Sma Exp 03 Code Print
Sma Exp 03 Code Print
CODE :
2. Data Loading
df1 = tweets_df[[ ‘date’, ‘rawContent’ , ‘renderedContent’ , ‘user’ , ‘replyCount’
,’retweetCount’ , ‘likeCount’ , ‘lang’ , ‘place’ , ‘hashtags’ , ‘viewCount’]].copy() df1.head()
df1.shape
plt.show()
import plotly.graph_objects as go
Top_Location_Of_tweet= df1[‘place’].value_counts().head (10)
df1.shape
!pip install tweet-preprocessor #Remove unnecessary characters punct = ['%','/',':','\\','&','&',';','?']
def remove_punctuations(text):
for punctuation in punct:
text = text.replace(punctuation,'') return text
df1['renderedContent'] = df1['renderedContent'].apply(lambda x: remove_punctuations(x))
df1['renderedContent'].replace( '', np.nan, inplace=True)
df1.dropna(subset=["renderedContent"],inplace=True) len(df1)
df1 = df1.reset_index(drop=True) df1.head()
from sklearn.feature_extraction. text import TfidfVectorizer, CountVectorizer
sns.set_style('whitegrid')
%matplotlib inline
stop=stop+['budget2023' , 'budget' , 'httpst' , '2023', 'modi' ,'nsitaraman' , 'union', 'pmindia' , 'tax' ,
'india']
def plot_20_most_common_words(count_data, count_vectorizer) :
plt.figure(2, (40,40))
plt.subplot(title = '20 most common words')
sns. set_context('notebook',font_scale=4,rc={ 'lines.linewidth' :2.5}) sns.barplot(x_pos, counts,
palette='husl')
plt.xticks(x_pos, words, rotation=90) plt.xlabel('words')
plt.ylabel('counts') plt.show()
sum_words = bag_of_words.sum(axis=0)
words_freq =[(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True) return words_freq[:n]
bigram_df.groupby( 'ngram'
).sum()['count'].sort_values(ascending=False).sort_values().plot.barh(title = 'Top 8
bigrams',color='orange' , width=.4, figsize=(12,8),stacked = True)
def get_subjectivity(text):
return TextBlob(text).sentiment.subjectivity def get_polarity(text):
return TextBlob(text).sentiment.polarity
df1['subjectivity']=df1[ 'renderedContent'].apply(get_subjectivity) df1[ 'polarity' ]=df1[
'renderedContent'].apply(get_polarity) df1.head()
df1['textblob_score'] =df1[ 'renderedContent'].apply(lambda x: TextBlob(x).sentiment.polarity)
neutral_threshold=0.05
df1['textblob_sentiment']=df1[ 'textblob_score'].apply(lambda c:'positive' if c >= neutral_threshold
else ('Negative' if c <= -(neutral_threshold) else 'Neutral' ) ) textblob_df =
df1[['renderedContent','textblob_sentiment','likeCount']] textblob_df
textblob_df["textblob_sentiment"].value_counts()
textblob_df["textblob_sentiment"].value_counts().plot.barh(title = 'Sentiment Analysis',color='orange'
, width=.4, figsize=(12,8),stacked = True)
df_positive=textblob_df[textblob_df['textblob_sentiment']=='positive' ]
df_very_positive=df_positive[df_positive['likeCount']>0] df_very_positive.head()
df_negative=textblob_df[textblob_df['textblob_sentiment']=='Negative' ] df_negative
df_neutral=textblob_df[textblob_df['textblob_sentiment']=='Neutral' ] df_neutral
from wordcloud import WordCloud, STOPWORDS