ChatGPT Twitter Sentiment Analyzer
ChatGPT Twitter Sentiment Analyzer
August 4, 2024
[2]: df=pd.read_csv("/kaggle/input/tweets-onchatgpt-chatgpt/ChatGPT.csv")
[3]: mask=np.array(Image.open("/kaggle/input/wordcloud-mask-collection/
↪stormtrooper_mask.png"))
[5]: df.head()
Tweet \
0 #ChatGPT If only there were a movie or somethi…
1 Workshop Description: In this workshop, you wi…
2 New Post at AiNewsDrop!\n\nChatGPT is a doctor…
3 OpenAI’s GPT-4 Just Got Supercharged! #ai #Cha…
4 "Classical art" is struggling - not changed th…
Url User \
0 https://twitter.com/kirbydjohnson/status/16445… kirbydjohnson
1
1 https://twitter.com/UniversityWeb/status/16445… UniversityWeb
2 https://twitter.com/AiNewsDrop/status/16445436… AiNewsDrop
3 https://twitter.com/tubeblogger/status/1644543… tubeblogger
4 https://twitter.com/majorradic/status/16445432… majorradic
Description
0 NaN
1 University webinars, speech videos, & news for…
2 I'm up almost 24/7 hunting for AI News\n\nWith…
3 Exploring the Best Of The Web. Tech, Games, Ar…
4 Mediji i milosrdje\n\n …
[6]: df.shape
[7]: df.isnull().sum()
[7]: Unnamed: 0 5
Date 0
Tweet 5
Url 5
User 5
UserCreated 5
UserVerified 5
UserFollowers 5
UserFriends 5
Retweets 5
Likes 5
Location 11851
Description 2190
dtype: int64
2
[8]: df.dropna(inplace=True)
[9]: df.isnull().sum()
[9]: Unnamed: 0 0
Date 0
Tweet 0
Url 0
User 0
UserCreated 0
UserVerified 0
UserFollowers 0
UserFriends 0
Retweets 0
Likes 0
Location 0
Description 0
dtype: int64
[10]: df.head()
Tweet \
3 OpenAI’s GPT-4 Just Got Supercharged! #ai #Cha…
4 "Classical art" is struggling - not changed th…
5 Alibaba invites businesses to trial 'ChatGPT r…
6 Trying to stop students from using #AI and #ch…
7 I Asked ChatGPT's AI Chatbot How Can I Earn Cr…
Url User \
3 https://twitter.com/tubeblogger/status/1644543… tubeblogger
4 https://twitter.com/majorradic/status/16445432… majorradic
5 https://twitter.com/gadgetsnow/status/16445430… gadgetsnow
6 https://twitter.com/Sherab_Taye/status/1644542… Sherab_Taye
7 https://twitter.com/cryptoccentral/status/1644… cryptoccentral
3
7 2023-01-27 00:12:12+00:00 False 1.0 2.0
Description
3 Exploring the Best Of The Web. Tech, Games, Ar…
4 Mediji i milosrdje\n\n …
5 The official technology news website of The Ti…
6 Musician // Thinker // Metaphysical Dandy // f…
7 Get the latest updates and insights on the cry…
[11]: df.shape
[12]: import re
df['Date'] = pd.to_datetime(df['Date'])
df['New_Date'] = pd.to_datetime(df['Date']).dt.date
df['Time'] = df['Date'].dt.time
df['Hour'] = df['Date'].dt.hour
df['Day'] = df['Date'].dt.strftime('%A')
df['TweetStr'] = df['Tweet'].astype(str)
df['hashtags'] = df['TweetStr'].apply(lambda x: re.findall(r"#(\w+)", x))
[14]: df["New_Date"].value_counts()
[14]: New_Date
2023-03-30 3999
2023-03-31 3948
2023-03-29 3828
2023-03-28 3788
2023-04-04 3357
2023-04-05 3310
2023-04-03 3134
2023-04-06 3087
2023-04-01 2919
2023-04-07 2804
2023-04-02 2461
2023-03-27 616
4
2023-04-08 209
Name: count, dtype: int64
[15]: len(df["New_Date"].unique())
[15]: 13
[17]: df["User"].value_counts()
5
[17]: User
crypaudio 440
torksmith 352
yuhanito 352
JimyTuits 245
MostBay 206
…
ANPrecision 1
TeddyLumidi 1
stacha_cl 1
AlanAtWTS 1
pritenmodi 1
Name: count, Length: 20809, dtype: int64
[18]: len(df["User"])
[18]: 37460
[19]: plt.figure(figsize=(25,18))
sns.countplot(y="User",data=df[: 100],palette="Set2")
plt.yticks(rotation=45)
plt.show()
6
[20]: df["UserVerified"].value_counts()
[20]: UserVerified
False 35660
True 1800
Name: count, dtype: int64
[21]: sns.countplot(x="UserVerified",data=df,palette="gnuplot")
plt.show()
[22]: plt.figure(figsize=(20,18))
sns.barplot(df[: 100], y="User", x="UserFollowers",palette="brg")
plt.show()
7
[23]: plt.figure(figsize=(20,18))
sns.barplot(df[: 100], y="User", x="UserFriends",palette="turbo")
plt.show()
8
[24]: plt.figure(figsize=(20,18))
sns.barplot(df[: 100], y="User", x="Retweets",palette="rainbow")
plt.show()
9
[25]: print(df["Likes"].dtype)
df["Likes"] = df["Likes"].astype(int) # Convert to float
print("=============================================")
object
=============================================
[26]: plt.figure(figsize=(20,18))
sns.barplot(df[: 100], y="User", x="Likes",palette="cubehelix")
plt.show()
10
[27]: df["Location"].value_counts()
[27]: Location
India 760
Manchester, UK 710
United States 602
Global 523
ChatGPT & AI News 440
…
LA / NYC / Sometimes SF 1
Crypto-twitter 1
Geneva, Switzerland 1
Northern San Diego County 1
Sea Cliff, NY 1
Name: count, Length: 8634, dtype: int64
[28]: len(df["Location"].unique())
11
[28]: 8634
[29]: plt.figure(figsize=(20,18))
sns.countplot(y="Location",data=df[: 100],palette="gist_earth")
plt.show()
/opt/conda/lib/python3.10/site-packages/IPython/core/pylabtools.py:152:
UserWarning: Glyph 127759 (\N{EARTH GLOBE ASIA-AUSTRALIA}) missing from current
font.
fig.canvas.print_figure(bytes_io, **kw)
[30]: plt.figure(figsize=(20,18))
sns.barplot(df[: 100], y="User", x="Hour",palette="terrain")
plt.show()
12
[31]: df["Day"].value_counts()
[31]: Day
Tuesday 7145
Wednesday 7138
Thursday 7086
Friday 6752
Monday 3750
Saturday 3128
Sunday 2461
Name: count, dtype: int64
[32]: plt.figure(figsize=(15,10))
sns.countplot(y="Day",data=df,palette="CMRmap")
plt.show()
13
[33]: import emoji
def preprocess_word(word):
word = re.sub(r'(.)\1+', r'\1\1', word)
word = re.sub(r'(-|\')', '', word)
return word
def is_valid_word(word):
return (re.search(r'^[a-zA-Z][a-z0-9A-Z\._]*$', word) is not None)
def handle_emojis(tweet):
emoji_patterns = {
r'(:\s?\)|:-\)|\(\s?:|\(-:|:\'\))': ' EMO_POS ',
r'(:\s?D|:-D|x-?D|X-?D)': ' EMO_POS ',
r'(<3|:\*)': ' EMO_POS ',
r'(;-?\)|;-?D|\(-?;)|�': ' EMO_POS ',
r'(:\s?\(|:-\(|\)\s?:|\)-:)': ' EMO_NEG ',
r'(:,\(|:\'\(|:"\()': ' EMO_NEG '
}
for pattern, replacement in emoji_patterns.items():
tweet = re.sub(pattern, replacement, tweet)
return tweet
def remove_emoji(tweet):
14
return emoji.replace_emoji(tweet, replace=" ")
def preprocess_tweet(tweet):
tweet = tweet.lower()
tweet = re.sub("^\d+\s|\s\d+\s|\s\d+$", " ", tweet)
tweet = re.sub(r'((www\.[\S]+)|(https?://[\S]+))', '', tweet)
tweet = re.sub(r'@[\S]+', '', tweet)
tweet = re.sub(r'#(\S+)', '', tweet)
tweet = re.sub(r'\brt\b', '', tweet)
tweet = re.sub(r'\.{2,}', ' ', tweet)
tweet = tweet.strip(' "\'')
tweet = remove_emoji(tweet)
tweet = re.sub(r'\s+', ' ', tweet)
processed_tweet = [preprocess_word(word) for word in tweet.split()]
return ' '.join(processed_tweet)
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
2024-08-04 08:13:44.295283: E
external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register
cuDNN factory: Attempting to register factory for plugin cuDNN when one has
already been registered
2024-08-04 08:13:44.295396: E
external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register
cuFFT factory: Attempting to register factory for plugin cuFFT when one has
already been registered
2024-08-04 08:13:44.449955: E
external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to
register cuBLAS factory: Attempting to register factory for plugin cuBLAS when
one has already been registered
config.json: 0%| | 0.00/929 [00:00<?, ?B/s]
vocab.json: 0%| | 0.00/899k [00:00<?, ?B/s]
15
merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]
special_tokens_map.json: 0%| | 0.00/239 [00:00<?, ?B/s]
pytorch_model.bin: 0%| | 0.00/501M [00:00<?, ?B/s]
/opt/conda/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning:
TypedStorage is deprecated. It will be removed in the future and UntypedStorage
will be the only storage class. This should only matter to you if you are using
storages directly. To access UntypedStorage directly, use
tensor.untyped_storage() instead of tensor.storage()
return self.fget.__get__(instance, owner)()
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-
sentiment-latest were not used when initializing
RobertaForSequenceClassification: ['roberta.pooler.dense.bias',
'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from
the checkpoint of a model trained on another task or with another architecture
(e.g. initializing a BertForSequenceClassification model from a
BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification
from the checkpoint of a model that you expect to be exactly identical
(initializing a BertForSequenceClassification model from a
BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device`
argument is passed to the `Pipeline` object. Model will be on CPU.
O Chato GPT #desenho #drawing #draw #characters #humor #natureza #cartum #charge
#vidaurbana #cidade #ilustração #desenho #robo #cerveja #beercast #robot #ia #ai
#chatgpt #inteligenciaartificial https://t.co/EZWMR52u9g
============================================================
[37]: print(df.loc[12229].clean_tweet)
print("\n")
16
print("============================================================")
print("\n")
test=sa(df.loc[12229].clean_tweet)
print(test)
o chato gpt
============================================================
[38]: print(df.loc[18000].Tweet)
print("\n")
print("============================================================")
print("\n")
test=sa(df.loc[18000].Tweet)
print(test)
We can assure you that #AI won’t replace lawyers because regulators won’t let AI
do all legal tasks without a qualified #lawyer to take responsibility.
============================================================
[39]: print(df.loc[18000].clean_tweet)
print("\n")
print("============================================================")
print("\n")
test=sa(df.loc[18000].clean_tweet)
print(test)
we can assure you that won’t replace lawyers because regulators won’t let ai do
all legal tasks without a qualified to take responsibility.
============================================================
17
[40]: # df["sentiment_label"] = "-"
# df["sentiment_score"] = -1
# df_part_1 = df[:10000]
# df_part_2 = df[10000:20000]
# df_part_3 = df[20000:30000]
# df_part_4 = df[30000:]
# tqdm.pandas()
# df_part_1["sentiment_score"] = df_part_1["clean_tweet"].progress_apply(lambda␣
↪x: sa(x)[0]['score'])
# df_part_2["sentiment_score"] = df_part_2["clean_tweet"].progress_apply(lambda␣
↪x: sa(x)[0]['score'])
# df_part_3["sentiment_score"] = df_part_3["clean_tweet"].progress_apply(lambda␣
↪x: sa(x)[0]['score'])
# df_part_4["sentiment_score"] = df_part_4["clean_tweet"].progress_apply(lambda␣
↪x: sa(x)[0]['score'])
# df_new.to_csv("tweet_gpt.csv", index=False)
[43]: df1=pd.read_csv("/kaggle/input/tweet-gpt/tweet_gpt.csv")
[44]: df1.columns
18
'sentiment'],
dtype='object')
[45]: df1.
↪drop(columns=["Url","Tweet","User","UserCreated","sentiment_label"],axis=1,inplace=True)
[46]: df1["sentiment"].value_counts()
[46]: sentiment
neutral 19218
positive 11705
negative 6537
Name: count, dtype: int64
[47]: plt.figure(figsize=(10,8))
sns.countplot(y="sentiment",data=df1,palette="gist_rainbow")
plt.show()
19
plt.figure(figsize=(12, 6))
sns.barplot(data=sentiment_counts, x='New_Date', y='Count', hue='sentiment')
plt.title('Sentiment Label Distribution Over Time')
plt.xlabel('Date')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
[49]: df1.
↪drop(columns=["Date","UserVerified","UserFollowers","UserFriends","Retweets","Likes","Locati
[50]: df1.
↪drop(columns=["Time","Hour","Day","TweetStr","hashtags","sentiment_score"],axis=1,inplace=Tr
nltk.download('stopwords')
def clean_text(text):
# Remove HTML tags
text = BeautifulSoup(text, 'html.parser').get_text()
20
text = re.sub(r"[^a-zA-Z]", " ", text)
# Remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))
# Remove emojis
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map␣
↪symbols
# Convert to lowercase
text = text.lower()
[53]: df1.head()
[53]: Description \
0 Exploring the Best Of The Web. Tech, Games, Ar…
1 Mediji i milosrdje\n\n …
21
2 The official technology news website of The Ti…
3 Musician // Thinker // Metaphysical Dandy // f…
4 Get the latest updates and insights on the cry…
clean_tweet sentiment \
0 openai’s gpt4 just got supercharged! positive
1 classical art" is struggling not changed the … neutral
2 alibaba invites businesses to trial chatgpt ri… neutral
3 trying to stop students from using and is like… negative
4 i asked chatgpts ai chatbot how can i earn cry… neutral
clean_description
0 NaN
1 NaN
2 NaN
3 exploring best web tech games art weirdtubes c…
4 mediji milosrdje moralno preispitivanje sport
[54]: df1=df1[["clean_tweet","clean_description","sentiment"]]
[55]: df1.isnull().sum()
[55]: clean_tweet 1
clean_description 9474
sentiment 0
dtype: int64
df1['clean_description'].fillna(df1['clean_tweet'], inplace=True)
22
The behavior will change in pandas 3.0. This inplace method will never work
because the intermediate object on which we are setting values always behaves as
a copy.
df1['clean_tweet'].fillna(df1['clean_description'], inplace=True)
[58]: df1.head()
[58]: clean_tweet \
0 openai’s gpt4 just got supercharged!
1 classical art" is struggling not changed the …
2 alibaba invites businesses to trial chatgpt ri…
3 trying to stop students from using and is like…
4 i asked chatgpts ai chatbot how can i earn cry…
clean_description sentiment
0 openai’s gpt4 just got supercharged! positive
1 classical art" is struggling not changed the … neutral
2 alibaba invites businesses to trial chatgpt ri… neutral
3 exploring best web tech games art weirdtubes c… negative
4 mediji milosrdje moralno preispitivanje sport neutral
[59]: df1.isnull().sum()
[59]: clean_tweet 0
clean_description 0
sentiment 0
dtype: int64
[61]: df1.head()
[61]: clean_tweet \
0 openai’s gpt4 just got supercharged!
1 classical art" is struggling not changed the …
2 alibaba invites businesses to trial chatgpt ri…
3 trying to stop students from using and is like…
4 i asked chatgpts ai chatbot how can i earn cry…
clean_description sentiment \
0 openai’s gpt4 just got supercharged! positive
23
1 classical art" is struggling not changed the … neutral
2 alibaba invites businesses to trial chatgpt ri… neutral
3 exploring best web tech games art weirdtubes c… negative
4 mediji milosrdje moralno preispitivanje sport neutral
text
0 openai’s gpt4 just got supercharged! openai’s …
1 classical art" is struggling not changed the …
2 alibaba invites businesses to trial chatgpt ri…
3 trying to stop students from using and is like…
4 i asked chatgpts ai chatbot how can i earn cry…
[62]: df1.drop(columns=["clean_tweet","clean_description"],axis=1,inplace=True)
[64]: df1=df1[["text","label"]]
[65]: df1.head()
[66]: df1.shape
[66]: (37460, 2)
[67]: df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37460 entries, 0 to 37459
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 text 37460 non-null object
1 label 37460 non-null object
dtypes: object(2)
memory usage: 585.4+ KB
[68]: negative_len=df1[df1["label"]=="negative"]["text"].str.len()
positive_len=df1[df1["label"]=="positive"]["text"].str.len()
neutral_len=df1[df1["label"]=="neutral"]["text"].str.len()
plt.figure(figsize=(10, 8))
24
plt.hist(negative_len, bins=30, label='Negative', color="black",alpha=0.9)
plt.hist(positive_len, bins=30, label='Positive', color="green",alpha=0.9)
plt.hist(neutral_len, bins=30, label='Neutral', color="darkorange",alpha=0.9)
plt.title("Length Distribution of Text Data by Sentiment", fontsize=16)
plt.xlabel("Length of Text", fontsize=14)
plt.ylabel("Frequency", fontsize=14)
plt.legend()
plt.grid(True)
plt.show()
↪generate(positive_text)
25
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
↪generate(negative_text)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
26
5 Neutral Data wordcloud
[71]: plt.figure(figsize=(15,15))
neutral_wordcloud=df1[df1["label"]=="neutral"]
neutral_text=" ".join(neutral_wordcloud['text'].values.tolist())
wordcloud = WordCloud(width=800, height=800,stopwords=STOPWORDS,␣
↪background_color='black', max_words=800,colormap="nipy_spectral",mask=mask).
↪generate(neutral_text)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
27
6 40 Most common Words From Positive text
[72]: positive_text = df1[df1["label"] == "positive"]
data_set = positive_text["text"].str.split()
all_words = [word for sublist in data_set for word in sublist]
counter = Counter(all_words)
common_words = counter.most_common(40)
df_common_words = pd.DataFrame(common_words, columns=['Word', 'Count'])
28
"darkviolet", "chocolate", "mediumslateblue", "darkgreen",␣
"orangered", "mediumblue", "peru", "mediumspringgreen"]
↪
plt.figure(figsize=(15,10))
sns.barplot(x='Count', y='Word', data=df_common_words, palette=colors)
plt.title('40 Most Common Words in Positive Texts')
plt.xlabel('Count')
plt.ylabel('Word')
plt.show()
29
"royalblue", "darkorchid", "darkturquoise", "darkgoldenrod",␣
↪"mediumvioletred", "mediumaquamarine",
"lightcoral", "darkslategray", "olivedrab", "dodgerblue",␣
↪"indianred", "limegreen", "steelblue",
plt.figure(figsize=(15,10))
sns.barplot(x='Count', y='Word', data=df_common_words, palette=colors)
plt.title('40 Most Common Words in Negative Texts')
plt.xlabel('Count')
plt.ylabel('Word')
plt.show()
30
8 40 Most common Words From Neutral text
[74]: neutral_text = df1[df1["label"] == "neutral"]
data_set = neutral_text["text"].str.split()
all_words = [word for sublist in data_set for word in sublist]
counter = Counter(all_words)
common_words = counter.most_common(40)
df_common_words = pd.DataFrame(common_words, columns=['Word', 'Count'])
plt.figure(figsize=(15,10))
sns.barplot(x='Count', y='Word', data=df_common_words, palette=colors)
plt.title('40 Most Common Words in Neutral Texts')
plt.xlabel('Count')
plt.ylabel('Word')
plt.show()
31
9 Topic Modeling
[75]: from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
32
work
Topic 2 (Business): chatgpt, ai, marketing, business, tools, digital, data,
technology, world, web
Topic 3 (Language Processing): ai, chatgpt, like, world, prompt, language, just,
asked, intelligence, time
Topic 4 (Privacy and Security): https, ai, data, italy, privacy, chatgpt, news,
business, concerns, openai
Topic 5 (Technical Details): gt, link, chatgpt, tech, https, use, details, ai,
visit, available
[76]: df1['number_character']=df1['text'].apply(len)
df1['number_of_words'] = df1['text'].apply(lambda x: len(x.split()))
df1['number_sentence'] = df1['text'].apply(lambda x: len(nltk.sent_tokenize(x)))
[77]: plt.figure(figsize=(15,12))
plt.tight_layout()
plt.show()
33
10 Average Sentence Length
[78]: avg_len=df1["text"].apply(len)
avg_len=avg_len.mean()
print(f"Average Text Length is : {avg_len:.2f}")
[79]: df1.
↪drop(columns=["number_character","number_of_words","number_sentence"],axis=1,inplace=True)
[80]: df1["label"]=df1["label"].replace({"positive":0,"neutral":1,"negative":2})
34
[81]: label_name=["Positive","Neutral","Negative"]
[82]: df1.head()
[83]: df1.shape
[83]: (37460, 2)
[84]: vocab_size=30000
max_len=200
[86]: df1.head()
[88]: text_data=np.array(embedded_doc)
y=df1["label"]
35
label_data=np.array(y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(text_data,label_data,␣
↪test_size=0.2, random_state=42)
embedding_dim = 100
model = Sequential()
model.add(Embedding(input_dim=vocab_size,␣
↪output_dim=embedding_dim,input_shape=(max_len,)))
learning_rate = 0.0001
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy',␣
↪metrics=['accuracy'])
model.summary()
/opt/conda/lib/python3.10/site-packages/keras/src/layers/core/embedding.py:93:
UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When
using Sequential models, prefer using an `Input(shape)` object as the first
layer in the model instead.
super().__init__(**kwargs)
Model: "sequential"
����������������������������������������������������������������������������
� Layer (type) � Output Shape � Param # �
����������������������������������������������������������������������������
� embedding (Embedding) � (None, 200, 100) � 3,000,000 �
����������������������������������������������������������������������������
� gru (GRU) � (None, 200, 512) � 943,104 �
����������������������������������������������������������������������������
� dropout (Dropout) � (None, 200, 512) � 0 �
����������������������������������������������������������������������������
� gru_1 (GRU) � (None, 256) � 591,360 �
36
����������������������������������������������������������������������������
� dropout_1 (Dropout) � (None, 256) � 0 �
����������������������������������������������������������������������������
� dense (Dense) � (None, 256) � 65,792 �
����������������������������������������������������������������������������
� dense_1 (Dense) � (None, 3) � 771 �
����������������������������������������������������������������������������
[90]:
37
38
[91]: from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_loss', patience=2,␣
↪restore_best_weights=True)
Epoch 1/40
1873/1873 �������������������� 439s 232ms/step
- accuracy: 0.5246 - loss: 0.9971 - val_accuracy: 0.6137 - val_loss: 0.8954 -
learning_rate: 1.0000e-04
Epoch 2/40
1873/1873 �������������������� 436s 233ms/step
- accuracy: 0.6666 - loss: 0.7830 - val_accuracy: 0.6849 - val_loss: 0.7147 -
learning_rate: 1.0000e-04
Epoch 3/40
1873/1873 �������������������� 435s 232ms/step
- accuracy: 0.7856 - loss: 0.5333 - val_accuracy: 0.6913 - val_loss: 0.7522 -
learning_rate: 1.0000e-04
Epoch 4/40
1873/1873 �������������������� 435s 232ms/step
- accuracy: 0.8400 - loss: 0.4096 - val_accuracy: 0.6995 - val_loss: 0.7919 -
learning_rate: 1.0000e-04
# First subplot
ax[0].plot(history.history['accuracy'],label="Accuracy",color="red")
ax[0].plot(history.history['val_accuracy'],label="Validation␣
↪Accuracy",color="green")
ax[0].set_title('Model Accuracy')
ax[0].set_ylabel('Accuracy')
ax[0].set_xlabel('Epoch')
ax[0].legend(loc='best')
# Second subplot
ax[1].plot(history.history['loss'],label="Loss",color="blue")
ax[1].plot(history.history['val_loss'],label="Validation Loss",color="black")
ax[1].set_title('Model Loss')
ax[1].set_ylabel('Loss')
ax[1].set_xlabel('Epoch')
ax[1].legend(loc='best')
39
plt.show()
[94]: pred=model.predict(X_test)
final_pred=np.argmax(pred,axis=1)
[96]: print(classification_report(y_test,final_pred,target_names=label_name))
[97]: cf=confusion_matrix(y_test,final_pred)
plt.figure(figsize=(10,8))
sns.
↪heatmap(cf,annot=True,cmap="gnuplot",fmt="d",xticklabels=label_name,yticklabels=label_name,l
plt.title('Confusion Matrix')
40
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
41
[99]: skplt.metrics.plot_precision_recall(y_test, y_probas,figsize=(10,8))
plt.show()
42
[100]: from sklearn.metrics import roc_auc_score
roc_auc = roc_auc_score(y_test, pred,multi_class='ovr')
plt.plot([])
plt.text(0,0, f'ROC AUC Score: {roc_auc:.4f}', fontsize=16, ha='center',␣
↪va='center',color="indigo")
plt.axis('off')
plt.xlim(-1, 1)
plt.ylim(-1,1)
plt.show()
43
[101]: from sklearn.metrics import log_loss,cohen_kappa_score,matthews_corrcoef
logarithm_loss=log_loss(y_test,pred)
plt.plot([])
plt.text(0,0, f'Log Loss: {logarithm_loss:.4f}', fontsize=16, ha='center',␣
↪va='center',color="black")
plt.axis('off')
plt.show()
44
[102]: kappa = cohen_kappa_score(y_test,final_pred)
plt.plot([])
plt.text(0,0, f'Cohen Kappa Score: {kappa:.4f}', fontsize=16, ha='center',␣
↪va='center',color="orangered")
plt.axis('off')
plt.show()
45
[103]: mcc = matthews_corrcoef(y_test, final_pred)
plt.plot([])
plt.text(0,0, f'Matthews Correlation Coefficient: {mcc:.4f}', fontsize=16,␣
↪ha='center', va='center',color="saddlebrown")
plt.axis('off')
plt.xlim(-1, 1)
plt.ylim(-1,1)
plt.show()
46
[104]: from sklearn.metrics import accuracy_score
plt.axis('off')
plt.xlim(-1, 1)
plt.ylim(-1,1)
plt.show()
47
11 Custom Data prediction
[105]: custom_text_data = []
custom_data = ["Great job! Your model achieved an accuracy score of"]
custom_text_data.extend(custom_data)
custom_padded_data = combined_padded_data[-len(custom_data):]
custom_pred = model.predict(custom_padded_data)
for p in custom_final_pred:
if p == 0:
print("Positive")
elif p == 1:
print("Neutral")
48
elif p == 2:
print("Negative")
[106]: custom_text_data = []
custom_data = ["The movie I watched last night was a complete disaster. The␣
↪plot was confusing, the acting was terrible, and the special effects were␣
↪anyone."
]
custom_text_data.extend(custom_data)
custom_padded_data = combined_padded_data[-len(custom_data):]
custom_pred = model.predict(custom_padded_data)
for p in custom_final_pred:
if p == 0:
print("Positive")
elif p == 1:
print("Neutral")
elif p == 2:
print("Negative")
[107]: custom_text_data = []
custom_data = ["I went to the grocery store and bought some fruits and␣
↪vegetables. Then, I stopped by the bank to deposit a check. After that, I␣
]
custom_text_data.extend(custom_data)
49
custom_padded_data = combined_padded_data[-len(custom_data):]
custom_pred = model.predict(custom_padded_data)
for p in custom_final_pred:
if p == 0:
print("Positive")
elif p == 1:
print("Neutral")
elif p == 2:
print("Negative")
[ ]:
50