0% found this document useful (0 votes)

15 views2 pages

Q 3

question 3

Uploaded by

Anonymous

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

15 views2 pages

Q 3

question 3

Uploaded by

Anonymous

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 2

3).

Product review dataset

In [ ]: import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [ ]: nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...

[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
Out[ ]: True

Load the dataset

In [ ]: with open('/content/dataset.txt', 'r', encoding='utf-8') as file:
lines = file.readlines()

In [ ]: # Initialize lists to store labels and reviews

labels = []
reviews = []

In [ ]: # Process each line in the dataset

for line in lines:
# Split the line by '__label__'
parts = line.split('__label__')
# Check if there are two parts
if len(parts) == 2:
# Extract label and review
label = '__label__' + parts[1].strip().split()[0] # Extracting the label
review = ' '.join(parts[1].strip().split()[1:]) # Extracting the review
labels.append(label)
reviews.append(review)

Creating a dataframe
In [ ]: # Create a DataFrame
df = pd.DataFrame({'label': labels, 'review': reviews})

# Map labels to sentiments

sentiment_map = {
'__label__1': 'positive',
'__label__2': 'negative'
}

df['sentiment'] = df['label'].map(sentiment_map)

# Drop the 'label' column

df.drop(columns=['label'], inplace=True)

# Display the DataFrame

print(df.head())

print(df.tail())

review sentiment
0 Great CD: My lovely Pat has one of the GREAT v... negative
1 One of the best game music soundtracks - for a... negative
2 Batteries died within a year ...: I bought thi... positive
3 works fine, but Maha Energy is better: Check o... negative
4 Great for the non-audiophile: Reviewed quite a... negative
review sentiment
399995 Unbelievable- In a Bad Way: We bought this Tho... positive
399996 Almost Great, Until it Broke...: My son reciev... positive
399997 Disappointed !!!: I bought this toy for my son... positive
399998 Classic Jessica Mitford: This is a compilation... negative
399999 Comedy Scene, and Not Heard: This DVD will be ... positive

Preprocessing
In [ ]: def preprocess_text(text):
text = text.lower() # Convert to lowercase
tokenizer = nltk.RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(text) # Tokenize
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
stemmer = PorterStemmer()
tokens = [stemmer.stem(word) for word in tokens] # Stemming
return ' '.join(tokens)

In [ ]: df['review'] = df['review'].apply(preprocess_text)

Sentiment Distribution
In [ ]: df['sentiment'].value_counts().plot(kind='bar')
plt.title('Product Review Data Sentiment Distribution')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.show()

Vectorization using TF-IDF

In [ ]: tfidf = TfidfVectorizer(max_features=1500, min_df=5, max_df=0.7)
X = tfidf.fit_transform(df['review']).toarray()

Split the data into training and testing sets

In [ ]: y = df['sentiment'].map({'positive': 1, 'negative': 0}).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [ ]: y

Out[ ]: array([0, 0, 1, ..., 1, 0, 1])

Logistic Regression classifier

In [ ]: lr_classifier = LogisticRegression()
lr_classifier.fit(X_train, y_train)
lr_pred = lr_classifier.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_pred)
print("Logistic Regression Accuracy:", lr_accuracy)

Logistic Regression Accuracy: 0.86795

Naive Bayes classifier

In [ ]: nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)
nb_pred = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)
print("Naive Bayes Accuracy:", nb_accuracy)

Naive Bayes Accuracy: 0.827925

In [ ]: def plot_confusion_matrix(y_true, y_pred, model_name):

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['Negative','Positive'],
yticklabels=['Negative','Positive'])
plt.title(f'{model_name} Confusion Matrix')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

Confusion matrix for Logistic Regression

In [ ]: plot_confusion_matrix(y_test, lr_pred, "Logistic Regression")

Confusion matrix for Naive Bayes

In [ ]: plot_confusion_matrix(y_test, nb_pred, "Naive Bayes")

Accuracy, Precision, Recall, and F1 score for Logistic Regression and Naive Bayes
In [ ]: def print_evaluation_metrics(y_true, y_pred, model_name):
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"----------- {model_name} Evaluation Metrics -----------")

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

In [ ]: print_evaluation_metrics(y_test, lr_pred, "Logistic Regression")

print_evaluation_metrics(y_test, nb_pred, "Naive Bayes")

----------- Logistic Regression Evaluation Metrics -----------

Accuracy: 0.8679
Precision: 0.8680
Recall: 0.8679
F1 Score: 0.8679
----------- Naive Bayes Evaluation Metrics -----------
Accuracy: 0.8279
Precision: 0.8280
Recall: 0.8279
F1 Score: 0.8279

Sentiment Analysis On Online Reviews
No ratings yet
Sentiment Analysis On Online Reviews
11 pages
Solution T1
No ratings yet
Solution T1
9 pages
Sentiment Analysis Project Documentation
No ratings yet
Sentiment Analysis Project Documentation
2 pages
Sentiment Analysis Using Text Mining PDF
100% (1)
Sentiment Analysis Using Text Mining PDF
12 pages
Sentiment Analysis of Reviews Using Machine Learning
100% (1)
Sentiment Analysis of Reviews Using Machine Learning
33 pages
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
No ratings yet
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
98 pages
22K61A0654 2 Sasi Auto
No ratings yet
22K61A0654 2 Sasi Auto
24 pages
Sentimental Analysis
No ratings yet
Sentimental Analysis
3 pages
Machine Learning Code Explanation
No ratings yet
Machine Learning Code Explanation
33 pages
AIML IA3 Loki & SG
No ratings yet
AIML IA3 Loki & SG
31 pages
NLP Submission
No ratings yet
NLP Submission
29 pages
Ritesh Mangla ML PracticalFile
No ratings yet
Ritesh Mangla ML PracticalFile
55 pages
NLP Manual
No ratings yet
NLP Manual
21 pages
Code
No ratings yet
Code
18 pages
Ai Project
No ratings yet
Ai Project
15 pages
Research Paper Text Classification
No ratings yet
Research Paper Text Classification
17 pages
Lab Report 8
No ratings yet
Lab Report 8
11 pages
BAET Record
No ratings yet
BAET Record
19 pages
WDM - Week - I
No ratings yet
WDM - Week - I
24 pages
Group 4 MovieReview
No ratings yet
Group 4 MovieReview
10 pages
Code
No ratings yet
Code
13 pages
Sentiment Analysis With NLP Deep Learning
No ratings yet
Sentiment Analysis With NLP Deep Learning
8 pages
DS - Lab Report.
No ratings yet
DS - Lab Report.
25 pages
Module4 TextAnalytics
No ratings yet
Module4 TextAnalytics
9 pages
Neural Networks
No ratings yet
Neural Networks
8 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
Module 5: Purposes and Functions of Language Assessment & Test
100% (1)
Module 5: Purposes and Functions of Language Assessment & Test
15 pages
Python 21to30
No ratings yet
Python 21to30
9 pages
2023 Aug How To Produce Data For A Neural networkORG
No ratings yet
2023 Aug How To Produce Data For A Neural networkORG
6 pages
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
No ratings yet
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
7 pages
Importing Packages: Id Label Tweet 0 1 2 3 4
No ratings yet
Importing Packages: Id Label Tweet 0 1 2 3 4
8 pages
17 Practicals
No ratings yet
17 Practicals
7 pages
Amazon Product Review - Ipynb - Colaboratory
No ratings yet
Amazon Product Review - Ipynb - Colaboratory
7 pages
Logistic Regression Example
No ratings yet
Logistic Regression Example
7 pages
UNIT 1 - Basic C Programming
No ratings yet
UNIT 1 - Basic C Programming
38 pages
ML Week10.1
No ratings yet
ML Week10.1
5 pages
NM Project
No ratings yet
NM Project
18 pages
Sentiment Analysis From H El Reviews: Data Mining For Business Intelligence
No ratings yet
Sentiment Analysis From H El Reviews: Data Mining For Business Intelligence
13 pages
Dataset Description: Amazon Reviews of Unlocked Phone
No ratings yet
Dataset Description: Amazon Reviews of Unlocked Phone
4 pages
Python CA 4
No ratings yet
Python CA 4
9 pages
RajSingh WIexp7
No ratings yet
RajSingh WIexp7
8 pages
DL 3
No ratings yet
DL 3
5 pages
Comsats University Islamabad Wah Campus (Project Report) : Submitted by
No ratings yet
Comsats University Islamabad Wah Campus (Project Report) : Submitted by
14 pages
Bert Sentiment
No ratings yet
Bert Sentiment
7 pages
Kindle Review Sentiment Analysis - Ipynb - Colab
No ratings yet
Kindle Review Sentiment Analysis - Ipynb - Colab
5 pages
DL Exp-10,11,12
No ratings yet
DL Exp-10,11,12
6 pages
Amna Bagh Ali
No ratings yet
Amna Bagh Ali
6 pages
En6G-Iig-7.3.1 En6G-Iig-7.3.2: Test - Id 32317&title Prepositional Phrases
100% (1)
En6G-Iig-7.3.1 En6G-Iig-7.3.2: Test - Id 32317&title Prepositional Phrases
15 pages
Jadavpur University: Assignment Submission
No ratings yet
Jadavpur University: Assignment Submission
9 pages
Sentiment Analysis
No ratings yet
Sentiment Analysis
4 pages
Amazon Sentiment Analysis Documentation
No ratings yet
Amazon Sentiment Analysis Documentation
4 pages
Detailed Report
No ratings yet
Detailed Report
6 pages
R002 KrishAhuja BDA Lab9.Ipynb - Colab
No ratings yet
R002 KrishAhuja BDA Lab9.Ipynb - Colab
3 pages
Configuring Oracle Workflow For OAuth 2.0 With Microsoft Office 365 Exchange Online in Oracle E-Business Suite Release 12.2 and Release 12.1.3 (Doc ID 2884072.1)
No ratings yet
Configuring Oracle Workflow For OAuth 2.0 With Microsoft Office 365 Exchange Online in Oracle E-Business Suite Release 12.2 and Release 12.1.3 (Doc ID 2884072.1)
17 pages
8-Text Classification - Jupyter Notebook
No ratings yet
8-Text Classification - Jupyter Notebook
2 pages
AI Project
No ratings yet
AI Project
6 pages
Mids Practical 3
No ratings yet
Mids Practical 3
2 pages
Maneesha Nidigonda Major Project
No ratings yet
Maneesha Nidigonda Major Project
11 pages
Capstone Project - Jaro-Prof. Babji
No ratings yet
Capstone Project - Jaro-Prof. Babji
5 pages
05 ML PDF
No ratings yet
05 ML PDF
1 page
Text Classification - Movie Review - News Wires
No ratings yet
Text Classification - Movie Review - News Wires
5 pages
Sentiment Analysis On Tweets
No ratings yet
Sentiment Analysis On Tweets
2 pages
UVM Interview Questions - VLSI Encyclopedia
No ratings yet
UVM Interview Questions - VLSI Encyclopedia
7 pages
Maneesha Nidigonda Verzeo Major Project
No ratings yet
Maneesha Nidigonda Verzeo Major Project
11 pages
Math Grade 7 DLL Q2 W7 JAN
No ratings yet
Math Grade 7 DLL Q2 W7 JAN
4 pages
Coldplay - Yellow: Were Came Wrote Was Took Was
No ratings yet
Coldplay - Yellow: Were Came Wrote Was Took Was
2 pages
Contrastive Linguistic - Morphology
No ratings yet
Contrastive Linguistic - Morphology
22 pages
Barracuda Web Filter AG PDF
No ratings yet
Barracuda Web Filter AG PDF
86 pages
Ali (A.s.) Ashja-un-Nas
0% (1)
Ali (A.s.) Ashja-un-Nas
177 pages
Comparison and Contrast
No ratings yet
Comparison and Contrast
26 pages
New+Interchange+1+ +Teacher+Book +u07
0% (1)
New+Interchange+1+ +Teacher+Book +u07
7 pages
Lesson Plan First
No ratings yet
Lesson Plan First
6 pages
A Response To David Gates: "The Door Is About To Close ": Are You Ready?
No ratings yet
A Response To David Gates: "The Door Is About To Close ": Are You Ready?
52 pages
Orientalism and Visual Culture: Imagining Mesopotamia in Nineteenth Century Europe
No ratings yet
Orientalism and Visual Culture: Imagining Mesopotamia in Nineteenth Century Europe
16 pages
Appendix 1: Lesson Plan (Template) : Lesson Plan Subject: English Trainee: Bashayer Abdul-Aziz Topic or Theme: Phonics
No ratings yet
Appendix 1: Lesson Plan (Template) : Lesson Plan Subject: English Trainee: Bashayer Abdul-Aziz Topic or Theme: Phonics
5 pages
Tercer Condicional
No ratings yet
Tercer Condicional
3 pages
Periodic Solutions of Non-Autonomous Ordinary Differential Equations
No ratings yet
Periodic Solutions of Non-Autonomous Ordinary Differential Equations
15 pages
Elt 124 Castaneda Ubbanan
No ratings yet
Elt 124 Castaneda Ubbanan
12 pages
Ubd Curriculum Template
No ratings yet
Ubd Curriculum Template
5 pages
Lesson 3 - Question Forms
No ratings yet
Lesson 3 - Question Forms
32 pages
15 Solving Problems Involving Test of Hypothesis On Population Proportion SPTC 1703 q4 FPF
No ratings yet
15 Solving Problems Involving Test of Hypothesis On Population Proportion SPTC 1703 q4 FPF
37 pages
Otaremwa Moses Ronaldo
No ratings yet
Otaremwa Moses Ronaldo
45 pages
Abdul Samad TresVista Resume
No ratings yet
Abdul Samad TresVista Resume
1 page
Reviewa 1
No ratings yet
Reviewa 1
6 pages
Example, Showing Entries in Different Databases: Relocatable
No ratings yet
Example, Showing Entries in Different Databases: Relocatable
15 pages
A Biographical Timeline
No ratings yet
A Biographical Timeline
7 pages
The Banking Concept of Education
No ratings yet
The Banking Concept of Education
6 pages
Naveen's Resume
No ratings yet
Naveen's Resume
1 page
Jamia Rahmania & Jannatia Mohila Madrasha Jamia Rahmania & Jannatia Mohila Madrasha
No ratings yet
Jamia Rahmania & Jannatia Mohila Madrasha Jamia Rahmania & Jannatia Mohila Madrasha
2 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet

Q 3

Uploaded by

Q 3

Uploaded by

3).

Product review dataset

[nltk_data] Downloading package punkt to /root/nltk_data...

Load the dataset

In [ ]: # Initialize lists to store labels and reviews

In [ ]: # Process each line in the dataset

# Map labels to sentiments

# Drop the 'label' column

# Display the DataFrame

Vectorization using TF-IDF

Split the data into training and testing sets

Out[ ]: array([0, 0, 1, ..., 1, 0, 1])

Logistic Regression classifier

Logistic Regression Accuracy: 0.86795

Naive Bayes classifier

Naive Bayes Accuracy: 0.827925

In [ ]: def plot_confusion_matrix(y_true, y_pred, model_name):

Confusion matrix for Logistic Regression

Confusion matrix for Naive Bayes

print(f"----------- {model_name} Evaluation Metrics -----------")

In [ ]: print_evaluation_metrics(y_test, lr_pred, "Logistic Regression")

----------- Logistic Regression Evaluation Metrics -----------

You might also like