0% found this document useful (0 votes)

14 views13 pages

Code

NOTHING

Uploaded by

hodcse

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views13 pages

Code

NOTHING

Uploaded by

hodcse

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 13

import pandas as pd

import re
from textblob import TextBlob
import matplotlib.pyplot as plt
# 2
# Load dataset
file_path = '/content/Nri_Textual_Survey_Data.csv' # Replace with your
file path
survey_data = pd.read_csv(file_path)
# 3
# ### 1. Text Preprocessing ###

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Update preprocessing function to include stop word removal

def preprocess_text_with_stopwords(text):
"""Cleans text by converting to lowercase, removing punctuation,
extra whitespace, and stop words."""
try:
# Convert to lowercase and remove punctuation
text = str(text).lower()
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
text = re.sub(r'\d+', '', text) # Remove digits
text = re.sub(r'http\S+|www\S+|https\S+', '', text) # Remove
URLs
text = re.sub(r'<.*?>', '', text) # Remove HTML tags
text = re.sub(r'\n', ' ', text) # Replace newlines with spaces
text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove non-ASCII
characters

# Remove stop words

text = " ".join([word for word in text.split() if word not in
ENGLISH_STOP_WORDS])
return text
except Exception as e:
return text

# Apply updated preprocessing

processed_data = survey_data.copy()
for col in processed_data.columns:
processed_data[col] =
processed_data[col].apply(preprocess_text_with_stopwords)
# 4
processed_data.to_csv('preprocessed_data.csv', index=False) # Save to
Colab environment

# Download the file

from google.colab import files
files.download('preprocessed_data.csv')
# 5
### 2. Sentiment Analysis ###
def analyze_sentiment(text):
"""Classifies sentiment as 'happy', 'neutral', or 'unhappy'."""
try:
blob = TextBlob(text)
polarity = blob.sentiment.polarity # Polarity ranges from -1
(negative) to 1 (positive)
if polarity > 0:
return 'happy'
elif polarity == 0:
return 'neutral'
else:
return 'unhappy'
except Exception:
return 'neutral'

# Add sentiment columns for each facility

sentiment_data = processed_data.copy()
for col in sentiment_data.columns:
sentiment_data[col + '_sentiment'] =
sentiment_data[col].apply(analyze_sentiment)
# 6
### 3. Sentiment Analysis Summary ###
# Count sentiments for each facility
facility_sentiment_cols = [col for col in sentiment_data.columns if
'_sentiment' in col]
sentiment_summary =
sentiment_data[facility_sentiment_cols].apply(pd.Series.value_counts).f
illna(0).astype(int)
sentiment_summary = sentiment_summary.T
sentiment_summary.columns = ['happy', 'neutral', 'unhappy']

# Overall sentiment counts

overall_sentiment_counts = sentiment_summary.sum()
# 7
### Visualization ###
# Overall Sentiment Distribution (Bar and Pie Charts)
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
overall_sentiment_counts.plot(kind='bar', color=['green', 'orange',
'red'], ax=axes[0])
axes[0].set_title('Overall Sentiment Distribution (Bar Chart)')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Count')
overall_sentiment_counts.plot(kind='pie', autopct='%1.1f%%',
colors=['green', 'orange', 'red'], ax=axes[1])
axes[1].set_title('Overall Sentiment Distribution (Pie Chart)')
axes[1].set_ylabel('')
plt.tight_layout()
plt.show()
# 8
# Facility-Wise Sentiment Distribution (Stacked Bar Chart)
sentiment_summary.plot(
kind='bar',
stacked=True,
figsize=(12, 8),
title='Facility-Wise Sentiment Distribution',
color=['green', 'orange', 'red']
)
plt.xlabel('Facilities')
plt.ylabel('Count')
plt.legend(title='Sentiment')
plt.show()
# 9
# Individual Facility Sentiment Pie Charts
rows = (len(sentiment_summary) // 3) + (1 if len(sentiment_summary) % 3
else 0)
fig, axes = plt.subplots(rows, 3, figsize=(18, 5 * rows))

axes = axes.flatten()
for idx, facility in enumerate(sentiment_summary.index):
sentiment_summary.loc[facility].plot(
kind='pie',
ax=axes[idx],
autopct='%1.1f%%',
colors=['green', 'orange', 'red'],
title=f'{facility} Sentiment Distribution'
)
axes[idx].set_ylabel('')

for ax in axes[len(sentiment_summary):]:
ax.axis('off')

plt.tight_layout()
plt.show()
# 10
from sklearn.feature_extraction.text import CountVectorizer,
TfidfVectorizer
import matplotlib.pyplot as plt
import pandas as pd

# Assuming 'processed_data' contains the preprocessed text data

# Dynamically set the column name
column_name = 'Internship' # Replace this with your desired column

# Bag-of-Words Extraction
bow_vectorizer = CountVectorizer(max_features=1000) # Limit vocabulary
size
bow_features =
bow_vectorizer.fit_transform(processed_data[column_name]) # Use
dynamic column name
bow_term_frequencies = bow_features.sum(axis=0).A1 # Convert sparse
matrix to array

# Create BoW DataFrame

bow_term_df = pd.DataFrame({
'Term': bow_vectorizer.get_feature_names_out(),
'Frequency': bow_term_frequencies
}).sort_values(by='Frequency', ascending=False)

# TF-IDF Extraction
tfidf_vectorizer = TfidfVectorizer(max_features=1000) # Limit
vocabulary size
tfidf_features =
tfidf_vectorizer.fit_transform(processed_data[column_name]) # Use
dynamic column name
tfidf_term_scores = tfidf_features.sum(axis=0).A1 # Convert sparse
matrix to array

# Create TF-IDF DataFrame

tfidf_term_df = pd.DataFrame({
'Term': tfidf_vectorizer.get_feature_names_out(),
'TF-IDF Score': tfidf_term_scores
}).sort_values(by='TF-IDF Score', ascending=False)

# Merge BoW and TF-IDF for comparison

comparison_df = pd.merge(
bow_term_df.rename(columns={"Frequency":
"Frequency_BoW"}).head(20),
tfidf_term_df.rename(columns={"TF-IDF Score": "Frequency_TF-
IDF"}).head(20),
on="Term",
how="outer"
).fillna(0)

# Sort by Bag-of-Words Frequency for consistency

comparison_df = comparison_df.sort_values(by="Frequency_BoW",
ascending=False)

# Plot the comparison graph

plt.figure(figsize=(12, 8))

# Bar width for side-by-side bars

bar_width = 0.35
index = range(len(comparison_df))

# Bag-of-Words Bar
plt.bar(index, comparison_df["Frequency_BoW"], bar_width, label="Bag-
of-Words", color="skyblue")

# TF-IDF Bar
plt.bar([i + bar_width for i in index], comparison_df["Frequency_TF-
IDF"], bar_width, label="TF-IDF", color="orange")

# Add labels and title

plt.xlabel("Terms")
plt.ylabel("Frequency/TF-IDF Score")
plt.title(f"Comparison of Bag-of-Words and TF-IDF Representations for
'{column_name}'")
plt.xticks([i + bar_width / 2 for i in index], comparison_df["Term"],
rotation=45, ha="right")
plt.legend()
plt.tight_layout()
plt.show()

# 11
from sklearn.model_selection import train_test_split

# Target variable based on the dynamic column name

target_column_name = column_name + '_sentiment' # Append '_sentiment'
dynamically
y = sentiment_data[target_column_name] # Use the dynamic sentiment
column name

# Split for BoW features

X_train_bow, X_test_bow, y_train, y_test = train_test_split(
bow_features, y, test_size=0.2, random_state=42
)

# Split for TF-IDF features

X_train_tfidf, X_test_tfidf, _, _ = train_test_split(
tfidf_features, y, test_size=0.2, random_state=42
)

# 12

from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Initialize models
log_reg = LogisticRegression(max_iter=1000, random_state=42)
random_forest = RandomForestClassifier(random_state=42)
svm = SVC(kernel='linear', random_state=42)

# Train models on BoW features

log_reg.fit(X_train_bow, y_train)
random_forest.fit(X_train_bow, y_train)
svm.fit(X_train_bow, y_train)
# 13
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

# Dynamically calculate model accuracies

bow_accuracies = [
accuracy_score(y_test, log_reg.predict(X_test_bow)),
accuracy_score(y_test, random_forest.predict(X_test_bow)),
accuracy_score(y_test, svm.predict(X_test_bow))
]

tfidf_accuracies = [
accuracy_score(y_test, log_reg.predict(X_test_tfidf)),
accuracy_score(y_test, random_forest.predict(X_test_tfidf)),
accuracy_score(y_test, svm.predict(X_test_tfidf))
]

# Plotting model accuracy comparison

model_names = ['Logistic Regression', 'Random Forest', 'SVM']
x = range(len(model_names))
bar_width = 0.35

plt.figure(figsize=(10, 6))
plt.bar(x, bow_accuracies, width=bar_width, label='BoW',
color='skyblue')
plt.bar([i + bar_width for i in x], tfidf_accuracies, width=bar_width,
label='TF-IDF', color='orange')

# Add labels and title

plt.xticks([i + bar_width / 2 for i in x], model_names)
plt.ylabel('Accuracy')
plt.title('Model Accuracy Comparison (BoW vs. TF-IDF)')
plt.legend()
plt.tight_layout()
plt.show()
# 14
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score

# Dynamically calculate metrics for a specific model (e.g., Logistic

Regression)
bow_scores = [
precision_score(y_test, log_reg.predict(X_test_bow),
average='weighted'),
recall_score(y_test, log_reg.predict(X_test_bow),
average='weighted'),
f1_score(y_test, log_reg.predict(X_test_bow), average='weighted')
]

tfidf_scores = [
precision_score(y_test, log_reg.predict(X_test_tfidf),
average='weighted'),
recall_score(y_test, log_reg.predict(X_test_tfidf),
average='weighted'),
f1_score(y_test, log_reg.predict(X_test_tfidf), average='weighted')
]

# Plot grouped bar chart

metrics = ['Precision', 'Recall', 'F1-Score']
x = np.arange(len(metrics))
bar_width = 0.35

plt.figure(figsize=(10, 6))
plt.bar(x, bow_scores, width=bar_width, label='BoW', color='skyblue')
plt.bar(x + bar_width, tfidf_scores, width=bar_width, label='TF-IDF',
color='orange')

# Add labels and title

plt.xticks(x + bar_width / 2, metrics)
plt.ylabel('Score')
plt.title('Model Performance Metrics (BoW vs. TF-IDF)')
plt.legend()
plt.tight_layout()
plt.show()

# 15
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Predictions for BoW features

y_pred_log_reg_bow = log_reg.predict(X_test_bow)
y_pred_rf_bow = random_forest.predict(X_test_bow)
y_pred_svm_bow = svm.predict(X_test_bow)

# Predictions for TF-IDF features

y_pred_log_reg_tfidf = log_reg.predict(X_test_tfidf)
y_pred_rf_tfidf = random_forest.predict(X_test_tfidf)
y_pred_svm_tfidf = svm.predict(X_test_tfidf)

# Confusion Matrices
cm_log_reg_bow = confusion_matrix(y_test, y_pred_log_reg_bow)
cm_rf_bow = confusion_matrix(y_test, y_pred_rf_bow)
cm_svm_bow = confusion_matrix(y_test, y_pred_svm_bow)

cm_log_reg_tfidf = confusion_matrix(y_test, y_pred_log_reg_tfidf)

cm_rf_tfidf = confusion_matrix(y_test, y_pred_rf_tfidf)
cm_svm_tfidf = confusion_matrix(y_test, y_pred_svm_tfidf)

# Plotting all confusion matrices

fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Titles for the matrices

titles = [
"Logistic Regression (BoW)", "Random Forest (BoW)", "SVM (BoW)",
"Logistic Regression (TF-IDF)", "Random Forest (TF-IDF)", "SVM (TF-
IDF)"
]

# All confusion matrices

conf_matrices = [
cm_log_reg_bow, cm_rf_bow, cm_svm_bow,
cm_log_reg_tfidf, cm_rf_tfidf, cm_svm_tfidf
]

# Plotting each heatmap

for i, ax in enumerate(axes.flat):
sns.heatmap(
conf_matrices[i], annot=True, fmt='d', cmap='Blues',
xticklabels=['Happy', 'Neutral', 'Unhappy'],
yticklabels=['Happy', 'Neutral', 'Unhappy'], ax=ax
)
ax.set_title(titles[i])
ax.set_xlabel("Predicted")
ax.set_ylabel("Actual")

plt.tight_layout()
plt.show()

from sklearn.metrics.pairwise import cosine_similarity

# 16

# Step 1: Use the already defined `processed_data` from your script.

# Step 2: Combine text from all columns to build a unified vocabulary

from sklearn.metrics.pairwise import cosine_similarity

combined_text_all = processed_data.apply(
lambda row: ' '.join(row.astype(str)), axis=1
)

# Fit the TF-IDF vectorizer on the combined text

tfidf_vectorizer_all = TfidfVectorizer()
tfidf_vectorizer_all.fit(combined_text_all)

# Step 3: Transform each column using the unified vocabulary

tfidf_vectors_all = {col:
tfidf_vectorizer_all.transform(processed_data[col].astype(str)) for col
in processed_data.columns}

# Step 4: Compute Pairwise Cosine Similarity for All Labels

similarity_matrix_all = np.zeros((len(processed_data.columns),
len(processed_data.columns)))

for i, col1 in enumerate(processed_data.columns):

for j, col2 in enumerate(processed_data.columns):
if i == j: # Self-similarity
similarity_matrix_all[i, j] = 1.0
else: # Pairwise similarity
similarity_matrix_all[i, j] = cosine_similarity(
tfidf_vectors_all[col1], tfidf_vectors_all[col2]
).mean()

# Step 5: Visualize the Similarity Matrix for All Labels

plt.figure(figsize=(12, 10))
sns.heatmap(
similarity_matrix_all,
xticklabels=processed_data.columns,
yticklabels=processed_data.columns,
cmap='coolwarm',
annot=True,
fmt=".2f",
annot_kws={"size": 10}, # Customize annotation font size
cbar_kws={"shrink": 0.8, "label": "Similarity Score"} # Color bar
customization
)
plt.title("Text Similarity Between All Labels (Cosine Similarity)",
fontsize=16)
plt.xlabel("Labels", fontsize=12)
plt.ylabel("Labels", fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10) # Rotate x-axis
labels for better readability
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()

# Step 6: Identify the Most and Least Similar Pairs Across All Labels
similarity_df_all = pd.DataFrame(
similarity_matrix_all,
index=processed_data.columns,
columns=processed_data.columns
)

# Melt the matrix for pairwise comparison

similarity_melted_all = similarity_df_all.reset_index().melt(
id_vars='index',
var_name='Label 2',
value_name='Similarity'
).rename(columns={'index': 'Label 1'})

# Remove self-similarity (diagonal values)

similarity_melted_all =
similarity_melted_all[similarity_melted_all['Label 1'] !=
similarity_melted_all['Label 2']]

# Sort for most and least similar pairs

most_similar_all = similarity_melted_all.sort_values(by='Similarity',
ascending=False).head(1)
least_similar_all = similarity_melted_all.sort_values(by='Similarity',
ascending=True).head(1)

# Output results
print("Most Similar Pair:")
print(most_similar_all)

print("\nLeast Similar Pair:")

print(least_similar_all)
# 17
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,
accuracy_score
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset, DataLoader
import torch
from torch.optim import AdamW
from tqdm import tqdm
import matplotlib.pyplot as plt

# Step 1: Load and Preprocess Dataset

file_path = '/content/Nri_Textual_Survey_Data.csv' # Replace with your
dataset path
data = pd.read_csv(file_path)

# Preprocess text
def preprocess_text_dl(text_dl):
"""Clean text."""
text_dl = str(text_dl).lower()
text_dl = re.sub(r'[^\w\s]', '', text_dl) # Remove punctuation
text_dl = re.sub(r'\s+', ' ', text_dl).strip() # Remove extra
spaces
return text_dl

# Apply preprocessing to text column dynamically

text_column = data.columns[0] # Dynamically use the first column as
text
data[text_column] = data[text_column].apply(preprocess_text_dl)

# Analyze sentiment dynamically

def analyze_sentiment(text_dl):
"""Classify sentiment using polarity."""
from textblob import TextBlob
try:
blob = TextBlob(text_dl)
polarity = blob.sentiment.polarity
if polarity > 0:
return 0 # Happy
elif polarity == 0:
return 1 # Neutral
else:
return 2 # Unhappy
except:
return 1

data['label'] = data[text_column].apply(analyze_sentiment)

# Step 2: Split Data

train_texts, test_texts, train_labels, test_labels = train_test_split(
data[text_column], data['label'], test_size=0.2, random_state=42
)

# Step 3: Tokenize Using BERT

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
class SentimentDataset(Dataset):
def __init__(self, texts, labels, tokenizer, max_len=128):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len

def __len__(self):
return len(self.texts)

def getitem(self, idx):

text = self.texts.iloc[idx]
label = self.labels.iloc[idx]
encoding = self.tokenizer(
text,
truncation=True,
padding='max_length',
max_length=self.max_len,
return_tensors="pt"
)
return {
'input_ids': encoding['input_ids'].squeeze(0),
'attention_mask': encoding['attention_mask'].squeeze(0),
'labels': torch.tensor(label, dtype=torch.long)
}

train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)

test_dataset = SentimentDataset(test_texts, test_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

test_loader = DataLoader(test_dataset, batch_size=16)

# Step 4: Define BERT Model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained('bert-base-
uncased', num_labels=3)
model.to(device)

# Step 5: Train Model

optimizer = AdamW(model.parameters(), lr=5e-5)
epochs = 1
model.train()

for epoch in range(epochs):

total_loss = 0
for batch in tqdm(train_loader, desc=f"Training Epoch {epoch +
1}"):
optimizer.zero_grad()
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

outputs = model(input_ids, attention_mask=attention_mask,

labels=labels)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")

# Step 6: Evaluate Model

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
for batch in tqdm(test_loader, desc="Evaluating"):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

preds = torch.argmax(outputs.logits, axis=1)

all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.cpu().numpy())

# Calculate Accuracy
accuracy = accuracy_score(all_labels, all_preds)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds, labels=[0, 1, 2])
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=["Happy", "Neutral", "Unhappy"])
disp.plot(cmap="Blues")
plt.title("Confusion Matrix - Sentiment Analysis")
plt.show()

Star Alliance Case Solution
0% (1)
Star Alliance Case Solution
6 pages
Cisco IOS Quick Reference Cheat Sheet 2.1
No ratings yet
Cisco IOS Quick Reference Cheat Sheet 2.1
4 pages
Suzlon
No ratings yet
Suzlon
4 pages
Code
No ratings yet
Code
18 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
Q 3
No ratings yet
Q 3
2 pages
Topic Classifierby David Caleb
No ratings yet
Topic Classifierby David Caleb
7 pages
AIML IA3 Loki & SG
No ratings yet
AIML IA3 Loki & SG
31 pages
Sentimental Analysis
No ratings yet
Sentimental Analysis
3 pages
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
No ratings yet
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
98 pages
Machine Learning Code Explanation
No ratings yet
Machine Learning Code Explanation
33 pages
Adithiyaa BR 23MBA0018 SMA DA Text Mining PDF
No ratings yet
Adithiyaa BR 23MBA0018 SMA DA Text Mining PDF
6 pages
Sentiment Analysis
No ratings yet
Sentiment Analysis
4 pages
Python CA 4
No ratings yet
Python CA 4
9 pages
Social Media Sentimental Analysis 1
No ratings yet
Social Media Sentimental Analysis 1
30 pages
Bert Sentiment
No ratings yet
Bert Sentiment
7 pages
Ex 2
No ratings yet
Ex 2
5 pages
Cyberbullying Code
No ratings yet
Cyberbullying Code
6 pages
Sma Exp 03 Code Print
No ratings yet
Sma Exp 03 Code Print
5 pages
Part C - Assignment No. 2 Mini-Project On Twitter
No ratings yet
Part C - Assignment No. 2 Mini-Project On Twitter
7 pages
WDM - Week - I
No ratings yet
WDM - Week - I
24 pages
Problem Statement
No ratings yet
Problem Statement
10 pages
DS - Lab Report.
No ratings yet
DS - Lab Report.
25 pages
Sma 3
No ratings yet
Sma 3
3 pages
NLP Transformer-Based Models Used For Sentiment Analysis
No ratings yet
NLP Transformer-Based Models Used For Sentiment Analysis
45 pages
17 - Source Code - nlp-2-5
No ratings yet
17 - Source Code - nlp-2-5
4 pages
Amazon Sentiment Analysis Documentation
No ratings yet
Amazon Sentiment Analysis Documentation
4 pages
Shreya Srivastava-27
No ratings yet
Shreya Srivastava-27
3 pages
Sentimental
No ratings yet
Sentimental
11 pages
Efficient Python Tricks and Tools For Data Scientists - by Khuyen Tran
No ratings yet
Efficient Python Tricks and Tools For Data Scientists - by Khuyen Tran
20 pages
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
No ratings yet
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
7 pages
Ke Record 2k21
No ratings yet
Ke Record 2k21
48 pages
LabAssignment 03ai
No ratings yet
LabAssignment 03ai
7 pages
Sentiment Analysis
No ratings yet
Sentiment Analysis
5 pages
Class Xii PDF For Practical
No ratings yet
Class Xii PDF For Practical
24 pages
DSBA+Master+Codebook+ +Text+Mining+&+TSF
No ratings yet
DSBA+Master+Codebook+ +Text+Mining+&+TSF
11 pages
British Airways Forage Report
No ratings yet
British Airways Forage Report
12 pages
Tweet-Sentiment-Extraction - Exploratory Data Analysis
No ratings yet
Tweet-Sentiment-Extraction - Exploratory Data Analysis
11 pages
NLP Sentimental Analysis 1736351356
No ratings yet
NLP Sentimental Analysis 1736351356
32 pages
Twitter Sentiment Analysis Dss
No ratings yet
Twitter Sentiment Analysis Dss
14 pages
AI Lab Report BIM
No ratings yet
AI Lab Report BIM
34 pages
2023 Aug How To Produce Data For A Neural networkORG
No ratings yet
2023 Aug How To Produce Data For A Neural networkORG
6 pages
Emotion Classification With DistilBERT
No ratings yet
Emotion Classification With DistilBERT
25 pages
Manual
No ratings yet
Manual
48 pages
Report On - Social Media Research Topic Modeling
No ratings yet
Report On - Social Media Research Topic Modeling
26 pages
121a1114 D2 Sma Exp3
No ratings yet
121a1114 D2 Sma Exp3
9 pages
Ds File
No ratings yet
Ds File
58 pages
Gokul
No ratings yet
Gokul
10 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
Experiment 1
No ratings yet
Experiment 1
19 pages
7 Aiml
No ratings yet
7 Aiml
4 pages
Dataset Description: Amazon Reviews of Unlocked Phone
No ratings yet
Dataset Description: Amazon Reviews of Unlocked Phone
4 pages
Sentiment Analysis With NLP Deep Learning
No ratings yet
Sentiment Analysis With NLP Deep Learning
8 pages
ML Week10.1
No ratings yet
ML Week10.1
5 pages
Sma 5
No ratings yet
Sma 5
3 pages
R002 KrishAhuja BDA Lab9.Ipynb - Colab
No ratings yet
R002 KrishAhuja BDA Lab9.Ipynb - Colab
3 pages
Kindle Review Sentiment Analysis - Ipynb - Colab
No ratings yet
Kindle Review Sentiment Analysis - Ipynb - Colab
5 pages
Sentiment Analysis Project Documentation
No ratings yet
Sentiment Analysis Project Documentation
2 pages
Code Shabab Error 7
No ratings yet
Code Shabab Error 7
5 pages
Black and White Blank Note Document
No ratings yet
Black and White Blank Note Document
57 pages
Experiment 7 ML
No ratings yet
Experiment 7 ML
3 pages
Personalized Cancer Diagnosis
No ratings yet
Personalized Cancer Diagnosis
100 pages
CMMS OptiMaint - User Guide
No ratings yet
CMMS OptiMaint - User Guide
63 pages
Certification
No ratings yet
Certification
2 pages
Every Font Awesome 4.0.3 Icon, CSS Class, & Unicode
No ratings yet
Every Font Awesome 4.0.3 Icon, CSS Class, & Unicode
5 pages
RHD'L: Instruction-Level Parallel Processing: History, Overview and Perspective
No ratings yet
RHD'L: Instruction-Level Parallel Processing: History, Overview and Perspective
57 pages
Grade 7 Technology Memo June 2021
No ratings yet
Grade 7 Technology Memo June 2021
4 pages
Instrumentation and Measurement
No ratings yet
Instrumentation and Measurement
17 pages
HE Commercial Washer Brochure PDF
No ratings yet
HE Commercial Washer Brochure PDF
4 pages
Flight International July 2023
100% (1)
Flight International July 2023
84 pages
Wifiid
No ratings yet
Wifiid
83 pages
WorldView 2 PDF Download
No ratings yet
WorldView 2 PDF Download
19 pages
Sva Mfa Thesis Show 2015
100% (2)
Sva Mfa Thesis Show 2015
4 pages
Texto de Manual CLARK C500 Y350
33% (3)
Texto de Manual CLARK C500 Y350
51 pages
Dia 6545 001
No ratings yet
Dia 6545 001
14 pages
Trimoterm Facade System - Vertical - FTV HL Invisio PDF en
No ratings yet
Trimoterm Facade System - Vertical - FTV HL Invisio PDF en
88 pages
IYONI II - Gas Monitor and Cap Lamp Combination
No ratings yet
IYONI II - Gas Monitor and Cap Lamp Combination
2 pages
RM Eu Biomodules Brochure Eguide
No ratings yet
RM Eu Biomodules Brochure Eguide
9 pages
lDOS Notes
No ratings yet
lDOS Notes
8 pages
Experiment No-1: Title: Write A Program To Implement Basic Arithmetic Operations Using Functions. Objectives
No ratings yet
Experiment No-1: Title: Write A Program To Implement Basic Arithmetic Operations Using Functions. Objectives
4 pages
Principle of Electrolysis of Copper Sulfate Electrolyte
No ratings yet
Principle of Electrolysis of Copper Sulfate Electrolyte
5 pages
General InstallationE Instructions For Peikko Transport Anchors
No ratings yet
General InstallationE Instructions For Peikko Transport Anchors
10 pages
Ultrasonic Sensors: Ultimate Ultrasonic Sensor Solution From Sick
No ratings yet
Ultrasonic Sensors: Ultimate Ultrasonic Sensor Solution From Sick
44 pages
Text To Image Conversion
No ratings yet
Text To Image Conversion
5 pages
3rd One Week International Workshop - Brochure
No ratings yet
3rd One Week International Workshop - Brochure
4 pages
Valves Valve Leakage
100% (1)
Valves Valve Leakage
5 pages
2008 9c 8537 PDF
No ratings yet
2008 9c 8537 PDF
8 pages
10w-70w Q-Switch Pulsed Fiber Laser-Leaflet
No ratings yet
10w-70w Q-Switch Pulsed Fiber Laser-Leaflet
2 pages
Sheet Pile Brochure 2022
100% (1)
Sheet Pile Brochure 2022
8 pages

Code

Uploaded by

Code

Uploaded by

import pandas as pd

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Update preprocessing function to include stop word removal

# Remove stop words

# Apply updated preprocessing

# Download the file

# Add sentiment columns for each facility

# Overall sentiment counts

# Assuming 'processed_data' contains the preprocessed text data

# Create BoW DataFrame

# Create TF-IDF DataFrame

# Merge BoW and TF-IDF for comparison

# Sort by Bag-of-Words Frequency for consistency

# Plot the comparison graph

# Bar width for side-by-side bars

# Add labels and title

# Target variable based on the dynamic column name

# Split for BoW features

# Split for TF-IDF features

from sklearn.linear_model import LogisticRegression

# Train models on BoW features

# Dynamically calculate model accuracies

# Plotting model accuracy comparison

# Add labels and title

# Dynamically calculate metrics for a specific model (e.g., Logistic

# Plot grouped bar chart

# Add labels and title

# Predictions for BoW features

# Predictions for TF-IDF features

cm_log_reg_tfidf = confusion_matrix(y_test, y_pred_log_reg_tfidf)

# Plotting all confusion matrices

# Titles for the matrices

# All confusion matrices

# Plotting each heatmap

from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Use the already defined `processed_data` from your script.

# Step 2: Combine text from all columns to build a unified vocabulary

# Fit the TF-IDF vectorizer on the combined text

# Step 3: Transform each column using the unified vocabulary

# Step 4: Compute Pairwise Cosine Similarity for All Labels

for i, col1 in enumerate(processed_data.columns):

# Step 5: Visualize the Similarity Matrix for All Labels

# Melt the matrix for pairwise comparison

# Remove self-similarity (diagonal values)

# Sort for most and least similar pairs

print("\nLeast Similar Pair:")

# Step 1: Load and Preprocess Dataset

# Apply preprocessing to text column dynamically

# Analyze sentiment dynamically

# Step 2: Split Data

# Step 3: Tokenize Using BERT

def __getitem__(self, idx):

train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Step 4: Define BERT Model

# Step 5: Train Model

for epoch in range(epochs):

outputs = model(input_ids, attention_mask=attention_mask,

# Step 6: Evaluate Model

outputs = model(input_ids, attention_mask=attention_mask)

You might also like

def getitem(self, idx):