0% found this document useful (0 votes)
2 views

code

The document outlines a Python script for analyzing restaurant reviews using sentiment analysis techniques, specifically VADER and RoBERTa. It includes data loading, preprocessing, and visualization steps, as well as functions for recommending restaurants based on user input. The script also incorporates interactive widgets for user engagement in a Jupyter Notebook environment.

Uploaded by

213j1a05f1
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

code

The document outlines a Python script for analyzing restaurant reviews using sentiment analysis techniques, specifically VADER and RoBERTa. It includes data loading, preprocessing, and visualization steps, as well as functions for recommending restaurants based on user input. The script also incorporates interactive widgets for user engagement in a Jupyter Notebook environment.

Uploaded by

213j1a05f1
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 18

# This Python 3 environment comes with many helpful analytics libraries installed

# It is defined by the kaggle/python Docker image:


https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory


# For example, running this (by clicking run or pressing Shift+Enter) will list all
files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets
preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved
outside of the current session

#importing all libraries


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')
import nltk

# Install ftfy
!pip install ftfy

# Import libraries
import pandas as pd
import ftfy

# Load the dataset


df = pd.read_csv('/kaggle/input/editaa/the editable - Copy.csv')

# Fix encoding issues for the 'Review' column directly


# Convert all entries to strings before applying ftfy
df['Review'] = df['Review'].astype(str).apply(ftfy.fix_text)

print(df.shape)
df=df.head(500)
print(df.shape)
# reducing the size for smooth programming and testing

# Quick eda
df['Rating']

df['Rating'].value_counts()

a=df['Restaurant'].value_counts()
a
df.head()

df["Rating"] = pd.to_numeric(df["Rating"], errors='coerce')


df["Rating"] = df["Rating"].fillna(value=3)
df["Rating"] = pd.to_numeric(df["Rating"])
# converting the rating columns into numeric and also filling the empty values with
3 as it represents neutral
# also the errors=coerce is used to convert invalid values to nan

ax=df['Rating'].value_counts().sort_index()\
.plot(kind='bar',title='count of ratings',
figsize=(10,5))
ax.set_xlabel('Ratings stars')
plt.show
# bar plot

example1=df['Review'][89]
print(example1)
print(" ")
example2=df['Review'][21]
print(example2)

tokens=nltk.word_tokenize(example1)
tokens[:15]

tagged=nltk.pos_tag(tokens)
tagged[:10]
# parts of speech like experience NN singular noun

entities=nltk.chunk.ne_chunk(tagged)
entities.pprint()
# chunking for it to be more manageable

#vader approach for the dataset

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='nltk.twitter')

from nltk.sentiment import SentimentIntensityAnalyzer


from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()

sia.polarity_scores('I am so happy!')

sia.polarity_scores('this is the worst thing ever')

sia.polarity_scores(example1)

## run the polarity score on the entire dataset


res={}
df['Review'] = df['Review'].fillna('')
# filling na with ' '
for i, row in tqdm(df.iterrows(),total =len(df)):
review=row['Review']
myid=row['id']
res[myid]=sia.polarity_scores(review)

pd.DataFrame(res).T
vaders=pd.DataFrame(res).T
vaders=vaders.reset_index().rename(columns={'index':'id'})
vaders=vaders.merge(df,how='left')

vaders

#sentiment score and metadata


vaders.head()

ax=sns.barplot(data=vaders, x='Rating', y='compound')


ax.set_title('compound score by customer reviews')
plt.show()

sns.barplot(data=vaders, x='Rating', y='pos')

fig,axs=plt.subplots(1,3,figsize=(20,3))
sns.barplot(data=vaders, x='Rating', y='pos',ax=axs[0])
sns.barplot(data=vaders, x='Rating', y='neu',ax=axs[1])
sns.barplot(data=vaders, x='Rating', y='neg',ax=axs[2])
axs[0].set_title('Positive')
axs[1].set_title('Neutral')
axs[2].set_title('Negative')
plt.tight_layout()
plt.show()

#vader just looked at each word in sentence of review and as the words have
#pre assigned scores, it added all the scores and assigned a sentiment score for
that review
# bar plot shows promising results

#roberta

from transformers import AutoTokenizer


#huggingface library | tokenize the words
from transformers import AutoModelForSequenceClassification
from scipy.special import SoftMax

import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

# RoBERTa Sentiment Analysis Code


encoded_text = tokenizer(example1, return_tensors='pt')
output = model(**encoded_text)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
rneg = scores[0]
rneu = scores[1]
rpos = scores[2]
# compound score is calculated manually as it is not calculated by default
compound_score = (rneg * -1) + (rneu * 0) + (rpos * 1)
scores_dict = {
'roberta_neg': scores[0],
'roberta_neu': scores[1],
'roberta_pos': scores[2],
'roberta_compound': compound_score
}
print(example1)
print(scores_dict)

import matplotlib.pyplot as plt

# Bar chart for sentiment scores (including compound score)


labels = ['Negative', 'Neutral', 'Positive', 'Compound']
values = [scores[0], scores[1], scores[2], compound_score]
colors = ['red', 'gray', 'green', 'blue']

plt.bar(labels, values, color=colors)


plt.title('Sentiment Scores with Compound Score')
plt.xlabel('Sentiment')
plt.ylabel('Score')

# Adjusting y-axis to include negative values


plt.ylim(-1, 1) # Allowing negative y-axis for compound score

plt.show()

# Note: RoBERTa understands the context better than VADER.

#VADER RESULTS ON EXAMPLE 1 where it outperforms


roberta---------------------------------------------------------------
print(example1)
# Get sentiment scores
sia_scores = sia.polarity_scores(example1)

# Display the results


print("Sentiment Analysis Results:", sia_scores)

# Extract labels and values for the bar plot


labels = ['Negative', 'Neutral', 'Positive', 'Compound']
values = [sia_scores['neg'], sia_scores['neu'], sia_scores['pos'],
sia_scores['compound']]

# Create the bar plot


plt.bar(labels, values, color=['red', 'gray', 'green', 'blue'])

# Add titles and labels


plt.title('VADER Sentiment Analysis Results')
plt.xlabel('Sentiment')
plt.ylabel('Scores')
plt.axhline(0, color='black', linestyle='--', linewidth=0.7) # Baseline at 0 for
clarity

# Adjust the y-axis limits for better visualization


plt.ylim(-1, 1)

# Display the plot


plt.show()

#VADER RESULTS ON EXAMPLE 2 where it outperforms


roberta---------------------------------------------------------------
print(example2)
# Get sentiment scores
sia_scores = sia.polarity_scores(example2)

# Display the results


print("Sentiment Analysis Results:", sia_scores)

# Extract labels and values for the bar plot


labels = ['Negative', 'Neutral', 'Positive', 'Compound']
values = [sia_scores['neg'], sia_scores['neu'], sia_scores['pos'],
sia_scores['compound']]

# Create the bar plot


plt.bar(labels, values, color=['red', 'gray', 'green', 'blue'])

# Add titles and labels


plt.title('VADER Sentiment Analysis Results')
plt.xlabel('Sentiment')
plt.ylabel('Scores')
plt.axhline(0, color='black', linestyle='--', linewidth=0.7) # Baseline at 0 for
clarity

# Adjust the y-axis limits for better visualization


plt.ylim(-1, 1)

# Display the plot


plt.show()

# RoBERTa Sentiment Analysis Code


encoded_text = tokenizer(example2, return_tensors='pt')
output = model(**encoded_text)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
rneg = scores[0]
rneu = scores[1]
rpos = scores[2]
# compound score is calculated manually as it is not calculated by default
compound_score = (rneg * -1) + (rneu * 0) + (rpos * 1)
scores_dict = {
'roberta_neg': scores[0],
'roberta_neu': scores[1],
'roberta_pos': scores[2],
'roberta_compound': compound_score
}
print(example2)
print(scores_dict)

import matplotlib.pyplot as plt

# Bar chart for sentiment scores (including compound score)


labels = ['Negative', 'Neutral', 'Positive', 'Compound']
values = [scores[0], scores[1], scores[2], compound_score]
colors = ['red', 'gray', 'green', 'blue']

plt.bar(labels, values, color=colors)


plt.title('Sentiment Scores with Compound Score')
plt.xlabel('Sentiment')
plt.ylabel('Score')

# Adjusting y-axis to include negative values


plt.ylim(-1, 1) # Allowing negative y-axis for compound score

plt.show()

# Note: RoBERTa understands the context better than VADER.

def polarity_scores_roberta(example):
encoded_text=tokenizer(example,return_tensors='pt')
output=model(**encoded_text)
scores=output[0][0].detach().numpy()
scores=softmax(scores)
#calculating compound score
rneg=scores[0]
rneu=scores[1]
rpos=scores[2]
compound_score=(rneg*-1)+(rneu*0)+(rpos*1)
scores_dict={
'roberta_neg':scores[0],
'roberta_neu':scores[1],
'roberta_pos':scores[2],
'roberta_compound':compound_score
}
return scores_dict

res={}
for i, row in tqdm(df.iterrows(),total =len(df)):
try:
review=row['Review']
myid=row['id']
vader_result=sia.polarity_scores(review)
vader_result_rename={}
for key, value in vader_result.items():
vader_result_rename[f"vader_{key}"]=value
roberta_result=polarity_scores_roberta(review)
both={**vader_result_rename, **roberta_result}
res[myid]=both
except RuntimeError:
print(f'Broke for id{myid}')

results_df=pd.DataFrame(res).T
results_df=results_df.reset_index().rename(columns={'index':'id'})
results_df=results_df.merge(df,how='left')

results_df.head()

#comparison between models


results_df.columns

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
results_df.replace([np.inf, -np.inf], np.nan, inplace=True)
custom_palette = {
1: '#E63946', # Red
2: '#F4A261', # Orange
3: '#E9C46A', # Yellow
4: '#2A9D8F', # Light Green
5: '#1D3557' # Dark Blue
}
sns.pairplot(data=results_df,vars=['vader_neg','vader_neu','vader_pos','roberta_neg
','roberta_neu','roberta_pos'],hue='Rating',palette='tab10')
plt.show()

#review examples
results_df.query('Rating==1').sort_values('vader_pos',ascending=False)
['Review'].values[0]

results_df.query('Rating==1').sort_values('roberta_pos',ascending=False)
['Review'].values[0]

#negative sentiment 5 star review


results_df.query('Rating==5').sort_values('roberta_neg',ascending=False)
['Review'].values[0]

results_df.query('Rating==5').sort_values('vader_neg',ascending=False)
['Review'].values[0]

results_df

print(df.head)

# Strip spaces from column names


results_df.columns = results_df.columns.str.strip()

# Print the updated column names


print(results_df.columns)

food=results_df
food.to_csv("food.csv", index=False)

import re
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load the dataset (ensure `results_df` is loaded correctly)


# Example:
# results_df = pd.read_csv('/kaggle/input/your-dataset.csv')

# Dynamically extract food menu columns (starting from the 19th column)
menu_items1 = [item.lower() for item in list(results_df.columns[18:])] # Convert
to lowercase for consistency

# Display the available menu


print("\n📌 **Available Food Menu:**")
for food1 in menu_items1:
print(f" {food1}")
##############################
# Store the values of the menu in one variable

#################################
# Function to check if a review contains emojis
def contains_emoji1(text1):
emoji_pattern1 = re.compile(
"[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\
U0001F700-\U0001F77F"
"\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\
U0001FA00-\U0001FA6F"
"\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]"
)
return bool(emoji_pattern1.search(text1))

# Function to calculate the average score


def calculate_average_score1(row1):
review_length1 = len(row1['Review'])
if review_length1 < 50 or contains_emoji1(row1['Review']):
return row1['vader_compound']
else:
return row1['roberta_compound']

# Recommendation function
def recommend_restaurant1(dataframe1, food_name1):
food_name1 = food_name1.lower()
print("Checking for:", food_name1) # Debugging
if food_name1 not in menu_items1:
print(f"'{food_name1}' not found in menu items.")
return f"Sorry, '{food_name1}' is not available in the dataset."

filtered_rows1 = dataframe1[dataframe1[food_name1] == 'yes']


print("Filtered rows found:", len(filtered_rows1))

if filtered_rows1.empty:
return f"Sorry, no restaurants found offering '{food_name1}'."

filtered_rows1 = filtered_rows1.copy()
filtered_rows1.loc[:, 'average_score1'] =
filtered_rows1.apply(calculate_average_score1, axis=1)

max_average_row1 =
filtered_rows1.loc[filtered_rows1['average_score1'].idxmax()]

return (f"Recommended Restaurant for '{food_name1}':\n"


f"Restaurant: {max_average_row1['Restaurant']}\n"
f"Location: {max_average_row1['location']}\n"
f"Average Score: {max_average_row1['average_score1']:.6f}")

# Create widgets
food_name_input1 = widgets.Combobox(
placeholder='Type or select food...',
options=[item.capitalize() for item in menu_items1], # Capitalized for UI
friendliness
description='Food:',
ensure_option=True
)

output1 = widgets.Output()
find_button1 = widgets.Button(description="Find Restaurant",
button_style="success")
reset_button1 = widgets.Button(description="Reset", button_style="warning")

# Button event handlers


def on_find_button_click1(b1):
print("Button clicked!") # Check if this prints in output
food_name1 = food_name_input1.value.strip().lower()
output1.clear_output()
with output1:
if food_name1:
if food_name1 in menu_items1:
result1 = recommend_restaurant1(results_df, food_name1)
print(result1)
else:
print(f"'{food_name1}' is not on the menu. Please select a valid
item.")
else:
print("Please type or select a food item.")

# ✅ Fix for input getting stuck (forces UI update)


food_name_input1.value = "" # Properly clear input

def on_reset_button_click1(b1):
food_name_input1.value = ""
output1.clear_output()

# Attach event handlers


find_button1.on_click(on_find_button_click1)
reset_button1.on_click(on_reset_button_click1)

# Display UI
display(food_name_input1, widgets.HBox([find_button1, reset_button1]), output1)

dff2 = pd.read_csv('/kaggle/input/hyderabad-tourisreview/hyderabad tourism


dataset.csv')
print(dff2.shape)
# Convert all entries to strings before applying ftfy
dff2['Raw Review'] = dff2['Raw Review'].astype(str).apply(ftfy.fix_text)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import warnings
import ipywidgets as widgets
from IPython.display import display

plt.style.use('ggplot')
warnings.filterwarnings("ignore", category=UserWarning, module='nltk.twitter')
warnings.filterwarnings("ignore", category=FutureWarning)

dff2.info()

# Fill missing ratings with neutral value (3)


dff2["Ratings of Place"] = pd.to_numeric(dff2["Ratings of Place"],
errors='coerce').fillna(3)
dff2 = dff2.head(1000)
print(dff2.shape)

dff2.columns

dff2['Ratings of Place']
dff2['Ratings of Place'].value_counts()

dff2['Place of Tourism'].value_counts()

# Plot ratings distribution


plt.figure(figsize=(10, 5))
sns.countplot(x=dff2['Ratings of Place'].astype(int), palette='viridis')
plt.title("Ratings Distribution")
plt.xlabel("Ratings")
plt.ylabel("Count")
plt.show()

# Sentiment Analysis Initialization


nltk.download('vader_lexicon')
sia2 = SentimentIntensityAnalyzer()
MODEL2 = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer2 = AutoTokenizer.from_pretrained(MODEL2)
model2 = AutoModelForSequenceClassification.from_pretrained(MODEL2)

# Function to calculate sentiment scores


def polarity_scores_roberta2(text):
encoded_text2 = tokenizer2(text, return_tensors='pt')
output2 = model2(**encoded_text2)
scores2 = output2[0][0].detach().numpy()
scores2 = softmax(scores2)
compound_score2 = (-scores2[0]) + (scores2[2])
return {'roberta_neg': scores2[0], 'roberta_neu': scores2[1], 'roberta_pos':
scores2[2], 'roberta_compound': compound_score2}

# Function to check if a review contains emojis


def contains_emoji2(text2):
emoji_pattern2 = re.compile(
"[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\
U0001F700-\U0001F77F"
"\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\
U0001FA00-\U0001FA6F"
"\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]"
)
return bool(emoji_pattern2.search(text2))

# Function to calculate the average score (based on review length or emojis)


def calculate_average_score2(row2):
review_length2 = len(row2['Raw Review']) # Check review length
if review_length2 < 50 or contains_emoji2(row2['Raw Review']):
return row2['vader_compound'] # Use Vader sentiment if review is short or
has emojis
else:
return row2['roberta_compound'] # Use RoBERTa sentiment otherwise

# Sentiment analysis on dataset


res2 = {}
dff2['Raw Review'] = dff2['Raw Review'].fillna('') # Fill NaNs with empty strings
for i2, row2 in tqdm(dff2.iterrows(), total=len(dff2)):
review2 = row2['Raw Review']
place_id2 = i2 # Unique index
vader_result2 = sia2.polarity_scores(review2)
vader_result2 = {f"vader_{k}": v for k, v in vader_result2.items()}
roberta_result2 = polarity_scores_roberta2(review2)
res2[place_id2] = {**vader_result2, **roberta_result2}
# Merge results with dataset
sentiment_df2 = pd.DataFrame(res2).T
sentiment_df2 = sentiment_df2.reset_index().rename(columns={'index': 'Place
Index'})
sentiment_df2 = sentiment_df2.merge(dff2, left_on='Place Index', right_index=True)
# Strip spaces from column names
sentiment_df2.columns = sentiment_df2.columns.str.strip()
print(sentiment_df2)

sentiment_df2.columns

place=sentiment_df2
place.to_csv("place.csv", index=False)

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
sentiment_df2.replace([np.inf, -np.inf], np.nan, inplace=True)
custom_palette = {
1: '#E63946', # Red
2: '#F4A261', # Orange
3: '#E9C46A', # Yellow
4: '#2A9D8F', # Light Green
5: '#1D3557' # Dark Blue
}

sns.pairplot(
data=sentiment_df2,
vars=['vader_neg', 'vader_neu', 'vader_pos', 'roberta_neg', 'roberta_neu',
'roberta_pos'],
hue='Ratings of Place',
palette='tab10'
)
plt.show()

# Function to handle recommendations


def recommend_places2(sentiment_df2, place_type2):
# Filter places based on place type and create a copy to avoid
SettingWithCopyWarning
filtered_places2 = sentiment_df2[sentiment_df2['Type of
Place'].str.contains(place_type2, case=False, na=False)].copy()

if filtered_places2.empty:
return f"No places found for '{place_type2}'."

# Apply sentiment score calculation using the average score function


filtered_places2['Sentiment score'] =
filtered_places2.apply(calculate_average_score2, axis=1)

# Remove duplicates based on columns that define uniqueness of a place


filtered_places2 = filtered_places2.drop_duplicates(subset=['Place of Tourism',
'Tourism City', 'Tourism State'])

# Sort by 'Sentiment score' in descending order to get the most positive places
filtered_places_sorted2 = filtered_places2.sort_values(by='Sentiment score',
ascending=False)

# Return the top 10 most positive places


return filtered_places_sorted2[['Place of Tourism', 'Tourism City', 'Tourism
State', 'Sentiment score']].head(10)

# UI for User Input


place_type_input2 = widgets.Combobox(
placeholder='Type or select a place type...',
options=sorted(dff2['Type of Place'].dropna().unique()),
description='Place:',
ensure_option=True
)

output2 = widgets.Output()
find_button2 = widgets.Button(description="Find Places", button_style="success")
reset_button2 = widgets.Button(description="Reset", button_style="warning")

# Button event handlers


def on_find_button_click2(b2):
place_type2 = place_type_input2.value.strip()
output2.clear_output()
with output2:
if place_type2:
recommendations2 = recommend_places2(sentiment_df2, place_type2)
if isinstance(recommendations2, str):
print(recommendations2)
else:
# Display only selected columns with renamed 'Sentiment score'
print(recommendations2[['Place of Tourism', 'Tourism City',
'Tourism State', 'Sentiment score']].to_string(index=False))
else:
print("Please type or select a place type.")

# Force UI update by clearing and resetting value


place_type_input2.value = " "
place_type_input2.value = ""

def on_reset_button_click2(b2):
place_type_input2.value = " "
place_type_input2.value = ""
output2.clear_output()

find_button2.on_click(on_find_button_click2)
reset_button2.on_click(on_reset_button_click2)

# Display UI
display(place_type_input2, widgets.HBox([find_button2, reset_button2]), output2)

import ipywidgets as widgets


from IPython.display import display, HTML

# Custom styling for better UI appearance


custom_css = """
<style>
.title {
font-size: 24px;
font-weight: bold;
color: #4CAF50;
margin-bottom: 10px;
}
.output-box {
border: 2px solid #ddd;
padding: 10px;
border-radius: 10px;
background-color: #f9f9f9;
max-height: 300px;
overflow-y: auto;
}
.button-row {
margin-top: 15px;
}
</style>
"""
display(HTML(custom_css))

# Define the widgets for Restaurant and Tourism


food_name_input = widgets.Combobox(
placeholder='Type or select food...',
options=menu_items1, # menu_items should be defined earlier
description=' Food:',
ensure_option=True,
style={'description_width': 'initial'}
)

place_type_input = widgets.Combobox(
placeholder='Type or select a place type...',
options=sorted(dff2['Type of Place'].dropna().unique()), # Assuming dff is
your tourism dataset
description='📍 Place:',
ensure_option=True,
style={'description_width': 'initial'}
)

# Output areas for both restaurant and tourism


output_restaurant = widgets.Output(layout={'width': '100%'})
output_tourism = widgets.Output(layout={'width': '100%'})

# Buttons to trigger recommendations and reset fields


find_button = widgets.Button(
description="🔍 Get Recommendations",
button_style="success",
layout=widgets.Layout(width='200px') # Adjust the width as needed
)
reset_food_button = widgets.Button(description="❌ Reset Food",
button_style="warning")
reset_place_button = widgets.Button(description="❌ Reset Place",
button_style="warning")
reset_all_button = widgets.Button(description="⚠️ Reset All",
button_style="danger")

# Function to display recommendations in both outputs


def on_find_button_click(b):
food_name = food_name_input.value.strip().lower()
place_type = place_type_input.value.strip().lower()

with output_restaurant:
output_restaurant.clear_output()
if food_name:
if food_name in menu_items1:
result = recommend_restaurant1(results_df, food_name) # Assuming
recommend_restaurant is defined
output_restaurant.append_stdout(f"✅ Recommended Restaurant:\
n{result}\n\n")
else:
output_restaurant.append_stdout(f"⚠️ '{food_name}' is not on the
menu.\n\n")
else:
output_restaurant.append_stdout("⚠️ Please type or select a food item.\
n\n")

with output_tourism:
output_tourism.clear_output()
if place_type:
recommendations = recommend_places2(sentiment_df2, place_type) #
Assuming recommend_places is defined
output_tourism.append_stdout(f" Recommended Places:\
n{recommendations.to_string(index=False)}\n\n")
else:
output_tourism.append_stdout("⚠️ Please type or select a place type.\n\
n")

# Reset food function


def on_reset_food_button_click(b):
food_name_input.value = ""
output_restaurant.clear_output()
output_restaurant.append_stdout("⚠️ Food input cleared.\n\n")

# Reset place function


def on_reset_place_button_click(b):
place_type_input.value = ""
output_tourism.clear_output()
output_tourism.append_stdout("⚠️ Place input cleared.\n\n")

# Reset all function


def on_reset_all_button_click(b):
food_name_input.value = ""
place_type_input.value = ""
output_restaurant.clear_output()
output_tourism.clear_output()
output_restaurant.append_stdout("⚠️ All inputs cleared.\n\n")
output_tourism.append_stdout("⚠️ All inputs cleared.\n\n")

# Bind button events


find_button.on_click(on_find_button_click)
reset_food_button.on_click(on_reset_food_button_click)
reset_place_button.on_click(on_reset_place_button_click)
reset_all_button.on_click(on_reset_all_button_click)

######################################
# Store menu items in a single variable for horizontal display
from IPython.display import display, HTML

# Store menu items in a single variable for horizontal display


menu_string = " | ".join([f" {food1}" for food1 in menu_items1])

# Format the menu with larger, centered, red-colored title and blue items
menu_html = f"""
<div style="text-align: center; font-size: 28px; font-weight: bold; color:
red;">
📌 <b>Available Food Menu:</b>
</div>
<div style="text-align: center; font-size: 24px; color: #007BFF;">
{menu_string}
</div>
"""

# Display the styled menu


display(HTML(menu_html))

###############################################
# Display widgets with section headers
display(HTML('<div class="title"> Restaurant Recommendation</div>'))
display(food_name_input)
display(output_restaurant)

display(HTML('<div class="title">📍 Tourism Recommendation</div>'))


display(place_type_input)
display(output_tourism)

display(widgets.HBox([find_button], layout={'justify_content': 'center'}))


display(widgets.HBox([reset_food_button, reset_place_button, reset_all_button],
layout={'justify_content': 'center'}))

from sklearn.metrics import accuracy_score

# Ensure sentiment_label is defined


def sentiment_label(score):
if score >= 0.05:
return 'positive'
elif score <= -0.05:
return 'negative'
else:
return 'neutral'

# Hybrid Sentiment Calculation (Weighted)


def hybrid_sentiment(row, text_column):
if len(row[text_column]) < 50 or contains_emoji1(row[text_column]):
return row['vader_compound'] # Use VADER for short/emotional reviews
else:
return (0.4 * row['vader_compound']) + (0.6 * row['roberta_compound'])

# Map Ratings to True Sentiment


def map_sentiment(df, rating_column):
return df[rating_column].apply(lambda x: 'positive' if x >= 4 else 'negative'
if x <= 2 else 'neutral')

# Function to compute hybrid accuracy for any dataset


def compute_accuracy(df, text_column, rating_column):
# Generate True Sentiment
df['true_sentiment'] = map_sentiment(df, rating_column)

# Apply Hybrid Sentiment Calculation


df['hybrid_score'] = df.apply(lambda row: hybrid_sentiment(row, text_column),
axis=1)

# Generate Sentiment Predictions


df['vader_prediction'] = df['vader_compound'].apply(sentiment_label)
df['roberta_prediction'] = df['roberta_compound'].apply(sentiment_label)
df['hybrid_prediction'] = df['hybrid_score'].apply(sentiment_label)

# Calculate Accuracy
vader_acc = accuracy_score(df['true_sentiment'], df['vader_prediction'])
roberta_acc = accuracy_score(df['true_sentiment'], df['roberta_prediction'])
hybrid_acc = accuracy_score(df['true_sentiment'], df['hybrid_prediction'])

return vader_acc, roberta_acc, hybrid_acc

# Calculate Accuracies for Food and Place


food_vader, food_roberta, food_hybrid = compute_accuracy(results_df, 'Review',
'Rating')
place_vader, place_roberta, place_hybrid = compute_accuracy(sentiment_df2, 'Raw
Review', 'Ratings of Place')

# Unified Display of Accuracy


print("""
📊 **Recommendation Accuracy Report**
------------------------------------
**Food Recommendation Accuracy**
- ✅ VADER Accuracy: {:.2%}
- ✅ RoBERTa Accuracy: {:.2%}
- ✅ Hybrid Model Accuracy: {:.2%}

**Place Recommendation Accuracy**


- ✅ VADER Accuracy: {:.2%}
- ✅ RoBERTa Accuracy: {:.2%}
- ✅ Hybrid Model Accuracy: {:.2%}
""".format(food_vader, food_roberta, food_hybrid, place_vader, place_roberta,
place_hybrid))

!pip install gradio --quiet

import gradio as gr
import pandas as pd
import urllib.parse # For URL encoding

# ✅ Load datasets
food_df = pd.read_csv("food.csv")
place_df = pd.read_csv("place.csv")

# ✅ Extract correct menu items (Food starts from 19th column)


menu_items1 = [col.lower() for col in food_df.columns[18:] if col.strip()]
place_types = sorted(place_df["Type of Place"].dropna().unique().tolist()) if "Type
of Place" in place_df.columns else []

# 📌 Generate Proper Google Search Link


def google_search_link(name, category):
"""Creates a fully encoded Google search link for a restaurant or place."""
query = urllib.parse.quote_plus(f"{name} {category} Hyderabad")
return f"[{name}](https://www.google.com/search?q={query})"

# 📌 Recommend restaurants based on food


def recommend_restaurant(food_name):
food_name = food_name.lower().strip()
matching_cols = [col for col in food_df.columns[18:] if col.lower().strip() ==
food_name]
if not matching_cols:
return ["No recommendations found."]

filtered = food_df[food_df[matching_cols[0]].str.lower() == "yes"]


unique_restaurants = filtered["Restaurant"].drop_duplicates().tolist()

return [f"**{i+1}.** {google_search_link(r, 'restaurant')}" for i, r in


enumerate(unique_restaurants[:5])] if unique_restaurants else ["No recommendations
found."]

# 📌 Recommend places based on place type


def recommend_places(place_type):
if "Type of Place" in place_df.columns:
filtered = place_df[place_df["Type of Place"].str.lower() ==
place_type.lower()]
unique_places = list(dict.fromkeys(filtered["Place of
Tourism"].dropna().tolist()))

return [f"**{i+1}.** {google_search_link(p, 'tourist place')}" for i, p in


enumerate(unique_places[:5])] if unique_places else ["No recommendations found."]

return ["No recommendations found."]

# ✅ Custom Theme & Styling


theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="cyan",
font=["Poppins", "Arial", "sans-serif"]
)

with gr.Blocks(theme=theme) as demo:


gr.Markdown("<h1 style='text-align:center; color:#007bff;'>🌍 Tourism &
Restaurant Recommendation System</h1>")

with gr.Row():
food = gr.Dropdown(
choices=["Please select a food"] + menu_items1,
label=" Select Food",
interactive=True,
value="Please select a food"
)
place = gr.Dropdown(
choices=["Please select a type of tourism"] + place_types,
label=" Select Place Type",
interactive=True,
value="Please select a type of tourism"
)

with gr.Row():
btn = gr.Button("🔍 Get Recommendations", elem_id="recommend_btn")
reset_btn = gr.Button("🔄 Reset", elem_id="reset_btn")

result = gr.Markdown("### Results will appear here", elem_id="result_output")

def recommend(food, place):


response = ""

if food and food != "Please select a food":


res1 = recommend_restaurant(food)
response += "### Recommended Restaurants:\n" + "\n".join(res1) + "\n\
n"

if place and place != "Please select a type of tourism":


res2 = recommend_places(place)
response += "### Recommended Places:\n" + "\n".join(res2)

return response.strip() if response else "Please select a valid option."

def reset():
return "Please select a food", "Please select a type of tourism", "###
Results will appear here"

btn.click(recommend, inputs=[food, place], outputs=[result])


reset_btn.click(reset, inputs=[], outputs=[food, place, result])

demo.launch(share=True)

You might also like