code
code
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets
preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved
outside of the current session
plt.style.use('ggplot')
import nltk
# Install ftfy
!pip install ftfy
# Import libraries
import pandas as pd
import ftfy
print(df.shape)
df=df.head(500)
print(df.shape)
# reducing the size for smooth programming and testing
# Quick eda
df['Rating']
df['Rating'].value_counts()
a=df['Restaurant'].value_counts()
a
df.head()
ax=df['Rating'].value_counts().sort_index()\
.plot(kind='bar',title='count of ratings',
figsize=(10,5))
ax.set_xlabel('Ratings stars')
plt.show
# bar plot
example1=df['Review'][89]
print(example1)
print(" ")
example2=df['Review'][21]
print(example2)
tokens=nltk.word_tokenize(example1)
tokens[:15]
tagged=nltk.pos_tag(tokens)
tagged[:10]
# parts of speech like experience NN singular noun
entities=nltk.chunk.ne_chunk(tagged)
entities.pprint()
# chunking for it to be more manageable
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='nltk.twitter')
sia = SentimentIntensityAnalyzer()
sia.polarity_scores('I am so happy!')
sia.polarity_scores(example1)
pd.DataFrame(res).T
vaders=pd.DataFrame(res).T
vaders=vaders.reset_index().rename(columns={'index':'id'})
vaders=vaders.merge(df,how='left')
vaders
fig,axs=plt.subplots(1,3,figsize=(20,3))
sns.barplot(data=vaders, x='Rating', y='pos',ax=axs[0])
sns.barplot(data=vaders, x='Rating', y='neu',ax=axs[1])
sns.barplot(data=vaders, x='Rating', y='neg',ax=axs[2])
axs[0].set_title('Positive')
axs[1].set_title('Neutral')
axs[2].set_title('Negative')
plt.tight_layout()
plt.show()
#vader just looked at each word in sentence of review and as the words have
#pre assigned scores, it added all the scores and assigned a sentiment score for
that review
# bar plot shows promising results
#roberta
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
plt.show()
plt.show()
def polarity_scores_roberta(example):
encoded_text=tokenizer(example,return_tensors='pt')
output=model(**encoded_text)
scores=output[0][0].detach().numpy()
scores=softmax(scores)
#calculating compound score
rneg=scores[0]
rneu=scores[1]
rpos=scores[2]
compound_score=(rneg*-1)+(rneu*0)+(rpos*1)
scores_dict={
'roberta_neg':scores[0],
'roberta_neu':scores[1],
'roberta_pos':scores[2],
'roberta_compound':compound_score
}
return scores_dict
res={}
for i, row in tqdm(df.iterrows(),total =len(df)):
try:
review=row['Review']
myid=row['id']
vader_result=sia.polarity_scores(review)
vader_result_rename={}
for key, value in vader_result.items():
vader_result_rename[f"vader_{key}"]=value
roberta_result=polarity_scores_roberta(review)
both={**vader_result_rename, **roberta_result}
res[myid]=both
except RuntimeError:
print(f'Broke for id{myid}')
results_df=pd.DataFrame(res).T
results_df=results_df.reset_index().rename(columns={'index':'id'})
results_df=results_df.merge(df,how='left')
results_df.head()
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
results_df.replace([np.inf, -np.inf], np.nan, inplace=True)
custom_palette = {
1: '#E63946', # Red
2: '#F4A261', # Orange
3: '#E9C46A', # Yellow
4: '#2A9D8F', # Light Green
5: '#1D3557' # Dark Blue
}
sns.pairplot(data=results_df,vars=['vader_neg','vader_neu','vader_pos','roberta_neg
','roberta_neu','roberta_pos'],hue='Rating',palette='tab10')
plt.show()
#review examples
results_df.query('Rating==1').sort_values('vader_pos',ascending=False)
['Review'].values[0]
results_df.query('Rating==1').sort_values('roberta_pos',ascending=False)
['Review'].values[0]
results_df.query('Rating==5').sort_values('vader_neg',ascending=False)
['Review'].values[0]
results_df
print(df.head)
food=results_df
food.to_csv("food.csv", index=False)
import re
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
# Dynamically extract food menu columns (starting from the 19th column)
menu_items1 = [item.lower() for item in list(results_df.columns[18:])] # Convert
to lowercase for consistency
#################################
# Function to check if a review contains emojis
def contains_emoji1(text1):
emoji_pattern1 = re.compile(
"[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\
U0001F700-\U0001F77F"
"\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\
U0001FA00-\U0001FA6F"
"\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]"
)
return bool(emoji_pattern1.search(text1))
# Recommendation function
def recommend_restaurant1(dataframe1, food_name1):
food_name1 = food_name1.lower()
print("Checking for:", food_name1) # Debugging
if food_name1 not in menu_items1:
print(f"'{food_name1}' not found in menu items.")
return f"Sorry, '{food_name1}' is not available in the dataset."
if filtered_rows1.empty:
return f"Sorry, no restaurants found offering '{food_name1}'."
filtered_rows1 = filtered_rows1.copy()
filtered_rows1.loc[:, 'average_score1'] =
filtered_rows1.apply(calculate_average_score1, axis=1)
max_average_row1 =
filtered_rows1.loc[filtered_rows1['average_score1'].idxmax()]
# Create widgets
food_name_input1 = widgets.Combobox(
placeholder='Type or select food...',
options=[item.capitalize() for item in menu_items1], # Capitalized for UI
friendliness
description='Food:',
ensure_option=True
)
output1 = widgets.Output()
find_button1 = widgets.Button(description="Find Restaurant",
button_style="success")
reset_button1 = widgets.Button(description="Reset", button_style="warning")
def on_reset_button_click1(b1):
food_name_input1.value = ""
output1.clear_output()
# Display UI
display(food_name_input1, widgets.HBox([find_button1, reset_button1]), output1)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import warnings
import ipywidgets as widgets
from IPython.display import display
plt.style.use('ggplot')
warnings.filterwarnings("ignore", category=UserWarning, module='nltk.twitter')
warnings.filterwarnings("ignore", category=FutureWarning)
dff2.info()
dff2.columns
dff2['Ratings of Place']
dff2['Ratings of Place'].value_counts()
dff2['Place of Tourism'].value_counts()
sentiment_df2.columns
place=sentiment_df2
place.to_csv("place.csv", index=False)
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
sentiment_df2.replace([np.inf, -np.inf], np.nan, inplace=True)
custom_palette = {
1: '#E63946', # Red
2: '#F4A261', # Orange
3: '#E9C46A', # Yellow
4: '#2A9D8F', # Light Green
5: '#1D3557' # Dark Blue
}
sns.pairplot(
data=sentiment_df2,
vars=['vader_neg', 'vader_neu', 'vader_pos', 'roberta_neg', 'roberta_neu',
'roberta_pos'],
hue='Ratings of Place',
palette='tab10'
)
plt.show()
if filtered_places2.empty:
return f"No places found for '{place_type2}'."
# Sort by 'Sentiment score' in descending order to get the most positive places
filtered_places_sorted2 = filtered_places2.sort_values(by='Sentiment score',
ascending=False)
output2 = widgets.Output()
find_button2 = widgets.Button(description="Find Places", button_style="success")
reset_button2 = widgets.Button(description="Reset", button_style="warning")
def on_reset_button_click2(b2):
place_type_input2.value = " "
place_type_input2.value = ""
output2.clear_output()
find_button2.on_click(on_find_button_click2)
reset_button2.on_click(on_reset_button_click2)
# Display UI
display(place_type_input2, widgets.HBox([find_button2, reset_button2]), output2)
place_type_input = widgets.Combobox(
placeholder='Type or select a place type...',
options=sorted(dff2['Type of Place'].dropna().unique()), # Assuming dff is
your tourism dataset
description='📍 Place:',
ensure_option=True,
style={'description_width': 'initial'}
)
with output_restaurant:
output_restaurant.clear_output()
if food_name:
if food_name in menu_items1:
result = recommend_restaurant1(results_df, food_name) # Assuming
recommend_restaurant is defined
output_restaurant.append_stdout(f"✅ Recommended Restaurant:\
n{result}\n\n")
else:
output_restaurant.append_stdout(f"⚠️ '{food_name}' is not on the
menu.\n\n")
else:
output_restaurant.append_stdout("⚠️ Please type or select a food item.\
n\n")
with output_tourism:
output_tourism.clear_output()
if place_type:
recommendations = recommend_places2(sentiment_df2, place_type) #
Assuming recommend_places is defined
output_tourism.append_stdout(f" Recommended Places:\
n{recommendations.to_string(index=False)}\n\n")
else:
output_tourism.append_stdout("⚠️ Please type or select a place type.\n\
n")
######################################
# Store menu items in a single variable for horizontal display
from IPython.display import display, HTML
# Format the menu with larger, centered, red-colored title and blue items
menu_html = f"""
<div style="text-align: center; font-size: 28px; font-weight: bold; color:
red;">
📌 <b>Available Food Menu:</b>
</div>
<div style="text-align: center; font-size: 24px; color: #007BFF;">
{menu_string}
</div>
"""
###############################################
# Display widgets with section headers
display(HTML('<div class="title"> Restaurant Recommendation</div>'))
display(food_name_input)
display(output_restaurant)
# Calculate Accuracy
vader_acc = accuracy_score(df['true_sentiment'], df['vader_prediction'])
roberta_acc = accuracy_score(df['true_sentiment'], df['roberta_prediction'])
hybrid_acc = accuracy_score(df['true_sentiment'], df['hybrid_prediction'])
import gradio as gr
import pandas as pd
import urllib.parse # For URL encoding
# ✅ Load datasets
food_df = pd.read_csv("food.csv")
place_df = pd.read_csv("place.csv")
with gr.Row():
food = gr.Dropdown(
choices=["Please select a food"] + menu_items1,
label=" Select Food",
interactive=True,
value="Please select a food"
)
place = gr.Dropdown(
choices=["Please select a type of tourism"] + place_types,
label=" Select Place Type",
interactive=True,
value="Please select a type of tourism"
)
with gr.Row():
btn = gr.Button("🔍 Get Recommendations", elem_id="recommend_btn")
reset_btn = gr.Button("🔄 Reset", elem_id="reset_btn")
def reset():
return "Please select a food", "Please select a type of tourism", "###
Results will appear here"
demo.launch(share=True)