Mlinternal - Ipynb - Colab
Mlinternal - Ipynb - Colab
df=pd.read_excel("/content/dataeda.xlsx")
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48620 entries, 0 to 48619
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 order_details_id 48620 non-null int64
1 order_id 48620 non-null int64
2 pizza_id 48620 non-null object
3 quantity 48620 non-null int64
4 order_date 48620 non-null datetime64[ns]
5 order_time 48620 non-null object
6 unit_price 48620 non-null float64
7 total_price 48620 non-null float64
8 pizza_size 48620 non-null object
9 pizza_category 48620 non-null object
10 pizza_ingredients 48620 non-null object
11 pizza_name 48620 non-null object
dtypes: datetime64[ns](1), float64(2), int64(3), object(6)
memory usage: 4.5+ MB
df.describe()
2015-06-29
mean 24310.500000 10701.479761 1.019622 16.494132
11:03:43.611682560
df.shape
(48620, 12)
df.isnull().sum()
order_details_id 0
order_id 0
pizza_id 0
quantity 0
order_date 0
order_time 0
unit_price 0
total_price 0
pizza_size 0
pizza_category 0
pizza_ingredients 0
pizza_name 0
dtype: int64
df.columns
df['order_time']=df['order_time'].astype('string')
df[['Hour','Minute', 'Second']]= df['order_time'].str.split(":",expand=True)
df["Hour"].value_counts()
Hour
12 6543
13 6203
18 5359
17 5143
19 4350
16 4185
14 3521
20 3487
15 3170
11 2672
21 2528
22 1370
23 68
10 17
09 4
Name: count, dtype: Int64
sns.countplot(data=df,x="Hour",palette="plasma")
plt.xticks(rotation=90)
plt.xlabel("Hour",fontsize=10,color="purple")
plt.ylabel("Frequency",fontsize=10,color="purple")
plt.title("HOUR",color="purple")
plt.show()
<ipython-input-9-6afbf4e71693>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="Hour",palette="plasma")
df['order_year'] =pd.DatetimeIndex( df['order_date']).year
df['order_year'].value_counts()
order_year
2015 48620
Name: count, dtype: int64
df['order_dates'] = df['order_date'].dt.day_name()
df['order_dates'].value_counts()
order_dates
Friday 8106
Saturday 7355
Thursday 7323
Wednesday 6797
Tuesday 6753
Monday 6369
Sunday 5917
Name: count, dtype: int64
sns.countplot(data=df,x="order_dates",palette="viridis")
plt.xticks(rotation=90)
plt.xlabel("Date",fontsize=10,color="green")
plt.ylabel("Frequency",fontsize=10,color="green")
plt.title("DATES",color="green")
plt.show()
<ipython-input-12-1fd42b1415ea>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="order_dates",palette="viridis")
order_month
July 4301
May 4239
March 4186
November 4185
January 4156
August 4094
April 4067
June 4025
February 3892
December 3859
September 3819
October 3797
Name: count, dtype: int64
sns.countplot(data=df,x="order_month",palette="CMRmap")
plt.xticks(rotation=90)
plt.xlabel("Months",fontsize=10,color="black")
plt.ylabel("Frequency",fontsize=10,color="black")
plt.title("MONTHS",color="black")
plt.show()
<ipython-input-14-b059dd219588>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="order_month",palette="CMRmap")
df.pizza_id.value_counts()
pizza_id
big_meat_s 1811
thai_ckn_l 1365
five_cheese_l 1359
four_cheese_l 1273
classic_dlx_m 1159
...
mexicana_s 160
calabrese_s 99
ckn_alfredo_s 96
green_garden_l 94
the_greek_xxl 28
Name: count, Length: 91, dtype: int64
<ipython-input-16-b984abe1a923>:2: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="pizza_id",palette="tab20b_r",ax=ax)
df.quantity.value_counts()
quantity
1 47693
2 903
3 21
4 3
Name: count, dtype: int64
sns.countplot(data=df,x="quantity",palette="Set1")
plt.xticks(rotation=90)
plt.xlabel("quantity",fontsize=10,color="red")
plt.title("quantity",color="red")
plt.show()
<ipython-input-18-2293a15c8caa>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="quantity",palette="Set1")
df.pizza_size.value_counts()
pizza_size
L 18526
M 15385
S 14137
XL 544
XXL 28
Name: count, dtype: int64
sns.countplot(data=df,x="pizza_size",palette="coolwarm_r")
plt.xticks(rotation=90)
plt.xlabel("pizza Size",fontsize=10,color="b")
plt.title("SİZE",color="b")
plt.show()
<ipython-input-20-53b823131db9>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="pizza_size",palette="coolwarm_r")
labels = df["pizza_size"].value_counts().index
sizes = df["pizza_size"].value_counts()
plt.figure(figsize = (8,10))
colors = ['#ff9749','#66b3f1','#999f99','#ffcc09',"RED","cyan"]
plt.pie(sizes, labels=labels, rotatelabels=False, autopct='%1.1f%%',colors=colors,shadow=True, startangle=60,explode=(0,0,0,0,0.8))
plt.show()
df.pizza_category.value_counts()
pizza_category
Classic 14579
Supreme 11777
Veggie 11449
Chicken 10815
Name: count, dtype: int64
sns.countplot(data=df,x="pizza_category",palette="Oranges")
plt.xticks(rotation=90)
plt.xlabel("Category",fontsize=10,color="orange")
plt.ylabel("Frequency",fontsize=10,color="orange")
plt.title("PİZZA CATEGORY",color="orange")
plt.show()
<ipython-input-23-b9d257fe9f01>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="pizza_category",palette="Oranges")
labels = df["pizza_category"].value_counts().index
sizes = df["pizza_category"].value_counts()
colors = ['#ff9999','#66b3ff','#990f99','#ffcc89',"pink","yellow"]
plt.pie(sizes, labels=labels, rotatelabels=False, autopct='%1.1f%%',colors=colors,shadow=True, startangle=45)
plt.show()
Start coding or generate with AI.
df.Hour.value_counts()
Hour
12 6543
13 6203
18 5359
17 5143
19 4350
16 4185
14 3521
20 3487
15 3170
11 2672
21 2528
22 1370
23 68
10 17
09 4
Name: count, dtype: Int64
<ipython-input-26-7f310977841f>:1: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.
sns.countplot(data=df,x="Hour",palette="inferno")
df.pizza_ingredients.value_counts()
pizza_ingredients
Pepperoni, Mushrooms, Red Onions, Red Peppers, Bacon 2416
Barbecued Chicken, Red Peppers, Green Peppers, Tomatoes, Red Onions, Barbecue Sauce 2372
Sliced Ham, Pineapple, Mozzarella Cheese 2370
Mozzarella Cheese, Pepperoni 2369
Chicken, Pineapple, Tomatoes, Red Peppers, Thai Sweet Chilli Sauce 2315
Chicken, Artichoke, Spinach, Garlic, Jalapeno Peppers, Fontina Cheese, Gouda Cheese 2302
Coarse Sicilian Salami, Tomatoes, Green Olives, Luganega Sausage, Onions, Garlic 1887
Capocollo, Tomatoes, Goat Cheese, Artichokes, Peperoncini verdi, Garlic 1887
Chicken, Tomatoes, Red Peppers, Red Onions, Jalapeno Peppers, Corn, Cilantro, Chipotle Sauce 1885
Ricotta Cheese, Gorgonzola Piccante Cheese, Mozzarella Cheese, Parmigiano Reggiano Cheese, Garlic 1850
Calabrese Salami, Capocollo, Tomatoes, Red Onions, Green Olives, Garlic 1849
Bacon, Pepperoni, Italian Sausage, Chorizo Sausage 1811
Mushrooms, Tomatoes, Red Peppers, Green Peppers, Red Onions, Zucchini, Spinach, Garlic 1510
Tomatoes, Red Peppers, Jalapeno Peppers, Red Onions, Cilantro, Corn, Chipotle Sauce, Garlic 1456
Tomatoes, Anchovies, Green Olives, Red Onions, Garlic 1451
Spinach, Mushrooms, Red Onions, Feta Cheese, Garlic 1432
Prosciutto di San Daniele, Arugula, Mozzarella Cheese 1428
Genoa Salami, Capocollo, Pepperoni, Tomatoes, Asiago Cheese, Garlic 1422
Capocollo, Red Peppers, Tomatoes, Goat Cheese, Garlic, Oregano 1414
Kalamata Olives, Feta Cheese, Tomatoes, Garlic, Beef Chuck Roast, Red Onions 1406
Mozzarella Cheese, Provolone Cheese, Smoked Gouda Cheese, Romano Cheese, Blue Cheese, Garlic 1359
Pepperoni, Mushrooms, Green Peppers 1342
Spinach, Mushrooms, Tomatoes, Green Olives, Feta Cheese 987
Chicken, Red Onions, Red Peppers, Mushrooms, Asiago Cheese, Alfredo Sauce 980
Eggplant, Artichokes, Tomatoes, Zucchini, Red Peppers, Garlic, Pesto Sauce 975
Chicken, Tomatoes, Red Peppers, Spinach, Garlic, Pesto Sauce 961
Spinach, Artichokes, Tomatoes, Sun-dried Tomatoes, Garlic, Pesto Sauce 957
Soppressata Salami, Fontina Cheese, Mozzarella Cheese, Mushrooms, Garlic 957
Spinach, Red Onions, Pepperoni, Tomatoes, Artichokes, Kalamata Olives, Garlic, Asiago Cheese 940
慛duja Salami, Pancetta, Tomatoes, Red Onions, Friggitello Peppers, Garlic 927
Spinach, Artichokes, Kalamata Olives, Sun-dried Tomatoes, Feta Cheese, Plum Tomatoes, Red Onions 923
Brie Carre Cheese, Prosciutto, Caramelized Onions, Pears, Thyme, Garlic 480
Name: count, dtype: int64
df.pizza_name.value_counts()
pizza_name
The Classic Deluxe Pizza 2416
The Barbecue Chicken Pizza 2372
The Hawaiian Pizza 2370
The Pepperoni Pizza 2369
The Thai Chicken Pizza 2315
The California Chicken Pizza 2302
The Sicilian Pizza 1887
The Spicy Italian Pizza 1887
The Southwest Chicken Pizza 1885
The Four Cheese Pizza 1850
The Italian Supreme Pizza 1849
The Big Meat Pizza 1811
The Vegetables + Vegetables Pizza 1510
The Mexicana Pizza 1456
The Napolitana Pizza 1451
The Spinach and Feta Pizza 1432
The Prosciutto and Arugula Pizza 1428
The Pepper Salami Pizza 1422
The Italian Capocollo Pizza 1414
The Greek Pizza 1406
The Five Cheese Pizza 1359
The Pepperoni, Mushroom, and Peppers Pizza 1342
The Green Garden Pizza 987
The Chicken Alfredo Pizza 980
The Italian Vegetables Pizza 975
The Chicken Pesto Pizza 961
The Spinach Pesto Pizza 957
The Soppressata Pizza 957
The Spinach Supreme Pizza 940
The Calabrese Pizza 927
The Mediterranean Pizza 923
The Brie Carre Pizza 480
Name: count, dtype: int64
px.bar(data_frame=df, x='order_dates',color_discrete_sequence=['green'],
labels={'order_dates':'count'},
template='plotly_white',title='<b> Which day is the most ordered?')
8000
7000
6000
5000
count
4000
3000
2000
1000
0
Thursday Friday Saturday Sunday Monday Tuesday
count
Start coding or generate with AI.
px.box(df,x='pizza_name',y='total_price',
color='pizza_name',template='seaborn',
labels={'pizza_name':'Name','total_price':'Price'},
title='<b>Pizza Names and Prices')
80 Name
60 The Hawaiian Pizza
Price
Name
fig=px.pie(df.groupby('pizza_size',as_index=False)['total_price'].count().sort_values(by='total_price',
ascending=False),names='pizza_size',
values='total_price',color='pizza_size',hole=0.7,labels={'pizza_size':'pizza_size','total_price':'price'},
template='ggplot2',title='<b> Pizza Sizes')
fig.update_layout(title_x=0.5,legend=dict(orientation='h',yanchor='bottom',y=1.02,xanchor='right',x=1))
Pizza Sizes
L M S XL
31.6%
38.1%
0.0576%
29.1% 1.12%
14k 14579
12k
11777
11449
10815
10k
total_price
8k
6k
4k
2k
0
Classic Supreme Veggie Chicken
pizza_category
24.2%
30%
23.5%
22.2%
with sns.axes_style('white'):
g = sns.catplot(x="order_month", data=df, aspect=4.0, kind='count',hue='order_dates',palette="pastel")
g.set_ylabels('Frequency')
g.set_xlabels("Months")
plt.show()
Start coding or generate with AI.
09
10
23
22
21
20
19
Hour
18
17
16
15
14
13
12
11
order_dates
joint_data=df.sort_values(by='total_price', ascending=False)
top_rated=joint_data[:]
fig =px.sunburst(
top_rated,
path=['pizza_category',"pizza_size"],
values='total_price',
color='total_price')
fig.show()
XXL
XL
M
L
S
Classic L
Supreme
S
Chicken Veggie
L M
L
M
df1 = df.groupby(["pizza_size","pizza_category"],as_index=False)[['total_price']].median().rename(columns={'total_price':'price'})
fig = px.treemap(df1, path = [px.Constant("all"), "pizza_size","pizza_category"],
values ='price', color='pizza_size',
color_discrete_map={'(?)':'lightgrey', 'Lunch':'gold', 'Dinner':'darkblue'})
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()
all
L M XXL
Chicken Supreme Chicken Classic Clas
Supreme Veggie
Classic Veggie
S
XL
Chicken Supreme Classic Veggie
Clas
plt.rcParams['figure.figsize'] = (12,12)
wordcloud = WordCloud(background_color = 'black',colormap='vlag', width = 1200, height = 1200, max_words = 121).generate(text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
plt.show()
Start coding or generate with AI.
70
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
60
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
total_price
50
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics
40 import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
30
# Preprocess data
20
X = df.drop(["total_price"], axis=1)
y = df["total_price"]
10
M
# One-hot encode categorical variables L S XL
ohe = OneHotEncoder() pizza_size
X_encoded = ohe.fit_transform(X)
# Scale numerical variables
scaler = StandardScaler(with_mean=False) # Set with_mean=False to disable centering
X_scaled = scaler.fit_transform(X_encoded)
# Split data