0% found this document useful (0 votes)

30 views9 pages

DST Python Code With Explanation

Python Code for simple TikTok Game

Uploaded by

Vedant Gade

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

30 views9 pages

DST Python Code With Explanation

Python Code for simple TikTok Game

Uploaded by

Vedant Gade

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

# Import necessary libraries

import pandas as pd
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import seaborn as sns
from sklearn.cluster import KMeans
# Read the excel data using pandas
raw_data = pd.read_excel("C:/Users/Bobby/Documents/Food and nutrition.xlsx")
# Print the first few rows of the data
print(raw_data.head())
# Print statistical summary of the data
print(raw_data.describe())
# Print information about the data
print(raw_data.info)
# Extract column names and first 100 rows
X = list(raw_data.head(76).columns)
y = tuple(raw_data.head(100).iterrows())
# Print column names and a subset of rows
print(X)
print(y[1:10])
# Convert data into a dictionary
data = dict(zip(X, y))
print(data.values())
print(data.keys())
# Create an empty list to store dictionaries
food_data = []
# Loop through each row and convert it to a dictionary
for row in y:
row_data = row.split("\t") # Assuming the data is tab-separated
food_dict = dict(zip(columns, row_data
# Convert values to a list where applicable
for key in food_dict:
if isinstance(food_dict[key], str) and key != "name" and key != "serving_size":
food_dict[key] = [food_dict[key]]
elif isinstance(food_dict[key], int):
food_dict[key] = [food_dict[key]]
# Append the dictionary to the list
food_data.append(food_dict)
# Now, 'food_data' is a list containing dictionaries for all rows
# Create an empty list to store dictionaries
list_of_dicts = []
for x in range(0, len(X)):
row_dict = raw_data.iloc[x].to_dict()
list_of_dicts.append(row_dict)
# Check for missing values
ms = missing_values = raw_data.isnull()
#raw_data.fillna(raw_data.sum(), inplace=True)
# Subset of columns for analysis
subset_columns = ["name", "serving_size", "calories", "total_fat", "saturated_fat",
"cholesterol", "sodium", "choline",
"folate", "folic_acid", "niacin", "pantothenic_acid", "riboflavin", "thiamin",
"vitamin_a",
"vitamin_a_rae",
"carotene_alpha", "carotene_beta", "cryptoxanthin_beta", "lutein_zeaxanthin",
"lucopene",
"vitamin_b12",
"vitamin_b6", "vitamin_c", "vitamin_d", "vitamin_e",
"tocopherol_alpha", "vitamin_k", "calcium", "copper",
"irom", "magnesium", "manganese", "phosphorous", "potassium",
"selenium", "zink", "protein", "alanine",
"arginine", "aspartic_acid", "cystine", "glutamic_acid", "glycine", "histidine",
"hydroxyproline",
"isoleucine", "leucine", "lysine", "methionine", "phenylalanine", "proline",
"serine", "threonine",
"tryptophan", "tyrosine", "valine", "carbohydrate", "fiber", "sugars", "fructose",
"galactose", "glucose",
"lactose", "maltose", "sucrose", "fat", "saturated_fatty_acids",
"monounsaturated_fatty_acids",
"polyunsaturated_fatty_acids", "fatty_acids_total_trans", "alcohol", "ash",
"caffeine", "theobromine",
"water"]
# Example: Create a scatter plot between two numeric columns
plt.scatter(raw_data['serving_size'], raw_data['calories'], cmap='inferno', linewidths=0.56,
c='g')
plt.colorbar()
plt.xlabel('Serving Size')
plt.ylabel('Calories')
plt.title('Scatter Plot for Bivariate Analysis')
plt.show()
# Calculate the correlation matrix
raw_data1 = raw_data.drop(columns='name')
correlation_matrix = raw_data1.corr()
filled_values = raw_data1.dropna()
# Multivariate Analysis
numerical_data = raw_data.drop(columns=['name'])
# Perform PCA
pca = PCA(n_components=3)
pca_result = pca.fit_transform(filled_values)
# Scatter plot of PCA result
plt.figure(figsize=(8, 6))
plt.scatter(pca_result[:, 0], pca_result[:, 1], alpha=0.6, linewidths=(3, 4), cmap='gray',
c='gray')
plt.colorbar()
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.title('PCA Result: 2D Scatterplot')
plt.grid(True)
plt.show()
# Check if the columns exist in the DataFrame
# missing_columns = [col for col in subset_columns if col not in raw_data.columns]
# if not missing_columns:
# Your pair plot code here
# sns.pairplot(raw_data[subset_columns])
# plt.show()
# else:
# print("Columns not found in DataFrame:", missing_columns)
# print("Available columns:", raw_data.columns)
# print("Column data types:", raw_data.dtypes)
print(filled_values)
# Perform K-Means clustering
n_clusters = KMeans(n_clusters=50, n_init="auto", algorithm='lloyd', max_iter=1000)
no_clusters = n_clusters.fit_transform(filled_values)
# Scatter plot of K-Means clusters
plt.figure(figsize=(5, 8))
plt.subplot(1, 1, 1)
plt.scatter(no_clusters[:, 0], no_clusters[:, 1], linewidths=(2, 3), c='b', cmap='inferno')
plt.colorbar()
plt.grid(True)
plt.show()
# Performing Bivariate analysis on the data using histogram
sns.histplot(filled_values, x='serving_size', y='calories', stat="count", binwidth=0.56,
bins="auto", element='step',
palette='colorblind', binrange=(100, 200))
plt.show()
1)pandas:

Description: Pandas is a popular Python library for data manipulation and

analysis. It provides data structures like dataframes and series for working
with structured data.

2)matplotlib.pyplot:
Description: Matplotlib is a data visualization library for creating static,
animated, or interactive plots in Python. pyplot is a collection of functions
that provide a simple interface for creating various types of plots.

3) sklearn.decomposition.PCA:

Description: This is part of the scikit-learn library (sklearn) and provides

Principal Component Analysis (PCA) for dimensionality reduction and
feature extraction.

4)sklearn.svm.SVC:

Description: This is also part of scikit-learn and stands for Support Vector
Classification. It is used for classification tasks using Support Vector
Machines (SVM).

5)seaborn:

Description: Seaborn is a data visualization library based on Matplotlib. It

provides a high-level interface for creating informative and attractive
statistical graphics.

6)sklearn.cluster.KMeans:
Description: Another part of scikit-learn, KMeans is an unsupervised
machine learning algorithm used for clustering data into groups based on
similarity.

a)Import necessary libraries:

import pandas as pd
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import seaborn as sns
from sklearn.cluster import KMeans

b)Read the data from an Excel file using Pandas:

raw_data = pd.read_excel("C:/Users/Bobby/Documents/Food and

nutrition.xlsx")

c)Extract and print the first few rows and statistical summary of the data:

print(raw_data.head())
print(raw_data.describe())

d)Extract column names and a subset of rows:

X = list(raw_data.head(76).columns)
y = tuple(raw_data.head(100).iterrows())
e)Convert data into a dictionary and print values and keys:

data = dict(zip(X, y))

print(data.values())
print(data.keys())

f)Create an empty list to store dictionaries and convert rows to dictionaries:

food_data = []
for row in y:
row_data = row.split("\t")
food_dict = dict(zip(columns, row_data))
# Convert values to lists where applicable
and append the dictionary
food_data.append(food_dict)

g)Create a list of dictionaries and check for missing values:

list_of_dicts = []
for x in range(0, len(X)):
row_dict = raw_data.iloc[x].to_dict()
list_of_dicts.append(row_dict)

ms = missing_values = raw_data.isnull()

h)Define a subset of columns for analysis:

subset_columns = [list of column names]

i)Create a scatter plot between two numeric columns:

plt.scatter(raw_data['serving_size'], raw_data['calories'], cmap='inferno',

linewidths=0.56, c='g')
plt.colorbar()
plt.xlabel('Serving Size')
plt.ylabel('Calories')
plt.title('Scatter Plot for Bivariate Analysis')
plt.show()

j)Calculate the correlation matrix and perform PCA:

raw_data1 = raw_data.drop(columns='name')
correlation_matrix = raw_data1.corr()
filled_values = raw_data1.dropna()

pca = PCA(n_components=3)
pca_result = pca.fit_transform(filled_values)

k)Create a scatter plot of PCA result:

plt.figure(figsize=(8, 6))
plt.scatter(pca_result[:, 0], pca_result[:, 1], alpha=0.6, linewidths=(3, 4),
cmap='gray', c='gray')
plt.colorbar()
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.title('PCA Result: 2D Scatterplot')
plt.grid(True)
plt.show()

l)Perform K-Means clustering and create a scatter plot of clusters:

n_clusters = KMeans(n_clusters=50, n_init="auto", algorithm='lloyd',

max_iter=1000)
no_clusters = n_clusters.fit_transform(filled_values)

plt.figure(figsize=(5, 8))
plt.subplot(1, 1, 1)
plt.scatter(no_clusters[:, 0], no_clusters[:, 1], linewidths=(2, 3), c='b',
cmap='inferno')
plt.colorbar()
plt.grid(True)
plt.show()

m)Perform bivariate analysis using a histogram:

sns.histplot(filled_values, x='serving_size', y='calories', stat="count",

binwidth=0.56, bins="auto",
element='step', palette='colorblind', binrange=(100, 200))
plt.show()

Lesson Plan in Water A.N.M 1ST Year (Environmaental Sanitation
100% (3)
Lesson Plan in Water A.N.M 1ST Year (Environmaental Sanitation
9 pages
Pandas
No ratings yet
Pandas
43 pages
Biol 224 Lab Manual
No ratings yet
Biol 224 Lab Manual
92 pages
Kobelev Vladimir Durability of Springs
100% (1)
Kobelev Vladimir Durability of Springs
291 pages
Data Dictionary Data Dictionary: Set The Working Directory Set The Working Directory
No ratings yet
Data Dictionary Data Dictionary: Set The Working Directory Set The Working Directory
15 pages
Matplotlib Inline PD Set - Option (, X: X) : Import As Import As Import As Import As Lambda Import As Import
No ratings yet
Matplotlib Inline PD Set - Option (, X: X) : Import As Import As Import As Import As Lambda Import As Import
14 pages
PES University, Bangalore: UE21CS342AA2 - Data Analytics - Worksheet 4B
No ratings yet
PES University, Bangalore: UE21CS342AA2 - Data Analytics - Worksheet 4B
1 page
Food Recommendation System
No ratings yet
Food Recommendation System
13 pages
Assignment Food Nutrition 2019
No ratings yet
Assignment Food Nutrition 2019
10 pages
Empirical Crop Suitability Model 1694688954
No ratings yet
Empirical Crop Suitability Model 1694688954
24 pages
Assignment Food and Nutrition
No ratings yet
Assignment Food and Nutrition
3 pages
McDonald Dataset Analysis Project
No ratings yet
McDonald Dataset Analysis Project
10 pages
Cereal Test
No ratings yet
Cereal Test
17 pages
KDD Lab 7 2214
No ratings yet
KDD Lab 7 2214
6 pages
SAS Program For Processing NHANES Data From "Dietary Interview, Individual Foods - First Day" File (DR1IFF)
No ratings yet
SAS Program For Processing NHANES Data From "Dietary Interview, Individual Foods - First Day" File (DR1IFF)
5 pages
App
No ratings yet
App
4 pages
Project 16 Calories Burnt Prediction
No ratings yet
Project 16 Calories Burnt Prediction
10 pages
Practical 7
No ratings yet
Practical 7
21 pages
FFST 224 Ood Composition 2
No ratings yet
FFST 224 Ood Composition 2
16 pages
Experiment No. 9
No ratings yet
Experiment No. 9
9 pages
Mokhless Hajji Project
No ratings yet
Mokhless Hajji Project
5 pages
Mcdonald Assignment - Bindu Sagar A-19!09!2021
No ratings yet
Mcdonald Assignment - Bindu Sagar A-19!09!2021
22 pages
Data Cleaning
No ratings yet
Data Cleaning
22 pages
Chennai Map Analysis
No ratings yet
Chennai Map Analysis
16 pages
File Code BTL
No ratings yet
File Code BTL
2 pages
Indian Food Analysis 1
No ratings yet
Indian Food Analysis 1
22 pages
Message
No ratings yet
Message
3 pages
Data Science Libraries
No ratings yet
Data Science Libraries
4 pages
Fds Mannual
No ratings yet
Fds Mannual
39 pages
Vinay Kumar Kannegala Siddalingappa HW4D
No ratings yet
Vinay Kumar Kannegala Siddalingappa HW4D
1 page
Assignment 6
No ratings yet
Assignment 6
7 pages
AS Notebook - PCA - Wine Data-4
100% (1)
AS Notebook - PCA - Wine Data-4
1 page
Import As From Import From Import Import As
No ratings yet
Import As From Import From Import Import As
5 pages
INF2008 Lecture09
No ratings yet
INF2008 Lecture09
46 pages
Main2 Py
No ratings yet
Main2 Py
2 pages
Assignment
No ratings yet
Assignment
17 pages
Diet Problems
No ratings yet
Diet Problems
5 pages
Dietr
No ratings yet
Dietr
21 pages
Dsa 1
No ratings yet
Dsa 1
8 pages
Personalized Diet Recommendation System in Healthcare
No ratings yet
Personalized Diet Recommendation System in Healthcare
31 pages
Coding
No ratings yet
Coding
6 pages
Understanding Indian Cuisine: A Data-Driven Study
No ratings yet
Understanding Indian Cuisine: A Data-Driven Study
15 pages
KNN - Jupyter Notebook
No ratings yet
KNN - Jupyter Notebook
7 pages
22AD004 - DVE - Assignment 3
No ratings yet
22AD004 - DVE - Assignment 3
13 pages
Cardio Screen RF
100% (1)
Cardio Screen RF
27 pages
Health App Data 5
No ratings yet
Health App Data 5
10 pages
Smart Cropping
No ratings yet
Smart Cropping
28 pages
DS Food
No ratings yet
DS Food
18 pages
Ex 1
No ratings yet
Ex 1
8 pages
Tamur Khan: Import As From Import Import As
No ratings yet
Tamur Khan: Import As From Import Import As
7 pages
Department of Statistics: COURSE STATS 330/762
No ratings yet
Department of Statistics: COURSE STATS 330/762
8 pages
UNIT 3 4 Feature Relevance Marginal Entropy
No ratings yet
UNIT 3 4 Feature Relevance Marginal Entropy
4 pages
Pandas Notes
No ratings yet
Pandas Notes
5 pages
5be8701130c118257ddc838b10d8fe12
No ratings yet
5be8701130c118257ddc838b10d8fe12
6 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
42 pages
Batch1 Ds
No ratings yet
Batch1 Ds
15 pages
Pandas Correlation, Visualization 5
No ratings yet
Pandas Correlation, Visualization 5
8 pages
IP Project
No ratings yet
IP Project
31 pages
Python Calorie Counting Script
No ratings yet
Python Calorie Counting Script
21 pages
Simplifying Data Science With Python
From Everand
Simplifying Data Science With Python
Billy David millican
No ratings yet
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Python for Data Science: Data Science Mastery by Nikhil Khan, #1
From Everand
Python for Data Science: Data Science Mastery by Nikhil Khan, #1
Nikhil Khan
No ratings yet
Assignment 2
No ratings yet
Assignment 2
19 pages
Determinants How To Solve 3 3 Determants 1 2 - 1 0 4 1 1 3 4
No ratings yet
Determinants How To Solve 3 3 Determants 1 2 - 1 0 4 1 1 3 4
2 pages
1
No ratings yet
1
3 pages
1.write The Structure Declaration in C With Suitable Example
No ratings yet
1.write The Structure Declaration in C With Suitable Example
2 pages
An If
No ratings yet
An If
9 pages
MML-2 Practicals
No ratings yet
MML-2 Practicals
14 pages
Library Management System Using C
No ratings yet
Library Management System Using C
5 pages
MML Chinmay
No ratings yet
MML Chinmay
10 pages
Micro Project: Ajeenkya D. Y. Patil School of Engineering (Second Shift Polytechnic)
No ratings yet
Micro Project: Ajeenkya D. Y. Patil School of Engineering (Second Shift Polytechnic)
10 pages
Micro Project: Dr. D. Y. Patil School of Engineering (Second Shift Polytechnic)
No ratings yet
Micro Project: Dr. D. Y. Patil School of Engineering (Second Shift Polytechnic)
9 pages
Contractor Monthly Performance KPI Report
No ratings yet
Contractor Monthly Performance KPI Report
1 page
Present Perfect Tense
No ratings yet
Present Perfect Tense
2 pages
BEMEF Orientation Material
No ratings yet
BEMEF Orientation Material
189 pages
G2000 Ambient Oil Mist Detector
No ratings yet
G2000 Ambient Oil Mist Detector
4 pages
DSM V Adhd
No ratings yet
DSM V Adhd
1 page
What Is An Arithmetic Sequence?: Arithmetic Sequences and Series
No ratings yet
What Is An Arithmetic Sequence?: Arithmetic Sequences and Series
34 pages
Biological Science Major Part 8
No ratings yet
Biological Science Major Part 8
10 pages
Berger and Luckman Sociology of Knowledge
100% (1)
Berger and Luckman Sociology of Knowledge
7 pages
Project 1 Brief AVA313 Fall24
No ratings yet
Project 1 Brief AVA313 Fall24
4 pages
Guitar Rig 4 Getting Started English
No ratings yet
Guitar Rig 4 Getting Started English
29 pages
ACI Materials Journal July 2023 v.120 No.4
No ratings yet
ACI Materials Journal July 2023 v.120 No.4
106 pages
Introduction To Social Representation Theory
No ratings yet
Introduction To Social Representation Theory
8 pages
Co - Ownership
100% (1)
Co - Ownership
7 pages
12 Capital Budgeting Version 2 Key
No ratings yet
12 Capital Budgeting Version 2 Key
10 pages
Max and Big-E Part 1
No ratings yet
Max and Big-E Part 1
3 pages
Activins in Adipogenesis and Obesity: Review
No ratings yet
Activins in Adipogenesis and Obesity: Review
4 pages
08.25.17 Game Notes PDF
No ratings yet
08.25.17 Game Notes PDF
8 pages
Normandy vs. Duque
No ratings yet
Normandy vs. Duque
2 pages
2015 - AutoCAD Tutorial Architecture Imperial Version
67% (6)
2015 - AutoCAD Tutorial Architecture Imperial Version
44 pages
162 CÂU TỪ VỰNG TỪ ĐỀ CÁC TRƯỜNG CHUYÊN
No ratings yet
162 CÂU TỪ VỰNG TỪ ĐỀ CÁC TRƯỜNG CHUYÊN
14 pages
Section 1 - With Answers
No ratings yet
Section 1 - With Answers
2 pages
HASYTEC DBPi Brochure
No ratings yet
HASYTEC DBPi Brochure
4 pages
Pat B.ing Kls 3
No ratings yet
Pat B.ing Kls 3
5 pages
Mariel Sofia S. Pulbosa 8-Gauss
No ratings yet
Mariel Sofia S. Pulbosa 8-Gauss
8 pages
Case Study of CPTED & Defensible Space Theory On Malaysia Low Cost Housing
100% (1)
Case Study of CPTED & Defensible Space Theory On Malaysia Low Cost Housing
31 pages
IP Monographs Development by IPC
No ratings yet
IP Monographs Development by IPC
86 pages
RP Lab File
No ratings yet
RP Lab File
20 pages

DST Python Code With Explanation

Uploaded by

DST Python Code With Explanation

Uploaded by

# Import necessary libraries

Description: Pandas is a popular Python library for data manipulation and

Description: This is part of the scikit-learn library (sklearn) and provides

Description: Seaborn is a data visualization library based on Matplotlib. It

a)Import necessary libraries:

b)Read the data from an Excel file using Pandas:

raw_data = pd.read_excel("C:/Users/Bobby/Documents/Food and

d)Extract column names and a subset of rows:

data = dict(zip(X, y))

f)Create an empty list to store dictionaries and convert rows to dictionaries:

g)Create a list of dictionaries and check for missing values:

h)Define a subset of columns for analysis:

subset_columns = [list of column names]

plt.scatter(raw_data['serving_size'], raw_data['calories'], cmap='inferno',

j)Calculate the correlation matrix and perform PCA:

k)Create a scatter plot of PCA result:

l)Perform K-Means clustering and create a scatter plot of clusters:

n_clusters = KMeans(n_clusters=50, n_init="auto", algorithm='lloyd',

m)Perform bivariate analysis using a histogram:

sns.histplot(filled_values, x='serving_size', y='calories', stat="count",

You might also like