0% found this document useful (0 votes)

8 views4 pages

Coding Notes Data Science

All coding related to data science

Uploaded by

Saman

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

8 views4 pages

Coding Notes Data Science

All coding related to data science

Uploaded by

Saman

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 4

Coding Notes Data Science

pd.read_csv
import pandas as pd

# reading csv file

df = pd.read_csv("people.csv")
df
pd.read_excel
pip install pandas
pip install xlrd
import pandas as pd
df = pd.read_excel("sample.xlsx")
print(df)
pd.read_sql
# import the modules
import pandas as pd
from sqlalchemy import create_engine

# SQLAlchemy connectable
cnx = create_engine('sqlite:///contacts.db ').connect()

# table named 'contacts' will be returned as a dataframe.

df = pd.read_sql_table('contacts', cnx)
print(df)
pd.read_table
# importing pandas
import pandas as pd

pd.read_table('people.csv', delimiter=',')
Clean a real world messy dataset (eg: Kaggle)
# modules we'll use
import pandas as pd
import numpy as np

# read in all our data

nfl_data = pd.read_csv("../input/nflplaybyplay2009to2016/NFL Play by Play 2009-2017 (v4).csv")

# set seed for reproducibility

np.random.seed(0)
# look at the first five rows of the nfl_data file.
# I can see a handful of missing data already!
nfl_data.head()
# get the number of missing data points per column
missing_values_count = nfl_data.isnull().sum()

# look at the # of missing points in the first ten columns

missing_values_count[0:10]
# how many total missing values do we have?
total_cells = np.product(nfl_data.shape)
total_missing = missing_values_count.sum()

# percent of data that is missing

percent_missing = (total_missing/total_cells) * 100
print(percent_missing)
# look at the # of missing points in the first ten columns
missing_values_count[0:10]
# remove all the rows that contain a missing value
nfl_data.dropna()
# remove all columns with at least one missing value
columns_with_na_dropped = nfl_data.dropna(axis=1)
columns_with_na_dropped.head()
# just how much data did we lose?
print("Columns in original dataset: %d \n" % nfl_data.shape[1])
print("Columns with na's dropped: %d" % columns_with_na_dropped.shape[1])
# get a small subset of the NFL dataset
subset_nfl_data = nfl_data.loc[:, 'EPA':'Season'].head()
subset_nfl_data
# replace all NA's with 0
subset_nfl_data.fillna(0)
# replace all NA's the value that comes directly after it in the same column,
# then replace all the remaining na's with 0
subset_nfl_data.fillna(method='bfill', axis=0).fillna(0)
Apply EDA on a student performance dataset
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv('/kaggle/input/students-performance-in-exams/
StudentsPerformance.csv')
df.info()
df.describe()
df.shape
df.isnull().sum() #checks if there are any missing values
Lets start with plotting graphs
plt.rcParams['figure.figsize'] = (20, 10)
sns.countplot(df['math score'], palette = 'dark')
plt.title('Math Score',fontsize = 20)
plt.show()
To analyse the data in more deeper way, lets few new columns: Total marks, Percentage and Grades.

df['total marks']=df['math score']+df['reading score']+df['writing score']

df['percentage']=df['total marks']/300*100
#Assigning the grades

def determine_grade(scores):
if scores >= 85 and scores <= 100:
return 'Grade A'
elif scores >= 70 and scores < 85:
return 'Grade B'
elif scores >= 55 and scores < 70:
return 'Grade C'
elif scores >= 35 and scores < 55:
return 'Grade D'
elif scores >= 0 and scores < 35:
return 'Grade E'

df['grades']=df['percentage'].apply(determine_grade)
df.info()
df['grades'].value_counts().plot.pie(autopct="%1.1f%%")
plt.show()
Implementation of Linear Regression Model
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Sample data (replace with your actual data)

X = np.array([[1], [2], [3], [4], [5]]) # Independent variable (features)
y = np.array([2, 4, 5, 4, 5]) # Dependent variable (target)

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model

model = LinearRegression()

# Train the model using the training data

model.fit(X_train, y_train)

# Make predictions on the test data

y_pred = model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Print the coefficients

print(f"Intercept: {model.intercept_}")
print(f"Coefficient: {model.coef_}")

Implementation of Random Forest Model

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the data (replace 'your_data.csv' with your actual file)

data = pd.read_csv('your_data.csv')

# Separate features (X) and target (y)

X = data.drop('target_column_name', axis=1) # Replace 'target_column_name'
y = data['target_column_name']

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier model

model = RandomForestClassifier(n_estimators=100, random_state=42)
# n_estimators is the number of trees in the forest

# Train the model

model.fit(X_train, y_train)

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Equipment Manual PDF
No ratings yet
Equipment Manual PDF
170 pages
Talent Release Form
No ratings yet
Talent Release Form
2 pages
BioCellar Phase I Report
No ratings yet
BioCellar Phase I Report
10 pages
Machine File
No ratings yet
Machine File
27 pages
ml file syllabus
No ratings yet
ml file syllabus
43 pages
PW2 DataCleaning
No ratings yet
PW2 DataCleaning
6 pages
AIDS - DM Using Python - Lab Programs
No ratings yet
AIDS - DM Using Python - Lab Programs
19 pages
DataAnalytics Lab Manual (1)
No ratings yet
DataAnalytics Lab Manual (1)
35 pages
1
No ratings yet
1
3 pages
Python practice questions (1)
No ratings yet
Python practice questions (1)
5 pages
External
No ratings yet
External
11 pages
Datascience
No ratings yet
Datascience
8 pages
Data Analytics lab manual
No ratings yet
Data Analytics lab manual
47 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
DA_Programs
No ratings yet
DA_Programs
44 pages
Day-4 DS Practicals
No ratings yet
Day-4 DS Practicals
5 pages
DA lab
No ratings yet
DA lab
27 pages
DATA_SCIENCE_MANAUL (TE) (1)
No ratings yet
DATA_SCIENCE_MANAUL (TE) (1)
78 pages
Data Science
No ratings yet
Data Science
18 pages
Some Exercises
No ratings yet
Some Exercises
9 pages
Modelling and Simmulation Assignment - Ipynb - Colab
No ratings yet
Modelling and Simmulation Assignment - Ipynb - Colab
7 pages
Lab 13
No ratings yet
Lab 13
5 pages
Exp 01-B Feature Selection and Extraction
No ratings yet
Exp 01-B Feature Selection and Extraction
12 pages
Lab 08 - Data Preprocessing
No ratings yet
Lab 08 - Data Preprocessing
9 pages
IS5312 Mini Project-2
No ratings yet
IS5312 Mini Project-2
5 pages
DSBDA Lab Plan
No ratings yet
DSBDA Lab Plan
5 pages
Syllabus AIML
No ratings yet
Syllabus AIML
14 pages
PDA_Assignment
No ratings yet
PDA_Assignment
6 pages
DSBDA LAB_1_1736243987425
No ratings yet
DSBDA LAB_1_1736243987425
10 pages
Python Report Ritik
No ratings yet
Python Report Ritik
15 pages
Ethics And Ai Exp-2
No ratings yet
Ethics And Ai Exp-2
5 pages
ELC Assignment
No ratings yet
ELC Assignment
4 pages
DSBDA Lab Manual24-25
No ratings yet
DSBDA Lab Manual24-25
58 pages
Lab 3 & 4
No ratings yet
Lab 3 & 4
10 pages
Data Science in Society Cat
No ratings yet
Data Science in Society Cat
5 pages
Data Analysis by Using Python
No ratings yet
Data Analysis by Using Python
15 pages
Monika Sree 11-07-2024
No ratings yet
Monika Sree 11-07-2024
36 pages
IDS-1
No ratings yet
IDS-1
30 pages
Machine Learning Project Roadmap
No ratings yet
Machine Learning Project Roadmap
4 pages
Bussiness Report PM
No ratings yet
Bussiness Report PM
44 pages
DS-DS Lab-1
No ratings yet
DS-DS Lab-1
4 pages
04 DS 2023
No ratings yet
04 DS 2023
63 pages
DA PROGRAM UPTO 6 (1)
No ratings yet
DA PROGRAM UPTO 6 (1)
20 pages
Introduction To Python and Computer Programming 1704298503
No ratings yet
Introduction To Python and Computer Programming 1704298503
44 pages
Activity 4 CGPA Vs Placement Package Program
No ratings yet
Activity 4 CGPA Vs Placement Package Program
4 pages
MACHINE LEARNING manual
No ratings yet
MACHINE LEARNING manual
36 pages
Assignment-2 IDS
No ratings yet
Assignment-2 IDS
2 pages
Practical File Question 28.09.2022
No ratings yet
Practical File Question 28.09.2022
15 pages
List of Programs For Informatics - XII - IP
No ratings yet
List of Programs For Informatics - XII - IP
26 pages
data analytics lab manual
No ratings yet
data analytics lab manual
26 pages
Data Analysis Lab - Final - 23-24
No ratings yet
Data Analysis Lab - Final - 23-24
11 pages
index
No ratings yet
index
4 pages
Pattern Recognition
No ratings yet
Pattern Recognition
26 pages
Student Performance Analysis and Prediction
No ratings yet
Student Performance Analysis and Prediction
19 pages
Data Analytics Lab Manual_250402_095326
No ratings yet
Data Analytics Lab Manual_250402_095326
58 pages
1152CS239-Intro. To Data Science-Syllabus
No ratings yet
1152CS239-Intro. To Data Science-Syllabus
6 pages
Data Preparation Basics#
No ratings yet
Data Preparation Basics#
2 pages
ML Lab Manual (1-10) FINAL
No ratings yet
ML Lab Manual (1-10) FINAL
34 pages
Group A Assignment No2 Writeup
No ratings yet
Group A Assignment No2 Writeup
9 pages
DVA Lab Manual
No ratings yet
DVA Lab Manual
20 pages
featureselection
No ratings yet
featureselection
11 pages
Assignment 2 Oops
No ratings yet
Assignment 2 Oops
10 pages
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
Time Series Analysis and Forecasting
No ratings yet
Time Series Analysis and Forecasting
7 pages
Natural Language Processing
No ratings yet
Natural Language Processing
6 pages
Normalization
No ratings yet
Normalization
2 pages
Introduction to Data Science Lecture 1
No ratings yet
Introduction to Data Science Lecture 1
4 pages
Case Study Normalization
No ratings yet
Case Study Normalization
1 page
Tcs Employment Application Form
No ratings yet
Tcs Employment Application Form
5 pages
Terms and Conditions Customer Acknowledgement 12082016
No ratings yet
Terms and Conditions Customer Acknowledgement 12082016
1 page
Remedial Classes For Slow Learners-2nd Year
No ratings yet
Remedial Classes For Slow Learners-2nd Year
2 pages
Council Meeting July 5, 2016 PDF
No ratings yet
Council Meeting July 5, 2016 PDF
47 pages
Interim Rates For Wholesale Residential and Business High Speed Access Services
No ratings yet
Interim Rates For Wholesale Residential and Business High Speed Access Services
4 pages
RAP Answers
No ratings yet
RAP Answers
7 pages
PSW
No ratings yet
PSW
2 pages
Tle10 Cookery10 Q3 M1
No ratings yet
Tle10 Cookery10 Q3 M1
14 pages
The Historical Foundations of The Duty of Care
No ratings yet
The Historical Foundations of The Duty of Care
29 pages
Performa Invoice: State Name: Haryana, Code: 06
No ratings yet
Performa Invoice: State Name: Haryana, Code: 06
2 pages
Labor Standards Reviewer
No ratings yet
Labor Standards Reviewer
7 pages
India Latest Market Info 2010
No ratings yet
India Latest Market Info 2010
2 pages
BK Sons Company Profile
No ratings yet
BK Sons Company Profile
8 pages
Notice 1 - 241017 - 230812
No ratings yet
Notice 1 - 241017 - 230812
1 page
INC 12-Systematic Review Nurul Arifah
No ratings yet
INC 12-Systematic Review Nurul Arifah
8 pages
QUESTER COMPACTOR Quester - E44 - Spec - Sheet
No ratings yet
QUESTER COMPACTOR Quester - E44 - Spec - Sheet
2 pages
TrakCare Credibility Brochure
No ratings yet
TrakCare Credibility Brochure
24 pages
Executive Summary
No ratings yet
Executive Summary
84 pages
CV Muhammad Dio Ariqsyah
No ratings yet
CV Muhammad Dio Ariqsyah
1 page
A Knowledge-Based SWOT-analysis System As An Instrument For Strategic Planning in Small and Medium Sized Enterprises
No ratings yet
A Knowledge-Based SWOT-analysis System As An Instrument For Strategic Planning in Small and Medium Sized Enterprises
11 pages
PM Wani Applicant
No ratings yet
PM Wani Applicant
1 page
Bill Ackman A Behavioral Finance Case Study
No ratings yet
Bill Ackman A Behavioral Finance Case Study
8 pages
Dawn Editorials 23 Oct PDF
No ratings yet
Dawn Editorials 23 Oct PDF
18 pages
Collateral Warranties
No ratings yet
Collateral Warranties
2 pages
Personal Monthly Budget
No ratings yet
Personal Monthly Budget
4 pages
MSBM Programmes Brochure (2015)
No ratings yet
MSBM Programmes Brochure (2015)
49 pages
Incident
No ratings yet
Incident
13 pages

Coding Notes Data Science

Uploaded by

Coding Notes Data Science

Uploaded by

Coding Notes Data Science

# reading csv file

# table named 'contacts' will be returned as a dataframe.

# read in all our data

# set seed for reproducibility

# look at the # of missing points in the first ten columns

# percent of data that is missing

import numpy as np # linear algebra

df['total marks']=df['math score']+df['reading score']+df['writing score']

# Sample data (replace with your actual data)

# Split data into training and testing sets

# Create a linear regression model

# Train the model using the training data

# Make predictions on the test data

# Evaluate the model

# Print the coefficients

Implementation of Random Forest Model

# Load the data (replace 'your_data.csv' with your actual file)

# Separate features (X) and target (y)

# Split data into training and testing sets

# Create a Random Forest Classifier model

# Train the model

# Make predictions on the test set

# Evaluate the model

You might also like