Coding Notes Data Science
Coding Notes Data Science
pd.read_csv
import pandas as pd
# SQLAlchemy connectable
cnx = create_engine('sqlite:///contacts.db ').connect()
pd.read_table('people.csv', delimiter=',')
Clean a real world messy dataset (eg: Kaggle)
# modules we'll use
import pandas as pd
import numpy as np
def determine_grade(scores):
if scores >= 85 and scores <= 100:
return 'Grade A'
elif scores >= 70 and scores < 85:
return 'Grade B'
elif scores >= 55 and scores < 70:
return 'Grade C'
elif scores >= 35 and scores < 55:
return 'Grade D'
elif scores >= 0 and scores < 35:
return 'Grade E'
df['grades']=df['percentage'].apply(determine_grade)
df.info()
df['grades'].value_counts().plot.pie(autopct="%1.1f%%")
plt.show()
Implementation of Linear Regression Model
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error