Ai Practicle
Ai Practicle
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import sklearn.linear_model as lm
dataset = pd.read_csv("student_scores.csv")
dataset
Hours Scores
0 2.5 21
1 5.1 47
2 3.2 27
3 8.5 75
4 3.5 30
5 1.5 20
6 9.2 88
7 5.5 60
8 8.3 81
9 2.7 25
10 7.7 85
11 5.9 62
12 4.5 41
13 3.3 42
14 1.1 17
15 8.9 95
16 2.5 30
17 1.9 24
18 6.1 67
19 7.4 69
20 2.7 30
21 4.8 54
22 3.8 35
23 6.9 76
24 7.8 86
500
# two dimensional
list=[[200,300,400,500],[200,300,400,500]]
list[0][2]
400
#checking shape
dataset.shape
(25, 2)
#extracting x and y
X_feature=dataset['Hours'].values
X_feature
array([2.5, 5.1, 3.2, 8.5, 3.5, 1.5, 9.2, 5.5, 8.3, 2.7, 7.7, 5.9, 4.5,
3.3, 1.1, 8.9, 2.5, 1.9, 6.1, 7.4, 2.7, 4.8, 3.8, 6.9, 7.8])
X_feature.shape
(25,)
Y_feature=dataset['Scores'].values
Y_feature#//np.array(Y_features) its a single restructured data
array([21, 47, 27, 75, 30, 20, 88, 60, 81, 25, 85, 62, 41, 42, 17, 95, 30,
plt.scatter(X_feature,Y_feature)
plt.show()
hour_score_dict={'NumOfHours':X_feature,'NumOfScores':Y_feature}
hour_score_dict
{'NumOfHours': array([2.5, 5.1, 3.2, 8.5, 3.5, 1.5, 9.2, 5.5, 8.3, 2.7, 7.7, 5.9, 4.5,
3.3, 1.1, 8.9, 2.5, 1.9, 6.1, 7.4, 2.7, 4.8, 3.8, 6.9, 7.8]),
'NumOfScores': array([21, 47, 27, 75, 30, 20, 88, 60, 81, 25, 85, 62, 41, 42, 17, 95, 30,
dataset.corr()
Hours Scores
#TRAINING A MODEL
model=lm.LinearRegression()
#the above statements create a variable instance of linearreggression
X_feature = X_feature.reshape(-1,1)
X_feature
# a linear reggression is created to run multiple output always
#reshape is a function for converting one dimesional array into a multiple/two
dimension array and still remain in the single variable as the one with one
dimension array of xvalues
#
array([[2.5],
[5.1],
[3.2],
[8.5],
[3.5],
[1.5],
[9.2],
[5.5],
[8.3],
[2.7],
[7.7],
[5.9],
[4.5],
[3.3],
[1.1],
[8.9],
[2.5],
[1.9],
[6.1],
[7.4],
[2.7],
[4.8],
[3.8],
[6.9],
[7.8]])
X_feature.shape
(25, 1)
#in y its not so important but the proccedure are the same
#spliting the dataset into training and testing set
X_train,X_valid,Y_train,Y_valid=train_test_split(X_feature,Y_feature,test_size=0.
2)
#X_train,X_valid,Y_train,Y_valid=train_test_split(X_feature,Y_feature,test_size=0.2,random_state=3)
Y_train
array([75, 81, 30, 62, 21, 41, 17, 85, 86, 25, 35, 88, 47, 60, 67, 20, 30,
model.fit(X_train,Y_train)
#training or fit the model
LinearRegression
LinearRegression()
rsquare=model.score(X_feature,Y_feature) #
rsquare
0.9516080519783816
model.intercept_ #y_intercept
2.749137028752024
#slpope
model.coef_
array([9.57231104])
array([50.61069222])
five=np.array([3,5])
five
array([3, 5])
many=five.reshape(-1,1)
many
array([[3],
[5]])
five.shape
(2,)
y_pred=model.predict(many)
y_pred
array([31.46607015, 50.61069222])
plt.scatter(X_train,Y_train,color='red')
plt.plot(X_train,model.predict(X_train),color='blue')
plt.title("model of scores vs hours",size="12")
plt.xlabel("Hours",size="12")
plt.ylabel("Scores",size="12")
plt.show()