Big Data Assignment - 4
Big Data Assignment - 4
[138…
#1. Test Points
import numpy as np
import pandas as pd
x_list = np.sort(x_list, 0)
y_list = []
#generate y values
for i in x_list:
print(df)
x y
0 -1.616408 -5.322508
1 -1.253063 -1.743096
2 -1.090656 -2.131412
3 -1.007017 -1.976215
4 -0.732967 0.004277
5 -0.668744 -0.161225
6 -0.435562 0.208111
7 -0.232641 0.532104
8 0.393235 0.006126
9 0.668950 0.516830
10 0.691024 1.021336
11 0.705019 0.836321
12 1.189462 1.788390
13 1.767211 5.752160
14 1.795525 5.621833
In [139…
#Linear Regression
lin_reg = LinearRegression()
x_list = x_list.reshape(-1, 1)
y_list = y_list.reshape(-1, 1)
lin_reg.fit(x_list, y_list)
plt.plot(x_list, lin_reg.predict(x_list))
plt.title('Linear Regression')
plt.xlabel('X values')
plt.ylabel('Y values')
plt.show()
In [150…
#Polynomial Regression with Degree 2
poly_reg = PolynomialFeatures(degree = 2)
x_poly_2 = poly_reg.fit_transform(x_list)
linreg = LinearRegression()
linreg.fit(x_poly_2, y_list)
lin_reg.fit(x_list, y_list)
plt.plot(x_list, linreg.predict(x_poly_2))
plt.xlabel('X values')
plt.ylabel('Y values')
plt.show()
In [151…
#Polynomial Regression with Degree 3
poly_reg = PolynomialFeatures(degree = 3)
x_poly_3 = poly_reg.fit_transform(x_list)
linreg = LinearRegression()
linreg.fit(x_poly_3, y_list)
lin_reg.fit(x_list, y_list)
plt.plot(x_list, linreg.predict(x_poly_3))
plt.xlabel('X values')
plt.ylabel('Y values')
plt.show()
In [152…
#Polynomial Regression with Degree 4
poly_reg = PolynomialFeatures(degree = 4)
x_poly_4 = poly_reg.fit_transform(x_list)
linreg = LinearRegression()
linreg.fit(x_poly_4, y_list)
lin_reg.fit(x_list, y_list)
plt.plot(x_list, linreg.predict(x_poly_4))
plt.xlabel('X values')
plt.ylabel('Y values')
plt.show()
In [184…
#Combine all models
fig, ax = plt.subplots()
ax = fig.add_subplot(1,1,1)
ax.scatter(x_list, y_list)
ax.plot(x_list, lin_reg.predict(x_list))
j = 0
poly = PolynomialFeatures(degree = i)
x_poly_i = poly_reg.fit_transform(x_list)
linear_reg = LinearRegression()
linear_reg.fit(x_poly_i, y_list)
j += 1
ax.set_title('Regression Models')
ax.legend(['quatric', 'linear', 'quadratic', 'cubic'],loc = 'upper left')
ax.set_xlabel('X values')
ax.set_ylabel('Y values')
plt.show()
In [166…
#Test the models
rng = np.random.default_rng(12345)
x1 = rng.uniform(-2, 2, 100)
x1 = np.sort(x1, 0)
y1 = []
#generate y values
for i in x1:
y1 = np.append(y1, y, axis=0)
print(df.head(10))
x y
0 -1.979911 -8.547646
1 -1.961151 -7.002029
2 -1.767190 -5.984456
3 -1.727649 -4.418418
4 -1.676513 -4.710488
5 -1.673622 -5.103340
6 -1.657713 -5.133752
7 -1.633341 -4.693403
8 -1.616408 -5.052430
9 -1.482124 -3.340472
In [168…
plt.scatter(x1, y1)
plt.title('Scatter Plot')
plt.xlabel('X values')
plt.ylabel('Y values')
plt.show()
In [177…
#Root Mean Squared Error
lin_reg = LinearRegression()
lin_reg.fit(x_list, y_list)
y_poly_pred_1 = lin_reg.predict(x_list)
rmse1 = np.sqrt(mean_squared_error(y_list,y_poly_pred_1))
poly_reg = PolynomialFeatures(degree = 2)
x_poly_2 = poly_reg.fit_transform(x_list)
linreg.fit(x_poly_2, y_list)
y_poly_pred_2 = linreg.predict(x_poly_2)
rmse2 = np.sqrt(mean_squared_error(y_list,y_poly_pred_2))
poly_reg = PolynomialFeatures(degree = 3)
x_poly_3 = poly_reg.fit_transform(x_list)
linreg.fit(x_poly_3, y_list)
y_poly_pred_3 = linreg.predict(x_poly_3)
rmse3 = np.sqrt(mean_squared_error(y_list,y_poly_pred_3))
poly_reg = PolynomialFeatures(degree = 4)
x_poly_4 = poly_reg.fit_transform(x_list)
linreg.fit(x_poly_4, y_list)
y_poly_pred_4 = linreg.predict(x_poly_4)
rmse4 = np.sqrt(mean_squared_error(y_list,y_poly_pred_4))
In [ ]:
#Polynomial with Degree 4 has the lowest RMS error. So, it's the best model among al