linear regression program
linear regression program
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error
df = pd.read_csv('SALARY2.csv')
df.head()
df_binary = df[['Years of Experience', 'Salary']]
sns.lmplot(x ="Years of Experience", y ="Salary", data = df_binary, order = 2,
ci = None)
plt.show()
df_binary.fillna(method ='ffill', inplace = True)
X = np.array(df_binary['Years of Experience']).reshape(-1, 1)
y = np.array(df_binary['Salary']).reshape(-1, 1)
plt.show()
mae = mean_absolute_error(y_true=y_test,y_pred=y_pred)
#squared True returns MSE value, False returns RMSE value.
mse = mean_squared_error(y_true=y_test,y_pred=y_pred) #default=True
print("MAE:",mae)
print("MSE:",mse)
Output:
Coefficient of determination:0.27853954081632637
MAE: 1484.71615720524
MSE: 2808565.8168227146
Dataset:
Years of
Experienc
e Salary
5 5000
3 6000
15 4000
7 3000
20 2000