ML Assignment1
ML Assignment1
ipynb - Colaboratory
Anil Kumar
(0901CS223D04)
import numpy as np
from sklearn.datasets import
fetch_california_housingimport matplotlib.pyplot as
plt
from pandas.plotting import
scatter_matrixfrom sklearn.metrics
import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import mean_squared_error
housing = fetch_california_housing(as_frame=True)
housing = housing.frame
housing.head()
housing.hist(bins=50, figsize=(12,8))
plt.show()
1/4
2/11/24, 6:25 PM linear regression.ipynb - Colaboratory
housing.plot(kind=”scatter”, x=”Longitude”,y=“Latitude”, c=“MedHouseVal“, cmap=“jet”, colorbar=True, legend=True, splt.show()
Longitude
2/4
2/11/24, 6:25 PM linear regression.ipynb - Colaboratory
corr = housing.corr()
corr['MedHouseVal'].sort_values(ascending=True)
Latitude -0.144160
AveBedrms -0.046701
Longitude -0.045967
Population -0.024650
Ave0ccup -0.023737
HouseAge 0.105623
AveRooms 0.151948
MedInc 0.688075
MedHouseVal 1.000000
Name: MedHouseVal, dtype: float64
X = housing.iloc[:,:-1]
y = housing.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
regression_pipeline.fit(X_train,y_train)
””””Pipeline”””’
• StandardScaler
• L1nearR
gresston
y_pred = regression_pipeline.predict(X_test)
r2_score( y_test, y_pred)
0.575787706032451
3/4
2/11/24, 6:25 PM linear regression.ipynb - Colaboratory
print(y test.dtype)
print(y pred.dtype)
float64float64
4/4