Code 1
Code 1
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
dataset = pd.read_csv('customer_purchases.csv')
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
dataset[['Customer_ID']] = imputer.fit_transform(dataset[['Customer_ID']])
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
dataset[['Age']] = imputer.fit_transform(dataset[['Age']]) 35
Bahria University Islamabad Campus Department of Computer
Engineering
dataset = dataset.drop_duplicates(subset=['Customer_ID',
'Purchase_Date']) dataset['Gender'] = dataset['Gender'].str.lower()
dataset['Category'] = dataset['Category'].str.lower()
dataset['Purchase_Date'] = pd.to_datetime(dataset['Purchase_Date'],
format='%m/%d/%Y') dataset['Age'] = dataset['Age'].astype(int)
dataset['Customer_ID'] = dataset['Customer_ID'].astype(int)
scaler = MinMaxScaler()
dataset['Purchase_Amount'] =
scaler.fit_transform(dataset[['Purchase_Amount']]) y =
dataset.select_dtypes(include=['object'])
x = dataset.drop(columns=y.columns) print("x (Numerical Columns):")
print(x.head())
print("\ny (String Columns):") print(y.head())
print("\nFinal Dataset after Preprocessing:") print(dataset)
code 2;
import numpy as np
import matplotlib.pyplot as plt import pandas as pd
dataset = pd.read_csv('rida.csv') dataset.hist()
dataset = dataset[dataset>0]
dataset.fillna(dataset.mean(), inplace = True)
plt.scatter(dataset['House size (sq. ft)'],dataset['Price (Rs. 1000)'],color =
'red') plt.xlabel('House size')
plt.ylabel('Price')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.2,random_state=0)
from sklearn.linear_model import LinearRegression regressor =
LinearRegression()
regressor.fit(X_train,y_train)
print('Slope: ', regressor.coef_)
print('intercept: ', regressor.intercept_)
x200 = np.array((2000)).reshape(-1,1)
y200 = regressor.predict(x200)
print('price of house with 2000 square feet')
plt.scatter(x200,y200,color='red')
plt.plot(x200,regressor.predict(x200),color='blue') plt.title('Price (Rs.
1000) vs House size (sq. ft) (training set)') plt.xlabel('Price of House size')
plt.ylabel('Price')
plt.show()
x200 = np.array([2000]).reshape(-1, 1)
y200 = regressor.predict(x200)
print('Price of house with 2000 square feet:', y200)
R2 = r2_score(y_test, y_pred) print('R^2: {0}'.format(R2))
residuals = abs(y_test - y_pred)
ss = (residuals**2).sum()
RMSE = math.sqrt(ss / len(y_test)) print('RMSE: ', RMSE)
code 3;
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
dataset = pd.read_csv('rida5.csv')
print(dataset.head())
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
plt.scatter(X, y, color='red')
plt.title('House Size vs Price')
plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
from sklearn.linear_model import LinearRegression lin_reg =
LinearRegression()
lin_reg.fit(X, y)
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg.predict(X), color='blue') plt.title('Linear Regression
(House Size vs Price)') plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=2)
X_poly = poly_reg.fit_transform(X)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y)
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color='blue')
plt.title('Polynomial Regression (House Size vs Price)') plt.xlabel('House
Size (sq. ft)')
plt.ylabel('Price')
plt.show()
x_grid = np.arange(min(X), max(X), 0.1) x_grid =
x_grid.reshape(len(x_grid), 1)
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg.predict(X), color='blue') plt.title('Linear Regression
(Higher Resolution)') plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
plt.scatter(X, y, color='red')
plt.plot(x_grid, lin_reg_2.predict(poly_reg.fit_transform(x_grid)),
color='blue') plt.title('Polynomial Regression (Higher Resolution)')
plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
y_pred_lin = lin_reg.predict([[350]]) * 1000
print(f'Predicted Price using Linear Regression: Rs. {y_pred_lin[0]:.2f}')
y_pred_poly = lin_reg_2.predict(poly_reg.fit_transform([[350]])) * 1000
print(f'Predicted Price using Polynomial Regression: Rs.
{y_pred_poly[0]:.2f}')
residuals_lin = abs(y - lin_reg.predict(X)) ss_lin = (residuals_lin**2).sum()
RMSE_lin = np.sqrt(ss_lin / len(y))
print(f'Linear Regression RMSE: {RMSE_lin:.2f}')
linear_r2 = lin_reg.score(X, lin_reg.predict(X)) print(f'Linear Regression
R^2: {linear_r2:.2f}')
residuals_poly = abs(y - lin_reg_2.predict(poly_reg.fit_transform(X)))
ss_poly = (residuals_poly**2).sum()
RMSE_poly = np.sqrt(ss_poly / len(y))
print(f'Polynomial Regression RMSE: {RMSE_poly:.2f}')
poly_r2 = lin_reg_2.score(poly_reg.fit_transform(X), y) print(f'Polynomial
Regression R^2: {poly_r2:.2f}')
based on the given codes please give the phyton code of the following
task 1 by 1 as given remember the file name of the data set (if any) would
be ridasaman.csv.