0% found this document useful (0 votes)
2 views

Code 1

The document contains three Python code snippets for data preprocessing and regression analysis using datasets related to customer purchases and house prices. The first code snippet focuses on cleaning and transforming a customer purchase dataset, while the second and third snippets perform linear and polynomial regression on house price data. The final task requests the creation of a new Python code based on the provided examples, using a dataset named 'ridasaman.csv'.

Uploaded by

ridasaman47
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

Code 1

The document contains three Python code snippets for data preprocessing and regression analysis using datasets related to customer purchases and house prices. The first code snippet focuses on cleaning and transforming a customer purchase dataset, while the second and third snippets perform linear and polynomial regression on house price data. The final task requests the creation of a new Python code based on the provided examples, using a dataset named 'ridasaman.csv'.

Uploaded by

ridasaman47
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

Code 1:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
dataset = pd.read_csv('customer_purchases.csv')
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
dataset[['Customer_ID']] = imputer.fit_transform(dataset[['Customer_ID']])
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
dataset[['Age']] = imputer.fit_transform(dataset[['Age']]) 35
Bahria University Islamabad Campus Department of Computer
Engineering
dataset = dataset.drop_duplicates(subset=['Customer_ID',
'Purchase_Date']) dataset['Gender'] = dataset['Gender'].str.lower()
dataset['Category'] = dataset['Category'].str.lower()
dataset['Purchase_Date'] = pd.to_datetime(dataset['Purchase_Date'],
format='%m/%d/%Y') dataset['Age'] = dataset['Age'].astype(int)
dataset['Customer_ID'] = dataset['Customer_ID'].astype(int)
scaler = MinMaxScaler()
dataset['Purchase_Amount'] =
scaler.fit_transform(dataset[['Purchase_Amount']]) y =
dataset.select_dtypes(include=['object'])
x = dataset.drop(columns=y.columns) print("x (Numerical Columns):")
print(x.head())
print("\ny (String Columns):") print(y.head())
print("\nFinal Dataset after Preprocessing:") print(dataset)

code 2;
import numpy as np
import matplotlib.pyplot as plt import pandas as pd
dataset = pd.read_csv('rida.csv') dataset.hist()
dataset = dataset[dataset>0]
dataset.fillna(dataset.mean(), inplace = True)
plt.scatter(dataset['House size (sq. ft)'],dataset['Price (Rs. 1000)'],color =
'red') plt.xlabel('House size')
plt.ylabel('Price')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.2,random_state=0)
from sklearn.linear_model import LinearRegression regressor =
LinearRegression()
regressor.fit(X_train,y_train)
print('Slope: ', regressor.coef_)
print('intercept: ', regressor.intercept_)
x200 = np.array((2000)).reshape(-1,1)
y200 = regressor.predict(x200)
print('price of house with 2000 square feet')
plt.scatter(x200,y200,color='red')
plt.plot(x200,regressor.predict(x200),color='blue') plt.title('Price (Rs.
1000) vs House size (sq. ft) (training set)') plt.xlabel('Price of House size')
plt.ylabel('Price')
plt.show()
x200 = np.array([2000]).reshape(-1, 1)
y200 = regressor.predict(x200)
print('Price of house with 2000 square feet:', y200)
R2 = r2_score(y_test, y_pred) print('R^2: {0}'.format(R2))
residuals = abs(y_test - y_pred)
ss = (residuals**2).sum()
RMSE = math.sqrt(ss / len(y_test)) print('RMSE: ', RMSE)

code 3;
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
dataset = pd.read_csv('rida5.csv')
print(dataset.head())
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
plt.scatter(X, y, color='red')
plt.title('House Size vs Price')
plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
from sklearn.linear_model import LinearRegression lin_reg =
LinearRegression()
lin_reg.fit(X, y)
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg.predict(X), color='blue') plt.title('Linear Regression
(House Size vs Price)') plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=2)
X_poly = poly_reg.fit_transform(X)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y)
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color='blue')
plt.title('Polynomial Regression (House Size vs Price)') plt.xlabel('House
Size (sq. ft)')
plt.ylabel('Price')
plt.show()
x_grid = np.arange(min(X), max(X), 0.1) x_grid =
x_grid.reshape(len(x_grid), 1)
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg.predict(X), color='blue') plt.title('Linear Regression
(Higher Resolution)') plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
plt.scatter(X, y, color='red')
plt.plot(x_grid, lin_reg_2.predict(poly_reg.fit_transform(x_grid)),
color='blue') plt.title('Polynomial Regression (Higher Resolution)')
plt.xlabel('House Size (sq. ft)')
plt.ylabel('Price')
plt.show()
y_pred_lin = lin_reg.predict([[350]]) * 1000
print(f'Predicted Price using Linear Regression: Rs. {y_pred_lin[0]:.2f}')
y_pred_poly = lin_reg_2.predict(poly_reg.fit_transform([[350]])) * 1000
print(f'Predicted Price using Polynomial Regression: Rs.
{y_pred_poly[0]:.2f}')
residuals_lin = abs(y - lin_reg.predict(X)) ss_lin = (residuals_lin**2).sum()
RMSE_lin = np.sqrt(ss_lin / len(y))
print(f'Linear Regression RMSE: {RMSE_lin:.2f}')
linear_r2 = lin_reg.score(X, lin_reg.predict(X)) print(f'Linear Regression
R^2: {linear_r2:.2f}')
residuals_poly = abs(y - lin_reg_2.predict(poly_reg.fit_transform(X)))
ss_poly = (residuals_poly**2).sum()
RMSE_poly = np.sqrt(ss_poly / len(y))
print(f'Polynomial Regression RMSE: {RMSE_poly:.2f}')
poly_r2 = lin_reg_2.score(poly_reg.fit_transform(X), y) print(f'Polynomial
Regression R^2: {poly_r2:.2f}')
based on the given codes please give the phyton code of the following
task 1 by 1 as given remember the file name of the data set (if any) would
be ridasaman.csv.

You might also like