0% found this document useful (0 votes)
8 views

ML Regression

Uploaded by

Dev Khatanhar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views

ML Regression

Uploaded by

Dev Khatanhar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

ml-regression

October 23, 2024

[3]: # Drop the unnecessary index column


import pandas as pd
data = pd.read_csv('/content/car data.csv')

# Check for missing values or inconsistencies


missing_values = data.isnull().sum()
data_summary = data.describe()

missing_values, data_summary

[3]: (Car_Name 0
Year 0
Selling_Price 0
Present_Price 0
Kms_Driven 0
Fuel_Type 0
Seller_Type 0
Transmission 0
Owner 0
dtype: int64,
Year Selling_Price Present_Price Kms_Driven Owner
count 301.000000 301.000000 301.000000 301.000000 301.000000
mean 2013.627907 4.661296 7.628472 36947.205980 0.043189
std 2.891554 5.082812 8.644115 38886.883882 0.247915
min 2003.000000 0.100000 0.320000 500.000000 0.000000
25% 2012.000000 0.900000 1.200000 15000.000000 0.000000
50% 2014.000000 3.600000 6.400000 32000.000000 0.000000
75% 2016.000000 6.000000 9.900000 48767.000000 0.000000
max 2018.000000 35.000000 92.600000 500000.000000 3.000000)

[8]: import numpy as np

# Split the data into training and testing sets


X_car = pd.get_dummies(data[['Owner','Kms_Driven']], drop_first=True)
y = data['Selling_Price']

1
def train_test_split_manual(X, y, test_size=0.2, random_state=None):
if random_state is not None:
np.random.seed(random_state)
indices = np.random.permutation(len(X))
test_set_size = int(len(X) * test_size)
test_indices = indices[:test_set_size]
train_indices = indices[test_set_size:]
return X.iloc[train_indices], X.iloc[test_indices], y.iloc[train_indices],␣
↪y.iloc[test_indices]

X_train, X_test, y_train, y_test = train_test_split_manual(X_car, y,␣


↪test_size=0.2, random_state=42)

# Implementing a simple linear regression model


class SimpleLinearRegression:
def __init__(self):
self.coefficients = None

def fit(self, X, y):


# Add a bias term (column of 1s) to the input matrix
X = np.c_[np.ones(X.shape[0]), X]
# Compute coefficients using the normal equation
self.coefficients = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

def predict(self, X):


# Add the bias term to the input matrix
X = np.c_[np.ones(X.shape[0]), X]
# Predict using the learned coefficients
return X.dot(self.coefficients)

# Train the linear regression model


model_car = SimpleLinearRegression()
model_car.fit(X_train, y_train)

y_car = model_car.predict(X_test)

def calculate_rmse(y_true, y_pred):


return np.sqrt(np.mean((y_true - y_pred) ** 2))

# Calculate RMSE
rmse_car = calculate_rmse(y_test, y_car)
# Print the predictions for the test set
predicted_sales = pd.DataFrame({'Actual Selling Price': y_test, 'Predicted␣
↪Price': y_car})

# Display the first few rows of the predictions

2
print(predicted_sales.head())

print(f'RSME score: {rmse_car}')

Actual Selling Price Predicted Price


177 0.35 4.638736
289 10.11 4.570413
228 4.95 4.827647
198 0.15 2.985661
60 6.95 4.722701
RSME score: 4.786033522433056

You might also like