0% found this document useful (0 votes)
10 views3 pages

ML Regression

Uploaded by

Dev Khatanhar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views3 pages

ML Regression

Uploaded by

Dev Khatanhar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

ml-regression

October 23, 2024

[3]: # Drop the unnecessary index column


import pandas as pd
data = pd.read_csv('/content/car data.csv')

# Check for missing values or inconsistencies


missing_values = data.isnull().sum()
data_summary = data.describe()

missing_values, data_summary

[3]: (Car_Name 0
Year 0
Selling_Price 0
Present_Price 0
Kms_Driven 0
Fuel_Type 0
Seller_Type 0
Transmission 0
Owner 0
dtype: int64,
Year Selling_Price Present_Price Kms_Driven Owner
count 301.000000 301.000000 301.000000 301.000000 301.000000
mean 2013.627907 4.661296 7.628472 36947.205980 0.043189
std 2.891554 5.082812 8.644115 38886.883882 0.247915
min 2003.000000 0.100000 0.320000 500.000000 0.000000
25% 2012.000000 0.900000 1.200000 15000.000000 0.000000
50% 2014.000000 3.600000 6.400000 32000.000000 0.000000
75% 2016.000000 6.000000 9.900000 48767.000000 0.000000
max 2018.000000 35.000000 92.600000 500000.000000 3.000000)

[8]: import numpy as np

# Split the data into training and testing sets


X_car = pd.get_dummies(data[['Owner','Kms_Driven']], drop_first=True)
y = data['Selling_Price']

1
def train_test_split_manual(X, y, test_size=0.2, random_state=None):
if random_state is not None:
np.random.seed(random_state)
indices = np.random.permutation(len(X))
test_set_size = int(len(X) * test_size)
test_indices = indices[:test_set_size]
train_indices = indices[test_set_size:]
return X.iloc[train_indices], X.iloc[test_indices], y.iloc[train_indices],␣
↪y.iloc[test_indices]

X_train, X_test, y_train, y_test = train_test_split_manual(X_car, y,␣


↪test_size=0.2, random_state=42)

# Implementing a simple linear regression model


class SimpleLinearRegression:
def __init__(self):
self.coefficients = None

def fit(self, X, y):


# Add a bias term (column of 1s) to the input matrix
X = np.c_[np.ones(X.shape[0]), X]
# Compute coefficients using the normal equation
self.coefficients = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

def predict(self, X):


# Add the bias term to the input matrix
X = np.c_[np.ones(X.shape[0]), X]
# Predict using the learned coefficients
return X.dot(self.coefficients)

# Train the linear regression model


model_car = SimpleLinearRegression()
model_car.fit(X_train, y_train)

y_car = model_car.predict(X_test)

def calculate_rmse(y_true, y_pred):


return np.sqrt(np.mean((y_true - y_pred) ** 2))

# Calculate RMSE
rmse_car = calculate_rmse(y_test, y_car)
# Print the predictions for the test set
predicted_sales = pd.DataFrame({'Actual Selling Price': y_test, 'Predicted␣
↪Price': y_car})

# Display the first few rows of the predictions

2
print(predicted_sales.head())

print(f'RSME score: {rmse_car}')

Actual Selling Price Predicted Price


177 0.35 4.638736
289 10.11 4.570413
228 4.95 4.827647
198 0.15 2.985661
60 6.95 4.722701
RSME score: 4.786033522433056

You might also like