0% found this document useful (0 votes)
2 views

PythonFile[1]

The document contains Python code for three statistical analyses: Normal Curves, correlation coefficient with scatter points, and various regression techniques (linear, multi-linear, and polynomial). Each section includes data loading, model training, and visualization using libraries like NumPy, pandas, and Matplotlib. The outputs provide key metrics such as correlation coefficients, mean squared errors, and R² scores for the regression models.

Uploaded by

Aditi Sharma
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

PythonFile[1]

The document contains Python code for three statistical analyses: Normal Curves, correlation coefficient with scatter points, and various regression techniques (linear, multi-linear, and polynomial). Each section includes data loading, model training, and visualization using libraries like NumPy, pandas, and Matplotlib. The outputs provide key metrics such as correlation coefficients, mean squared errors, and R² scores for the regression models.

Uploaded by

Aditi Sharma
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

9. Develop program for Normal Curves.

import numpy as np

import matplotlib.pyplot as plt

sd=1

m=0

x=np.linspace(-2, 2, 10000)

y=(1/(sd*np.sqrt(2*np.pi)))*np.exp(-0.5*((x-m)/sd)**2)

plt.plot(x,y)

plt.grid(True)

plt.show()

Output:

10. Develop program for correlation coefficient and scatter points.


import pandas as pd

import matplotlib.pyplot as plt

from math import sqrt

df = pd.read_csv("/content/Housing.csv", usecols=['price', 'area'])

p_mean = df['price'].mean()

a_mean = df['area'].mean()

n = len(df)

sum_n = ((df['price'] - p_mean) * (df['area'] - a_mean)).sum()

sum_p = ((df['price'] - p_mean)**2).sum()

sum_a = ((df['area'] - a_mean)**2).sum()

ans = sum_n / sqrt(sum_p * sum_a)

print(f"Correlation Coefficient: {ans}")

plt.figure(figsize=(8, 6))

plt.scatter(df['area'], df['price'], label='Data Points')

plt.axline((a_mean, p_mean), slope=ans, color='red', label=f'Correlation Line (r = {ans:.2f})')

plt.xlabel("Area")
plt.ylabel("Price")

plt.title("Area vs. Price with Correlation")

plt.legend()

plt.grid(True)

plt.show()

Output :
Correlation Coefficient: 0.5359973457780801

11. Develop program for Regression Techniques.


(a) Linear Regression
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("Salary_Data.csv")

print(df.head())

X = df[['YearsExperience']] # Independent variable (e.g., Years of Experience)

y = df['Salary'] # Dependent variable (e.g., Salary)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(f"Intercept: {model.intercept_:.2f}")

print(f"Coefficient: {model.coef_[0]:.2f}")

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

print(f"R² Score: {r2:.2f}")

plt.scatter(X_test, y_test, color="blue", label="Actual Data")

plt.plot(X_test, y_pred, color="red", linewidth=2, label="Predicted Line")

plt.xlabel("Experience (Years)")

plt.ylabel("Salary")

plt.title("Linear Regression - Salary vs. Experience")

plt.legend()

plt.show()

Output :
Intercept: 25321.58

Coefficient: 9423.82

Mean Squared Error: 49830096.86

R² Score: 0.90

(b) Multi-Linear Regression


import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("Housing.csv")

X = df[['area', 'bathrooms','bedrooms','stories']]

Y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(f"Intercept: {model.intercept_:.2f}")

print(f"Coefficients: {model.coef_}")

print(f"Mean Squared Error: {mse:.2f}")

print(f"R² Score: {r2:.2f}")

plt.scatter(X_test['area'], y_test, color="blue", label="Actual Data")

plt.plot(X_test['area'], y_pred, color="red", linewidth=2, label="Predicted Line")

plt.xlabel("Area")

plt.ylabel("Price")

plt.title("Multi-Linear Regression - Price vs. Area")

plt.legend()

plt.show()

Output :
Intercept: -64342.42

Coefficients: [3.49009738e+02 1.25815095e+06 1.74685138e+05 4.83859660e+05]

Mean Squared Error: 2457741642022.09

R² Score: 0.51

(c) Polynomial Regression


import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

from sklearn.preprocessing import PolynomialFeatures

df = pd.read_csv("Salary_Data.csv")
X = df[['YearsExperience']]

Y = df['Salary']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

poly_reg = PolynomialFeatures(degree=4)

X_poly = poly_reg.fit_transform(X)

X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, Y, test_size=0.2,


random_state=42)

model = LinearRegression()

model.fit(X_train_poly, y_train)

y_pred = model.predict(X_test_poly)

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")

print(f"R² Score: {r2:.2f}")

plt.scatter(X, Y, color='blue', label="Actual Data")

plt.plot(X, model.predict(X_poly), color='red', linestyle='dashed', label="Polynomial Fit")

plt.xlabel("YearsExperience")

plt.ylabel("Salary")

plt.title("Polynomial Regression Curve")

plt.legend()

plt.grid(True)

plt.show()

Output :
Intercept: 25321.58

Coefficient: 9423.82

Mean Squared Error: 49830096.86

R² Score: 0.90

You might also like