9. Develop program for Normal Curves.
import numpy as np
import matplotlib.pyplot as plt
sd=1
m=0
x=np.linspace(-2, 2, 10000)
y=(1/(sd*np.sqrt(2*np.pi)))*np.exp(-0.5*((x-m)/sd)**2)
plt.plot(x,y)
plt.grid(True)
plt.show()
Output:
10. Develop program for correlation coefficient and scatter points.
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt
df = pd.read_csv("/content/Housing.csv", usecols=['price', 'area'])
p_mean = df['price'].mean()
a_mean = df['area'].mean()
n = len(df)
sum_n = ((df['price'] - p_mean) * (df['area'] - a_mean)).sum()
sum_p = ((df['price'] - p_mean)**2).sum()
sum_a = ((df['area'] - a_mean)**2).sum()
ans = sum_n / sqrt(sum_p * sum_a)
print(f"Correlation Coefficient: {ans}")
plt.figure(figsize=(8, 6))
plt.scatter(df['area'], df['price'], label='Data Points')
plt.axline((a_mean, p_mean), slope=ans, color='red', label=f'Correlation Line (r = {ans:.2f})')
plt.xlabel("Area")
plt.ylabel("Price")
plt.title("Area vs. Price with Correlation")
plt.legend()
plt.grid(True)
plt.show()
Output :
Correlation Coefficient: 0.5359973457780801
11. Develop program for Regression Techniques.
(a) Linear Regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_csv("Salary_Data.csv")
print(df.head())
X = df[['YearsExperience']] # Independent variable (e.g., Years of Experience)
y = df['Salary'] # Dependent variable (e.g., Salary)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"Intercept: {model.intercept_:.2f}")
print(f"Coefficient: {model.coef_[0]:.2f}")
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")
plt.scatter(X_test, y_test, color="blue", label="Actual Data")
plt.plot(X_test, y_pred, color="red", linewidth=2, label="Predicted Line")
plt.xlabel("Experience (Years)")
plt.ylabel("Salary")
plt.title("Linear Regression - Salary vs. Experience")
plt.legend()
plt.show()
Output :
Intercept: 25321.58
Coefficient: 9423.82
Mean Squared Error: 49830096.86
R² Score: 0.90
(b) Multi-Linear Regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_csv("Housing.csv")
X = df[['area', 'bathrooms','bedrooms','stories']]
Y = df['price']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Intercept: {model.intercept_:.2f}")
print(f"Coefficients: {model.coef_}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")
plt.scatter(X_test['area'], y_test, color="blue", label="Actual Data")
plt.plot(X_test['area'], y_pred, color="red", linewidth=2, label="Predicted Line")
plt.xlabel("Area")
plt.ylabel("Price")
plt.title("Multi-Linear Regression - Price vs. Area")
plt.legend()
plt.show()
Output :
Intercept: -64342.42
Coefficients: [3.49009738e+02 1.25815095e+06 1.74685138e+05 4.83859660e+05]
Mean Squared Error: 2457741642022.09
R² Score: 0.51
(c) Polynomial Regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
df = pd.read_csv("Salary_Data.csv")
X = df[['YearsExperience']]
Y = df['Salary']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X)
X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, Y, test_size=0.2,
random_state=42)
model = LinearRegression()
model.fit(X_train_poly, y_train)
y_pred = model.predict(X_test_poly)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")
plt.scatter(X, Y, color='blue', label="Actual Data")
plt.plot(X, model.predict(X_poly), color='red', linestyle='dashed', label="Polynomial Fit")
plt.xlabel("YearsExperience")
plt.ylabel("Salary")
plt.title("Polynomial Regression Curve")
plt.legend()
plt.grid(True)
plt.show()
Output :
Intercept: 25321.58
Coefficient: 9423.82
Mean Squared Error: 49830096.86
R² Score: 0.90