0% found this document useful (0 votes)
3 views4 pages

#Exp2 Eda On 2 Variable Dataset

The document outlines an exploratory data analysis (EDA) on the Iris dataset, focusing on the relationship between sepal length and petal length using linear regression. It includes visualizations such as boxplots and heatmaps, and compares regression models with and without bias. Additionally, it demonstrates the classification of a synthetic dataset using a perceptron model with and without bias, reporting the accuracy for both models.

Uploaded by

22b137
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views4 pages

#Exp2 Eda On 2 Variable Dataset

The document outlines an exploratory data analysis (EDA) on the Iris dataset, focusing on the relationship between sepal length and petal length using linear regression. It includes visualizations such as boxplots and heatmaps, and compares regression models with and without bias. Additionally, it demonstrates the classification of a synthetic dataset using a perceptron model with and without bias, reporting the accuracy for both models.

Uploaded by

22b137
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

#EXP2 EDA ON 2 VARIABLE DATASET

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

df = sns.load_dataset("iris")

# ---------- 1. Check Categorical Variable (Species) ----------


print("\nUnique Species in Dataset:", df['species'].unique())

# Boxplot: Species vs Sepal Length


plt.figure(figsize=(10, 5))
sns.boxplot(x="species", y="sepal_length", data=df, palette="coolwarm")
plt.title("Sepal Length Distribution Across Species")
plt.show()

# ---------- 2. Correlation Heatmap ----------


plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.show()

# ---------- 3. Simple Linear Regression ----------


X = df[['sepal_length']] # Independent variable
y = df['petal_length'] # Dependent variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model


model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print results
print(f"\nModel Coefficient: {model.coef_[0]:.2f}")
print(f"Model Intercept: {model.intercept_:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R2 Score: {r2:.2f}")

plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_test['sepal_length'], y=y_test, hue=df['species'],palette="viridis",
legend=False)
sns.lineplot(x=X_test['sepal_length'], y=y_pred, color='red',label='Regression Line')

plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Linear Regression: Sepal Length vs Petal Length")
plt.legend()

plt.show()

#REGRESSION MODEL WITH BIAS AND WITHOUT BIAS


import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load dataset
data = sns.load_dataset('iris')

# Use Sepal & Petal features (X) and Petal Length as target (y)
X = data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values
y = data['petal_length'].values
species = data['species']

# Split into train/test


X_train, X_test, y_train, y_test, species_train, species_test = train_test_split(
X, y, species, test_size=0.2, random_state=42
)

# ----------- Model WITH Bias (fit_intercept=True) -----------


model1 = LinearRegression()
model1.fit(X_train[:, 0:1], y_train) # Use only sepal_length
pred1 = model1.predict(X_test[:, 0:1])
mse1 = mean_squared_error(y_test, pred1)
print("MSE with Bias:", round(mse1, 2))
# ----------- Model WITHOUT Bias (fit_intercept=False) -----------
model2 = LinearRegression(fit_intercept=False)
model2.fit(X_train[:, 0:1], y_train)
pred2 = model2.predict(X_test[:, 0:1])
mse2 = mean_squared_error(y_test, pred2)
print("MSE without Bias:", round(mse2, 2))

# ----------- Comparison Plot -----------


plt.figure(figsize=(12, 5))

# Plot 1: With Bias


plt.subplot(1, 2, 1)
sns.scatterplot(x=X_test[:, 0], y=y_test, hue=species_test, palette='viridis', alpha=0.7)
plt.plot(X_test[:, 0], pred1, color='red', label='With Bias')
plt.title('With Bias')
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.legend()

# Plot 2: Without Bias


plt.subplot(1, 2, 2)
sns.scatterplot(x=X_test[:, 0], y=y_test, hue=species_test, palette='viridis', alpha=0.7)
plt.plot(X_test[:, 0], pred2, color='blue', label='Without Bias')
plt.title('Without Bias')
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.legend()

plt.tight_layout()
plt.show()

#EXP 4
Classification of a dataset from UCI repository using a perceptron with bias
and without bias
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

# Generate a synthetic dataset


X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=0)
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Fitting a perceptron model without bias


model_no_bias = Perceptron(fit_intercept=False)
model_no_bias.fit(X_train, y_train)
y_pred_no_bias = model_no_bias.predict(X_test)
accuracy_no_bias = accuracy_score(y_test, y_pred_no_bias)
print("Accuracy of perceptron without bias:", accuracy_no_bias)

# Fitting a perceptron model with bias


model_with_bias = Perceptron(fit_intercept=True)
model_with_bias.fit(X_train, y_train)
y_pred_with_bias = model_with_bias.predict(X_test)
accuracy_with_bias = accuracy_score(y_test, y_pred_with_bias)
print("Accuracy of perceptron with bias:", accuracy_with_bias)

You might also like