Bi Practical
Bi Practical
Code:
pip install pandas numpy scikit-learn matplotlib seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
from sklearn.datasets import load_iris
# Make predictions
y_pred = model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Classification report
print("Classification Report:\n", classification_report(y_test,
y_pred))
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
PRACTICAL 4
Practical 4 - Perform the data clustering using clustering
algorithm using R/Python.
Code:
pip install pandas numpy scikit-learn matplotlib seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
# Generate sample data with 3 clusters
X, y = make_blobs(n_samples=300, centers=3, random_state=42,
cluster_std=1.0)
# Convert to DataFrame
df = pd.DataFrame(X, columns=['Feature1', 'Feature2'])
# Cluster centers
centers = kmeans.cluster_centers_
plt.figure(figsize=(8, 6))
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('K-Means Clustering Visualization')
plt.legend()
plt.show()
inertia = []
K_range = range(1, 10)
for k in K_range:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)
inertia.append(kmeans.inertia_)
Code:
plot (var_1, var_2,
col="color for the points",
main="title of our graph",
abline(relation_between_the_variables),
cex = size of the point,
pch = style of the point (from 0-25),
xlab = "label for x axis",
ylab = "label for y axis")
#x - represents height (in cms)
#y - represents weight (in kg)
x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
#perform a linear regression where we specify the dependent and
the independent variable in the following manner:
#Syntax: lm(dependent_var ~ independent_var)
#in our case y is dependent and x is independent
relation <- lm(y ~ x)
#predicting the weight i.e. (y) from a given value of the height i.e.
(x) = 170 ; create a new data frame of the value
a <- data.frame(x=170)
#to find the result of our prediction we use the predict function
with the relation and the dataframe
#Syntax: predict(relation,data.frame)
result <- predict(relation,a)
print(result)
#plotting the data on a graph
#Syntax: plot(var_1,var_2,col = "point_color", main="title" ,
abline("relation_between_lines"),cex = point_size , pch =
shape_of_point , xlab = "label for x axis" , ylab = "label for y
axis")
plot(x, y,
col = "blue",
main = "Height and Weight Regression",
abline(lm(y ~ x)),
cex = 1.3,
pch = 16,
xlab = "Height in cm",
ylab = "Weight in kg")
#pch symbols image-link (in desc)
https://r-charts.com/en/tags/base-r/pch-symbols_files/figure-
html/pch-symbols.png
PRACTICAL 6
Practical 6 - Perform the logistic regression on the given data
warehouse data using R/Python.
Code:
pip install pandas numpy scikit-learn matplotlib seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
# Simulated dataset
data = {
"Age": [25, 45, 35, 50, 23, 40, 30, 60, 27, 55],
"Income": [30000, 80000, 50000, 90000, 25000, 70000, 45000,
100000, 32000, 85000],
"Purchased": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # Target variable (1 =
Purchased, 0 = Not Purchased)
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Display first five rows
print(df.head())
# Define independent (X) and dependent (y) variables
X = df[['Age', 'Income']] # Features
y = df['Purchased'] # Target variable
# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Standardize the data (important for Logistic Regression)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Initialize and train the model
model = LogisticRegression()
model.fit(X_train, y_train)
# Get predictions
y_pred = model.predict(X_test)
# Model performance metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Classification report
print("Classification Report:\n", classification_report(y_test,
y_pred))
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
from matplotlib.colors import ListedColormap
# Data Visualization
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm",
linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()
Code:
pip install pandas numpy matplotlib seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Sample Sales Data
data = {
"Date": pd.date_range(start="2023-01-01", periods=12,
freq='M'),
"Sales": [5000, 7000, 8000, 6500, 7200, 9000, 11000, 10500,
9500, 9800, 12000, 13000],
"Profit": [800, 1200, 1500, 1000, 1300, 1700, 2200, 2100, 1900,
2000, 2500, 2700],
"Category": ["Electronics", "Clothing", "Electronics",
"Furniture", "Clothing", "Electronics",
"Furniture", "Clothing", "Electronics", "Furniture",
"Clothing", "Electronics"]
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Display first five rows
print(df.head())
1. Line Chart - Monthly Sales Trend
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["Sales"], marker='o', linestyle='-',
color='blue', label="Sales")
plt.xlabel("Month")
plt.ylabel("Sales ($)")
plt.title("Monthly Sales Trend")
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.show()