ML Complete Notes Hridoy
ML Complete Notes Hridoy
drive.mount('/content/drive/')
import pandas as pd
import numpy as np
# Assuming the file is in the 'My Drive' folder of your Google Drive
df = pd.read_csv(file_pa)
Part 1: Data Preprocessing
data.fillna(0, inplace=True) # Replaces all NaN with 0 (or any chosen constant)
le = LabelEncoder()
scaler = StandardScaler()
df2['age'].fillna(df2['age'].mean(),inplace = True)
df2['age'] #for age change in filna to cover null value to replace null
Part 2: Data Visualization
✅
1. Correlation Heatmap
plt.title("Correlation Heatmap")
plt.show()
✅ 2. Pairplot
sns.pairplot(data, hue="Purchased") # Visualize pairwise relationships between features
✅ 3. Boxplot
sns.boxplot(data=data[["Age", "Salary"]]) # Detect outliers and understand value distributions
✅ 4. Histogram
data["Age"].hist(bins=20)
plt.title("Distribution of Age")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()
✅ 5. Scatter Plot
sns.scatterplot(x="Age", y="Salary", hue="Purchased", data=data)
plt.title("Age vs Salary")
plt.show()
✅ 6. Count Plot
sns.countplot(x="Purchased", data=data) # Count of each class/category
plt.title("Gender Distribution")
plt.show()
✅ 8. Bar Plot
sns.barplot(x="Gender", y="Salary", data=data) # Compare average Salary by Gender
plt.title("Correlation Heatmap")
plt.show()
y = data["Purchased"] # Target
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)