Abhiml ML File
Abhiml ML File
df.tail()
Output:-
FIG. 2.1 DISPLAY ALL THE DATA OF THE FILE “DATA CSV”
df.info()
import pandas as pd
data = {
'EmpID':['1021','1057','1147','1272','1523','1445','1663','1747'],
'FName':['Ravi','Aman','Krishan','Priya','Navjot','Abhishek','Jay','Ritik'],
'LName':['Kumar','Kumr','Yadav','Sharma','Singh','Gupta','Malhotra','Patel'],
'Salary':['22000','25000','28000','20000','35000','18000','32000','40000'] } df =
pd.DataFrame(data)
print(df)
Rename Column
print(df)
FIG.2.3 Drop Column Name 'Salary'
df = df.drop(columns=['Salary']) print(df)
ISNA FUNCTION
CODE:-
df.isna()
Output: FIG. 3.1 DISPLAY ALL THE DATA USING VARIOUS ENCODING METHOD
FIG. 3.2 Display The Label Encoder 'Gender_LabelEncoded'
1. Label Encoder
from sklearn.preprocessing import LabelEncoder lblenc =
LabelEncoder() df['Gender_LabelEncoded'] =
lblenc.fit_transform(df['Gender']) print("Label Encoded DataFrame: ")
print(df)
DataFrame: ")
print(df)
3. Ordinal Encoding
df['Color_OrdinalEncoded'] = df['Color'].map(color_mapping)
OUTPUT:-
FIG. 3.6 Display The Data Binary Encoding
4. Binary Encoding
ce.BinaryEncoder(cols=['Color']) binary_encoded =
binary_encoder.fit_transform(df['Color']) df = pd.concat([df,
print(df)
4. Write a program to perform various
scalers to scale the features using
minmax, Z- score, Robust, Max Absolute
and Min-max within range.
as np
data = {
'Feature1':np.random.randint(1,100,10),
'Feature2':np.random.randint(100,1000,10),
'Feature3':np.random.randint(1000,10000,10)} df =
print(df)
1. Min-Max Scaling
from sklearn.preprocessing import MinMaxScaler minmax_scaler =
MaxAbsScaler() maxabs_scaled=maxabs_scaler.fit_transform(df)
maxabs_df=pd.DataFrame(maxabs_scaled, columns=df.columns)
2.Missing Values
missing_records =df[df.isnull().any(axis=1)]
print("\nRecords with Missing Values:")
print(missing_records)
FIG:5.3. Display The Data Count of Missing Values:-
4. Filled mean:-
df_filled_mean = df.fillna(df.mean())
df_filled_mean
df["Age"]= age
df["Salary"]= salary
df["Exp"]= exp
df
FIG:7.1.Display The Data
best_feature_index = np.argmin(gini_impurities)
print("nBest Feature(lowest Gini Impurity):Feature",best_feature_index)
y_pred = clf.predict(x_test)
accuracy =accuracy_score(y_test, y_pred)
print("\nAccuracy:",accuracy)
FIG:8.1.Display The Data
8.Write program to print the information gain of the same
dataset using decision tree classifier.
Sol:-
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import mutual_info_classif
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
iris =load_iris()
x=iris.data
y=iris.target
information_gains = mutual_info_classif(x,y)
for feature_index, information_gains in enumerate(information_gains):
print(f"Feature{feature_index}: Information_gains ={information_gains:.4f}")
FIG:8.2.Display The Data
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,
random_state=42)
num_selected_features =2
selected_feature_indices = np.argsort(information_gains)[-num_selected_features:]
clf = DecisionTreeClassifier(random_state=42)
clf.fit(x_train[:,selected_feature_indices], y_train)
y_pred = clf.predict(x_test[:,selected_feature_indices])
accuracy =accuracy_score(y_test, y_pred)
print("\nAccuracy:",accuracy)
print("\nOriginal Predicted")
for i in range(len(y_test)):
print(f"{y_test[i]} {y_pred[i]}")
FIG:9.1.Display The Data
9.Write a program to print whether a person survive or not
during the Titanic accident using decision tree classifier.
Sol:-
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
titanic_data = pd.read_csv('D:\\titanic.csv')
features =['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']
target ='Survived'
titanic_data = titanic_data[features +[target]].dropna()
titanic_data
titanic_data['Sex'] = titanic_data['Sex'].map({'male':0,'Female': 1})
titanic_data['Embarked']= titanic_data['Embarked'].map({'S':0,'C':1,'Q':2})
x = titanic_data[features]
y = titanic_data[target]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,
random_state=42)
clf = DecisionTreeClassifier(random_state=42)
clf.fit(x_train, y_train)
FIG:9.2.Display The Data
first_10_records = X_test.head(10)
predictions = clf.predict(first_10_records)
for i, prediction in enumerate(predictions):
passenger_data =first_10_recoreds.iloc[i]
survived = "Survived" if prediction == 1 else "Not Survived"
print(f"Passenger {i + 1}: { survived} -| {passenger_data}")
ANSWER: -
import numpy as np import
matplotlib.pyplot as plt from
sklearn.cluster import Kmeans
# Plot the scatter plot with the points and their assigned clusters
plt.scatter(X[:,0], X[:,1], c=labels)
plt.scatter(centroids[:,0], centroids[:,1], marker='*', s=300, c='red')
plt.title('Partitioned Clustering with Two Clusters') plt.xlabel('x')
plt.ylabel('y') plt.show()
FIG:13.Display The Data
Q13.Write a program to predict the category of iris flower
using SVC with accuracy.
ANSWER: -
from sklearn.datasets import load_iris from
sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score iris =
load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size=0.3, random_state=42) lst =
["Setosa","Verginica","Versicolor"] clf = SVC(kernel='linear')
clf.fit(X_train, y_train) y_pred = clf.predict(X_test) for i in range(5):
print("Predicted value:")
print(lst[y_pred[i]])
print("Actual value:")
print(lst[y_test[i]]) print()
acc = accuracy_score(y_test, y_pred) print("Accuracy:",
acc)
FIG:14.Display The Data
Q14.Write a program to predict the category of iris flower
using Decision Tree Classifier with accuracy.
ANSWER: -
from sklearn.datasets import load_iris from
sklearn.tree import DecisionTreeClassifier from
sklearn.model_selection import train_test_split from
sklearn.metrics import accuracy_score iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size=0.3, random_state=42) lst =
["Setosa","Verginica","Versicolor"] clf =
DecisionTreeClassifier(random_state=42) clf.fit(X_train, y_train)
lst =
["Setosa","Virginica","Versicolor"]
y_pred = clf.predict(X_test) for i in
range(5):
print("Actual value: ",end="")
print(lst[y_test[i]])
print("Predicted value: ",end="")
print(lst[y_pred[i]]) print()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x= np.linspace(0,2 *np.pi, 100)
y =np.sin(x)
plt.plot(x,y)
plt.xlabel('x values between 0 and 2pi',fontsize=18)
plt.ylabel('sin(x) values',fontsize=18)
plt.title('sine curve
plot',fontweight="bold",codor="r",fontsize=20)
plt.show()
FIG:17.Display The Data
Q17: - Write a program to Create scatter plot.
x = np.random.rand(50)
y = np.random.rand(50)
plt.scatter(x,y,marker="D")
plt.xlabel('x values',fontsize=18)
plt.ylabel('y values',fontsize=18)
plt.title('scatter plot',fontsize=18)
plt.show()
FIG:18.Display The Data
Q18: - Write a program to Create a bar graph
x = ['A','B','C','D']
y = [10,20,15,25]
plt.bar(x,y)
plt.xlabel('Categories',fontsize=18)
plt.ylabel('Values',fontsize=18)
plt.title('Bar plot',fontsize=20,fontsize="bold",color="g")
plt.show()
FIG:19.Display The Data
Q19: - Write a program to create a histogram.
data = np.random.randn(1000)
plt.hist(data, bins=30)
plt.xlabel('Values',fontsize=18,fontweight="bold",color="r")
plt.ylabel('Frequency',fontsize=18,fontweight="bold",color="r")
plt.title('Histogram',fontsize=20,fontweight="bold",color="g")
plt.grid(True,color="r",alpha=0.2)
plt.savefig('K:\example_plot.png')
plt.show()
FIG:20.Display The Data
Q20: - Write a program to create a subplots.
axes[0].plot(x,y1)
axes[0].set_title('Plot 1')
axes[0].set_xlabel('x-axis')
axes[0].set_ylabel('y-axis')
axes[1].plot(x,y2)
axes[1].set_title('Plot 2')
axes[1].set_xlabel('x-axis')
axes[1].set_ylabel('y-axis')
plt.tight_layout()
plt.show()
FIG:21.1Display The Data
x= ['A','B','C','D','E','F','G','H','I','J']
y=['15','20','16','18','17','25','22','14','23','27']
plt.scatter(x, y,marker="d")
plt.xlabel('Students',fontsize=18)
plt.ylabel('Ages of students',fontsize=18)
plt.title('Scatter Plot',fontsize=18)
plt.show()
FIG:21.3Display The Data
import numpy as np
import matplotlib.pyplot as plt
X_axis = np.arange(len(X))
plt.xticks(X_axis, X) plt.xlabel("Groups")
plt.ylabel("Number of Students")
plt.title("Number of Students in each group")
plt.legend()
plt.show()
FIG:21.5Display The Data
21.5:-PERFORMING THE BAR-GRAPH OF MARKS OF MATHS,
ENGLISH OF STUDENTS BY USING MATPLOTLIB MODULE OF
PYTHON:-
import numpy as np import
matplotlib.pyplot as plt
Maths = [15, 25, 20, 40]
English = [14, 23, 51, 37] n=4
r = np.arange(n) width =
0.25
plt.bar(r, Women, color = 'b',
width = width, edgecolor = 'black',
label='Maths')
plt.bar(r + width, Men, color = 'g',
width = width, edgecolor = 'black',
label='Men')
plt.xlabel("Marks of Students")
plt.ylabel("Math Marks")
plt.title("English Marks")
plt.grid(linestyle='--')
plt.xticks(r + width/2,['Maths','English', 'Maths','English'])
plt.legend()
plt.show()
FIG:22Display The Data
lst = ["crop","dieases","non-dieases"]
y_pred = clf.predict(X_test)
for i in range(5):
print("Actual value: ",end="")
print(lst[y_test[i]])
print("Predicted value: ",end="")
print(lst[y_pred[i]])
print()
y_pred =
clf.predict(X_test) for i in
range(5):
print("Actual value: ",end="")
print(lst[y_test[i]])
print("Predictedvalue:",end=""
) print(lst[y_pred[i]])
print()
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)
print(y_pred)