ML Journal
ML Journal
In [2]: a=np.array([1,2,3,4,5,6,7,8,9,10])
print(a)
[ 1 2 3 4 5 6 7 8 9 10]
In [3]: a1=np.array([(1,2,3,5,6),[3,4,5,6,7]])
print(a1)
[[1 2 3 5 6]
[3 4 5 6 7]]
In [4]: l=[1,2,3,4,5,6]
print("list",l)
arr=np.array(l)
print("array:",arr)
list [1, 2, 3, 4, 5, 6]
array: [1 2 3 4 5 6]
In [5]: f=np.linspace(1,10,50)
print(f)
In [6]: b=np.random.random((5,5))
print(b)
print("length :",len(b))
print("max :",np.max(b))
print("min:",np.min(b))
In [7]: c=np.random.randint(1,100,20)
print(c)
[50 43 72 85 95 79 33 35 40 18 72 49 85 36 63 42 37 56 37 92]
given the numpy array arr ,reverse its elements and find its size
In [8]: arr=np.array([1,2,3,4,6,8,9])
print(arr)
#print(np.flip(arr))
print(np.flip(arr,0))
print("size is :",np.size(arr))
[1 2 3 4 6 8 9]
[9 8 6 4 3 2 1]
size is : 7
In [9]: array=np.array([1,2,4,5,6])
print("mean: ",np.mean(array))
print("median: ",np.median(array))
print("standard deviation :",np.std(array))
mean: 3.6
median: 4.0
standard deviation : 1.8547236990991407
2
create 3by 3 matrix with all values set to 1
In [10]: a=np.ones((3,3))
print(a)
[[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]]
In [11]: b=np.zeros((3,3))
print(b)
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
given two numpy arrays arr1 and arr2 ,concatenate them horizontally
In [12]: arr1=np.array([(1,3,4,7),(3,5,6,7)])
arr2=np.array([[9,10,4,5],[7,9,0,5]])
print("horizontal")
c=np.concatenate((arr1,arr2),axis=None) #horizontal
print(c)
print("vertical")
c1=np.concatenate((arr1,arr2),axis=1) #vertical
print(c1)
print("using hstack")
d=np.hstack((arr1,arr2)) #horizontal
print(d)
print("using vstack")
d1=np.vstack((arr1,arr2)) #vertical
print(d1)
horizontal
[ 1 3 4 7 3 5 6 7 9 10 4 5 7 9 0 5]
vertical
[[ 1 3 4 7 9 10 4 5]
[ 3 5 6 7 7 9 0 5]]
using hstack
[[ 1 3 4 7 9 10 4 5]
[ 3 5 6 7 7 9 0 5]]
using vstack
[[ 1 3 4 7]
[ 3 5 6 7]
[ 9 10 4 5]
[ 7 9 0 5]]
3
In [13]: arr1=np.array([1,3,4,7])
arr2=np.array([9,10,4,5])
print("horizontal")
c=np.concatenate((arr1,arr2),axis=None) #horizontal
print(c)
#print("vertical")
#c1=np.concatenate((arr1,arr2),axis=1) # not possible to print vertical
#print(c1)
horizontal
[ 1 3 4 7 9 10 4 5]
create a numpy array containing all even and odd numbers from 0 to 20
In [14]: print(np.arange(0,20,2))
print(np.arange(1,20,2))
[ 0 2 4 6 8 10 12 14 16 18]
[ 1 3 5 7 9 11 13 15 17 19]
In [15]: a=np.array([1,2,3,4])
b=np.array([5,6,7,8])
#c=np.matmul(a,b) #total multiplication
#print(c)
#print(np.dot(a,b)) #dot product
print(np.multiply(a,b))
[ 5 12 21 32]
In [16]: a=np.array([1,3,4,56,7,8,9,9,1])
newa=a.reshape(3,3)
print(newa)
[[ 1 3 4]
[56 7 8]
[ 9 9 1]]
4
In [17]: a=np.arange(0,30)
print(a)
print("max:", np.max(a))
print("min : ",min(a))
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29]
max: 29
min : 0
In [18]: x=np.array([1,2,3,4])
y=np.array([5,6,7,8])
print(np.dot(x,y))
70
5
2) Assignment on Practice of Pandas Library
In [1]: import pandas as pd
import numpy as np
Out[3]: W X Y Z
In [4]: df['W']
Out[4]: A 2.706850
B 0.651118
C -2.018168
D 0.188695
E 0.190794
Name: W, dtype: float64
In [5]: df[['W','Z']]
Out[5]: W Z
A 2.706850 0.503826
B 0.651118 0.605965
C -2.018168 -0.589001
D 0.188695 0.955057
E 0.190794 0.683509
In [6]: df.W
Out[6]: A 2.706850
B 0.651118
C -2.018168
D 0.188695
E 0.190794
Name: W, dtype: float64
In [7]: type(df['W'])
Out[7]: pandas.core.series.Series
6
In [8]: df['new']=df['W']+df['Y']
df
Out[8]: W X Y Z new
In [9]: df.drop('new',axis=1)
Out[9]: W X Y Z
In [10]: df
Out[10]: W X Y Z new
In [11]: df.drop('new',axis=1,inplace=True)
df
Out[11]: W X Y Z
In [12]: df.drop('E',axis=0)
Out[12]: W X Y Z
Out[13]: W 2.706850
X 0.628133
Y 0.907969
Z 0.503826
Name: A, dtype: float64
In [14]: df.iloc[2]
Out[14]: W -2.018168
X 0.740122
Y 0.528813
Z -0.589001
Name: C, dtype: float64
In [15]: df.loc['B','Y']
Out[15]: -0.8480769834036315
In [16]: df.loc[['A','B'],['W','Y']]
Out[16]: W Y
A 2.706850 0.907969
B 0.651118 -0.848077
In [17]: df
Out[17]: W X Y Z
In [18]: df>0
Out[18]: W X Y Z
8
In [19]: df[df>0]
Out[19]: W X Y Z
In [20]: df[df['W']>0]
Out[20]: W X Y Z
In [21]: df[df['W']>0]['Y']
Out[21]: A 0.907969
B -0.848077
D -0.933237
E 2.605967
Name: Y, dtype: float64
In [22]: df[df['W']>0][['Y','X']]
Out[22]: Y X
A 0.907969 0.628133
B -0.848077 -0.319318
D -0.933237 -0.758872
E 2.605967 1.978757
In [23]: df[(df['W']>0)&(df['Y']>1)]
Out[23]: W X Y Z
In [24]: df
Out[24]: W X Y Z
Out[25]: index W X Y Z
In [27]: df['States']=newind
df
Out[27]: W X Y Z States
In [28]: df.set_index('States')
Out[28]: W X Y Z
States
In [29]: df
Out[29]: W X Y Z States
10
In [30]: df.set_index('States',inplace=True)
df
Out[30]: W X Y Z
States
In [31]: outside=['G1','G1','G1','G2','G2','G2']
inside=[1,2,3,1,2,3]
hier_index=list(zip(outside,inside))
hier_index=pd.MultiIndex.from_tuples(hier_index)
In [32]: hier_index
In [33]: df=pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
df
Out[33]: A B
1 0.302665 1.693723
G1 2 -1.706086 -1.159119
3 -0.134841 0.390528
1 0.166905 0.184502
G2 2 0.807706 0.072960
3 0.638787 0.329646
In [34]: df.loc['G1']
Out[34]: A B
1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
In [35]: df.loc['G1'].loc[1]
Out[35]: A 0.302665
B 1.693723
Name: 1, dtype: float64
11
In [36]: df.index.names
In [37]: df.index.names=['Group','Num']
df
Out[37]: A B
Group Num
1 0.302665 1.693723
G1 2 -1.706086 -1.159119
3 -0.134841 0.390528
1 0.166905 0.184502
G2 2 0.807706 0.072960
3 0.638787 0.329646
In [38]: df.xs('G1')
Out[38]: A B
Num
1 0.302665 1.693723
2 -1.706086 -1.159119
3 -0.134841 0.390528
In [40]: df.xs(('G1',1))
Out[40]: A 0.302665
B 1.693723
Name: (G1, 1), dtype: float64
In [41]: df.xs(1,level='Num')
Out[41]: A B
Group
G1 0.302665 1.693723
G2 0.166905 0.184502
12
3) Assignment on finds Algorithm. Apply on 'Enjoy
Sport Data to find Specific hypothesis for it.
In [1]: import pandas as pd
import numpy as np
Loading Dataset
In [2]: data=pd.read_csv("tennis.csv")
print(data)
In [3]: d=np.array(data)[:,:-1]
print("the attributes are: ",d)
the target is : ['no' 'no' 'yes' 'yes' 'yes' 'no' 'yes' 'no' 'yes' 'yes' 'yes' 'ye
s' 'yes'
'no']
Find-s Algorithm
13
In [5]: def train(d, t):
specific_hypothesis = None # Initialize specific_hypothesis within the function
if specific_hypothesis is None:
return "No positive example found in the target"
return specific_hypothesis
14
4) Assignment on Candidate Elimination Algorithm.
Apply it on Dataset to Enjoy Sport find Version
Space for it.
In [1]: import pandas as pd
import numpy as np
Loading Dataset
In [2]: data=pd.read_csv("tennis.csv")
print(data)
In [3]: d=np.array(data)[:,:-1]
print("the attributes are: ",d)
the target is : ['no' 'no' 'yes' 'yes' 'yes' 'no' 'yes' 'no' 'yes' 'yes' 'yes' 'ye
s' 'yes'
'no']
Candidate Elimination Algorithm
15
In [5]: def learn(d, target):
specific_h = d[0].copy()
print("initialization of specific_h and general_h")
print(specific_h)
for i, h in enumerate(d):
if target[i] == "yes":
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == "no":
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print(" steps of Candidate Elimination Algorithm",i+1)
print(specific_h)
print(general_h)
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(d, target)
print("Final Specific_h:", s_final, sep="\n")
print("Final General_h:", g_final, sep="\n")
16
initialization of specific_h and general_h
['sunny' 'hot' 'high' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 1
['sunny' 'hot' 'high' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 2
['sunny' 'hot' 'high' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
False]]
steps of Candidate Elimination Algorithm 3
['?' 'hot' 'high' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
False]]
steps of Candidate Elimination Algorithm 4
['?' '?' 'high' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
False]]
steps of Candidate Elimination Algorithm 5
['?' '?' '?' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
False]]
steps of Candidate Elimination Algorithm 6
['?' '?' '?' False]
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
False]]
steps of Candidate Elimination Algorithm 7
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 8
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 9
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 10
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 11
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 12
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 13
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
steps of Candidate Elimination Algorithm 14
['?' '?' '?' '?']
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
Final Specific_h:
17
['?' '?' '?' '?']
Final General_h:
[['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?', '?'], ['?', '?', '?',
'?']]
18
5) Assignment on Simple Regression. Build an
application where it can predict based on year of
Experience a salary using single Variable Linear
Regression (use dataset from the Kaggle) . Display
co-efficient and intercept. Also Display MSE. Plot
model on Testing data.
In [1]: import pandas as pd
Loading Dataset
In [2]: data=pd.read_csv('salary_data.csv')
In [3]: data.head()
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
In [4]: data.tail()
25 9.0 105582.0
26 9.5 116969.0
27 9.6 112635.0
28 10.3 122391.0
29 10.5 121872.0
In [5]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
# Column Non-Null Count Dtype
19
In [6]: data.describe()
In [8]: x_train=train.drop('Salary',axis=1)
y_train=train['Salary']
In [9]: x_test=test.drop('Salary',axis=1)
y_test=test['Salary']
In [10]: x_test
Out[10]: YearsExperience
4 2.2
26 9.5
22 7.9
2 1.5
16 5.1
10 3.9
20 6.8
9 3.7
3 2.0
In [11]: y_test.head()
Out[11]: 4 39891.0
26 116969.0
22 101302.0
2 37731.0
16 66029.0
Name: Salary, dtype: float64
20
In [18]: from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
In [25]: model.fit(x_train,y_train)
Out[25]: LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the
notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with
nbviewer.org.
In [26]: pred=model.predict(x_test)
pred
In [27]: error=sqrt(mean_squared_error(y_test,pred))
error
Out[27]: 4882.248392297978
21
In [31]: plt.scatter(x_test, y_test, color='green', label='Testing data')
plt.plot(x_train, model.predict(x_train), color='red', label='Regression line')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.title('Salary vs. Years of Experience (Testing data)')
plt.legend()
plt.show()
22
6) Assignment on Multi Regression: Build an
application where it can predict price of a house
using a multiple variable Linear regression (use
Housing dataset from Kaggle). Display all the co-
efficients and MSE.
In [1]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Loading Dataset ¶
In [2]: data=pd.read_csv("USA_housing.csv")
In [3]: data.head()
Out[3]:
Avg.
Avg. Area Avg. Area
Avg. Area Area Area
Number of Number of Price Address
Income House Population
Rooms Bedrooms
Age
9127 Elizabeth
2 61287.067179 5.865890 8.512727 5.13 36882.159400 1.058988e+06 Stravenue\nDanieltown,
WI 06482...
USS Barnett\nFPO AP
3 63345.240046 7.188236 5.586729 3.26 34310.242831 1.260617e+06
44820
USNS Raymond\nFPO
4 59982.197226 5.040555 7.839388 4.23 26354.109472 6.309435e+05
AE 0938
In [4]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
# Column Non-Null Count Dtype
23
In [5]: data.describe()
Out[5]:
Avg. Area Avg. Area Area
Avg. Area Avg. Area Price
Number of Number of
Income House Age Population
Rooms Bedrooms
In [6]: data.columns
Out[6]: Index(['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
'Avg. Area Number of Bedrooms', 'Area Population', 'Price', 'Address'],
dtype='object')
In [7]: x = data[['Avg. Area Income','Avg. Area House Age','Avg. Area Number of Rooms','Avg.
In [8]: y=data['Price']
In [11]: lm.fit(X_train,Y_train)
Out[11]: ▾ LinearRegression
LinearRegression()
In [12]: pred=lm.predict(X_test)
In [13]: pred
24
In [15]: print("MAE : ",metrics.mean_absolute_error(Y_test,pred))
MAE : 81550.25106016382
MSE : 10144286208.90111
RMSE : 100718.84733703574
print("Coefficients:")
for feature, coef in zip(x.columns, coefficients):
print(f"{feature}: {coef}")
Coefficients:
Avg. Area Income: 21.350766991704706
Avg. Area House Age: 167276.28633397297
Avg. Area Number of Rooms: 121482.62475230212
Avg. Area Number of Bedrooms: 1178.4271356234713
Area Population: 15.063238730521304
25
7) Assignment on Binary classification: Build
application tennis to decide on whether to play
Decision Tree classifier. Do the required data
preprocessing. Display Accuracy score, classi fication
report & confusion Matrix.
In [1]: import numpy as np
import pandas as pd
import warnings
import matplotlib as plt
import seaborn as sns
loading dataset
In [2]: data=pd.read_csv("tennis.csv")
data
In [3]: data.info()
26
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 outlook 14 non-null object
1 temp 14 non-null object
2 humidity 14 non-null object
3 windy 14 non-null bool
4 play 14 non-null object
dtypes: bool(1 ), object(4)
memory usage: 594.0+ bytes
In [4]: data.head()
Out[5]: True
0 0
1 1
2 0
3 0
4 0
5 1
6 1
7 0
8 0
9 0
10 1
11 1
12 0
13 1
In [6]: data.head()
27
Out[6]: outlook temp humidity windy play
Out[8]: overcast rainy sunny cool hot mild high normal True no yes
0 0 0 1 0 1 0 1 0 0 1 0
1 0 0 1 0 1 0 1 0 1 1 0
2 1 0 0 0 1 0 1 0 0 0 1
3 0 1 0 0 0 1 1 0 0 0 1
4 0 1 0 1 0 0 0 1 0 0 1
5 0 1 0 1 0 0 0 1 1 1 0
6 1 0 0 1 0 0 0 1 1 0 1
7 0 0 1 0 0 1 1 0 0 1 0
8 0 0 1 1 0 0 0 1 0 0 1
9 0 1 0 0 0 1 0 1 0 0 1
10 0 0 1 0 0 1 0 1 1 0 1
11 1 0 0 0 0 1 1 0 1 0 1
12 1 0 0 0 1 0 0 1 0 0 1
13 0 1 0 0 0 1 1 0 1 1 0
dtc.fit(X_train, y_train)
Out[10]: ▾ DecisionTreeClassifier
DecisionTreeClassifier(criterion='entropy') 28
In [11]: pred=dtc.predict(X_test)
accuracy 0.33 3
macro avg 0.50 0.17 0.25 3
weighted avg 1.00 0.33 0.50 3
[[1 2]
[0 0]]
29
8) Assignment on Binary classification using
Perceptron. Implement Perception model. Use this
model to classify a patient is having cancer or not
(use Breast cancer dataset from sklearn). Display
Accuracy score, classification Report and
confusion matrix.
In [1]: import sklearn.datasets
import numpy as np
Loading dataset
In [2]: cancer=sklearn.datasets.load_breast_cancer()
In [3]: x=cancer.data
y=cancer.target
print(x.shape,y.shape)
In [5]: data['class']=cancer.target
data.head()
Out[5]: mean me
mean mean mean mean mean mean mean mean
concave frac
radius texture perimeter area smoothness compactness concavity symmetry
points dimensi
0 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.078
1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.056
2 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.059
3 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.097
4 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.058
5 rows × 31 columns
In [6]: print(data['class'].value_counts())
1 357
0 212
Name: class, dtype: int64
In [7]: print(cancer.target_names)
['malignant' 'benign']
30
In [8]: data.groupby('class').mean()
Out[8]: mean
mean mean mean mean mean mean
mean area concave
radius texture perimeter smoothness compactness concavity
sy
points
class
2 rows × 30 columns
Train-test Split
In [9]: from sklearn.model_selection import train_test_split
x=data.drop('class',axis=1)
y=data['class']
In [10]: type(x)
Out[10]: pandas.core.frame.DataFrame
In [11]: x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1)
x_train=x_train.values
x_test=x_test.values
Perceptron Class
31
In [12]: from sklearn.metrics import accuracy_score
class Perceptron:
def init (self):
self.w=None
self.b=None
def model(self,X):
return 1 if(np.dot(self.w,X) >= self.b) else 0
def predict(self,X):
Y=[]
for x in X:
result=self.model(x)
Y.append(result)
return np.array(Y)
def fit(self,X,Y,epochs=1,lr=1):
self.w=np.ones(X.shape[1])
self.b=0
accuracy={}
max_accuracy=0
wt_matrix=[]
for i in range(epochs):
for x,y in zip(X,Y):
y_pred=self.model(x)
if y==1 and y_pred==0:
self.w=self.w+lr*x
self.b=self.b-lr*1
elif y==0 and y_pred==1:
self.w=self.w-lr*x
self.b=self.b+lr*1
wt_matrix.append(self.w)
accuracy[i]=accuracy_score(self.predict(X),Y)
if (accuracy[i] >=max_accuracy):
max_accuracy=accuracy[i]
chkptw=self.w
chkptb=self.b
self.w=chkptw
self.b=chkptb
print(max_accuracy)
In [13]: percept=Perceptron()
32
In [14]: wt_matrix=percept.fit(x_train,y_train,10000,0.5)
0.947265625
In [15]: y_predict=percept.predict(x_test)
accuracy 0.98 57
macro avg 0.98 0.98 0.98 57
weighted avg 0.98 0.98 0.98 57
Accuracy: 0.9824561403508771
Confusion_Matrix : [[24 0]
[ 1 32]]
33
9) Assignment on Multiclassification using MLP
(Multilayer Perception). Build an application to
classify give iris flower into its Specie using MLP
Cuse iris data set Kaggle / sklearn). Display
Accuracy Score, classification report and
Confusion matrix.
In [1]: import pandas as pd
url="https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names=['sepal-length','sepal-width','petal-length','petal-width','Class']
data=pd.read_csv(url,names=names)
In [2]: data.head()
Out[2]:
sepal-length sepal-width petal-length petal-width Class
In [3]: X=data.iloc[:,0:4]
y=data.select_dtypes(include=[object])
y.head()
X.head()
Out[3]:
sepal-length sepal-width petal-length petal-width
In [4]: y.Class.unique()
34
In [5]: from sklearn import preprocessing
le=preprocessing.LabelEncoder()
y=y.apply(le.fit_transform)
y
Out[5]:
Class
0 0
1 0
2 0
3 0
4 0
... ...
145 2
146 2
147 2
148 2
149 2
Out[6]:
Class
40 0
144 2
129 2
82 1
59 1
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)
Out[8]: ▾ MLPClassifier
MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
35
In [9]: pred=mlp.predict(X_test)
accuracy 0.96 45
macro avg 0.97 0.94 0.95 45
weighted avg 0.96 0.96 0.95 45
[[14 0 0]
[ 0 9 2]
[ 0 0 20]]
36
10) Assignment on Regression using KNN. Build an
application where it can predict Salary based on of
experience using KNN (use salary dataset from
Kaggle). Display MSE.
In [1]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
loading dataset
In [2]: data=pd.read_csv("salary_data.csv")
In [3]: data.head()
Out[3]:
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
In [4]: data.tail()
Out[4]:
YearsExperience Salary
25 9.0 105582.0
26 9.5 116969.0
27 9.6 112635.0
28 10.3 122391.0
29 10.5 121872.0
In [5]: data.describe()
Out[5]:
YearsExperience Salary
count 30.000000 30.000000
mean 5.313333 76003.000000
std 2.837888 27414.429785
min 1.100000 37731.000000
25% 3.200000 56720.750000
50% 4.700000 65237.000000
75% 7.700000 100544.750000
max 10.500000 122391.000000
37
Train test Split
In [6]: from sklearn.model_selection import train_test_split
train , test = train_test_split(data, test_size = 0.3)
In [9]: x_test.head()
Out[9]:
YearsExperience
29 10.5
7 3.2
22 7.9
11 4.0
10 3.9
In [10]: y_test.head()
Out[10]: 29 121872.0
7 54445.0
22 101302.0
11 55794.0
10 63218.0
Name: Salary, dtype: float64
In [13]: pred
In [14]: error
Out[14]: 4194.927232595636
38
11) Assignment on Classification using KNN. an
application classify a iris flower into its specie
using KNN (use Iris dataset from Sklearn). Display
Accuracy score, classification Report & confusion
Matrix.
In [1]: import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
Loading dataset
In [8]: data=load_iris()
In [5]: X=data.data
y=data.target
y
Out[5]: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
In [12]: model.fit(X_train,y_train)
Out[12]: KNeighborsClassifier(n_neighbors=7)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the
notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with
nbviewer.org.
In [13]: pred=model.predict(X_test)
39
In [15]: pred
Out[15]: array([1, 0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, 1, 0, 1, 2, 2, 2, 1,
0, 0, 1, 2, 1, 2, 0, 0, 1, 0, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 0, 2,
0])
Accuracy : 0.9555555555555556
accuracy 0.96 45
macro avg 0.96 0.95 0.95 45
weighted avg 0.96 0.96 0.96 45
40
12) Assignment on Naive Bayes Classifier. Build an
application to classify a given text using a Naive
classifier. Use data from sklearn. Display Accuracy
score, Classification Report, confusion matrix.
In [1]: import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
data = fetch_20newsgroups()
data.target_names
Out[2]: ['alt.atheism',
'comp.graphics',
'comp.os.ms-windows.misc',
'comp.sys.ibm.pc.hardware',
'comp.sys.mac.hardware',
'comp.windows.x',
'misc.forsale',
'rec.autos',
'rec.motorcycles',
'rec.sport.baseball',
'rec.sport.hockey',
'sci.crypt',
'sci.electronics',
'sci.med',
'sci.space',
'soc.religion.christian',
'talk.politics.guns',
'talk.politics.mideast',
'talk.politics.misc',
'talk.religion.misc']
41
In [4]: print(train.data[5])
2493.
42
In [7]: from sklearn.metrics import confusion_matrix
mat = confusion_matrix(test.target, labels)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
xticklabels=train.target_names, yticklabels=train.target_names)
plt.xlabel('true label')
plt.ylabel('predicted label');
In [8]: mat
Out[10]: 'sci.space'
Out[11]: 'soc.religion.christian
Out[12]: 'comp.graphics'
43
13) Assignment on K-mean clusting. Apply K-mean
clustering on Income data set to form 3 clusters and
display there clusters using scatter graph.
In [1]: import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
Loading dataset
In [2]: df=pd.read_csv("income.csv")
df.head()
0 Rob 27 70000
1 Michael 29 90000
2 Mohan 29 61000
3 Ismail 28 60000
4 Kory 42 150000
In [3]: plt.scatter(df.Age,df['Income($)'])
plt.xlabel('Age')
plt.ylabel('Income($)')
44
In [4]: km=KMeans(n_clusters=3)
y_predicted=km.fit_predict(df[['Age','Income($)']])
y_predicted
array([0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2])
Out[4]:
In [5]: df['cluster']=y_predicted
df.head()
df1 = df[df.cluster==0]
In [6]: df2 = df[df.cluster==1]
df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income($)'],color='green')
plt.scatter(df2.Age,df2['Income($)'],color='red')
plt.scatter(df3.Age,df3['Income($)'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purple',marker='*',
plt.xlabel('Age')
plt.ylabel('Income ($)')
plt.legend()
45
In [7]: scaler=MinMaxScaler()
scaler.fit(df[['Income($)']])
df['Income($)']=scaler.transform(df[['Income($)']])
scaler.fit(df[["Age"]])
df['Age']=scaler.transform(df[['Age']])
In [8]: df.head()
In [9]: plt.scatter(df.Age,df['Income($)'])
<matplotlib.collections.PathCollection at 0x150641bae10>
Out[9]:
46
In [10]: km=KMeans(n_clusters=3)
y_predicted=km.fit_predict(df[['Age','Income($)']])
y_predicted
array([0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
Out[10]:
In [11]: df['cluster']=y_predicted
In [12]: df.head()
In [13]: km.cluster_centers_
47
In [15]: sse = []
k_rng = range(1,10)
for k in k_rng:
km = KMeans(n_clusters=k)
km.fit(df[['Age','Income($)']])
sse.append(km.inertia_)
In [16]: sse
[5.434011511988178,
Out[16]:
2.091136388699078,
0.4750783498553096,
0.3491047094419566,
0.2664030124668416,
0.21055478995472493,
0.16869711728567788,
0.13265419827245162,
0.10383752586603562]
In [17]: plt.xlabel('K')
plt.ylabel('sum of squared error')
plt.plot(k_rng,sse)
[<matplotlib.lines.Line2D at 0x1506427acd0>]
Out[17]:
48
49
14) Assignment on Hierarchial clustering,Apply it
on mall_customers to form 5 clusters and display
these clusters using scatter graph and also display
its dendrogram
In [1]: import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [2]: data=pd.read_csv('Mall_Customers.csv')
data.head()
Out[2]: CustomerID Gender Age Annual Income (k$) Spending Score (1-100)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
In [3]: newdata=data.iloc[:,[3,4]].values
50
In [4]: import scipy.cluster.hierarchy as sch
dendrogram = sch.dendrogram(sch.linkage(newdata, method = 'ward'))
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()
C:\Users\HP\anaconda3\Lib\site-packages\sklearn\cluster\_agglomerative.py:1005: Fut
ureWarning: Attribute `affinity` was deprecated in version 1.2 and will be removed
in 1.4. Use `metric` instead
warnings.warn(
51
In [6]: plt.scatter(newdata[y_hc == 0, 0], newdata[y_hc == 0, 1], s = 100, c = 'red', label =
plt.scatter(newdata[y_hc == 1, 0], newdata[y_hc == 1, 1], s = 100, c = 'blue', label
plt.scatter(newdata[y_hc == 2, 0], newdata[y_hc == 2, 1], s = 100, c = 'green', label
plt.scatter(newdata[y_hc == 3, 0], newdata[y_hc == 3, 1], s = 100, c = 'cyan', label
plt.scatter(newdata[y_hc == 4, 0], newdata[y_hc == 4, 1], s = 100, c = 'magenta', lab
# plot title addition
plt.title('Clusters of customers')
# labelling the x-axis
plt.xlabel('Annual Income (k$)')
# label of the y-axis
plt.ylabel('Spending Score (1-100)')
# printing the legend
plt.legend()
# show the plot
plt.show()
52
15) Assignment on Dimensionality Reduction
. Apply Principal component Analysis (PCA) on Iris
dataset to reduce its dimensionality into 3 principal
components. Display data before and after reduction
using scatter matrix graph.
In [1]: import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('Iris.csv')
df.head()
Out[1]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
53
In [2]: fig = plt.figure(figsize = (8,8))
sepal = fig.add_subplot(1,1,1)
sepal.set_xlabel('sepal_length', fontsize = 15)
sepal.set_ylabel('sepal_width', fontsize = 15)
sepal.set_title('Original Data', fontsize = 20)
targets = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
indicesToKeep = df['Species'] == target
sepal.scatter(df.loc[indicesToKeep, 'SepalLengthCm']
, df.loc[indicesToKeep, 'SepalWidthCm']
, c = color
, s = 50)
sepal.legend(targets)
sepal.grid()
54
In [3]: fig = plt.figure(figsize = (8,8))
petal = fig.add_subplot(1,1,1)
petal.set_xlabel('petal_length', fontsize = 15)
petal.set_ylabel('petal_width', fontsize = 15)
petal.set_title('Original Data', fontsize = 20)
targets = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
indicesToKeep = df['Species'] == target
petal.scatter(df.loc[indicesToKeep, 'PetalLengthCm']
, df.loc[indicesToKeep, 'PetalWidthCm']
, c = color
, s = 50)
petal.legend(targets)
petal.grid()
55
In [4]: from sklearn.preprocessing import StandardScaler
features = ['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']
# Separating out the features
x = df.loc[:, features].values
# Separating out the target
y = df.loc[:,['Species']].values
# Standardizing the features
x = StandardScaler().fit_transform(x)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state
Out[6]:
principal component 1 principal component 2 principal component 3 Species
56
In [7]: import matplotlib.pyplot as plt
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)
targets = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
indicesToKeep = finalDf['Species'] == target
ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
, finalDf.loc[indicesToKeep, 'principal component 2']
, c = color
, s = 50)
ax.legend(targets)
ax.grid()
57
In [8]: from sklearn.neural_network import MLPClassifier
mlp=MLPClassifier(hidden_layer_sizes=(10,10,10),max_iter=200)
mlp.fit(X_train,y_train.ravel())
C:\Users\HP\anaconda3\Lib\site-packages\sklearn\neural_network\_multilayer_perceptr
on.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reach
ed and the optimization hasn't converged yet.
warnings.warn(
Out[8]: ▾ MLPClassifier
MLPClassifier(hidden_layer_sizes=(10, 10, 10))
C:\Users\HP\anaconda3\Lib\site-packages\sklearn\neural_network\_multilayer_perceptr
on.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reach
ed and the optimization hasn't converged yet.
warnings.warn(
Out[10]: 0.9777777777777777
58