0% found this document useful (0 votes)
1 views18 pages

Ashwin Report

The document provides an overview of various machine learning algorithms including the Apriori algorithm for association rule mining, FP-Growth algorithm, ID3 algorithm for decision trees, Bayesian classification, and SVM classification. Each algorithm is demonstrated with sample code using Python and the pandas library, showcasing how to preprocess data, calculate metrics, and make predictions. The document emphasizes the implementation of these algorithms in practical scenarios using transaction datasets and classification tasks.

Uploaded by

bibekkshetri055
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
1 views18 pages

Ashwin Report

The document provides an overview of various machine learning algorithms including the Apriori algorithm for association rule mining, FP-Growth algorithm, ID3 algorithm for decision trees, Bayesian classification, and SVM classification. Each algorithm is demonstrated with sample code using Python and the pandas library, showcasing how to preprocess data, calculate metrics, and make predictions. The document emphasizes the implementation of these algorithms in practical scenarios using transaction datasets and classification tasks.

Uploaded by

bibekkshetri055
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 18

2.

Apriori algorithm
# 2. Apriori algorithm

import pandas a s pd
from mlxtend . f r e q u e n t p a t t e r n s import a p r i o r i , a s s o c i a t i o n r u l e s
from mlxtend . p r e p r o c e s s i n g import T r a n s a c t i o n E n c o d e r

# Sample d a t a s e t ( t r a n s a c t i o n s )
dataset = [
[ ’ Milk ’ , ’ Bread ’ , ’ Butter ’ ] ,
[ ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ ] ,
[ ’ Bread ’ , ’ Butter ’ ]
]
te = TransactionEncoder ( )
te ary = te . f i t ( dataset ) . transform ( dataset )

d f = pd . DataFrame ( t e a r y , columns=t e . columns )


p r i n t ( ” One−Hot Encoded DataFrame : ” )
print ( df )

f r e q u e n t i t e m s e t s = a p r i o r i ( df , m i n s u p p o r t =0.4 , u s e c o l n a m e s=True )
p r i n t ( ” \ nFrequent I t e m s e t s : ” )
print ( frequent itemsets )

r u l e s = a s s o c i a t i o n r u l e s ( f r e q u e n t i t e m s e t s , m e t r i c=” c o n f i d e n c e ” , m i n t h r e s h o l d =0.7 , n u m i t e m s e t s=N


p r i n t ( ” \ n A s s o c i a t i o n Rules : ” )
print ( rules )

r u l e s = r u l e s . s o r t v a l u e s ( by=’ l i f t ’ , a s c e n d i n g=F a l s e )
p r i n t ( ” \ n S o r t e d A s s o c i a t i o n Rules by L i f t : ” )
print ( rules )
3. FP growth algorithm
# 3 . FP growth a l g o r i t h m

import pandas a s pd
from mlxtend . f r e q u e n t p a t t e r n s import fpgrowth , a s s o c i a t i o n r u l e s
from mlxtend . p r e p r o c e s s i n g import T r a n s a c t i o n E n c o d e r

dataset = [
[ ’ Milk ’ , ’ Bread ’ , ’ Butter ’ ] ,
[ ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ ] ,
[ ’ Bread ’ , ’ Butter ’ ]
]

te = TransactionEncoder ( )
te ary = te . f i t ( dataset ) . transform ( dataset )
d f = pd . DataFrame ( t e a r y , columns=t e . columns )
p r i n t ( ” One−Hot Encoded DataFrame : ” )
print ( df )

f r e q u e n t i t e m s e t s = fpgrowth ( df , m i n s u p p o r t =0.4 , u s e c o l n a m e s=True )


p r i n t ( ” \ nFrequent I t e m s e t s : ” )
print ( frequent itemsets )

r u l e s = a s s o c i a t i o n r u l e s ( f r e q u e n t i t e m s e t s , m e t r i c=” c o n f i d e n c e ” , m i n t h r e s h o l d =0.7 , n u m i t e m s e t s=N


p r i n t ( ” \ n A s s o c i a t i o n Rules : ” )
print ( rules )

r u l e s = r u l e s . s o r t v a l u e s ( by=’ l i f t ’ , a s c e n d i n g=F a l s e )
p r i n t ( ” \ n S o r t e d A s s o c i a t i o n Rules by L i f t : ” )
print ( rules )
4. classification using ID3 algorithm
# 4 . c l a s s i f i c a t i o n u s i n g ID3 a l g o r i t h m

import pandas a s pd
import numpy a s np
from math import l o g 2

d e f c a l c u l a t e e n t r o p y ( data ) :
c l a s s c o u n t s = data . i l o c [ : , −1]. v a l u e c o u n t s ( )
t o t a l i n s t a n c e s = l e n ( data )
entropy = 0
f o r count i n c l a s s c o u n t s :
p r o b a b i l i t y = count / t o t a l i n s t a n c e s
e n t r o p y −= p r o b a b i l i t y ∗ l o g 2 ( p r o b a b i l i t y )
return entropy

d e f c a l c u l a t e i n f o r m a t i o n g a i n ( data , f e a t u r e ) :
t o t a l e n t r o p y = c a l c u l a t e e n t r o p y ( data )
f e a t u r e v a l u e s = data [ f e a t u r e ] . v a l u e c o u n t s ( )
weighted entropy = 0

f o r va lue , count i n f e a t u r e v a l u e s . i t e m s ( ) :
s u b s e t = data [ data [ f e a t u r e ] == v a l u e ]
w e i g h t e d e n t r o p y += ( count / l e n ( data ) ) ∗ c a l c u l a t e e n t r o p y ( s u b s e t )

return total entropy − weighted entropy

d e f i d 3 ( data , f e a t u r e s ) :
i f l e n ( data . i l o c [ : , −1]. v a l u e c o u n t s ( ) ) == 1 :
r e t u r n data . i l o c [ 0 , −1]

i f l e n ( f e a t u r e s ) == 0 :
r e t u r n data . i l o c [ : , −1]. v a l u e c o u n t s ( ) . idxmax ( )

b e s t f e a t u r e = max( f e a t u r e s , key=lambda f e a t u r e : c a l c u l a t e i n f o r m a t i o n g a i n ( data , f e a t u r e ) )

t r e e = { b e s t f e a t u r e : {}}

f o r v a l u e i n data [ b e s t f e a t u r e ] . unique ( ) :
s u b s e t = data [ data [ b e s t f e a t u r e ] == v a l u e ] . drop ( columns =[ b e s t f e a t u r e ] )
r e m a i n i n g f e a t u r e s = [ f f o r f i n f e a t u r e s i f f != b e s t f e a t u r e ]
t r e e [ b e s t f e a t u r e ] [ value ] = id3 ( subset , r e m a i n i n g f e a t u r e s )

return tree

def predict ( tree , instance ) :


i f i s i n s t a n c e ( tree , dict ) :
f e a t u r e = l i s t ( t r e e . keys ( ) ) [ 0 ]
feature value = instance [ feature ]
return predict ( tree [ feature ] [ feature value ] , instance )
else :
return tree

data = pd . DataFrame ( {
’ Outlook ’ : [ ’ Sunny ’ , ’ Sunny ’ , ’ Overcast ’ , ’ Rainy ’ , ’ Rainy ’ , ’ Rainy ’ , ’ Overcast ’ , ’ Sunny ’ , ’ Sun
’ Temperature ’ : [ ’ Hot ’ , ’ Hot ’ , ’ Hot ’ , ’ Mild ’ , ’ Cool ’ , ’ Cool ’ , ’ Cool ’ , ’ Mild ’ , ’ Mild ’ , ’ Mild ’ ] ,
’ Humidity ’ : [ ’ High ’ , ’ High ’ , ’ High ’ , ’ High ’ , ’ High ’ , ’Low ’ , ’Low ’ , ’Low ’ , ’Low ’ , ’ High ’ ] ,
’ Wind ’ : [ ’ Weak ’ , ’ Strong ’ , ’Weak ’ , ’Weak ’ , ’Weak ’ , ’Weak ’ , ’ Strong ’ , ’ Strong ’ , ’Weak ’ , ’ Strong
’ PlayTennis ’ : [ ’ No ’ , ’No ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’No ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’No ’ ]
})
f e a t u r e s = [ ’ Outlook ’ , ’ Temperature ’ , ’ Humidity ’ , ’ Wind ’ ]
d e c i s i o n t r e e = i d 3 ( data , f e a t u r e s )
p r i n t ( ” D e c i s i o n Tree : ” )
print ( decision tree )
i n s t a n c e = { ’ Outlook ’ : ’ Sunny ’ , ’ Temperature ’ : ’ Mild ’ , ’ Humidity ’ : ’Low ’ , ’ Wind ’ : ’ Strong ’ }
prediction = predict ( decision tree , instance )

p r i n t ( f ”\ n P r e d i c t i o n f o r i n s t a n c e { i n s t a n c e } : { p r e d i c t i o n } ” )
5. classification using Bayesian algorithm
# 5 . c l a s s i f i c a t i o n u s i n g Bayesian a l g o r i t h m

import pandas a s pd
import numpy a s np

data = pd . DataFrame ( {
’ Age ’ : [ 2 5 , 3 0 , 3 5 , 4 0 , 4 5 , 5 0 , 5 5 , 6 0 , 6 5 , 7 0 ] ,
’ Income ’ : [ ’ Low ’ , ’Low ’ , ’Low ’ , ’ Medium ’ , ’ Medium ’ , ’ Medium ’ , ’ High ’ , ’ High ’ , ’ High ’ , ’ High ’ ] ,
’ Purchase ’ : [ ’ No ’ , ’No ’ , ’No ’ , ’No ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ ]
})

data [ ’ Income ’ ] = data [ ’ Income ’ ] . map ( { ’ Low ’ : 0 , ’ Medium ’ : 1 , ’ High ’ : 2 } )


data [ ’ Purchase ’ ] = data [ ’ Purchase ’ ] . map ( { ’ No ’ : 0 , ’ Yes ’ : 1 } )

d e f c a l c u l a t e p r i o r ( data ) :
t o t a l = l e n ( data )
p r i o r = data [ ’ Purchase ’ ] . v a l u e c o u n t s ( ) / t o t a l
return prior

d e f c a l c u l a t e l i k e l i h o o d ( data ) :
l i k e l i h o o d = {}
t o t a l = l e n ( data )

f o r f e a t u r e i n [ ’ Age ’ , ’ Income ’ ] :
f e a t u r e l i k e l i h o o d = {}
f o r l a b e l i n data [ ’ Purchase ’ ] . unique ( ) :
f e a t u r e d a t a = data [ data [ ’ Purchase ’ ] == l a b e l ]
fea ture lik eliho od [ label ] = feature data [ feature ] . value counts () / len ( feature data )
likelihood [ feature ] = feature likelihood

return l i k e l i h o o d

def predict ( instance , prior , l i k e l i h o o d ) :


p o s t e r i o r = {}

f o r l a b e l in p r i o r . index :
p o s t e r i o r [ l a b e l ] = np . l o g ( p r i o r [ l a b e l ] )

f o r f e a t u r e in in s ta n ce . index :
feature value = instance [ feature ]
i f feature value in l i k e l i h o o d [ feature ] [ label ] :
p o s t e r i o r [ l a b e l ] += np . l o g ( l i k e l i h o o d [ f e a t u r e ] [ l a b e l ] . g e t ( f e a t u r e v a l u e , 0 . 0 0 0 1 ) )
# Add l o g o f l i k e l i h o o d

p o s t e r i o r [ l a b e l ] = np . exp ( p o s t e r i o r [ l a b e l ] )

r e t u r n max( p o s t e r i o r , key=p o s t e r i o r . g e t )

p r i o r = c a l c u l a t e p r i o r ( data )
l i k e l i h o o d = c a l c u l a t e l i k e l i h o o d ( data )

n e w i n s t a n c e = pd . S e r i e s ( { ’ Age ’ : 5 0 , ’ Income ’ : 1 } )

p r e d i c t i o n = predict ( new instance , prior , l i k e l i h o o d )


p r e d i c t e d c l a s s = ’ Yes ’ i f p r e d i c t i o n == 1 e l s e ’No ’

p r i n t ( f ” P r e d i c t i o n f o r new i n s t a n c e : { p r e d i c t e d c l a s s } ” )
6. classification using SVM algorithm
# 6 . c l a s s i f i c a t i o n u s i n g SVM a l g o r i t h m

import numpy a s np
import pandas a s pd
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n import d a t a s e t s
from s k l e a r n . m o d e l s e l e c t i o n import t r a i n t e s t s p l i t
from s k l e a r n . p r e p r o c e s s i n g import S t a n d a r d S c a l e r
from s k l e a r n . svm import SVC
from s k l e a r n . m e t r i c s import c l a s s i f i c a t i o n r e p o r t , a c c u r a c y s c o r e

i r i s = datasets . l o a d i r i s ()
X = i r i s . data
y = i r i s . target

X t r a i n , X t e s t , y t r a i n , y t e s t = t r a i n t e s t s p l i t (X, y , t e s t s i z e =0.3 , r a n d o m s t a t e =42)

s c a l e r = StandardScaler ()
X train = s c a l e r . fit transform ( X train )
X test = s c a l e r . transform ( X test )

svm model = SVC( k e r n e l =’ l i n e a r ’ , r a n d o m s t a t e =42)


svm model . f i t ( X t r a i n , y t r a i n )

y p r e d = svm model . p r e d i c t ( X t e s t )

p r i n t ( ” C l a s s i f i c a t i o n Report : \ n ” , c l a s s i f i c a t i o n r e p o r t ( y t e s t , y p r e d ) )
p r i n t ( ” Accuracy S c o r e : ” , a c c u r a c y s c o r e ( y t e s t , y p r e d ) )

X train 2D = X train [ : , : 2 ]
X test 2D = X test [ : , : 2 ]

svm model 2D = SVC( k e r n e l =’ l i n e a r ’ , r a n d o m s t a t e =42)


svm model 2D . f i t ( X train 2D , y t r a i n )

p l t . f i g u r e ( f i g s i z e =(8 ,6))
p l t . t i t l e ( ”SVM D e c i s i o n Boundary ( 2D) ” )

x min , x max = X t r a i n 2 D [ : , 0 ] . min ( ) − 1 , X t r a i n 2 D [ : , 0 ] . max ( ) + 1


y min , y max = X t r a i n 2 D [ : , 1 ] . min ( ) − 1 , X t r a i n 2 D [ : , 1 ] . max ( ) + 1
xx , yy = np . meshgrid ( np . a r a n g e ( x min , x max , 0 . 1 ) ,
np . a r a n g e ( y min , y max , 0 . 1 ) )

Z = svm model 2D . p r e d i c t ( np . c [ xx . r a v e l ( ) , yy . r a v e l ( ) ] )
Z = Z . r e s h a p e ( xx . shape )

plt . c o n t o u r f ( xx , yy , Z , a l p h a =0.8)
plt . s c a t t e r ( X t r a i n 2 D [ : , 0 ] , X t r a i n 2 D [ : , 1 ] , c=y t r a i n , e d g e c o l o r s =’k ’ , marker =’o ’ , s =100 , l a b e
plt . s c a t t e r ( X t e s t 2 D [ : , 0 ] , X t e s t 2 D [ : , 1 ] , c=y t e s t , e d g e c o l o r s =’ r ’ , marker = ’ˆ ’ , s =100 , l a b e l =”
plt . x l a b e l ( ’ S e p a l Length ’ )
plt . y l a b e l ( ’ S e p a l Width ’ )
plt . legend ()
plt . show ( )
7. Linear Regression algorithm
# 7. Linear Regression algorithm

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . m o d e l s e l e c t i o n import t r a i n t e s t s p l i t
from s k l e a r n . m e t r i c s import m e a n s q u a r e d e r r o r , r 2 s c o r e

np . random . s e e d ( 4 2 )
X = np . random . rand ( 1 0 0 , 1 ) ∗ 10
y = 2 . 5 ∗ X + 1 + np . random . randn ( 1 0 0 , 1 ) ∗ 2

X t r a i n , X t e s t , y t r a i n , y t e s t = t r a i n t e s t s p l i t (X, y , t e s t s i z e =0.2 , r a n d o m s t a t e =42)


X t r a i n b i a s = np . c [ np . o n e s ( ( X t r a i n . shape [ 0 ] , 1 ) ) , X t r a i n ]
X t e s t b i a s = np . c [ np . o n e s ( ( X t e s t . shape [ 0 ] , 1 ) ) , X t e s t ]

t h e t a = np . l i n a l g . i n v ( X t r a i n b i a s . T . dot ( X t r a i n b i a s ) ) . dot ( X t r a i n b i a s . T ) . dot ( y t r a i n )

b , m = theta [ 0 ] , theta [ 1 ]
p r i n t ( f ” Model c o e f f i c i e n t s : I n t e r c e p t ( b ) = {b [ 0 ] : . 2 f } , S l o p e (m) = {m[ 0 ] : . 2 f } ” )

y p r e d = X t e s t b i a s . dot ( t h e t a )

mse = m e a n s q u a r e d e r r o r ( y t e s t , y p r e d )
rmse = np . s q r t ( mse )
r2 = r 2 s c o r e ( y t e s t , y pred )

p r i n t ( f ”Mean Squared E r r o r (MSE) : {mse : . 2 f } ” )


p r i n t ( f ” Root Mean Squared E r r o r (RMSE) : { rmse : . 2 f } ” )
p r i n t ( f ”R−s q u a r e d (Rˆ 2 ) : { r 2 : . 2 f } ” )

plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r ( X t e s t , y t e s t , c o l o r =’ blac k ’ , l a b e l =’ Test Data ’ , a l p h a =0.7)
plt . p l o t ( X t e s t , y pred , c o l o r =’ bla ck ’ , l a b e l =’ R e g r e s s i o n Line ’ , l i n e w i d t h =2)
plt . t i t l e ( ” L i n e a r R e g r e s s i o n : Test Data vs P r e d i c t e d L ine ” )
plt . x l a b e l ( ” F e a t u r e (X) ” )
plt . y l a b e l ( ” Target ( y ) ” )
plt . legend ()
plt . show ( )
8. Multiple Linear Regression algorithm
# 8. Multiple Linear Regression algorithm

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . m o d e l s e l e c t i o n import t r a i n t e s t s p l i t
from s k l e a r n . m e t r i c s import m e a n s q u a r e d e r r o r , r 2 s c o r e
from s k l e a r n . d a t a s e t s import m a k e r e g r e s s i o n

np . random . s e e d ( 4 2 )
X, y = m a k e r e g r e s s i o n ( n s a m p l e s =100 , n f e a t u r e s =3, n o i s e =10 , r a n d o m s t a t e =42)

X t r a i n , X t e s t , y t r a i n , y t e s t = t r a i n t e s t s p l i t (X, y , t e s t s i z e =0.2 , r a n d o m s t a t e =42)

X t r a i n b i a s = np . c [ np . o n e s ( ( X t r a i n . shape [ 0 ] , 1 ) ) , X t r a i n ]
X t e s t b i a s = np . c [ np . o n e s ( ( X t e s t . shape [ 0 ] , 1 ) ) , X t e s t ]

t h e t a = np . l i n a l g . i n v ( X t r a i n b i a s . T . dot ( X t r a i n b i a s ) ) . dot ( X t r a i n b i a s . T ) . dot ( y t r a i n )


p r i n t ( f ” Model c o e f f i c i e n t s ( I n t e r c e p t and S l o p e s ) : { t h e t a . f l a t t e n ( ) } ” )

y p r e d = X t e s t b i a s . dot ( t h e t a )

mse = m e a n s q u a r e d e r r o r ( y t e s t , y p r e d )
rmse = np . s q r t ( mse )
r2 = r 2 s c o r e ( y t e s t , y pred )

p r i n t ( f ”Mean Squared E r r o r (MSE) : {mse : . 2 f } ” )


p r i n t ( f ” Root Mean Squared E r r o r (RMSE) : { rmse : . 2 f } ” )
p r i n t ( f ”R−s q u a r e d (Rˆ 2 ) : { r 2 : . 2 f } ” )
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )
p l t . s c a t t e r ( X t e s t [ : , 0 ] , y t e s t , c o l o r =’ blac k ’ , l a b e l =’ Test Data ’ )
p l t . s c a t t e r ( X t e s t [ : , 0 ] , y pred , c o l o r =’ bla ck ’ , l a b e l =’ P r e d i c t e d Data ’ , a l p h a =0.7)
p l t . t i t l e ( ” M u l t i p l e L i n e a r R e g r e s s i o n : Test Data vs P r e d i c t e d Data ( F e a t u r e 1 vs Target ) ” )
p l t . x l a b e l (” Feature 1”)
p l t . y l a b e l ( ” Target ( y ) ” )
plt . legend ()
p l t . show ( )
9. clustering using K-Means algorithm
# 9 . c l u s t e r i n g u s i n g K−Means a l g o r i t h m

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import KMeans
from s k l e a r n . d a t a s e t s import make blobs

np . random . s e e d ( 4 2 )

X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)

plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )

kmeans = KMeans ( n c l u s t e r s =3)


kmeans . f i t (X)
c e n t r o i d s = kmeans . c l u s t e r c e n t e r s
l a b e l s = kmeans . l a b e l s

p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )
p l t . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c=l a b e l s , cmap=’ v i r i d i s ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ bl ack ’ )
p l t . s c a t t e r ( c e n t r o i d s [ : , 0 ] , c e n t r o i d s [ : , 1 ] , s =200 , c =’ red ’ , marker =’X’ , l a b e l =’ C e n t r o i d s ’ )

plt . t i t l e ( ”K−Means C l u s t e r i n g ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . legend ()
plt . show ( )

p r i n t ( f ” I n e r t i a (WCSS) : {kmeans . i n e r t i a } ” )

inertias = []
K range = r a n g e ( 1 , 1 1 )
f o r k i n K range :
kmeans = KMeans ( n c l u s t e r s=k )
kmeans . f i t (X)
i n e r t i a s . append ( kmeans . i n e r t i a )

plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . p l o t ( K range , i n e r t i a s , marker =’o ’ , l i n e s t y l e = ’ − ’ , c o l o r =’b ’ )
plt . t i t l e ( ” Elbow Method f o r Optimal K” )
plt . x l a b e l ( ” Number o f C l u s t e r s (K) ” )
plt . y l a b e l ( ” I n e r t i a (WCSS) ” )
plt . show ( )
10. clustering using K-Medoids algorithm
# 1 0 . c l u s t e r i n g u s i n g K−Medoids a l g o r i t h m

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from p y c l u s t e r i n g . c l u s t e r . kmedoids import kmedoids
from p y c l u s t e r i n g . u t i l s import r e a d s a m p l e
from s k l e a r n . d a t a s e t s import make blobs

np . random . s e e d ( 4 2 )
X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)

plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )

i n i t i a l m e d o i d s = [ 0 , 50 , 100]

k m e d o i d s i n s t a n c e = kmedoids (X, i n i t i a l m e d o i d s )
kmedoids instance . process ()

c l u s t e r s = kmedoids instance . g e t c l u s t e r s ()
medoids = k m e d o i d s i n s t a n c e . g e t m e d o i d s ( )

p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )

for c l u s t e r in c l u s t e r s :
p l t . s c a t t e r (X[ c l u s t e r , 0 ] , X[ c l u s t e r , 1 ] , s =30 , marker =’o ’ , e d g e c o l o r =’ blac k ’ )

f o r medoid i n medoids :
p l t . s c a t t e r (X[ medoid , 0 ] , X[ medoid , 1 ] , s =200 , c =’ red ’ , marker =’X’ , l a b e l =”Medoid ” )

plt . t i t l e ( ”K−Medoids C l u s t e r i n g ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . l e g e n d ( [ ” C l u s t e r s ” , ” Medoids ” ] )
plt . show ( )

p r i n t ( f ” Medoids ( data p o i n t i n d i c e s ) : { medoids } ” )


f o r i , c l u s t e r i n enumerate ( c l u s t e r s ) :
p r i n t ( f ” Cluster { i } : { len ( c l u s t e r )} points ”)
11. clustering using Mini Batch K-Mean algorithm
# 1 1 . c l u s t e r i n g u s i n g Mini Batch K−Mean a l g o r i t h m

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import MiniBatchKMeans
from s k l e a r n . d a t a s e t s import make blobs

np . random . s e e d ( 4 2 )
X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)

plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )

minibatch kmeans = MiniBatchKMeans ( n c l u s t e r s =3, b a t c h s i z e =50 , r a n d o m s t a t e =42)


minibatch kmeans . f i t (X)
l a b e l s = minibatch kmeans . l a b e l s
c e n t r o i d s = minibatch kmeans . c l u s t e r c e n t e r s
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )

f o r i in range ( 3 ) :
p l t . s c a t t e r (X[ l a b e l s == i , 0 ] , X[ l a b e l s == i , 1 ] , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )

p l t . s c a t t e r ( c e n t r o i d s [ : , 0 ] , c e n t r o i d s [ : , 1 ] , s =200 , c =’ red ’ , marker =’X’ , l a b e l =”C e n t r o i d s ” )

p l t . t i t l e ( ” Mini−Batch K−Means C l u s t e r i n g ” )
p l t . x l a b e l (” Feature 1”)
p l t . y l a b e l (” Feature 2”)
plt . legend ()
p l t . show ( )
p r i n t ( f ” C e n t r o i d s : \n{ c e n t r o i d s } ” )
12. clustering using Agglomerative algorithm
# 12. c l u s t e r i n g using Agglomerative algorithm

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import A g g l o m e r a t i v e C l u s t e r i n g
from s k l e a r n . d a t a s e t s import make blobs
from s c i p y . c l u s t e r . h i e r a r c h y import dendrogram , l i n k a g e

np . random . s e e d ( 4 2 )
X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)

plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )

a g g c l u s t = A g g l o m e r a t i v e C l u s t e r i n g ( n c l u s t e r s =3, l i n k a g e =’ward ’ )
l a b e l s = a g g c l u s t . f i t p r e d i c t (X)
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )

f o r i in range ( 3 ) :
p l t . s c a t t e r (X[ l a b e l s == i , 0 ] , X[ l a b e l s == i , 1 ] , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )

plt . t i t l e (” Agglomerative C l us te r in g ”)
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )

Z = l i n k a g e (X, method=’ward ’ )

p l t . f i g u r e ( f i g s i z e =(10 , 7 ) )
dendrogram ( Z )
p l t . t i t l e ( ” Dendrogram ” )
p l t . x l a b e l ( ” Sample i n d e x ” )
p l t . y l a b e l (” Distance ”)
p l t . show ( )
p r i n t (” Cluster l a b e l s : ” , l a b e l s )
13. clustering using DBSCAN algorithm
# 1 3 . c l u s t e r i n g u s i n g DBSCAN a l g o r i t h m

import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import DBSCAN
from s k l e a r n . d a t a s e t s import make blobs
from s k l e a r n . p r e p r o c e s s i n g import S t a n d a r d S c a l e r

np . random . s e e d ( 4 2 )
X, = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)

X[ 1 5 0 : 2 0 0 ] = np . random . uniform ( low=−6, h i g h =6, s i z e =(50 , 2 ) ) # Random n o i s e p o i n t s

X = S t a n d a r d S c a l e r ( ) . f i t t r a n s f o r m (X)
dbscan = DBSCAN( e p s =0.3 , min samples =10)
l a b e l s = dbscan . f i t p r e d i c t (X)

p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )

p l t . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c=l a b e l s , cmap=’ v i r i d i s ’ , s =30 , marker =’o ’ , e d g e c o l o r =’k ’ )


p l t . s c a t t e r (X[ l a b e l s == −1, 0 ] , X[ l a b e l s == −1, 1 ] , c =’ red ’ , s =30 , marker =’x ’ , l a b e l =’ O u t l i e r s ’ )

p l t . t i t l e ( ”DBSCAN C l u s t e r i n g ” )
p l t . x l a b e l (” Feature 1”)
p l t . y l a b e l (” Feature 2”)
plt . legend ()
p l t . show ( )
p r i n t (” Cluster l a b e l s : ” , l a b e l s )

You might also like