Ashwin Report
Ashwin Report
Apriori algorithm
# 2. Apriori algorithm
import pandas a s pd
from mlxtend . f r e q u e n t p a t t e r n s import a p r i o r i , a s s o c i a t i o n r u l e s
from mlxtend . p r e p r o c e s s i n g import T r a n s a c t i o n E n c o d e r
# Sample d a t a s e t ( t r a n s a c t i o n s )
dataset = [
[ ’ Milk ’ , ’ Bread ’ , ’ Butter ’ ] ,
[ ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ ] ,
[ ’ Bread ’ , ’ Butter ’ ]
]
te = TransactionEncoder ( )
te ary = te . f i t ( dataset ) . transform ( dataset )
f r e q u e n t i t e m s e t s = a p r i o r i ( df , m i n s u p p o r t =0.4 , u s e c o l n a m e s=True )
p r i n t ( ” \ nFrequent I t e m s e t s : ” )
print ( frequent itemsets )
r u l e s = r u l e s . s o r t v a l u e s ( by=’ l i f t ’ , a s c e n d i n g=F a l s e )
p r i n t ( ” \ n S o r t e d A s s o c i a t i o n Rules by L i f t : ” )
print ( rules )
3. FP growth algorithm
# 3 . FP growth a l g o r i t h m
import pandas a s pd
from mlxtend . f r e q u e n t p a t t e r n s import fpgrowth , a s s o c i a t i o n r u l e s
from mlxtend . p r e p r o c e s s i n g import T r a n s a c t i o n E n c o d e r
dataset = [
[ ’ Milk ’ , ’ Bread ’ , ’ Butter ’ ] ,
[ ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ , ’ Diaper ’ ] ,
[ ’ Milk ’ , ’ Bread ’ ] ,
[ ’ Bread ’ , ’ Butter ’ ]
]
te = TransactionEncoder ( )
te ary = te . f i t ( dataset ) . transform ( dataset )
d f = pd . DataFrame ( t e a r y , columns=t e . columns )
p r i n t ( ” One−Hot Encoded DataFrame : ” )
print ( df )
r u l e s = r u l e s . s o r t v a l u e s ( by=’ l i f t ’ , a s c e n d i n g=F a l s e )
p r i n t ( ” \ n S o r t e d A s s o c i a t i o n Rules by L i f t : ” )
print ( rules )
4. classification using ID3 algorithm
# 4 . c l a s s i f i c a t i o n u s i n g ID3 a l g o r i t h m
import pandas a s pd
import numpy a s np
from math import l o g 2
d e f c a l c u l a t e e n t r o p y ( data ) :
c l a s s c o u n t s = data . i l o c [ : , −1]. v a l u e c o u n t s ( )
t o t a l i n s t a n c e s = l e n ( data )
entropy = 0
f o r count i n c l a s s c o u n t s :
p r o b a b i l i t y = count / t o t a l i n s t a n c e s
e n t r o p y −= p r o b a b i l i t y ∗ l o g 2 ( p r o b a b i l i t y )
return entropy
d e f c a l c u l a t e i n f o r m a t i o n g a i n ( data , f e a t u r e ) :
t o t a l e n t r o p y = c a l c u l a t e e n t r o p y ( data )
f e a t u r e v a l u e s = data [ f e a t u r e ] . v a l u e c o u n t s ( )
weighted entropy = 0
f o r va lue , count i n f e a t u r e v a l u e s . i t e m s ( ) :
s u b s e t = data [ data [ f e a t u r e ] == v a l u e ]
w e i g h t e d e n t r o p y += ( count / l e n ( data ) ) ∗ c a l c u l a t e e n t r o p y ( s u b s e t )
d e f i d 3 ( data , f e a t u r e s ) :
i f l e n ( data . i l o c [ : , −1]. v a l u e c o u n t s ( ) ) == 1 :
r e t u r n data . i l o c [ 0 , −1]
i f l e n ( f e a t u r e s ) == 0 :
r e t u r n data . i l o c [ : , −1]. v a l u e c o u n t s ( ) . idxmax ( )
t r e e = { b e s t f e a t u r e : {}}
f o r v a l u e i n data [ b e s t f e a t u r e ] . unique ( ) :
s u b s e t = data [ data [ b e s t f e a t u r e ] == v a l u e ] . drop ( columns =[ b e s t f e a t u r e ] )
r e m a i n i n g f e a t u r e s = [ f f o r f i n f e a t u r e s i f f != b e s t f e a t u r e ]
t r e e [ b e s t f e a t u r e ] [ value ] = id3 ( subset , r e m a i n i n g f e a t u r e s )
return tree
data = pd . DataFrame ( {
’ Outlook ’ : [ ’ Sunny ’ , ’ Sunny ’ , ’ Overcast ’ , ’ Rainy ’ , ’ Rainy ’ , ’ Rainy ’ , ’ Overcast ’ , ’ Sunny ’ , ’ Sun
’ Temperature ’ : [ ’ Hot ’ , ’ Hot ’ , ’ Hot ’ , ’ Mild ’ , ’ Cool ’ , ’ Cool ’ , ’ Cool ’ , ’ Mild ’ , ’ Mild ’ , ’ Mild ’ ] ,
’ Humidity ’ : [ ’ High ’ , ’ High ’ , ’ High ’ , ’ High ’ , ’ High ’ , ’Low ’ , ’Low ’ , ’Low ’ , ’Low ’ , ’ High ’ ] ,
’ Wind ’ : [ ’ Weak ’ , ’ Strong ’ , ’Weak ’ , ’Weak ’ , ’Weak ’ , ’Weak ’ , ’ Strong ’ , ’ Strong ’ , ’Weak ’ , ’ Strong
’ PlayTennis ’ : [ ’ No ’ , ’No ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’No ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’No ’ ]
})
f e a t u r e s = [ ’ Outlook ’ , ’ Temperature ’ , ’ Humidity ’ , ’ Wind ’ ]
d e c i s i o n t r e e = i d 3 ( data , f e a t u r e s )
p r i n t ( ” D e c i s i o n Tree : ” )
print ( decision tree )
i n s t a n c e = { ’ Outlook ’ : ’ Sunny ’ , ’ Temperature ’ : ’ Mild ’ , ’ Humidity ’ : ’Low ’ , ’ Wind ’ : ’ Strong ’ }
prediction = predict ( decision tree , instance )
p r i n t ( f ”\ n P r e d i c t i o n f o r i n s t a n c e { i n s t a n c e } : { p r e d i c t i o n } ” )
5. classification using Bayesian algorithm
# 5 . c l a s s i f i c a t i o n u s i n g Bayesian a l g o r i t h m
import pandas a s pd
import numpy a s np
data = pd . DataFrame ( {
’ Age ’ : [ 2 5 , 3 0 , 3 5 , 4 0 , 4 5 , 5 0 , 5 5 , 6 0 , 6 5 , 7 0 ] ,
’ Income ’ : [ ’ Low ’ , ’Low ’ , ’Low ’ , ’ Medium ’ , ’ Medium ’ , ’ Medium ’ , ’ High ’ , ’ High ’ , ’ High ’ , ’ High ’ ] ,
’ Purchase ’ : [ ’ No ’ , ’No ’ , ’No ’ , ’No ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ , ’ Yes ’ ]
})
d e f c a l c u l a t e p r i o r ( data ) :
t o t a l = l e n ( data )
p r i o r = data [ ’ Purchase ’ ] . v a l u e c o u n t s ( ) / t o t a l
return prior
d e f c a l c u l a t e l i k e l i h o o d ( data ) :
l i k e l i h o o d = {}
t o t a l = l e n ( data )
f o r f e a t u r e i n [ ’ Age ’ , ’ Income ’ ] :
f e a t u r e l i k e l i h o o d = {}
f o r l a b e l i n data [ ’ Purchase ’ ] . unique ( ) :
f e a t u r e d a t a = data [ data [ ’ Purchase ’ ] == l a b e l ]
fea ture lik eliho od [ label ] = feature data [ feature ] . value counts () / len ( feature data )
likelihood [ feature ] = feature likelihood
return l i k e l i h o o d
f o r l a b e l in p r i o r . index :
p o s t e r i o r [ l a b e l ] = np . l o g ( p r i o r [ l a b e l ] )
f o r f e a t u r e in in s ta n ce . index :
feature value = instance [ feature ]
i f feature value in l i k e l i h o o d [ feature ] [ label ] :
p o s t e r i o r [ l a b e l ] += np . l o g ( l i k e l i h o o d [ f e a t u r e ] [ l a b e l ] . g e t ( f e a t u r e v a l u e , 0 . 0 0 0 1 ) )
# Add l o g o f l i k e l i h o o d
p o s t e r i o r [ l a b e l ] = np . exp ( p o s t e r i o r [ l a b e l ] )
r e t u r n max( p o s t e r i o r , key=p o s t e r i o r . g e t )
p r i o r = c a l c u l a t e p r i o r ( data )
l i k e l i h o o d = c a l c u l a t e l i k e l i h o o d ( data )
n e w i n s t a n c e = pd . S e r i e s ( { ’ Age ’ : 5 0 , ’ Income ’ : 1 } )
p r i n t ( f ” P r e d i c t i o n f o r new i n s t a n c e : { p r e d i c t e d c l a s s } ” )
6. classification using SVM algorithm
# 6 . c l a s s i f i c a t i o n u s i n g SVM a l g o r i t h m
import numpy a s np
import pandas a s pd
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n import d a t a s e t s
from s k l e a r n . m o d e l s e l e c t i o n import t r a i n t e s t s p l i t
from s k l e a r n . p r e p r o c e s s i n g import S t a n d a r d S c a l e r
from s k l e a r n . svm import SVC
from s k l e a r n . m e t r i c s import c l a s s i f i c a t i o n r e p o r t , a c c u r a c y s c o r e
i r i s = datasets . l o a d i r i s ()
X = i r i s . data
y = i r i s . target
s c a l e r = StandardScaler ()
X train = s c a l e r . fit transform ( X train )
X test = s c a l e r . transform ( X test )
y p r e d = svm model . p r e d i c t ( X t e s t )
p r i n t ( ” C l a s s i f i c a t i o n Report : \ n ” , c l a s s i f i c a t i o n r e p o r t ( y t e s t , y p r e d ) )
p r i n t ( ” Accuracy S c o r e : ” , a c c u r a c y s c o r e ( y t e s t , y p r e d ) )
X train 2D = X train [ : , : 2 ]
X test 2D = X test [ : , : 2 ]
p l t . f i g u r e ( f i g s i z e =(8 ,6))
p l t . t i t l e ( ”SVM D e c i s i o n Boundary ( 2D) ” )
Z = svm model 2D . p r e d i c t ( np . c [ xx . r a v e l ( ) , yy . r a v e l ( ) ] )
Z = Z . r e s h a p e ( xx . shape )
plt . c o n t o u r f ( xx , yy , Z , a l p h a =0.8)
plt . s c a t t e r ( X t r a i n 2 D [ : , 0 ] , X t r a i n 2 D [ : , 1 ] , c=y t r a i n , e d g e c o l o r s =’k ’ , marker =’o ’ , s =100 , l a b e
plt . s c a t t e r ( X t e s t 2 D [ : , 0 ] , X t e s t 2 D [ : , 1 ] , c=y t e s t , e d g e c o l o r s =’ r ’ , marker = ’ˆ ’ , s =100 , l a b e l =”
plt . x l a b e l ( ’ S e p a l Length ’ )
plt . y l a b e l ( ’ S e p a l Width ’ )
plt . legend ()
plt . show ( )
7. Linear Regression algorithm
# 7. Linear Regression algorithm
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . m o d e l s e l e c t i o n import t r a i n t e s t s p l i t
from s k l e a r n . m e t r i c s import m e a n s q u a r e d e r r o r , r 2 s c o r e
np . random . s e e d ( 4 2 )
X = np . random . rand ( 1 0 0 , 1 ) ∗ 10
y = 2 . 5 ∗ X + 1 + np . random . randn ( 1 0 0 , 1 ) ∗ 2
b , m = theta [ 0 ] , theta [ 1 ]
p r i n t ( f ” Model c o e f f i c i e n t s : I n t e r c e p t ( b ) = {b [ 0 ] : . 2 f } , S l o p e (m) = {m[ 0 ] : . 2 f } ” )
y p r e d = X t e s t b i a s . dot ( t h e t a )
mse = m e a n s q u a r e d e r r o r ( y t e s t , y p r e d )
rmse = np . s q r t ( mse )
r2 = r 2 s c o r e ( y t e s t , y pred )
plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r ( X t e s t , y t e s t , c o l o r =’ blac k ’ , l a b e l =’ Test Data ’ , a l p h a =0.7)
plt . p l o t ( X t e s t , y pred , c o l o r =’ bla ck ’ , l a b e l =’ R e g r e s s i o n Line ’ , l i n e w i d t h =2)
plt . t i t l e ( ” L i n e a r R e g r e s s i o n : Test Data vs P r e d i c t e d L ine ” )
plt . x l a b e l ( ” F e a t u r e (X) ” )
plt . y l a b e l ( ” Target ( y ) ” )
plt . legend ()
plt . show ( )
8. Multiple Linear Regression algorithm
# 8. Multiple Linear Regression algorithm
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . m o d e l s e l e c t i o n import t r a i n t e s t s p l i t
from s k l e a r n . m e t r i c s import m e a n s q u a r e d e r r o r , r 2 s c o r e
from s k l e a r n . d a t a s e t s import m a k e r e g r e s s i o n
np . random . s e e d ( 4 2 )
X, y = m a k e r e g r e s s i o n ( n s a m p l e s =100 , n f e a t u r e s =3, n o i s e =10 , r a n d o m s t a t e =42)
X t r a i n b i a s = np . c [ np . o n e s ( ( X t r a i n . shape [ 0 ] , 1 ) ) , X t r a i n ]
X t e s t b i a s = np . c [ np . o n e s ( ( X t e s t . shape [ 0 ] , 1 ) ) , X t e s t ]
y p r e d = X t e s t b i a s . dot ( t h e t a )
mse = m e a n s q u a r e d e r r o r ( y t e s t , y p r e d )
rmse = np . s q r t ( mse )
r2 = r 2 s c o r e ( y t e s t , y pred )
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import KMeans
from s k l e a r n . d a t a s e t s import make blobs
np . random . s e e d ( 4 2 )
plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )
p l t . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c=l a b e l s , cmap=’ v i r i d i s ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ bl ack ’ )
p l t . s c a t t e r ( c e n t r o i d s [ : , 0 ] , c e n t r o i d s [ : , 1 ] , s =200 , c =’ red ’ , marker =’X’ , l a b e l =’ C e n t r o i d s ’ )
plt . t i t l e ( ”K−Means C l u s t e r i n g ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . legend ()
plt . show ( )
p r i n t ( f ” I n e r t i a (WCSS) : {kmeans . i n e r t i a } ” )
inertias = []
K range = r a n g e ( 1 , 1 1 )
f o r k i n K range :
kmeans = KMeans ( n c l u s t e r s=k )
kmeans . f i t (X)
i n e r t i a s . append ( kmeans . i n e r t i a )
plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . p l o t ( K range , i n e r t i a s , marker =’o ’ , l i n e s t y l e = ’ − ’ , c o l o r =’b ’ )
plt . t i t l e ( ” Elbow Method f o r Optimal K” )
plt . x l a b e l ( ” Number o f C l u s t e r s (K) ” )
plt . y l a b e l ( ” I n e r t i a (WCSS) ” )
plt . show ( )
10. clustering using K-Medoids algorithm
# 1 0 . c l u s t e r i n g u s i n g K−Medoids a l g o r i t h m
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from p y c l u s t e r i n g . c l u s t e r . kmedoids import kmedoids
from p y c l u s t e r i n g . u t i l s import r e a d s a m p l e
from s k l e a r n . d a t a s e t s import make blobs
np . random . s e e d ( 4 2 )
X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)
plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )
i n i t i a l m e d o i d s = [ 0 , 50 , 100]
k m e d o i d s i n s t a n c e = kmedoids (X, i n i t i a l m e d o i d s )
kmedoids instance . process ()
c l u s t e r s = kmedoids instance . g e t c l u s t e r s ()
medoids = k m e d o i d s i n s t a n c e . g e t m e d o i d s ( )
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )
for c l u s t e r in c l u s t e r s :
p l t . s c a t t e r (X[ c l u s t e r , 0 ] , X[ c l u s t e r , 1 ] , s =30 , marker =’o ’ , e d g e c o l o r =’ blac k ’ )
f o r medoid i n medoids :
p l t . s c a t t e r (X[ medoid , 0 ] , X[ medoid , 1 ] , s =200 , c =’ red ’ , marker =’X’ , l a b e l =”Medoid ” )
plt . t i t l e ( ”K−Medoids C l u s t e r i n g ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . l e g e n d ( [ ” C l u s t e r s ” , ” Medoids ” ] )
plt . show ( )
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import MiniBatchKMeans
from s k l e a r n . d a t a s e t s import make blobs
np . random . s e e d ( 4 2 )
X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)
plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )
f o r i in range ( 3 ) :
p l t . s c a t t e r (X[ l a b e l s == i , 0 ] , X[ l a b e l s == i , 1 ] , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
p l t . t i t l e ( ” Mini−Batch K−Means C l u s t e r i n g ” )
p l t . x l a b e l (” Feature 1”)
p l t . y l a b e l (” Feature 2”)
plt . legend ()
p l t . show ( )
p r i n t ( f ” C e n t r o i d s : \n{ c e n t r o i d s } ” )
12. clustering using Agglomerative algorithm
# 12. c l u s t e r i n g using Agglomerative algorithm
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import A g g l o m e r a t i v e C l u s t e r i n g
from s k l e a r n . d a t a s e t s import make blobs
from s c i p y . c l u s t e r . h i e r a r c h y import dendrogram , l i n k a g e
np . random . s e e d ( 4 2 )
X, y = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)
plt . f i g u r e ( f i g s i z e =(8 , 6 ) )
plt . s c a t t e r (X [ : , 0 ] , X [ : , 1 ] , c =’ blue ’ , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e ( ” Generated S y n t h e t i c Data ” )
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )
a g g c l u s t = A g g l o m e r a t i v e C l u s t e r i n g ( n c l u s t e r s =3, l i n k a g e =’ward ’ )
l a b e l s = a g g c l u s t . f i t p r e d i c t (X)
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )
f o r i in range ( 3 ) :
p l t . s c a t t e r (X[ l a b e l s == i , 0 ] , X[ l a b e l s == i , 1 ] , s =30 , marker =’o ’ , e d g e c o l o r =’ black ’ )
plt . t i t l e (” Agglomerative C l us te r in g ”)
plt . x l a b e l (” Feature 1”)
plt . y l a b e l (” Feature 2”)
plt . show ( )
Z = l i n k a g e (X, method=’ward ’ )
p l t . f i g u r e ( f i g s i z e =(10 , 7 ) )
dendrogram ( Z )
p l t . t i t l e ( ” Dendrogram ” )
p l t . x l a b e l ( ” Sample i n d e x ” )
p l t . y l a b e l (” Distance ”)
p l t . show ( )
p r i n t (” Cluster l a b e l s : ” , l a b e l s )
13. clustering using DBSCAN algorithm
# 1 3 . c l u s t e r i n g u s i n g DBSCAN a l g o r i t h m
import numpy a s np
import m a t p l o t l i b . p y p l o t a s p l t
from s k l e a r n . c l u s t e r import DBSCAN
from s k l e a r n . d a t a s e t s import make blobs
from s k l e a r n . p r e p r o c e s s i n g import S t a n d a r d S c a l e r
np . random . s e e d ( 4 2 )
X, = make blobs ( n s a m p l e s =300 , c e n t e r s =3, c l u s t e r s t d =0.60 , r a n d o m s t a t e =42)
X = S t a n d a r d S c a l e r ( ) . f i t t r a n s f o r m (X)
dbscan = DBSCAN( e p s =0.3 , min samples =10)
l a b e l s = dbscan . f i t p r e d i c t (X)
p l t . f i g u r e ( f i g s i z e =(8 , 6 ) )
p l t . t i t l e ( ”DBSCAN C l u s t e r i n g ” )
p l t . x l a b e l (” Feature 1”)
p l t . y l a b e l (” Feature 2”)
plt . legend ()
p l t . show ( )
p r i n t (” Cluster l a b e l s : ” , l a b e l s )