Lab Manual AIML
Lab Manual AIML
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp A
/ s ta p
r. y Page1o2
f
1
2 # (1). Implement A* Search algorithm.
3
4 def aStarAlgo(start_node, stop_node):
5
6 open_set = set(start_node)
7 closed_set = set()
8
9 g = {}
10 parents = {}
11
12 g[start_node] = 0
13 parents[start_node] = start_node
14
15 while len(open_set) > 0:
16 n = None
17
18 for v in open_set:
19 if n == None or g[v] + heuristic(v) < g[n] + heuristic(n):
20 n = v
21
22 if n == stop_node or Graph_nodes[n] == None:
23 pass
24
25 else:
26 for (m, weight) in get_neighbors(n):
27
28 if m not in open_set and m not in closed_set:
29 open_set.add(m)
30 parents[m] = n
31 g[m] = g[n] + weight
32
33 else:
34 if g[m] > g[n] + weight:
35 g[m] = g[n] + weight
36 parents[m] = n
37
38 if m in closed_set:
39 closed_set.remove(m)
40 open_set.add(m)
41
42 if n == None:
43 print('Path does not exist!')
44 return None
45
46 if n == stop_node:
47
48 path = []
49 while parents[n] != n:
50 path.append(n)
51 n = parents[n]
52
53 path.append(start_node)
54 path.reverse()
55 print('Path found: {}'.format(path))
56 return path
57
58 open_set.remove(n)
59 closed_set.add(n)
60
61 print('Path does not exist!')
62 return None
63
64
65 def get_neighbors(v):
66 if v in Graph_nodes:
67 return Graph_nodes[v]
68 else:
69 return None
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp A
/ s ta p
r. y Page2o2
f
70
71 def heuristic(n):
72 H_dist = {
73 'A': 2,
74 'B': 6,
75 'C': 2,
76 'D': 3,
77 'S': 4,
78 'G': 0,
79 }
80 return H_dist[n]
81
82
83 Graph_nodes = {
84 'A': [('B', 3), ('C', 1)],
85 'B': [('D', 3)],
86 'C': [('D', 1), ('G', 5)],
87 'D': [('G', 3)],
88 'S': [('A', 1)],
89 'G': []
90 }
91 aStarAlgo('S', 'G')
92 aStarAlgo('A', 'B')
93 aStarAlgo('B', 'S')
94
95 '''
96 ---Output---
97 Path found: ['S', 'A', 'C', 'D', 'G']
98 Path found: ['A', 'B']
99 Path does not exist!
100 '''
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp A
/ O_ S T A R p
. y Page1o2
f
1
2 # (2). Implement AO* Search algorithm.
3
4
5 class Graph:
6
7 def __init__(self, graph, heuristicNodeList, startNode):
8
9 self.graph = graph
10 self.H = heuristicNodeList
11 self.start = startNode
12 self.parent = {}
13 self.status = {}
14 self.solutionGraph = {}
15
16 def applyAOStar(self):
17 self.aoStar(self.start, False)
18
19 def getNeighbors(self, v):
20 return self.graph.get(v, '')
21
22 def getStatus(self, v):
23 return self.status.get(v, 0)
24
25 def setStatus(self, v, val):
26 self.status[v] = val
27
28 def getHeuristicNodeValue(self, n):
29 return self.H.get(n, 0)
30
31 def setHeuristicNodeValue(self, n, value):
32 self.H[n] = value
33
34 def printSolution(self):
35 print("FOR GRAPH SOLUTION, TRAVERSE THE GRAPH FROM THE START NODE:"
36 , self.start)
37 print("===========================================================")
38 print(self.solutionGraph)
39 print("===========================================================")
40
41
42 def computeMinimumCostChildNodes(self, v):
43
44 minimumCost = 0
45 costToChildNodeListDict = {}
46 costToChildNodeListDict[minimumCost] = []
47 flag = True
48
49 for nodeInfoTupleList in self.getNeighbors(v):
50 cost = 0
51 nodeList = []
52
53 for c, weight in nodeInfoTupleList:
54 cost = cost + self.getHeuristicNodeValue(c) + weight
55 nodeList.append(c)
56
57 if flag == True:
58 minimumCost = cost
59 costToChildNodeListDict[minimumCost] = nodeList
60 flag = False
61 else:
62 if minimumCost > cost:
63 minimumCost = cost
64 costToChildNodeListDict[minimumCost] = nodeList
65
66 return minimumCost, costToChildNodeListDict[minimumCost]
67
68
69
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp A
/ O_ S T A R p
. y Page2o2
f
70
71 def aoStar(self, v, backTracking):
72
73 print("HEURISTIC VALUES :", self.H)
74 print("SOLUTION GRAPH :", self.solutionGraph)
75 print("PROCESSING NODE :", v)
76 print("------------------------------------------------------------")
77
78 if self.getStatus(v) >= 0:
79
80 minimumCost, childNodeList = self.computeMinimumCostChildNodes(v)
81 self.setHeuristicNodeValue(v, minimumCost)
82 self.setStatus(v, len(childNodeList))
83 solved = True
84
85 for childNode in childNodeList:
86 self.parent[childNode] = v
87 if self.getStatus(childNode) != -1:
88 solved = solved & False
89
90 if solved == True:
91 self.setStatus(v, -1)
92 self.solutionGraph[v] = childNodeList
93
94 if v != self.start:
95 self.aoStar(self.parent[v], True)
96
97 if backTracking == False:
98 for childNode in childNodeList:
99 self.setStatus(childNode, 0)
100 self.aoStar(childNode, False)
101
102
103 h1 = {'A': 38, 'B': 17, 'C': 9, 'D': 27, 'E': 5, 'F': 10, 'G': 3,
104 'H': 4,'I': 15, 'J': 10}
105
106 graph1 = {
107 'A': [[('B', 1), ('C', 1)], [('D', 1)]],
108 'B': [[('E', 1)], [('F', 1)]],
109 'C': [[('G', 1)], [('H', 1)]],
110 'D': [[('I', 1), ('J', 1)]]
111 }
112
113 G1 = Graph(graph1, h1, 'A')
114 G1.applyAOStar()
115 G1.printSolution()
116
117 print("HEURISTIC VALUES :", G1.H)
118 print("SOLUTION GRAPH :", G1.solutionGraph)
119
120 print('status:', G1.status)
121 print('parent:', G1.parent)
122
123
124
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp c
/ a n d id a te E m
il n
i ao
ti n p
. y Page1o1
f
1
2 (3).For a given set of training data examples stored in a .CSV file, implement and
3 demonstrate the CANDIDATE-ELIMINATION ALGORITHM to output a description
4 of the set of allhypotheses consistent with the training examples.
5
6
7 import numpy as np
8 import pandas as pd
9
10 data = pd.read_csv('data1.csv')
11
12 concepts = np.array(data.iloc[:, 0:-1])
13 print(concepts)
14
15 target = np.array(data.iloc[:, -1])
16 print(target)
17
18
19 def learn(concepts, target):
20
21 specific_h = concepts[0].copy()
22 print('initialization of specific_h and general_h')
23 print(specific_h)
24
25 general_h = [['?' for i in range(len(specific_h))] for i in
26 range(len(specific_h))]
27 print(general_h)
28
29 for i, h in enumerate(concepts):
30 if target[i] == 'yes':
31 for x in range(len(specific_h)):
32 if h[x] != specific_h[x]:
33 specific_h[x] = '?'
34 general_h[x][x] = '?'
35 print(specific_h)
36 print(specific_h)
37
38 if target[i] == 'no':
39 for x in range(len(specific_h)):
40 if h[x] != specific_h[x]:
41 general_h[x][x] = specific_h[x]
42 else:
43 general_h[x][x] = '?'
44
45 print('steps of candidate Elimation Algorithm ', i + 1)
46 print(specific_h)
47 print(general_h)
48
49 indeces = [i for i, val in enumerate(general_h) if val ==
50 ['?', '?', '?', '?', '?', '?']]
51
52 for i in indeces:
53 general_h.remove(['?', '?', '?', '?', '?', '?'])
54
55 return specific_h, general_h
56
57
58 s_final, g_final = learn(concepts, target)
59 print('----------------final answer----------------\n')
60 print('final specific_h: ', s_final, sep='\n')
61 print('final general_h: ', g_final, sep='\n')
62
63
64
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp n
i/ fo G a in p
. y Page1o2
f
1
2 (4). Write a program to demonstrate the working of the decision tree based
3 ID3 algorithm . Use an appropriate data set for building the decision
4 tree and apply this knowledge to classify a new sample.
5
6
7 import math
8 import pandas as pd
9 from pprint import pprint
10 from collections import Counter
11
12 def entropy(probs):
13 return sum([-prob * math.log(prob, 2) for prob in probs])
14
15
16 def entropy_list(a_list):
17
18 cnt = Counter(x for x in a_list)
19 num_instance = len(a_list) * 1.0
20 probs = [x / num_instance for x in cnt.values()]
21 return entropy(probs)
22
23
24 def info_gain(df, split, target, trace=0):
25
26 df_split = df.groupby(split)
27 nobs = len(df.index) * 1.0
28 df_agg_ent = df_split.agg({target: [entropy_list, lambda x: len(x) / nobs]})
29 df_agg_ent.columns = ["entropy", "propObserved"]
30
31 new_entropy = sum(df_agg_ent["entropy"] * df_agg_ent["propObserved"])
32 old_entropy = entropy_list(df[target])
33 return old_entropy - new_entropy
34
35
36 def id3(df, target, attribute_name, default_class=None):
37
38 cnt = Counter(x for x in df[target])
39 if len(cnt) == 1:
40 return next(iter(cnt))
41
42 elif df.empty or (not attribute_name):
43 return default_class
44
45 else:
46 default_class = max(cnt.keys())
47 gains = [info_gain(df, attr, target) for attr in attribute_name]
48 index_max = gains.index(max(gains))
49 best_attr = attribute_name[index_max]
50 tree = {best_attr: {}}
51 remaining_attr = [x for x in attribute_name if x != best_attr]
52
53 for attr_val, data_subset in df.groupby(best_attr):
54 subtree = id3(data_subset, target, remaining_attr, default_class)
55 tree[best_attr][attr_val] = subtree
56
57 return tree
58
59
60 def classify(instance, tree, default=None):
61 attribute = next(iter(tree))
62 if instance[attribute] in tree[attribute].keys():
63 result = tree[attribute][instance[attribute]]
64 if isinstance(result, dict):
65 return classify(instance, result)
66 else:
67 return result
68 else:
69 return default
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp n
i/ fo G a in p
. y Page2o2
f
70
71 df_tennis = pd.read_csv('id3.csv')
72 print(df_tennis)
73
74 attribute_names = list(df_tennis.columns)
75 attribute_names.remove('PlayTennis')
76
77 tree = id3(df_tennis, 'PlayTennis', attribute_names)
78
79 print('\n\n The resultant decision tree is: \n\n')
80 pprint(tree)
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp b
/ a c k P ro p a g a o
ti n p
. y Page1o1
f
1
2 (5). Build an Artificial Neural Network by implementing the Backpropagation
3 algorithm and test the same using appropriate data sets.
4
5
6 import numpy as np
7
8 input_neurons = 2
9 hidden_layer_neurons = 2
10 output_neurons = 2
11
12 input_ = np.random.randint(1, 100, input_neurons)
13 output = np.array([1.0, 0.0])
14
15 hidden_biass = np.random.rand(1, hidden_layer_neurons)
16 output_biass = np.random.rand(1, output_neurons)
17 hidden_weight = np.random.rand(input_neurons, hidden_layer_neurons)
18 output_weight = np.random.rand(hidden_layer_neurons, output_neurons)
19
20
21 def sigmoid(layer):
22 return 1 / (1 + np.exp(-layer))
23
24
25 def gradient(layer):
26 return layer * (1 - layer)
27
28
29 for i in range(2000):
30
31 hidden_layer = np.dot(input_, hidden_weight)
32 hidden_layer = sigmoid(hidden_layer + hidden_biass)
33
34 output_layer = np.dot(hidden_layer, output_weight)
35 output_layer = sigmoid(output_layer + output_biass)
36
37 error = (output - output_layer)
38 gradient_outputLayer = gradient(output_layer)
39
40 error_terms_output = gradient_outputLayer * error
41 error_terms_hidden = gradient(hidden_layer) *
42 np.dot(error_terms_output, output_weight.T)
43
44 gradient_hidden_weights = np.dot(input_.reshape(input_neurons, 1),
45 error_terms_hidden.reshape(1, hidden_layer_neurons))
46 gradient_output_weights = np.dot(hidden_layer.reshape(hidden_layer_neurons, 1),
47 error_terms_output.reshape(1, output_neurons))
48
49 hidden_weight = hidden_weight + 0.05 * gradient_hidden_weights
50 output_weight = output_weight + 0.05 * gradient_output_weights
51
52 print('***********************************')
53 print('Iteration: ', i, ':::', error)
54 print('####- output - #####', output_layer)
55
56
57
58
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp b
/ as
i ep
. y Page1o1
f
1
2 (6). Write a program to implement the naïve Bayesian classifier for a sample
3 training data set stored as a .CSV file. Compute the accuracy of the
4 classifier, considering few test data sets.
5
6
7 import pandas as pd
8 from sklearn.model_selection import train_test_split
9 from sklearn.naive_bayes import GaussianNB
10 from sklearn import metrics
11
12 df = pd.read_csv("pima_indian.csv")
13 feature_col_names = ['num_preg', 'glucose_conc', 'diastolic_bp', 'thickness',
14 'insulin', 'bmi', 'diab_pred', 'age']
15 predicted_class_names = ['diabetes']
16
17 X = df[feature_col_names].values
18 y = df[predicted_class_names].values
19
20 xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.33)
21
22 print('\n the total number of Training Data :', ytrain.shape)
23 print('\n the total number of Test Data :', ytest.shape)
24
25 clf = GaussianNB().fit(xtrain, ytrain.ravel())
26 predicted = clf.predict(xtest)
27
28 predictTestData = clf.predict([[1, 189, 60, 23, 846, 30.1, 0.398, 59]])
29
30 print('\n Confusion matrix')
31 print(metrics.confusion_matrix(ytest, predicted))
32
33 print('Accuracy of the classifier is', metrics.accuracy_score(ytest, predicted))
34 print('The value of Precision', metrics.precision_score(ytest, predicted))
35 print('The value of Recall', metrics.recall_score(ytest, predicted))
36
37 print("Predicted Value for individual Test Data:", predictTestData)
38
39
40
41
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp K
/ M
- e a n s .p y Page1o1
f
1
2 (7). Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same
3 data set for clustering using k-Means algorithm . Compare the results of
4 these two algorithms and comment on the quality of clustering. You can add
5 Java/Python ML library classes/API in the program.
6
7
8
9 import matplotlib.pyplot as plt
10 import numpy as np
11 import pandas as pd
12 import sklearn.metrics as metrics
13 from sklearn.cluster import KMeans
14 from sklearn.mixture import GaussianMixture
15
16 names = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Class']
17
18 dataset = pd.read_csv("Kdataset.csv", names=names)
19
20 X = dataset.iloc[:, :-1]
21
22 label = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
23
24 y = [label[c] for c in dataset.iloc[:, -1]]
25
26 plt.figure(figsize=(14, 7))
27 colormap = np.array(['red', 'lime', 'black'])
28
29 # REAL PLOT
30
31 plt.subplot(1, 3, 1)
32 plt.title('Real')
33 plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y])
34
35 # K-PLOT
36
37 model = KMeans(n_clusters=3, random_state=0).fit(X)
38 plt.subplot(1, 3, 2)
39 plt.title('KMeans')
40 plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_])
41
42 print('The accuracy score K-Mean: ', metrics.accuracy_score(y, model.labels_))
43 print('The Confusion matrix K-Mean:\n', metrics.confusion_matrix(y, model.labels_))
44
45 # GMM PLOT
46
47 gmm = GaussianMixture(n_components=3, random_state=0).fit(X)
48 y_cluster_gmm = gmm.predict(X)
49
50 plt.subplot(1, 3, 3)
51 plt.title('GMM Classification')
52 plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm])
53
54 print('The accuracy score of EM: ', metrics.accuracy_score(y, y_cluster_gmm))
55 print('The Confusion matrix of EM:\n ', metrics.confusion_matrix(y, y_cluster_gmm))
56
57 plt.show()
58
59
60
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp K
/ NNp
. y Page1o1
f
1
2 (8). Write a program to implement k-Nearest Neighbour algorithm to classify
3 the iris data set.Print both correct and wrong predictions. Java/Python ML
4 library classes can be used for this problem.
5
6 import pandas as pd
7 from sklearn.neighbors import KNeighborsClassifier
8 from sklearn.model_selection import train_test_split
9 from sklearn import metrics
10
11 names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
12
13 dataset = pd.read_csv('Kdataset.csv')
14 X = dataset.iloc[:, :-1]
15 y = dataset.iloc[:, -1]
16
17 print('sepal-length', 'sepal-width', 'petal-length', 'petal-width')
18 print(X.head())
19 print('Target value')
20 print(y.head())
21
22 Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.10)
23 classifier = KNeighborsClassifier(n_neighbors=5).fit(Xtrain, ytrain)
24
25 ypred = classifier.predict(Xtest)
26
27 print("\n-----------------------------------------------------------------------")
28 print('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))
29 print("-------------------------------------------------------------------------")
30
31 i = 0
32 for label in ytest:
33 print('%-25s %-25s' % (label, ypred[i]), end="")
34 if label == ypred[i]:
35 print(' %-25s' % 'Correct')
36 else:
37 print(' %-25s' % 'Wrong')
38 i = i + 1
39
40 print("-------------------------------------------------------------------------")
41 print("\nConfusion Matrix:\n", metrics.confusion_matrix(ytest, ypred))
42 print("-------------------------------------------------------------------------")
43 print("\nClassification Report:\n", metrics.classification_report(ytest, ypred))
44 print("-------------------------------------------------------------------------")
45 print('Accuracy of the classifer is %0.2f' % metrics.accuracy_score(ytest, ypred))
46 print("-------------------------------------------------------------------------")
47
48
49
F e
li h
/: o me m
/ o h a mme d D
/ e s k to p te
/ mp e
r/ g re s s o
i np
. y Page1o1
f
1
2 (9). Implement the non-parametric Locally Weighted Regression algorithm in
3 order to fit data points. Select appropriate data set for your experiment
4 and draw graphs.
5
6
7 import numpy as np
8 import numpy as np1
9 import pandas as pd
10 import matplotlib.pyplot as plt
11
12
13 def kernel(point, xmat, k):
14 m, n = np.shape(xmat)
15 weights = np.mat(np1.eye((m)))
16 for j in range(m):
17 diff = point - X[j]
18 weights[j, j] = np.exp(diff * diff.T / (-2.0 * k ** 2))
19 return weights
20
21
22 def localWeight(point, xmat, ymat, k):
23 wei = kernel(point, xmat, k)
24 W = (X.T * (wei * X)).I * (X.T * (wei * ymat.T))
25 return W
26
27
28 def localWeightRegression(xmat, ymat, k):
29 m, n = np.shape(xmat)
30 ypred = np.zeros(m)
31 for i in range(m):
32 ypred[i] = xmat[i] * localWeight(xmat[i], xmat, ymat, k)
33 return ypred
34
35
36 # load data points
37 data = pd.read_csv('10-dataset.csv')
38 bill = np.array(data.total_bill)
39 tip = np.array(data.tip)
40
41 # preparing and add 1 in bill
42 mbill = np.mat(bill)
43 mtip = np.mat(tip)
44
45 m = np.shape(mbill)[1]
46 one = np.mat(np1.ones(m))
47 X = np.hstack((one.T, mbill.T))
48
49 # set k here
50 ypred = localWeightRegression(X, mtip, 0.5)
51 SortIndex = X[:, 1].argsort(0)
52 xsort = X[SortIndex][:, 0]
53
54 fig = plt.figure()
55 ax = fig.add_subplot(1, 1, 1)
56
57 ax.scatter(bill, tip, color='green')
58 ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=5)
59
60 plt.xlabel('Total bill')
61 plt.ylabel('Tip')
62 plt.show()
63
64
65