Machine Learning Through Python Lab Mannual
Machine Learning Through Python Lab Mannual
specific hypothesis based on a given set of training data samples. Read the training data
from a .CSV file.
import csv
num_attributes = 6
a = []
Experiment-2: For a given set of training data examples stored in a .CSV file, implement
and demonstrate the CandidateElimination algorithm to output a description of the set
of all hypotheses consistent with the training examples.
import numpy as np
import pandas as pd
data = pd.DataFrame(data=pd.read_csv('ML1&2.csv'))
concepts = np.array(data.iloc[:, 0:-1])
print(concepts)
for i, h in enumerate(concepts):
if target[i] == "yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
print(specific_h)
print(general_h)
if target[i] == "no":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("Steps of Candidate Elimination Algorithm", i+1)
print(specific_h)
print(general_h)
indices = [i for i, val in enumerate(general_h) if val == ['?',
'?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
Experiment-3: Write a program to demonstrate the working of the decision tree based
ID3 algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
def __str__(self):
return self.attribute
def subtables(data, col, delete):
dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]),
dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
if items.size == 1:
return 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
Outlook
Overcast
b'Yes'
Rainy
Windy
b'False'
b'Yes'
b'True'
b'No'
Sunny
Humidity
b'High'
b'No'
b'Normal'
b'Yes'
C:\Users\somas\AppData\Local\Temp\ipykernel_11960\3759657849.py:35:
DeprecationWarning: Conversion of an array with ndim > 0 to a scalar
is deprecated, and will error in future. Ensure you extract a single
element from your array before performing this operation.
(Deprecated NumPy 1.25.)
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|
S32")
C:\Users\somas\AppData\Local\Temp\ipykernel_11960\3759657849.py:58:
DeprecationWarning: Conversion of an array with ndim > 0 to a scalar
is deprecated, and will error in future. Ensure you extract a single
element from your array before performing this operation.
(Deprecated NumPy 1.25.)
sums += -1 * count * math.log(count, 2)
Experiment-4: Exercises to solve the real-world problems using the following machine
learning methods:
a) Linear Regression
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('ML4.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size
= 1/3, random_state =0)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
plt.scatter(X_train, y_train, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Training set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
plt.scatter(X_test, y_test, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Test set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
Experiment-4: Exercises to solve the real-world problems using the following machine
learning methods:
b) Logistic Regression
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25, random_state=0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
LogisticRegression(random_state=0)
[[65 3]
[ 8 24]]
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=cmap(i), label=j)
C:\Users\somas\AppData\Local\Temp\ipykernel_11960\798797970.py:17:
UserWarning: *c* argument looks like a single numeric RGB or RGBA
sequence, which should be avoided as value-mapping will have
precedence in case its length matches with *x* & *y*. Please use
the *color* keyword-argument or provide a 2D array with a single row
if you intend to specify the same RGB or RGBA value for all points.
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1,
stop=X_set[:, 0].max() + 1, step=0.01),
np.arange(start=X_set[:, 1].min() - 1,
stop=X_set[:, 1].max() + 1, step=0.01))
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=cmap(i), label=j)
Experiment-4: Exercises to solve the real-world problems using the following machine
learning methods:
c) Binary Classifier
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
dataset = load_breast_cancer(as_frame=True)
dataset['data'].head()
[5 rows x 30 columns]
dataset['target'].head()
0 0
1 0
2 0
3 0
4 0
Name: target, dtype: int32
dataset['target'].value_counts()
target
1 357
0 212
Name: count, dtype: int64
X = dataset['data']
y = dataset['target']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y ,
test_size=0.25, random_state=0)
from sklearn.preprocessing import StandardScaler
ss_train = StandardScaler()
X_train = ss_train.fit_transform(X_train)
ss_test = StandardScaler()
X_test = ss_test.fit_transform(X_test)
models = {}
# K-Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier
models['K-Nearest Neighbor'] = KNeighborsClassifier()
from sklearn.metrics import accuracy_score, precision_score,
recall_score
recall = 0.9368421052631579
# Model definition
model_lr = LinearRegression()
print("Variance:", Variance)
Variance: 0.6317289156999706
# Evaluating SSE
SSE = np.mean((np.mean(Prediction) - y)** 2)
SSE
0.666933139470129
# Evaluating Variance
Bias = SSE - Variance
Bias
0.035204223770158416
highway-mpg price
0 27 13495
1 27 16500
2 26 16500
3 30 13950
4 22 17450
5 25 15250
6 25 17710
7 25 18920
8 20 23875
9 22 ?
df.drop_duplicates(subset=['make'])
df.drop_duplicates(subset=['fuel-type','body-style'])
[9 rows x 26 columns]
df.drop_duplicates(subset=['fuel-type','body-style'],keep='last')
[9 rows x 26 columns]
.
.
Experiment-6: Write a program to implement Categorical Encoding, One-hot Encoding
import pandas as pd
df = pd.read_csv('homeprices.csv')
df
{"table":{"data":{"area":
[2600,3000,3200,3600,4000,2600,2800,3300,3600,2600,2900,3100,3600],"
index":[0,1,2,3,4,5,6,7,8,9,10,11,12],"price":
[550000,565000,610000,680000,725000,585000,615000,650000,710000,5750
00,600000,620000,695000],"town":["monroe township","monroe
township","monroe township","monroe township","monroe
township","west windsor","west windsor","west windsor","west
windsor","robinsville","robinsville","robinsville","robinsville"]},"
schema":{"fields":[{"name":"index","type":"integer"},
{"name":"town","type":"string"},{"name":"area","type":"integer"},
{"name":"price","type":"integer"}],"pandas_version":"1.4.0","primary
Key":["index"]}},"total_rows":13,"truncation_type":null}
dummies=pd.get_dummies(df.town)
dummies
merged=pd.concat([df,dummies],axis='columns')
merged
{"table":{"data":{"area":
[2600,3000,3200,3600,4000,2600,2800,3300,3600,2600,2900,3100,3600],"
index":[0,1,2,3,4,5,6,7,8,9,10,11,12],"monroe township":
[1,1,1,1,1,0,0,0,0,0,0,0,0],"price":
[550000,565000,610000,680000,725000,585000,615000,650000,710000,5750
00,600000,620000,695000],"robinsville":
[0,0,0,0,0,0,0,0,0,1,1,1,1],"town":["monroe township","monroe
township","monroe township","monroe township","monroe
township","west windsor","west windsor","west windsor","west
windsor","robinsville","robinsville","robinsville","robinsville"],"w
est windsor":[0,0,0,0,0,1,1,1,1,0,0,0,0]},"schema":{"fields":
[{"name":"index","type":"integer"},{"name":"town","type":"string"},
{"name":"area","type":"integer"},{"name":"price","type":"integer"},
{"name":"monroe township","type":"integer"},
{"name":"robinsville","type":"integer"},{"name":"west
windsor","type":"integer"}],"pandas_version":"1.4.0","primaryKey":
["index"]}},"total_rows":13,"truncation_type":null}
final=merged.drop(['town','west windsor'],axis='columns')
final
{"table":{"data":{"area":
[2600,3000,3200,3600,4000,2600,2800,3300,3600,2600,2900,3100,3600],"
index":[0,1,2,3,4,5,6,7,8,9,10,11,12],"monroe township":
[1,1,1,1,1,0,0,0,0,0,0,0,0],"price":
[550000,565000,610000,680000,725000,585000,615000,650000,710000,5750
00,600000,620000,695000],"robinsville":
[0,0,0,0,0,0,0,0,0,1,1,1,1]},"schema":{"fields":
[{"name":"index","type":"integer"},{"name":"area","type":"integer"},
{"name":"price","type":"integer"},{"name":"monroe
township","type":"integer"},
{"name":"robinsville","type":"integer"}],"pandas_version":"1.4.0","p
rimaryKey":["index"]}},"total_rows":13,"truncation_type":null}
{"table":{"data":{"area":
[2600,3000,3200,3600,4000,2600,2800,3300,3600,2600,2900,3100,3600],"
index":[0,1,2,3,4,5,6,7,8,9,10,11,12],"monroe township":
[1,1,1,1,1,0,0,0,0,0,0,0,0],"robinsville":
[0,0,0,0,0,0,0,0,0,1,1,1,1]},"schema":{"fields":
[{"name":"index","type":"integer"},{"name":"area","type":"integer"},
{"name":"monroe township","type":"integer"},
{"name":"robinsville","type":"integer"}],"pandas_version":"1.4.0","p
rimaryKey":["index"]}},"total_rows":13,"truncation_type":null}
y=final.price
y
0 550000
1 565000
2 610000
3 680000
4 725000
5 585000
6 615000
7 650000
8 710000
9 575000
10 600000
11 620000
12 695000
Name: price, dtype: int64
model.fit(x,y)
LinearRegression()
model.predict([[28,0,1]])
array([239015.93205768])
model.predict([[3400,0,0]])
array([681241.66845839])
model.score(x,y)
0.9573929037221872
import numpy as np
# Sigmoid Function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Variable initialization
epoch = 5 # Setting training iterations
lr = 0.1 # Setting learning rate
inputlayer_neurons = 2 # number of features in data set
hiddenlayer_neurons = 3 # number of hidden layers neurons
output_neurons = 1 # number of neurons at output layer
# Backpropagation
EO = y - output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act) # how much hidden
layer wts contributed to error
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) * lr # dot product of next
layer error and current layer output
wh += X.T.dot(d_hiddenlayer) * lr
print("-----------Epoch-", i + 1, "Starts----------")
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n", output)
print("-----------Epoch-", i + 1, "Ends----------\n")
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n", output)
-----------Epoch- 1 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83727262]
[0.81890898]
[0.84136943]]
-----------Epoch- 1 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83727262]
[0.81890898]
[0.84136943]]
-----------Epoch- 2 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83789076]
[0.81951306]
[0.84198315]]
-----------Epoch- 2 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83789076]
[0.81951306]
[0.84198315]]
-----------Epoch- 3 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83849879]
[0.8201075 ]
[0.84258678]]
-----------Epoch- 3 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83849879]
[0.8201075 ]
[0.84258678]]
-----------Epoch- 4 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83909695]
[0.82069254]
[0.84318055]]
-----------Epoch- 4 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83909695]
[0.82069254]
[0.84318055]]
-----------Epoch- 5 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83968547]
[0.82126838]
[0.8437647 ]]
-----------Epoch- 5 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83968547]
[0.82126838]
[0.8437647 ]]
kn = KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train, y_train)
KNeighborsClassifier(n_neighbors=1)
KNeighborsClassifier(algorithm='auto', leaf_size=30,
metric='minkowski',
weights='uniform')
KNeighborsClassifier(n_neighbors=1)
for i in range(len(X_test)):
x = X_test[i]
x_new = np.array([x])
prediction = kn.predict(x_new)
print("TARGET=", y_test[i], dataset["target_names"][y_test[i]],
"PREDICTED=", prediction, dataset["target_names"][prediction[0]])
print(kn.score(X_test, y_test))