0% found this document useful (0 votes)
21 views43 pages

Machine Learning Lab

Uploaded by

shahidarzoo39
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views43 pages

Machine Learning Lab

Uploaded by

shahidarzoo39
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 43

Machine Learning Lab # 01

Code:
from pandas import Series

s1 = Series([3,-4.5,7,81.9,13])

s2 = Series([5,7,31,3.5,-8.7],index = ['A','B','C','D','E'])

print(s1)

print(s2)

print('Values',s1.values)

print('Index',s1.index)

print('Values',s2.values)

print('Index',s2.index)

## indexing and slicing of s1

print('indexing of s1')

print('s1[1] = ',s1[1])

print('\nslicing of s1')

print('s1[1:3]\n',s1[1:3])

## indexing and slicing of s2

print('indexing of s2')

print('s2[3] = ', s2[3])

print('\nslicing of s2')

print('s2[1:4]\n', s2[1:4])

print('s2[3]',s2['D'])

s2['B':'E']

pg. 1
Output:

Code:
import numpy as np

s3 = Series(np.random.randn(6))

print(s3)

print(s3.shape)

print(s3.size)

Output:

pg. 2
Code:
from pandas import Series

capital = {'MI': 'Multan', 'IS': 'Islamabad', 'KI': 'Karachi', 'TX': 'Austin'}

s4 = Series(capital)

print(s4)

print('Values:', s4.values)

print('Index:', s4.index)

# Slicing of s4

print(s4[1:3], '\n')

print(s4.iloc[1:3])

print(f"{s4['MI']}\n{s4['KI']}\n")

print(s4.iloc[:3], '\n')

# print(s4.iloc[1,0:3])

# This line is incorrect and unnecessary because it's not how .iloc is used.

# If you want to slice rows and columns, you need a DataFrame, not a Series.

# In the context of a Series, you can slice like this:

# print(s4.iloc[1:3])

Output:

pg. 3
Machine Learning Lab # 02

Code:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

# Define column names based on the dataset description

columns = [

'Sample code number',

'Clump Thickness',

'Uniformity of Cell Size',

'Uniformity of Cell Shape',

'Marginal Adhesion',

'Single Epithelial Cell Size',

'Bare Nuclei',

'Bland Chromatin',

'Normal Nucleoli',

'Mitoses',

'Class'

# Load the dataset from the provided URL

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-
wisconsin.data'

df = pd.read_csv(url, header=None, names=columns)

df.head()

Output:

pg. 4
Code:
data = df.drop(['Sample code number','Class'], axis = 1)

data.head()

Output:

Code:
print('Number of instances = %d' % (data.shape[0]))

print('Number of attributes = %d' % (data.shape[1]))

data[130:141]

Output:

Code:
data.replace('?', np.nan, inplace= True)

# data[130:141]

pg. 5
Output:

Code:
for col in data.columns:

print('Columns %s : %d missing Values' % (col,data[col].isna().sum()))

Output:

Code:
data = data.fillna(data.median())

data[130:141]

Output:

pg. 6
Code:
data['Bare Nuclei'] = pd.to_numeric(data['Bare Nuclei'])

data.boxplot(figsize = (20,3))

Output:

Code:
z = (data - data.mean())/(data.std())

# z[130:141]

print('Number of rows before discarting = %d' % (z.shape[0]))

z2 = z.loc[((z > -3).sum(axis= 1) == 9) & ((z < 3).sum(axis= 1) == 9), :]

print('Number of row after discarting = %d' % (z2.shape[0]))

Output:

Code:
dups = data.duplicated()

print('Number of Duplicated Row = %d' % (dups.sum()))

Output:

Code:
print('Number of rows before discarting = %d' % (data.shape[0]))

data2 = data.drop_duplicates()

print('Number of rows after discarting = %d' % (data2.shape[0]))

pg. 7
Output:

Machine Learning Lab # 03

Code:
import pandas as pd

import numpy as np

# Corrected code to create a Pandas Series

s3 = pd.Series([1.2, -0.8, 0, 1.7, -3.2, 2.2], index=['jan1', 'jan2', 'jan3', 'jan4', 'jan5', 'jan6'])

# Display the Series

# print(s3)

print(s3[s3 >= 0])

print('\n',s3 + 4)

print('\n',s3 / 2)

print('\n',np.log(s3 + 4))

Output:

pg. 8
Code:
# Define a dictionary with car data

car = {

'make': ['Toyota', 'Honda', 'Ford', 'Chevrolet'],

'model': ['Camry', 'Civic', 'Mustang', 'Malibu'],

'msrp': [27595, 23570, 23495, 68000]

# Create a DataFrame using the dictionary

car_data = pd.DataFrame(car, index = [1,2,3,4])

# Display the DataFrame

print(car_data)

car_data['year'] = 2016

print('Car Data after adding year')

print(car_data)

# Correctly add the 'dealership' column with None for missing value

# car_data['dealership'] = ['Courtesy Ford', 'Captial Honda', 'Spectan Toyota', None]

# Correctly add the 'dealership' column with np.nan for missing value

car_data['dealership'] = ['Courtesy Ford', 'Captial Honda', 'Spectan Toyota', np.nan]

print('Car Data after adding dealership')

print(car_data)

Output:

pg. 9
Code:
# Define a list of tuples containing data for year, temperature, and precipitation

tuple_list = [

(2011, 42.5, 32.4),

(2012, 42.4, 32.5),

(2013, 47.2, 39.2),

(2014, 44.2, 31.4),

(2015, 39.9, 29.8),

(2016, 41.5, 36.7)

# Define a separate list of column names

col_name = ['Year', 'Temperature', 'Precipitation']

# Create a DataFrame using the list of tuples and specify the column names

weather_data = pd.DataFrame(tuple_list, columns=col_name)

# Display the DataFrame

print(weather_data)

Output:

Code:
# create a Data Frame using numpy library

upDate = np.random.randn(5,3)

colName = ['X1','X2','X3']

data = pd.DataFrame(upDate, columns= colName)

print(data)

pg. 10
Output:

Code:
print(data['X2'])

print(' ')

print(data.iloc[2])

print(car_data.iloc[2])

# Display specific elements and slices from the DataFrame

print("Element at row 1, column 2 (using iloc):")

print(car_data.iloc[1, 2])

print("\n" + "-"*30 + "\n")

print("Element at row 1, column 'model' (using loc):")

print(car_data.loc[1, 'model'])

print("\n" + "-"*30 + "\n")

print("Slice of rows 1 to 2 and columns 1 to 2 (using iloc):")

print(car_data.iloc[1:3, 1:3])

print("\n" + "-"*30 + "\n")

print("Rows with MSRP greater than 25000:")

print(car_data[car_data.msrp > 25000])

pg. 11
Output:

Code:
# Perform transformations and display results

print("Transposed Data:")

print(data.T)

print("\n" + "-"*30 + "\n")

print("Data with 4 added to each element:")

print(data + 4)

print("\n" + "-"*30 + "\n")

print("Maximum value in the data (by column):")

print(data.max())

print("\n" + "-"*30 + "\n")

print("Minimum value in each row:")

print(data.min(axis=1))

print("\n" + "-"*30 + "\n")

print("Sum of all elements (by column):")

print(data.sum())

print("\n" + "-"*30 + "\n")

print("Mean value of each row:")

print(data.mean(axis=1))

pg. 12
Output:

Code:

# Create a DataFrame using the NumPy library

upDate2 = np.random.randn(5, 3) # Generate a 5x3 matrix of random numbers

colName2 = ['X1', 'X2', 'X3'] # Define column names

data2 = pd.DataFrame(upDate2, columns=colName2) # Create the DataFrame with the generated data

# Display the first DataFrame

print("Data1 DataFrame:")

print(data)

print("\n" + "-"*30 + "\n")

# Display the second DataFrame

print("Data2 DataFrame:")

print(data2)

print("\n" + "-"*30 + "\n")

# Display the result of adding Data1 and Data2

print("Sum of Data1 and Data2:")

print(data.add(data2))

print("\n" + "-"*30 + "\n")

# Display the result of multiplying Data1 and Data2

pg. 13
print("Product of Data1 and Data2:")

print(data.mul(data2))

Output:

Code:
print('Calculate max-min per column')

f = lambda x : x.max() - x.min()

print(data.apply(f))

print("\n" + "-"*30 + "\n")

print('Calculate max-min per row')

f = lambda x : x.max() - x.min()

print(data.apply(f, axis=1))

Output:

pg. 14
Code:
import matplotlib.pyplot as plt # Correct import statement

# Use %matplotlib inline to display plots in the notebook

%matplotlib inline

# Plot the line plot

s3.plot(kind='line', title='Line Plot')

plt.xlabel('Index')

plt.ylabel('Values')

plt.show()

# Plot the bar chart

s3.plot(kind='bar', title='Bar Chart', color='skyblue')

plt.xlabel('Index')

plt.ylabel('Values')

plt.show()

# Plot the histogram

s3.plot(kind='hist', title='Histogram', bins=5, color='lightgreen', edgecolor='black')

plt.xlabel('Values')

plt.ylabel('Frequency')

plt.show()

Output:

pg. 15
Code:
# Plot the box plot for Temperature and Precipitation

weather_data[['Temperature', 'Precipitation']].plot(kind='box', title='Box Plot')

plt.ylabel('Values')

plt.grid(axis='y')

plt.show()

Output:

pg. 16
Code:
daily = pd.read_csv('/content/synthetic_weather_data_multiyear.csv')

daily.index = pd.to_datetime(daily['Date'])

daily = daily["Precipitation"]

ax = daily.plot(kind = 'line', figsize= (15,3))

variance = daily.var()

ax.set_title(f'Daily Precipitation (variance : {variance : .4f})')

Output:

Code:
monthly = daily.groupby(pd.Grouper(freq = 'M')).sum()

ax = monthly.plot(kind = 'line', figsize= (15,3))

variance = monthly.var()

ax.set_title(f'Monthly Precipitation (variance {variance : .4f})')

Output:

Code:
annual = daily.groupby(pd.Grouper(freq = 'Y')).sum()

ax = annual.plot(kind = 'line', figsize = (15,7))

pg. 17
variance = annual.var()

ax.set_title(f'Annual Precipitation : {variance : .4f}')

Output:

Machine Learning Lab #04

Code:
import matplotlib.image as mpimg

import pandas as pd

from sklearn.decomposition import PCA

import numpy as np

import matplotlib.pyplot as plt

numimages = 16

fig = plt.figure(figsize = (7,7))

imgData = []

for i in range(1,numimages + 1):

filename = '/content/pics/pics/Picture' + str(i) + '.jpeg'

img = mpimg.imread(filename)

ax = fig.add_subplot(4,4,i)

plt.imshow(img)

plt.axis('OFF')

ax.set_title(str(i))

imgData.append(np.array(img).flatten().reshape(1,img.shape[0] * img.shape[1] * img.shape[2]))

pg. 18
imgData = np.concatenate(imgData, axis = 0)

numcomponents = 2

pca = PCA(n_components = numcomponents)

pca.fit(imgData)

projected = pca.transform(imgData)

projected_df = pd.DataFrame(projected, columns = ['PC1', 'PC2'], index = range(1, numimages + 1))

print(projected_df)

Output:

Code:
projected_df['food'] =
['burger','burger','burger','burger','drink','drink','drink','drink','pasta','pasta','pasta','pasta','chicken','chick
en','chicken','chicken']

print(projected_df)

color = {'burger' : 'b', 'drink' : 'g', 'pasta' : 'r', 'chicken' : 'c'}

markertypes = {'burger' : '+', 'drink' : 'x', 'pasta' : 'o', 'chicken' : 's'}

for foodtype in markertypes:

d = projected_df[projected_df['food'] == foodtype]

plt.scatter(d['PC1'], d['PC2'], c = color[foodtype], marker = markertypes[foodtype], label = foodtype)

plt.title('Scatter Plot between Different Food types and Food markers')

pg. 19
plt.xlabel('PC1')

plt.ylabel('PC2')

plt.legend(loc = 'best')

plt.show()

Output:

Code:
import numpy as np

import matplotlib.pyplot as plt

read = 1

# Set random seed for reproducibility (optional)

np.random.seed(read)

# Number of data points

numinstances = 200

# Generate random X values

X = np.random.rand(numinstances, 1).reshape(-1, 1)

# Define the true Y values (Y_true = -3 * X + 1)

Y_true = -3 * X + 1

# Add random noise to the true Y values to create the observed Y values

Y = Y_true + np.random.normal(size=numinstances).reshape(-1, 1)

pg. 20
# Plot the data points (scatter plot)

plt.scatter(X, Y, color='black', label='Observed Y')

plt.scatter(X, Y_true, color='blue', linewidth=3, label='True Y')

# Add title and labels

plt.title('True Function Y = -3X + 1')

plt.xlabel('X')

plt.ylabel('Y')

# Add legend to the plot

plt.legend()

# Display the plot

plt.show()

Output:

Code:
import matplotlib.pyplot as plt

from sklearn import linear_model

from sklearn.metrics import mean_squared_error, r2_score

# Assuming X and Y are already defined

numinstances = len(X) # Total instances

pg. 21
numtrain = 20 # Number of training instances

numtest = numinstances - numtrain # Number of test instances

# Split the data into training and testing sets

X_train = X[:-numtest] # Train set features

X_test = X[-numtest:] # Test set features

Y_train = Y[:-numtest] # Train set labels

Y_test = Y[-numtest:] # Test set labels

# step #02 Regression Model to training set

# Fitting Model to training set

# Create the LinearRegression model

regr = linear_model.LinearRegression()

# Fit the model on the training data

regr.fit(X_train, Y_train)

# step #03 Apply Model to test data

# Predict the values for the test set

Y_pred_test = regr.predict(X_test)

#step #04 Evaluate the model performance

# Plotting the variation in Y (True vs Predicted values)

plt.scatter(Y_test, Y_pred_test, color='black') # True vs Predicted scatter plot

# plt.plot([min(Y_test), max(Y_test)], [min(Y_test), max(Y_test)], color='blue', linewidth=2) # Perfect


prediction line

plt.title('Comparing True vs Predicted Y values')

plt.xlabel('True value of Y')

plt.ylabel('Predicted value of Y')

plt.show()

pg. 22
Output:

Code:
# Model Evaluation:

import numpy as np

# Calculate Root Mean Squared Error (RMSE)

rmse = np.sqrt(mean_squared_error(Y_test, Y_pred_test)) # Corrected from rget to sqrt

# Calculate R^2 score

r2 = r2_score(Y_test, Y_pred_test)

# Output the results with proper formatting

print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')

print(f'R-Squared (R²): {r2:.4f}')

Output:

Code:
# Step #05: Post Processing - Display Model Parameters and Plot

# Display slope (coefficient) and intercept

print('Slope = ', regr.coef_[0]) # Access the slope (coefficient)

print('Intercept = ', regr.intercept_[0]) # Access the intercept

# Plot the data

pg. 23
plt.scatter(X_test, Y_test, color='black') # Scatter plot of actual values

plt.plot(X_test, Y_pred_test, color='blue', linewidth=3) # Plot regression line with predictions

title_label = 'Predicted Function Y = % .2f X + %.2f' %(regr.coef_[0],regr.intercept_[0])

# Set the plot title and labels

plt.title(title_label)

plt.xlabel('X')

plt.ylabel('Y')

plt.show() # Display the plot

Output:

Machine Learning Lab #05

Code:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier

import pydotplus

pg. 24
from IPython.display import Image, display

data = pd.read_csv('/content/updated_animal_classification.csv')

data.head()

Output:

Code:
data['Class'] = data['Class'].replace(['Fishes','Birds','Amphibians','Reptiles'],'Non-Mammal')

pd.crosstab([data['Warm Blooded'],data['Give Birth']],data['Class'])

Output:

Code:
from sklearn import tree

Y = data['Class']

X = data.drop(['Name','Class'], axis = 1)

clf = DecisionTreeClassifier(criterion = 'entropy', max_depth = 3)

clf.fit(X,Y)

dot_data = tree.export_graphviz(clf,feature_names= X.columns, class_names = ['Mammal','Non-


Mammal'],filled = True, rounded = True)

graph = pydotplus.graph_from_dot_data(dot_data)

display(Image(graph.create_png()))

pg. 25
Output:

Code:
test_data = [['gilanonstave',0,0,0,0,1,1,'Non-Mammal'],

['platypus',1,0,0,0,1,1,'Mammal'],

['oral',1,0,0,1,1,0,'Non-Mammal'],

['dolphins',1,1,1,0,0,0,'Mammal'],]

Test_data = pd.DataFrame(test_data,columns = data.columns)

Test_data.head()

Test_Y = Test_data['Class']

Test_X = Test_data.drop(['Name','Class'], axis = 1)

Y_predTest = clf.predict(Test_X)

predicted = pd.concat([Test_data['Name'],pd.Series(Y_predTest)], axis = 1)

predicted.columns = ['Name','Predicted Column']

predicted.head()

Output:

pg. 26
Code:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

n = 1500

mean1 = [6,14]

mean2 = [10,6]

mean3 = [14,14]

cor = [[3.5,0],[0,3.5]]

X = np.random.multivariate_normal(mean1, cor, int(n/6))

X = np.concatenate((X,np.random.multivariate_normal(mean2, cor, int(n/6))))

X = np.concatenate((X,np.random.multivariate_normal(mean3, cor, int(n/6))))

X = np.concatenate((X, 20*np.random.rand(int(n/2),2)))

Y = np.concatenate((np.ones(int(n/2)),np.zeros(int(n/2))))

plt.plot(X[:int(n/2),0],X[:int(n/2),1],'r+',ms = 4, label = 'Class 01')

plt.plot(X[int(n/2):,0],X[int(n/2):,1],'b+',ms = 4, label = 'Class 02')

plt.legend()

plt.show()

Output:

pg. 27
Code:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.8, random_state = 1)

from sklearn import tree

from sklearn.metrics import accuracy_score

max_depth = [2,3,4,5,6,7,8,9,10,15,20,25,30,35,40,45,50]

trainAcc = np.zeros(len(max_depth))

testAcc = np.zeros(len(max_depth))

index = 0

for depth in max_depth:

clf = tree.DecisionTreeClassifier(max_depth = depth)

clf = clf.fit(X_train,Y_train)

Y_predTrain = clf.predict(X_train)

Y_predTest = clf.predict(X_test)

trainAcc[index] = accuracy_score(Y_train, Y_predTrain)

testAcc[index] = accuracy_score(Y_test, Y_predTest)

index += 1

plt.plot(max_depth,trainAcc,'r+',max_depth,testAcc,'r-')

plt.legend(['Train Accuracy','Test Accuracy'])

plt.xlabel('Max Depth')

plt.ylabel('Accuracy')

plt.show()

pg. 28
Output:

Machine Learning Lab #06

Code:
from sklearn.neighbors import KNeighborsClassifier # Correct import

from sklearn.metrics import accuracy_score # Import accuracy_score

import matplotlib.pyplot as plt # Import matplotlib for plotting

trainAcc = []

testAcc = []

NumNeighbour = [1, 5, 10, 15, 20, 25, 30]

for k in NumNeighbour:

clf = KNeighborsClassifier(n_neighbors=k, metric='minkowski', p=2) # Correct the classifier


initialization

clf.fit(X_train, Y_train)

Y_predtrain = clf.predict(X_train)

Y_predtest = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train, Y_predtrain)) # Correct usage of accuracy_score

testAcc.append(accuracy_score(Y_test, Y_predtest)) # Ensure Y_test is defined

# Correct the plotting syntax

plt.plot(NumNeighbour, trainAcc, 'r-', NumNeighbour, testAcc, 'b-')

plt.legend(['Train Accuracy', 'Test Accuracy'])

pg. 29
plt.xlabel('Number of Neighbors')

plt.ylabel('Accuracy')

plt.title('KNN Accuracy vs Number of Neighbors') # Optional: Add a title

plt.show() # Display the plot

Output:

Code:
from sklearn.linear_model import LogisticRegression # Correct import

from sklearn.metrics import accuracy_score # Import accuracy_score

import matplotlib.pyplot as plt # Import matplotlib for plotting

c = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50]

LRTrainAcc = []

LRTestAcc = []

for parm in c:

clf = LogisticRegression(C=parm) # Correct initialization of Logistic Regression with C parameter

clf.fit(X_train, Y_train)

Y_Predtrain = clf.predict(X_train)

Y_Predtest = clf.predict(X_test)

LRTrainAcc.append(accuracy_score(Y_train, Y_Predtrain))

pg. 30
LRTestAcc.append(accuracy_score(Y_test, Y_Predtest))

# Correct the plotting syntax

# plt.plot(c, LRTrainAcc, 'r-', label='Train Accuracy')

# plt.plot(c, LRTestAcc, 'b-', label='Test Accuracy')

plt.plot(c, LRTrainAcc, 'r-', c, LRTestAcc, 'b-')

plt.legend(['Train Accuracy','Test Accuracy'])

plt.xlabel('C (Regularization Strength)')

plt.ylabel('Accuracy')

plt.title('Logistic Regression Accuracy vs C') # Optional: Add a title

# plt.xscale('log') # Optional: Use log scale for x-axis if needed

plt.show()

Output:

Code:
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# List of C values

C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 30]

# Initialize lists for storing accuracies

SVMTrainAcc = []

pg. 31
SVMTestAcc = []

# Loop through different C values

for parm in C:

clf = SVC(C=parm, kernel='linear') # Use uppercase C in SVC parameter

clf.fit(X_train, Y_train) # Train the classifier

# Predictions

Y_PredTrain = clf.predict(X_train)

Y_PredTest = clf.predict(X_test)

# Calculate and store accuracies

SVMTrainAcc.append(accuracy_score(Y_train, Y_PredTrain))

SVMTestAcc.append(accuracy_score(Y_test, Y_PredTest))

# Plot the results

plt.plot(C, SVMTrainAcc, 'r-', C, SVMTestAcc, 'b-')

plt.legend(['Train Accuracy', 'Test Accuracy'])

plt.xlabel('C')

plt.ylabel('Accuracy')

plt.show()

Output:

pg. 32
Code:
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# List of C values

C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 30]

# Initialize lists for storing accuracies

SVMTrainAcc = []

SVMTestAcc = []

# Loop through different C values

for parm in C:

clf = SVC(C=parm, kernel='rbf') # Use uppercase C in SVC parameter

clf.fit(X_train, Y_train) # Train the classifier

# Predictions

Y_PredTrain = clf.predict(X_train)

Y_PredTest = clf.predict(X_test)

# Calculate and store accuracies

SVMTrainAcc.append(accuracy_score(Y_train, Y_PredTrain))

SVMTestAcc.append(accuracy_score(Y_test, Y_PredTest))

# Plot the results

plt.plot(C, SVMTrainAcc, 'r-', C, SVMTestAcc, 'b-')

plt.legend(['Train Accuracy', 'Test Accuracy'])

plt.xlabel('C')

plt.ylabel('Accuracy')

plt.show()

pg. 33
Output:

Machine Learning Lab #07

Code:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB

# Features: [Movie Length, Genre Code (0 or 1)]

movie_features = np.array([[120, 0], [15, 1], [99, 0], [140, 1], [100, 0], [80, 1], [10, 0], [130, 1]])

# Likes: 1 = like, 0 = dislike

movie_likes = np.array([1, 1, 0, 1, 0, 1, 0, 1])

# Create Gaussian Naive Bayes model and fit it

model = GaussianNB()

model.fit(movie_features, movie_likes)

# New movie features: [Movie Length, Genre Code]

new_movie = np.array([[100, 1]])

# Predict whether the user will like the new movie

predicted_likes = model.predict(new_movie)

# Plot the existing movie data points

plt.scatter(movie_features[:, 0], movie_features[:, 1], c=movie_likes, cmap='viridis', marker='o')

pg. 34
# Plot the new movie as a red 'x'

plt.scatter(new_movie[:, 0], new_movie[:, 1], color='red', marker='x')

# Plot title and labels

plt.title('Movie Likes based on Length and Genre Code')

plt.xlabel('Movie Length (minutes)')

plt.ylabel('Genre Code')

# Show the plot

plt.show()

# Print the prediction result

# print(f"Tom will {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long movie of
genre code {new_movie[0,1]}")

print(f"Tom mill {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long {'comedy' if
new_movie[0,1] == 1 else 'action'} movie")

Output:

Code:
# Expanded movie features and likes based on the given pattern

movie_features = np.array([

[120, 0], [15, 1], [99, 0], [140, 1], [100, 0], [80, 1], [10, 0], [130, 1], # original 8

[105, 0], [20, 1], [110, 0], [150, 1], [95, 0], [70, 1], [25, 0], [145, 1], # additional samples

[115, 0], [35, 1], [108, 0], [135, 1], [85, 0], [90, 1], [50, 0], [125, 1], # additional samples

[102, 0], [40, 1], [98, 0], [120, 1], [65, 0], [140, 1] # additional samples

])

pg. 35
# Expanded likes corresponding to the movie features

movie_likes = np.array([

1, 1, 0, 1, 0, 1, 0, 1, # original 8

1, 1, 0, 1, 0, 1, 0, 1, # additional samples

1, 1, 0, 1, 0, 1, 0, 1, # additional samples

1, 1, 0, 1, 0, 1 # additional samples

])

# Create Gaussian Naive Bayes model and fit it

model = GaussianNB()

model.fit(movie_features, movie_likes)

# New movie features: [Movie Length, Genre Code]

new_movie = np.array([[100, 0]])

# Predict whether the user will like the new movie

predicted_likes = model.predict(new_movie)

# Plot the existing movie data points

plt.scatter(movie_features[:, 0], movie_features[:, 1], c=movie_likes, cmap='viridis', marker='o')

# Plot the new movie as a red 'x'

plt.scatter(new_movie[:, 0], new_movie[:, 1], color='red', marker='x')

# Plot title and labels

plt.title('Movie Likes based on Length and Genre Code')

plt.xlabel('Movie Length (minutes)')

plt.ylabel('Genre Code')

# Show the plot

plt.show()

# Print the prediction result

# print(f"Tom will {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long movie of
genre code {new_movie[0,1]}")

print(f"Tom mill {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long {'comedy' if
new_movie[0,1] == 1 else 'action'} movie")

pg. 36
Output:

Machine Learning Lab # 08

Code:
import numpy as np

import tensorflow as ts

import matplotlib.pyplot as plt

from tensorflow import keras

x = np.array([1.0,2.0,3.0,4.0,5.0], dtype = float)

y = np.array([1.0,1.5,2.0,2.5,3.0], dtype = float)

model = keras.Sequential([keras.layers.Dense(units = 1, input_shape = [1])])

model.compile(optimizer= 'sgd', loss = 'mean_squared_error')

model.fit(x,y, epochs=500)

print(model.predict(np.array([[7.0]])))

test = np.array([7,9,11,13])

ground_truth = np.array([4,5,6,7])

prediction = model.predict(test)

prediction_value = [y[0] for y in prediction]

plt.scatter(test, ground_truth, color = 'g', label = 'Ground Truth')

pg. 37
plt.scatter(test, prediction_value, color = 'r', label = 'Prediction')

plt.xlabel('Test Data (x)')

plt.ylabel('Predicted vs Actual (y)')

plt.legend()

plt.show()

Output:

Code:
from sklearn.cluster import KMeans

X = np.array([[1,2],[1.5,1.8],[5,8],[8,8],[1,0.6],[9,11],[8,2],[10,2],[9,3]])

# plt.plot(X[:,0], X[:,1], c = 'red', markersize = 10)

plt.scatter(X[:,0], X[:,1], color = 'red', s = 100)

plt.xlabel('Feature 1')

plt.ylabel('Feature 2')

plt.show()

kmean = KMeans(n_clusters = 3)

kmean.fit(X)

centroids = kmean.cluster_centers_

labels = kmean.labels_

print('Centroids :',centroids)

pg. 38
print('Labels :',labels)

colors = ['g','r','b']

for i in range(len(X)):

plt.scatter(X[i][0], X[i][1], color = colors[labels[i]], s = 100, label = f'Cluster {labels[i]}')

plt.scatter(centroids[:,0], centroids[:,1], marker = 'x', s = 150, linewidths=5, zorder = 10, color = 'black',
label = 'Centroids')

plt.title('KMean Cluster')

plt.legend(loc = 'best')

plt.show()

Output:

Code:
from sklearn.ensemble import BaggingClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

iris = load_iris()

X, Y = iris.data,iris.target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 42)

bagging = BaggingClassifier(estimator = DecisionTreeClassifier(), n_estimators=10, random_state = 42)

bagging.fit(X_train, Y_train)

pg. 39
Y_pred = bagging.predict(X_test)

print('Bagging Accuracy : %.2f' % (accuracy_score(Y_test,Y_pred)))

Output:

Code:
numbaseclassifier = 500

mxdepth = 10

trainAcc = []

testAcc = []

clf = ensemble.RandomForestClassifier(n_estimators = numbaseclassifier)

clf.fit(X_train, Y_train)

Y_predTrainRF = clf.predict(X_train)

Y_predTestRF = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train,Y_predTrainRF))

testAcc.append(accuracy_score(Y_test,Y_predTestRF))

print('Random Forest Train Accuracy',trainAcc)

print('Random Forest Test Accuracy', testAcc)

clf = ensemble.BaggingClassifier(DecisionTreeClassifier(max_depth = maxdepth), n_estimators =


numbaseclassifier)

clf.fit(X_train, Y_train)

Y_predTrainBC = clf.predict(X_train)

Y_predTestBC = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train,Y_predTrainBC))

testAcc.append(accuracy_score(Y_test,Y_predTestBC))

print('Bagging Classifier Train Accuracy',trainAcc)

print('Bagging Classifier Test Accuracy', testAcc)

clf = ensemble.AdaBoostClassifier(DecisionTreeClassifier(max_depth = maxdepth), n_estimators =


numbaseclassifier)

clf.fit(X_train,Y_train)

pg. 40
Y_predTrainAC = clf.predict(X_train)

Y_predTestAC = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train,Y_predTrainAC))

testAcc.append(accuracy_score(Y_test,Y_predTestAC))

print('Random Forest Train Accuracy',trainAcc)

print('Random Forest Test Accuracy', testAcc)

methods=['Random Forest','Bagging','Adaboost']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,6))

ax1.plot(methods, trainAcc, marker='o', linestyle='-', color='blue', label='Training Accuracy')

ax1.set_title('Training Accuracy')

ax1.set_xlabel('Method')

ax1.set_ylabel('Accuracy')

ax1.grid(True) # Add grid lines

ax1.legend() # Add legend

ax2.plot(methods, testAcc, marker='o', linestyle='-', color='green', label='Testing Accuracy')

ax2.set_title('Testing Accuracy')

ax2.set_xlabel('Method')

ax2.set_ylabel('Accuracy')

ax2.grid(True) # Add grid lines

ax2.legend()

plt.show()

Output:

pg. 41
pg. 42
pg. 43

You might also like