0% found this document useful (0 votes)

19 views

Machine Learning Lab

Uploaded by

shahidarzoo39

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

19 views

Machine Learning Lab

Uploaded by

shahidarzoo39

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 43

Machine Learning Lab # 01

Code:
from pandas import Series

s1 = Series([3,-4.5,7,81.9,13])

s2 = Series([5,7,31,3.5,-8.7],index = ['A','B','C','D','E'])

print(s1)

print(s2)

print('Values',s1.values)

print('Index',s1.index)

print('Values',s2.values)

print('Index',s2.index)

## indexing and slicing of s1

print('indexing of s1')

print('s1[1] = ',s1[1])

print('\nslicing of s1')

print('s1[1:3]\n',s1[1:3])

## indexing and slicing of s2

print('indexing of s2')

print('s2[3] = ', s2[3])

print('\nslicing of s2')

print('s2[1:4]\n', s2[1:4])

print('s2[3]',s2['D'])

s2['B':'E']

pg. 1
Output:

Code:
import numpy as np

s3 = Series(np.random.randn(6))

print(s3)

print(s3.shape)

print(s3.size)

Output:

pg. 2
Code:
from pandas import Series

capital = {'MI': 'Multan', 'IS': 'Islamabad', 'KI': 'Karachi', 'TX': 'Austin'}

s4 = Series(capital)

print(s4)

print('Values:', s4.values)

print('Index:', s4.index)

# Slicing of s4

print(s4[1:3], '\n')

print(s4.iloc[1:3])

print(f"{s4['MI']}\n{s4['KI']}\n")

print(s4.iloc[:3], '\n')

# print(s4.iloc[1,0:3])

# This line is incorrect and unnecessary because it's not how .iloc is used.

# If you want to slice rows and columns, you need a DataFrame, not a Series.

# In the context of a Series, you can slice like this:

# print(s4.iloc[1:3])

Output:

pg. 3
Machine Learning Lab # 02

Code:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

# Define column names based on the dataset description

columns = [

'Sample code number',

'Clump Thickness',

'Uniformity of Cell Size',

'Uniformity of Cell Shape',

'Marginal Adhesion',

'Single Epithelial Cell Size',

'Bare Nuclei',

'Bland Chromatin',

'Normal Nucleoli',

'Mitoses',

'Class'

# Load the dataset from the provided URL

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-
wisconsin.data'

df = pd.read_csv(url, header=None, names=columns)

df.head()

Output:

pg. 4
Code:
data = df.drop(['Sample code number','Class'], axis = 1)

data.head()

Output:

Code:
print('Number of instances = %d' % (data.shape[0]))

print('Number of attributes = %d' % (data.shape[1]))

data[130:141]

Output:

Code:
data.replace('?', np.nan, inplace= True)

# data[130:141]

pg. 5
Output:

Code:
for col in data.columns:

print('Columns %s : %d missing Values' % (col,data[col].isna().sum()))

Output:

Code:
data = data.fillna(data.median())

data[130:141]

Output:

pg. 6
Code:
data['Bare Nuclei'] = pd.to_numeric(data['Bare Nuclei'])

data.boxplot(figsize = (20,3))

Output:

Code:
z = (data - data.mean())/(data.std())

# z[130:141]

print('Number of rows before discarting = %d' % (z.shape[0]))

z2 = z.loc[((z > -3).sum(axis= 1) == 9) & ((z < 3).sum(axis= 1) == 9), :]

print('Number of row after discarting = %d' % (z2.shape[0]))

Output:

Code:
dups = data.duplicated()

print('Number of Duplicated Row = %d' % (dups.sum()))

Output:

Code:
print('Number of rows before discarting = %d' % (data.shape[0]))

data2 = data.drop_duplicates()

print('Number of rows after discarting = %d' % (data2.shape[0]))

pg. 7
Output:

Machine Learning Lab # 03

Code:
import pandas as pd

import numpy as np

# Corrected code to create a Pandas Series

s3 = pd.Series([1.2, -0.8, 0, 1.7, -3.2, 2.2], index=['jan1', 'jan2', 'jan3', 'jan4', 'jan5', 'jan6'])

# Display the Series

# print(s3)

print(s3[s3 >= 0])

print('\n',s3 + 4)

print('\n',s3 / 2)

print('\n',np.log(s3 + 4))

Output:

pg. 8
Code:
# Define a dictionary with car data

car = {

'make': ['Toyota', 'Honda', 'Ford', 'Chevrolet'],

'model': ['Camry', 'Civic', 'Mustang', 'Malibu'],

'msrp': [27595, 23570, 23495, 68000]

# Create a DataFrame using the dictionary

car_data = pd.DataFrame(car, index = [1,2,3,4])

# Display the DataFrame

print(car_data)

car_data['year'] = 2016

print('Car Data after adding year')

print(car_data)

# Correctly add the 'dealership' column with None for missing value

# car_data['dealership'] = ['Courtesy Ford', 'Captial Honda', 'Spectan Toyota', None]

# Correctly add the 'dealership' column with np.nan for missing value

car_data['dealership'] = ['Courtesy Ford', 'Captial Honda', 'Spectan Toyota', np.nan]

print('Car Data after adding dealership')

print(car_data)

Output:

pg. 9
Code:
# Define a list of tuples containing data for year, temperature, and precipitation

tuple_list = [

(2011, 42.5, 32.4),

(2012, 42.4, 32.5),

(2013, 47.2, 39.2),

(2014, 44.2, 31.4),

(2015, 39.9, 29.8),

(2016, 41.5, 36.7)

# Define a separate list of column names

col_name = ['Year', 'Temperature', 'Precipitation']

# Create a DataFrame using the list of tuples and specify the column names

weather_data = pd.DataFrame(tuple_list, columns=col_name)

# Display the DataFrame

print(weather_data)

Output:

Code:
# create a Data Frame using numpy library

upDate = np.random.randn(5,3)

colName = ['X1','X2','X3']

data = pd.DataFrame(upDate, columns= colName)

print(data)

pg. 10
Output:

Code:
print(data['X2'])

print(' ')

print(data.iloc[2])

print(car_data.iloc[2])

# Display specific elements and slices from the DataFrame

print("Element at row 1, column 2 (using iloc):")

print(car_data.iloc[1, 2])

print("\n" + "-"*30 + "\n")

print("Element at row 1, column 'model' (using loc):")

print(car_data.loc[1, 'model'])

print("\n" + "-"*30 + "\n")

print("Slice of rows 1 to 2 and columns 1 to 2 (using iloc):")

print(car_data.iloc[1:3, 1:3])

print("\n" + "-"*30 + "\n")

print("Rows with MSRP greater than 25000:")

print(car_data[car_data.msrp > 25000])

pg. 11
Output:

Code:
# Perform transformations and display results

print("Transposed Data:")

print(data.T)

print("\n" + "-"*30 + "\n")

print("Data with 4 added to each element:")

print(data + 4)

print("\n" + "-"*30 + "\n")

print("Maximum value in the data (by column):")

print(data.max())

print("\n" + "-"*30 + "\n")

print("Minimum value in each row:")

print(data.min(axis=1))

print("\n" + "-"*30 + "\n")

print("Sum of all elements (by column):")

print(data.sum())

print("\n" + "-"*30 + "\n")

print("Mean value of each row:")

print(data.mean(axis=1))

pg. 12
Output:

Code:

# Create a DataFrame using the NumPy library

upDate2 = np.random.randn(5, 3) # Generate a 5x3 matrix of random numbers

colName2 = ['X1', 'X2', 'X3'] # Define column names

data2 = pd.DataFrame(upDate2, columns=colName2) # Create the DataFrame with the generated data

# Display the first DataFrame

print("Data1 DataFrame:")

print(data)

print("\n" + "-"*30 + "\n")

# Display the second DataFrame

print("Data2 DataFrame:")

print(data2)

print("\n" + "-"*30 + "\n")

# Display the result of adding Data1 and Data2

print("Sum of Data1 and Data2:")

print(data.add(data2))

print("\n" + "-"*30 + "\n")

# Display the result of multiplying Data1 and Data2

pg. 13
print("Product of Data1 and Data2:")

print(data.mul(data2))

Output:

Code:
print('Calculate max-min per column')

f = lambda x : x.max() - x.min()

print(data.apply(f))

print("\n" + "-"*30 + "\n")

print('Calculate max-min per row')

f = lambda x : x.max() - x.min()

print(data.apply(f, axis=1))

Output:

pg. 14
Code:
import matplotlib.pyplot as plt # Correct import statement

# Use %matplotlib inline to display plots in the notebook

%matplotlib inline

# Plot the line plot

s3.plot(kind='line', title='Line Plot')

plt.xlabel('Index')

plt.ylabel('Values')

plt.show()

# Plot the bar chart

s3.plot(kind='bar', title='Bar Chart', color='skyblue')

plt.xlabel('Index')

plt.ylabel('Values')

plt.show()

# Plot the histogram

s3.plot(kind='hist', title='Histogram', bins=5, color='lightgreen', edgecolor='black')

plt.xlabel('Values')

plt.ylabel('Frequency')

plt.show()

Output:

pg. 15
Code:
# Plot the box plot for Temperature and Precipitation

weather_data[['Temperature', 'Precipitation']].plot(kind='box', title='Box Plot')

plt.ylabel('Values')

plt.grid(axis='y')

plt.show()

Output:

pg. 16
Code:
daily = pd.read_csv('/content/synthetic_weather_data_multiyear.csv')

daily.index = pd.to_datetime(daily['Date'])

daily = daily["Precipitation"]

ax = daily.plot(kind = 'line', figsize= (15,3))

variance = daily.var()

ax.set_title(f'Daily Precipitation (variance : {variance : .4f})')

Output:

Code:
monthly = daily.groupby(pd.Grouper(freq = 'M')).sum()

ax = monthly.plot(kind = 'line', figsize= (15,3))

variance = monthly.var()

ax.set_title(f'Monthly Precipitation (variance {variance : .4f})')

Output:

Code:
annual = daily.groupby(pd.Grouper(freq = 'Y')).sum()

ax = annual.plot(kind = 'line', figsize = (15,7))

pg. 17
variance = annual.var()

ax.set_title(f'Annual Precipitation : {variance : .4f}')

Output:

Machine Learning Lab #04

Code:
import matplotlib.image as mpimg

import pandas as pd

from sklearn.decomposition import PCA

import numpy as np

import matplotlib.pyplot as plt

numimages = 16

fig = plt.figure(figsize = (7,7))

imgData = []

for i in range(1,numimages + 1):

filename = '/content/pics/pics/Picture' + str(i) + '.jpeg'

img = mpimg.imread(filename)

ax = fig.add_subplot(4,4,i)

plt.imshow(img)

plt.axis('OFF')

ax.set_title(str(i))

imgData.append(np.array(img).flatten().reshape(1,img.shape[0] * img.shape[1] * img.shape[2]))

pg. 18
imgData = np.concatenate(imgData, axis = 0)

numcomponents = 2

pca = PCA(n_components = numcomponents)

pca.fit(imgData)

projected = pca.transform(imgData)

projected_df = pd.DataFrame(projected, columns = ['PC1', 'PC2'], index = range(1, numimages + 1))

print(projected_df)

Output:

Code:
projected_df['food'] =
['burger','burger','burger','burger','drink','drink','drink','drink','pasta','pasta','pasta','pasta','chicken','chick
en','chicken','chicken']

print(projected_df)

color = {'burger' : 'b', 'drink' : 'g', 'pasta' : 'r', 'chicken' : 'c'}

markertypes = {'burger' : '+', 'drink' : 'x', 'pasta' : 'o', 'chicken' : 's'}

for foodtype in markertypes:

d = projected_df[projected_df['food'] == foodtype]

plt.scatter(d['PC1'], d['PC2'], c = color[foodtype], marker = markertypes[foodtype], label = foodtype)

plt.title('Scatter Plot between Different Food types and Food markers')

pg. 19
plt.xlabel('PC1')

plt.ylabel('PC2')

plt.legend(loc = 'best')

plt.show()

Output:

Code:
import numpy as np

import matplotlib.pyplot as plt

read = 1

# Set random seed for reproducibility (optional)

np.random.seed(read)

# Number of data points

numinstances = 200

# Generate random X values

X = np.random.rand(numinstances, 1).reshape(-1, 1)

# Define the true Y values (Y_true = -3 * X + 1)

Y_true = -3 * X + 1

# Add random noise to the true Y values to create the observed Y values

Y = Y_true + np.random.normal(size=numinstances).reshape(-1, 1)

pg. 20
# Plot the data points (scatter plot)

plt.scatter(X, Y, color='black', label='Observed Y')

plt.scatter(X, Y_true, color='blue', linewidth=3, label='True Y')

# Add title and labels

plt.title('True Function Y = -3X + 1')

plt.xlabel('X')

plt.ylabel('Y')

# Add legend to the plot

plt.legend()

# Display the plot

plt.show()

Output:

Code:
import matplotlib.pyplot as plt

from sklearn import linear_model

from sklearn.metrics import mean_squared_error, r2_score

# Assuming X and Y are already defined

numinstances = len(X) # Total instances

pg. 21
numtrain = 20 # Number of training instances

numtest = numinstances - numtrain # Number of test instances

# Split the data into training and testing sets

X_train = X[:-numtest] # Train set features

X_test = X[-numtest:] # Test set features

Y_train = Y[:-numtest] # Train set labels

Y_test = Y[-numtest:] # Test set labels

# step #02 Regression Model to training set

# Fitting Model to training set

# Create the LinearRegression model

regr = linear_model.LinearRegression()

# Fit the model on the training data

regr.fit(X_train, Y_train)

# step #03 Apply Model to test data

# Predict the values for the test set

Y_pred_test = regr.predict(X_test)

#step #04 Evaluate the model performance

# Plotting the variation in Y (True vs Predicted values)

plt.scatter(Y_test, Y_pred_test, color='black') # True vs Predicted scatter plot

# plt.plot([min(Y_test), max(Y_test)], [min(Y_test), max(Y_test)], color='blue', linewidth=2) # Perfect

prediction line

plt.title('Comparing True vs Predicted Y values')

plt.xlabel('True value of Y')

plt.ylabel('Predicted value of Y')

plt.show()

pg. 22
Output:

Code:
# Model Evaluation:

import numpy as np

# Calculate Root Mean Squared Error (RMSE)

rmse = np.sqrt(mean_squared_error(Y_test, Y_pred_test)) # Corrected from rget to sqrt

# Calculate R^2 score

r2 = r2_score(Y_test, Y_pred_test)

# Output the results with proper formatting

print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')

print(f'R-Squared (R²): {r2:.4f}')

Output:

Code:
# Step #05: Post Processing - Display Model Parameters and Plot

# Display slope (coefficient) and intercept

print('Slope = ', regr.coef_[0]) # Access the slope (coefficient)

print('Intercept = ', regr.intercept_[0]) # Access the intercept

# Plot the data

pg. 23
plt.scatter(X_test, Y_test, color='black') # Scatter plot of actual values

plt.plot(X_test, Y_pred_test, color='blue', linewidth=3) # Plot regression line with predictions

title_label = 'Predicted Function Y = % .2f X + %.2f' %(regr.coef_[0],regr.intercept_[0])

# Set the plot title and labels

plt.title(title_label)

plt.xlabel('X')

plt.ylabel('Y')

plt.show() # Display the plot

Output:

Machine Learning Lab #05

Code:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier

import pydotplus

pg. 24
from IPython.display import Image, display

data = pd.read_csv('/content/updated_animal_classification.csv')

data.head()

Output:

Code:
data['Class'] = data['Class'].replace(['Fishes','Birds','Amphibians','Reptiles'],'Non-Mammal')

pd.crosstab([data['Warm Blooded'],data['Give Birth']],data['Class'])

Output:

Code:
from sklearn import tree

Y = data['Class']

X = data.drop(['Name','Class'], axis = 1)

clf = DecisionTreeClassifier(criterion = 'entropy', max_depth = 3)

clf.fit(X,Y)

dot_data = tree.export_graphviz(clf,feature_names= X.columns, class_names = ['Mammal','Non-

Mammal'],filled = True, rounded = True)

graph = pydotplus.graph_from_dot_data(dot_data)

display(Image(graph.create_png()))

pg. 25
Output:

Code:
test_data = [['gilanonstave',0,0,0,0,1,1,'Non-Mammal'],

['platypus',1,0,0,0,1,1,'Mammal'],

['oral',1,0,0,1,1,0,'Non-Mammal'],

['dolphins',1,1,1,0,0,0,'Mammal'],]

Test_data = pd.DataFrame(test_data,columns = data.columns)

Test_data.head()

Test_Y = Test_data['Class']

Test_X = Test_data.drop(['Name','Class'], axis = 1)

Y_predTest = clf.predict(Test_X)

predicted = pd.concat([Test_data['Name'],pd.Series(Y_predTest)], axis = 1)

predicted.columns = ['Name','Predicted Column']

predicted.head()

Output:

pg. 26
Code:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

n = 1500

mean1 = [6,14]

mean2 = [10,6]

mean3 = [14,14]

cor = [[3.5,0],[0,3.5]]

X = np.random.multivariate_normal(mean1, cor, int(n/6))

X = np.concatenate((X,np.random.multivariate_normal(mean2, cor, int(n/6))))

X = np.concatenate((X,np.random.multivariate_normal(mean3, cor, int(n/6))))

X = np.concatenate((X, 20*np.random.rand(int(n/2),2)))

Y = np.concatenate((np.ones(int(n/2)),np.zeros(int(n/2))))

plt.plot(X[:int(n/2),0],X[:int(n/2),1],'r+',ms = 4, label = 'Class 01')

plt.plot(X[int(n/2):,0],X[int(n/2):,1],'b+',ms = 4, label = 'Class 02')

plt.legend()

plt.show()

Output:

pg. 27
Code:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.8, random_state = 1)

from sklearn import tree

from sklearn.metrics import accuracy_score

max_depth = [2,3,4,5,6,7,8,9,10,15,20,25,30,35,40,45,50]

trainAcc = np.zeros(len(max_depth))

testAcc = np.zeros(len(max_depth))

index = 0

for depth in max_depth:

clf = tree.DecisionTreeClassifier(max_depth = depth)

clf = clf.fit(X_train,Y_train)

Y_predTrain = clf.predict(X_train)

Y_predTest = clf.predict(X_test)

trainAcc[index] = accuracy_score(Y_train, Y_predTrain)

testAcc[index] = accuracy_score(Y_test, Y_predTest)

index += 1

plt.plot(max_depth,trainAcc,'r+',max_depth,testAcc,'r-')

plt.legend(['Train Accuracy','Test Accuracy'])

plt.xlabel('Max Depth')

plt.ylabel('Accuracy')

plt.show()

pg. 28
Output:

Machine Learning Lab #06

Code:
from sklearn.neighbors import KNeighborsClassifier # Correct import

from sklearn.metrics import accuracy_score # Import accuracy_score

import matplotlib.pyplot as plt # Import matplotlib for plotting

trainAcc = []

testAcc = []

NumNeighbour = [1, 5, 10, 15, 20, 25, 30]

for k in NumNeighbour:

clf = KNeighborsClassifier(n_neighbors=k, metric='minkowski', p=2) # Correct the classifier

initialization

clf.fit(X_train, Y_train)

Y_predtrain = clf.predict(X_train)

Y_predtest = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train, Y_predtrain)) # Correct usage of accuracy_score

testAcc.append(accuracy_score(Y_test, Y_predtest)) # Ensure Y_test is defined

# Correct the plotting syntax

plt.plot(NumNeighbour, trainAcc, 'r-', NumNeighbour, testAcc, 'b-')

plt.legend(['Train Accuracy', 'Test Accuracy'])

pg. 29
plt.xlabel('Number of Neighbors')

plt.ylabel('Accuracy')

plt.title('KNN Accuracy vs Number of Neighbors') # Optional: Add a title

plt.show() # Display the plot

Output:

Code:
from sklearn.linear_model import LogisticRegression # Correct import

from sklearn.metrics import accuracy_score # Import accuracy_score

import matplotlib.pyplot as plt # Import matplotlib for plotting

c = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50]

LRTrainAcc = []

LRTestAcc = []

for parm in c:

clf = LogisticRegression(C=parm) # Correct initialization of Logistic Regression with C parameter

clf.fit(X_train, Y_train)

Y_Predtrain = clf.predict(X_train)

Y_Predtest = clf.predict(X_test)

LRTrainAcc.append(accuracy_score(Y_train, Y_Predtrain))

pg. 30
LRTestAcc.append(accuracy_score(Y_test, Y_Predtest))

# Correct the plotting syntax

# plt.plot(c, LRTrainAcc, 'r-', label='Train Accuracy')

# plt.plot(c, LRTestAcc, 'b-', label='Test Accuracy')

plt.plot(c, LRTrainAcc, 'r-', c, LRTestAcc, 'b-')

plt.legend(['Train Accuracy','Test Accuracy'])

plt.xlabel('C (Regularization Strength)')

plt.ylabel('Accuracy')

plt.title('Logistic Regression Accuracy vs C') # Optional: Add a title

# plt.xscale('log') # Optional: Use log scale for x-axis if needed

plt.show()

Output:

Code:
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# List of C values

C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 30]

# Initialize lists for storing accuracies

SVMTrainAcc = []

pg. 31
SVMTestAcc = []

# Loop through different C values

for parm in C:

clf = SVC(C=parm, kernel='linear') # Use uppercase C in SVC parameter

clf.fit(X_train, Y_train) # Train the classifier

# Predictions

Y_PredTrain = clf.predict(X_train)

Y_PredTest = clf.predict(X_test)

# Calculate and store accuracies

SVMTrainAcc.append(accuracy_score(Y_train, Y_PredTrain))

SVMTestAcc.append(accuracy_score(Y_test, Y_PredTest))

# Plot the results

plt.plot(C, SVMTrainAcc, 'r-', C, SVMTestAcc, 'b-')

plt.legend(['Train Accuracy', 'Test Accuracy'])

plt.xlabel('C')

plt.ylabel('Accuracy')

plt.show()

Output:

pg. 32
Code:
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# List of C values

C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 30]

# Initialize lists for storing accuracies

SVMTrainAcc = []

SVMTestAcc = []

# Loop through different C values

for parm in C:

clf = SVC(C=parm, kernel='rbf') # Use uppercase C in SVC parameter

clf.fit(X_train, Y_train) # Train the classifier

# Predictions

Y_PredTrain = clf.predict(X_train)

Y_PredTest = clf.predict(X_test)

# Calculate and store accuracies

SVMTrainAcc.append(accuracy_score(Y_train, Y_PredTrain))

SVMTestAcc.append(accuracy_score(Y_test, Y_PredTest))

# Plot the results

plt.plot(C, SVMTrainAcc, 'r-', C, SVMTestAcc, 'b-')

plt.legend(['Train Accuracy', 'Test Accuracy'])

plt.xlabel('C')

plt.ylabel('Accuracy')

plt.show()

pg. 33
Output:

Machine Learning Lab #07

Code:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB

# Features: [Movie Length, Genre Code (0 or 1)]

movie_features = np.array([[120, 0], [15, 1], [99, 0], [140, 1], [100, 0], [80, 1], [10, 0], [130, 1]])

# Likes: 1 = like, 0 = dislike

movie_likes = np.array([1, 1, 0, 1, 0, 1, 0, 1])

# Create Gaussian Naive Bayes model and fit it

model = GaussianNB()

model.fit(movie_features, movie_likes)

# New movie features: [Movie Length, Genre Code]

new_movie = np.array([[100, 1]])

# Predict whether the user will like the new movie

predicted_likes = model.predict(new_movie)

# Plot the existing movie data points

plt.scatter(movie_features[:, 0], movie_features[:, 1], c=movie_likes, cmap='viridis', marker='o')

pg. 34
# Plot the new movie as a red 'x'

plt.scatter(new_movie[:, 0], new_movie[:, 1], color='red', marker='x')

# Plot title and labels

plt.title('Movie Likes based on Length and Genre Code')

plt.xlabel('Movie Length (minutes)')

plt.ylabel('Genre Code')

# Show the plot

plt.show()

# Print the prediction result

# print(f"Tom will {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long movie of
genre code {new_movie[0,1]}")

print(f"Tom mill {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long {'comedy' if
new_movie[0,1] == 1 else 'action'} movie")

Output:

Code:
# Expanded movie features and likes based on the given pattern

movie_features = np.array([

[120, 0], [15, 1], [99, 0], [140, 1], [100, 0], [80, 1], [10, 0], [130, 1], # original 8

[105, 0], [20, 1], [110, 0], [150, 1], [95, 0], [70, 1], [25, 0], [145, 1], # additional samples

[115, 0], [35, 1], [108, 0], [135, 1], [85, 0], [90, 1], [50, 0], [125, 1], # additional samples

[102, 0], [40, 1], [98, 0], [120, 1], [65, 0], [140, 1] # additional samples

])

pg. 35
# Expanded likes corresponding to the movie features

movie_likes = np.array([

1, 1, 0, 1, 0, 1, 0, 1, # original 8

1, 1, 0, 1, 0, 1, 0, 1, # additional samples

1, 1, 0, 1, 0, 1 # additional samples

])

# Create Gaussian Naive Bayes model and fit it

model = GaussianNB()

model.fit(movie_features, movie_likes)

# New movie features: [Movie Length, Genre Code]

new_movie = np.array([[100, 0]])

# Predict whether the user will like the new movie

predicted_likes = model.predict(new_movie)

# Plot the existing movie data points

plt.scatter(movie_features[:, 0], movie_features[:, 1], c=movie_likes, cmap='viridis', marker='o')

# Plot the new movie as a red 'x'

plt.scatter(new_movie[:, 0], new_movie[:, 1], color='red', marker='x')

# Plot title and labels

plt.title('Movie Likes based on Length and Genre Code')

plt.xlabel('Movie Length (minutes)')

plt.ylabel('Genre Code')

# Show the plot

plt.show()

# Print the prediction result

# print(f"Tom will {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long movie of
genre code {new_movie[0,1]}")

print(f"Tom mill {'like' if predicted_likes[0] == 1 else 'dislike'} a {new_movie[0,0]} min long {'comedy' if
new_movie[0,1] == 1 else 'action'} movie")

pg. 36
Output:

Machine Learning Lab # 08

Code:
import numpy as np

import tensorflow as ts

import matplotlib.pyplot as plt

from tensorflow import keras

x = np.array([1.0,2.0,3.0,4.0,5.0], dtype = float)

y = np.array([1.0,1.5,2.0,2.5,3.0], dtype = float)

model = keras.Sequential([keras.layers.Dense(units = 1, input_shape = [1])])

model.compile(optimizer= 'sgd', loss = 'mean_squared_error')

model.fit(x,y, epochs=500)

print(model.predict(np.array([[7.0]])))

test = np.array([7,9,11,13])

ground_truth = np.array([4,5,6,7])

prediction = model.predict(test)

prediction_value = [y[0] for y in prediction]

plt.scatter(test, ground_truth, color = 'g', label = 'Ground Truth')

pg. 37
plt.scatter(test, prediction_value, color = 'r', label = 'Prediction')

plt.xlabel('Test Data (x)')

plt.ylabel('Predicted vs Actual (y)')

plt.legend()

plt.show()

Output:

Code:
from sklearn.cluster import KMeans

X = np.array([[1,2],[1.5,1.8],[5,8],[8,8],[1,0.6],[9,11],[8,2],[10,2],[9,3]])

# plt.plot(X[:,0], X[:,1], c = 'red', markersize = 10)

plt.scatter(X[:,0], X[:,1], color = 'red', s = 100)

plt.xlabel('Feature 1')

plt.ylabel('Feature 2')

plt.show()

kmean = KMeans(n_clusters = 3)

kmean.fit(X)

centroids = kmean.cluster_centers_

labels = kmean.labels_

print('Centroids :',centroids)

pg. 38
print('Labels :',labels)

colors = ['g','r','b']

for i in range(len(X)):

plt.scatter(X[i][0], X[i][1], color = colors[labels[i]], s = 100, label = f'Cluster {labels[i]}')

plt.scatter(centroids[:,0], centroids[:,1], marker = 'x', s = 150, linewidths=5, zorder = 10, color = 'black',
label = 'Centroids')

plt.title('KMean Cluster')

plt.legend(loc = 'best')

plt.show()

Output:

Code:
from sklearn.ensemble import BaggingClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

iris = load_iris()

X, Y = iris.data,iris.target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 42)

bagging = BaggingClassifier(estimator = DecisionTreeClassifier(), n_estimators=10, random_state = 42)

bagging.fit(X_train, Y_train)

pg. 39
Y_pred = bagging.predict(X_test)

print('Bagging Accuracy : %.2f' % (accuracy_score(Y_test,Y_pred)))

Output:

Code:
numbaseclassifier = 500

mxdepth = 10

trainAcc = []

testAcc = []

clf = ensemble.RandomForestClassifier(n_estimators = numbaseclassifier)

clf.fit(X_train, Y_train)

Y_predTrainRF = clf.predict(X_train)

Y_predTestRF = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train,Y_predTrainRF))

testAcc.append(accuracy_score(Y_test,Y_predTestRF))

print('Random Forest Train Accuracy',trainAcc)

print('Random Forest Test Accuracy', testAcc)

clf = ensemble.BaggingClassifier(DecisionTreeClassifier(max_depth = maxdepth), n_estimators =

numbaseclassifier)

clf.fit(X_train, Y_train)

Y_predTrainBC = clf.predict(X_train)

Y_predTestBC = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train,Y_predTrainBC))

testAcc.append(accuracy_score(Y_test,Y_predTestBC))

print('Bagging Classifier Train Accuracy',trainAcc)

print('Bagging Classifier Test Accuracy', testAcc)

clf = ensemble.AdaBoostClassifier(DecisionTreeClassifier(max_depth = maxdepth), n_estimators =

numbaseclassifier)

clf.fit(X_train,Y_train)

pg. 40
Y_predTrainAC = clf.predict(X_train)

Y_predTestAC = clf.predict(X_test)

trainAcc.append(accuracy_score(Y_train,Y_predTrainAC))

testAcc.append(accuracy_score(Y_test,Y_predTestAC))

print('Random Forest Train Accuracy',trainAcc)

print('Random Forest Test Accuracy', testAcc)

methods=['Random Forest','Bagging','Adaboost']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,6))

ax1.plot(methods, trainAcc, marker='o', linestyle='-', color='blue', label='Training Accuracy')

ax1.set_title('Training Accuracy')

ax1.set_xlabel('Method')

ax1.set_ylabel('Accuracy')

ax1.grid(True) # Add grid lines

ax1.legend() # Add legend

ax2.plot(methods, testAcc, marker='o', linestyle='-', color='green', label='Testing Accuracy')

ax2.set_title('Testing Accuracy')

ax2.set_xlabel('Method')

ax2.set_ylabel('Accuracy')

ax2.grid(True) # Add grid lines

ax2.legend()

plt.show()

Output:

pg. 41
pg. 42
pg. 43

English10 - q2 - Module1 - Language of Research, Campaigns and Advocacies - v2
89% (100)
English10 - q2 - Module1 - Language of Research, Campaigns and Advocacies - v2
19 pages
Lesson Plan Tle-Ict
100% (7)
Lesson Plan Tle-Ict
2 pages
Chapter3 Test
No ratings yet
Chapter3 Test
4 pages
Functional Grammar 1
No ratings yet
Functional Grammar 1
19 pages
Gas Laws RAFT Assignment: Purpose
No ratings yet
Gas Laws RAFT Assignment: Purpose
4 pages
3rd Semester DDM AI DAA DEV Print Pages For Spiral Record 25-1-24 - Removed
No ratings yet
3rd Semester DDM AI DAA DEV Print Pages For Spiral Record 25-1-24 - Removed
28 pages
PR Final File
No ratings yet
PR Final File
70 pages
External
No ratings yet
External
11 pages
numpy_dataframe
No ratings yet
numpy_dataframe
12 pages
DP prog
No ratings yet
DP prog
10 pages
Ilovepdf Merged (2) Merged
No ratings yet
Ilovepdf Merged (2) Merged
65 pages
FDS RECORD-1-4
No ratings yet
FDS RECORD-1-4
18 pages
AD3301 DEV Lab Manual
No ratings yet
AD3301 DEV Lab Manual
26 pages
EXP1-siddhant gupta (23_SE_148)
No ratings yet
EXP1-siddhant gupta (23_SE_148)
17 pages
Dejene Chala Stat606 Screening Quiz Programming Part
No ratings yet
Dejene Chala Stat606 Screening Quiz Programming Part
12 pages
batch1 ds
No ratings yet
batch1 ds
15 pages
Introduction To Python (Part III)
No ratings yet
Introduction To Python (Part III)
29 pages
Time Series Analysis Group 9
No ratings yet
Time Series Analysis Group 9
16 pages
Fundamentals of Data Science Lab Manual
No ratings yet
Fundamentals of Data Science Lab Manual
34 pages
Pandas,Numpy,Matplotlib
No ratings yet
Pandas,Numpy,Matplotlib
11 pages
Pandas Notes
No ratings yet
Pandas Notes
54 pages
Python Course Cheat Sheet
No ratings yet
Python Course Cheat Sheet
30 pages
PR final file
No ratings yet
PR final file
49 pages
Suryadatta National School Class 12 CBSE Informatics Practices Practicals List
No ratings yet
Suryadatta National School Class 12 CBSE Informatics Practices Practicals List
19 pages
Practical - With Solution - XII - IP
No ratings yet
Practical - With Solution - XII - IP
13 pages
FDS Lab Manual
No ratings yet
FDS Lab Manual
48 pages
ML IU48prac1,2
No ratings yet
ML IU48prac1,2
16 pages
4.1 Data Retrieval and Preprocessing of Python
No ratings yet
4.1 Data Retrieval and Preprocessing of Python
57 pages
dv_lab_manual_modified
No ratings yet
dv_lab_manual_modified
31 pages
DATASCIENCE_INTERNSHIP[1]
No ratings yet
DATASCIENCE_INTERNSHIP[1]
43 pages
Fundamentals of Data Science Lab Manual New1
No ratings yet
Fundamentals of Data Science Lab Manual New1
32 pages
Practical File Question 28.09.2022
No ratings yet
Practical File Question 28.09.2022
15 pages
Short Notes on pandas
No ratings yet
Short Notes on pandas
21 pages
Commands SQL, Python (BASICS)
No ratings yet
Commands SQL, Python (BASICS)
7 pages
AIML LAB MANAUAL R23
100% (1)
AIML LAB MANAUAL R23
10 pages
Pandas DataFrame Notes
100% (1)
Pandas DataFrame Notes
10 pages
Data Aggregation and Group Operations
No ratings yet
Data Aggregation and Group Operations
34 pages
FODS_LAB_MANUAL
No ratings yet
FODS_LAB_MANUAL
26 pages
ML(sudhanshu)
No ratings yet
ML(sudhanshu)
24 pages
P03 Introduction To Pandas Ans
No ratings yet
P03 Introduction To Pandas Ans
45 pages
Practical Record Programs - Solutions
No ratings yet
Practical Record Programs - Solutions
23 pages
Pierian Data - Python For Finance & Algorithmic Trading Course Notes
No ratings yet
Pierian Data - Python For Finance & Algorithmic Trading Course Notes
11 pages
DA lab
No ratings yet
DA lab
27 pages
Fundamental - Python
No ratings yet
Fundamental - Python
3 pages
Week 4- Introduction to Python #3
No ratings yet
Week 4- Introduction to Python #3
47 pages
Pandas DataFrameObject
No ratings yet
Pandas DataFrameObject
4 pages
Cheat Sheet: The Pandas Dataframe Object: Preliminaries Get Your Data Into A Dataframe
100% (1)
Cheat Sheet: The Pandas Dataframe Object: Preliminaries Get Your Data Into A Dataframe
10 pages
Section 7
No ratings yet
Section 7
33 pages
Cheat Sheet: The Pandas Dataframe Object: Preliminaries Get Your Data Into A Dataframe
100% (1)
Cheat Sheet: The Pandas Dataframe Object: Preliminaries Get Your Data Into A Dataframe
12 pages
Python Practical Questions
No ratings yet
Python Practical Questions
13 pages
Pandas
No ratings yet
Pandas
5 pages
EX-02-Data manipulation pandas matplot
No ratings yet
EX-02-Data manipulation pandas matplot
9 pages
Tutorial 2
No ratings yet
Tutorial 2
9 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
Fundamentals of Data Science Lab Manual-5-26
No ratings yet
Fundamentals of Data Science Lab Manual-5-26
22 pages
Ex. No: 1 Exploring The Features of Numpy, Scipy, Jupyter, Statsmodels and Pandas Date: 07/08/2024
No ratings yet
Ex. No: 1 Exploring The Features of Numpy, Scipy, Jupyter, Statsmodels and Pandas Date: 07/08/2024
9 pages
ELE492 - ELE492 - Image Process Lecture Notes 5
No ratings yet
ELE492 - ELE492 - Image Process Lecture Notes 5
41 pages
Ap Python
No ratings yet
Ap Python
12 pages
MACHINE LEARNING manual
No ratings yet
MACHINE LEARNING manual
36 pages
DV Lab2 Updated
No ratings yet
DV Lab2 Updated
12 pages
Nitya Practical File Class Xii 2023-2024
No ratings yet
Nitya Practical File Class Xii 2023-2024
41 pages
To Study About Numpy, Pandas and Matplotlib Libraries in Python
No ratings yet
To Study About Numpy, Pandas and Matplotlib Libraries in Python
21 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
C Language Programming Codes
From Everand
C Language Programming Codes
Durgesh
No ratings yet
my-report
No ratings yet
my-report
6 pages
Abstract
No ratings yet
Abstract
3 pages
ABSTRACT REPORT AARZOO
No ratings yet
ABSTRACT REPORT AARZOO
8 pages
Assignment work
No ratings yet
Assignment work
6 pages
Final Term Computer (2210) SR-II (RED)
No ratings yet
Final Term Computer (2210) SR-II (RED)
34 pages
201103201458_01_BEH_Vol4_GERMANY_MirzaNomman_Ahmed_and_Schmitz_Economic_Impact_Climate_Change (1)
No ratings yet
201103201458_01_BEH_Vol4_GERMANY_MirzaNomman_Ahmed_and_Schmitz_Economic_Impact_Climate_Change (1)
13 pages
Talha+Shahid+cv
No ratings yet
Talha+Shahid+cv
3 pages
PREDICTING SOLAR FLARES COUNT PER EVENT
No ratings yet
PREDICTING SOLAR FLARES COUNT PER EVENT
11 pages
DMX Operator Ii PDF
No ratings yet
DMX Operator Ii PDF
48 pages
The Singleton Planets
0% (1)
The Singleton Planets
11 pages
Centralization & Decentralization
No ratings yet
Centralization & Decentralization
14 pages
Chapter 7 Designing Organization For Performance Excellence
No ratings yet
Chapter 7 Designing Organization For Performance Excellence
4 pages
Gap Frame Presses
No ratings yet
Gap Frame Presses
6 pages
Mpi Assignment Solution1
0% (1)
Mpi Assignment Solution1
15 pages
Noise Mapping in Factory
No ratings yet
Noise Mapping in Factory
5 pages
Cheatsheet For Nano
No ratings yet
Cheatsheet For Nano
2 pages
School: Class: Module To Be Covered From: To: (Dates) ¡Listos! 3 Rojo Módulo 2 en El Cole
No ratings yet
School: Class: Module To Be Covered From: To: (Dates) ¡Listos! 3 Rojo Módulo 2 en El Cole
6 pages
MAEG4080 2016-17 Syllabus
No ratings yet
MAEG4080 2016-17 Syllabus
4 pages
Production Engineering 2011
No ratings yet
Production Engineering 2011
74 pages
BSBRSK501 Learner Assessment Tasks
No ratings yet
BSBRSK501 Learner Assessment Tasks
31 pages
Traducción One Place After Another
No ratings yet
Traducción One Place After Another
6 pages
The Last Post: w1/w2 (A2 - Aavg) / (Aavg - A1)
No ratings yet
The Last Post: w1/w2 (A2 - Aavg) / (Aavg - A1)
2 pages
Cloud Computing: Bangladesh Perspective
100% (1)
Cloud Computing: Bangladesh Perspective
4 pages
TM 10 - Patterns of Organization
No ratings yet
TM 10 - Patterns of Organization
22 pages
2006 - A Survey of Approaches and Challenges in 3d and Multimodal 3d+2d Face Recognition
No ratings yet
2006 - A Survey of Approaches and Challenges in 3d and Multimodal 3d+2d Face Recognition
15 pages
2. Chapter 3
No ratings yet
2. Chapter 3
19 pages
Application Form English To
No ratings yet
Application Form English To
5 pages
EAPP 4TH QUARTER EXAMINATION 2ND SEMESTER - Quizizz
No ratings yet
EAPP 4TH QUARTER EXAMINATION 2ND SEMESTER - Quizizz
4 pages
Free Transformational Energy Healing and Meditation - You Wealth Revolution
100% (1)
Free Transformational Energy Healing and Meditation - You Wealth Revolution
9 pages
AntiXSS v4.3
No ratings yet
AntiXSS v4.3
8 pages
Surface Review and Letters
No ratings yet
Surface Review and Letters
12 pages
Ambos Ol 2010
No ratings yet
Ambos Ol 2010
3 pages
Al Selden Leif - Pagan - Divination - Dice Divination
100% (1)
Al Selden Leif - Pagan - Divination - Dice Divination
27 pages