Practical: 1.
Write a program to scatter plot using Iris
dataset.
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("iris.csv")
print(df)
df.plot(kind='scatter', x='sepal_length', y= 'sepal_width')
plt.show()
df.plot(kind='scatter', x='petal_length', y= 'petal_width')
plt.show()
df.plot(kind='scatter', x='sepal_length', y= 'petal_length')
plt.show()
df.plot(kind='scatter', x='sepal_width', y= 'petal_width')
plt.show()
Practical: 2. Write a python program to find all
null values in a given data set and remove them
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("iris.csv")
print(df)
df.isnull().sum()
df.dropna()
removedValue = df.dropna()
removedValue.isnull().sum()
Practical: 3. Write a python program the
Categorical values in numeric format for a given
dataset.(Startups Expenses)
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("Startups_Expenses.csv")
print(df)
x=df.iloc[:, :-1].values
y=df.iloc[:, 4].values
pd.get_dummies(df['State'],drop_first=True)
df1= pd.get_dummies(df['State'],drop_first=True)
df= pd.concat([df1,df],axis =1)
df.drop('State',axis =1 , inplace = True)
df.head()
Practical: 4. Write a python program to
implement simple Linear Regression for
predicting house price.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df=pd.read_csv("kc_house_data.csv")
print(df)
df.head()
df.tail()
print(df.shape)
df= df.dropna()
print(df.shape)
space = df['sqft_living']
price = df['price']
x=np.array(space).reshape(-1,1)
y=np.array(price)
from sklearn.model_selection import train_test_split
x_train,x_test, y_train,y_test = train_test_split(x,y, test_size = 1/3, random_state = 0)
from sklearn.linear_model import LinearRegression
regressor= LinearRegression()
regressor.fit(x_train,y_train)
Visualizing the Training Test Results
plt.scatter(x_train,y_train, color = 'red')
plt.plot(x_train,regressor.predict(x_train), color = 'blue')
plt.title("Visuals for Training Dataset")
plt.xlabel("Space")
plt.ylabel("Price")
plt.show()
Visualizing the Test Results
plt.scatter(x_test,y_test, color = 'red')
plt.plot(x_train,regressor.predict(x_train), color = 'blue')
plt.title("Visuals for Test Dataset")
plt.xlabel("Space")
plt.ylabel("Price")
plt.show()
Practical: 5. Write a python program to implement k-nearest Neighbors ML algorithm to build
prediction model
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Import necessary modules
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
# Loading data
irisData = load_iris()
# Create feature and target arrays
X = irisData.data
y = irisData.target
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)
# Predict on dataset which model has not seen before
print(knn.predict(X_test))
# Calculate the accuracy of the model
print(knn.score(X_test, y_test))
neighbors = np.arange(1, 9)
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
# Loop over K values
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
# Compute traning and test data accuracy
train_accuracy[i] = knn.score(X_train, y_train)
test_accuracy[i] = knn.score(X_test, y_test)
# Generate plot
plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')
plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()