AI ML - Cycle 2 Programs
AI ML - Cycle 2 Programs
Date:
Program:
import pandas as pd
import statsmodels.api as sm
data = pd.read_csv(“pima_diabetes.csv")
#create correlation matrix
data.corr()
#Bivariate Analysis of Glucose-Insulin features
#define response variable 1
y1 = data['Glucose']
#define explanatory variable 1
x1 = data[['Insulin']]
#add constant to predictor variables
x1 = sm.add_constant(x1)
#fit linear regression model
model1 = sm.OLS(y1, x1).fit()
#view model summary
print(model1.summary())
#Bivariate Analysis of Age-Pregnancies features
#define response variable 2
y2 = data['Age']
#define explanatory variable 2
x2 = data['Pregnancies']
#add a constant to predictor
variablesx2 = sm.add_constant(x2)
#fit linear regression model
model2 = sm.OLS(y2, x2).fit()
#view model summary
print(model2.summary())
#Bivariate Analysis of SkinThickness-BMI features
#define response variable 3
y3 = data['SkinThickness']
#define explanatory variable 3
x3 = data[['BMI']]
#add constant to predictor variables
x3 = sm.add_constant(x3)
#fit linear regression model
Model3 = sm.OLS(y3, x3).fit()
#view model summary
print(model3.summary())
Output:
a. Correlation Matrix
Result:
Ex No: 7b BUILD LOGISTIC REGRESSION MODELS
Date:
Program:
# importing libraries
import statsmodels.api as sm
import pandas as pd
# loading the training dataset
data = pd.read_csv('pima_diabetes.csv', index_col = 0)
# defining the dependent and independent variables
Xtrain = data[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
'DiabetesPedigreeFunction','Age']]
ytrain = data[['Outcome']]
# building the model and fitting the data
log_reg = sm.Logit(ytrain, Xtrain).fit()
# printing the summary table
print(log_reg.summary())
Output:
Result:
Ex No: 7c BUILD DECISION TREES
Date:
Program:
import pandas
from sklearn import tree
from sklearn.tree import DecisionTreeClassifierdf
= pandas.read_csv("data.csv")
print("Input:")
print(df.head(5))
d = {'UK':0,'USA':1,'N':2}
df['Nationality'] = df['Nationality'].map(d)d
= {'YES':1, 'NO':0}
df['Go'] = df['Go'].map(d)
print("Transformed Data:")
print(df.head(5))
features = ['Age','Experience','Rank','Nationality']X
= df[features]
y = df['Go']
dtree = DecisionTreeClassifier()
dtree = dtree.fit(X,y)
print(dtree.predict([[40,10,6,1]]))
print("[1]means 'Go'")
print("[0]means 'NO'")
DATA SET : (data.csv)
Age Experience Rank Nationality Go
36 10 9 UK NO
42 12 4 USA NO
23 4 6 N NO
52 4 4 USA NO
43 21 8 USA YES
Output:
Result:
Ex No: 7d BUILD RANDOM FORESTS
Date:
Program:
import numpy as np
# Labels are the values we want to predict
labels = np.array(features['actual'])
# Remove the labels from the features
# axis 1 refers to the columns
features= features.drop('actual', axis = 1)
# Saving feature names for later use
feature_list = list(features.columns)
# Convert to numpy array
features = np.array(features)
Output:
Accuracy: 93.73 %.
Result:
Ex No: 7e BUILD SVM MODELS
Date:
Program:
import pandas
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
data = pandas.read_csv("vector.csv")
print("Input: ")
print(data.head(10))
x_train = training_set.iloc[:,0:2].values
y_train = training_set.iloc[:,2].values
x_test = test_set.iloc[:,0:2].values
y_test = test_set.iloc[:,2].values
cm = confusion_matrix(y_test, y_pred)
accuracy = float(cm.diagonal().sum()/len(y_test))
print("\nAccuracy of SVM for the given dataset: ", accuracy)
Dataset
Output:
Result:
Ex No: 8 IMPLEMENT ENSEMBLING TECHNIQUES
Date:
Program:
#Implement VotingClassifier
#Importing necessary libraries:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
#Creating dataset:
X, y = make_moons(n_samples=500, noise=0.30)
X_train, X_test, y_train, y_test = train_test_split(X, y)
#Implement BaggingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
bagging_clf = BaggingClassifier(
DecisionTreeClassifier(), n_estimators=250,
max_samples=100, bootstrap=True, random_state=101)
#Fitting training data:
bagging_clf.fit(X_train, y_train)
#Implement AdaBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
adaboost_clf = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=1), n_estimators=200,
algorithm="SAMME.R", learning_rate=0.5, random_state=42)
Output:
#For VotingClassifier
LogisticRegression 0.848
RandomForestClassifier 0.88
SVC 0.896
VotingClassifier 0.896
#For BaggingClassifier
0.888
#For AdaBoostClassifier
0.864
Result:
Ex No: 9 IMPLEMENT CLUSTERING ALGORITHMS
Date:
Program:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
data = {'x':
[25,34,22,27,33,33,31,22,35,34,67,54,57,43,50,57,59,52,65,47,49,48,35,33,44,45,38,43,51,4
6],
'y':
[79,51,53,78,59,74,73,57,69,75,51,32,40,47,53,36,35,58,59,50,25,20,14,12,20,5,29,27,8,7]
}
Output:
Result:
Ex No: 10 IMPLEMENT GMM ALGORITHMS
Date:
Program:
import matplotlib.pyplot as plt
from sklearn import datasets
import sklearn.metrics as sm
import pandas as pd
import numpy as np
%matplotlib inline
# import some data to play with
iris = datasets.load_iris()
# GMM
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
xs.sample(5)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm = gmm.predict(xs)
y_cluster_gmm
plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm], s=40)
plt.title('GMM Classification')
# Accuracy
sm.accuracy_score(y, y_cluster_gmm)
# Confusion Matrix
sm.confusion_matrix(y, y_cluster_gmm)
Output:
array([[50, 0, 0],
[ 0, 5, 45],
[ 0, 50, 0]], dtype=int64)
Result: