0% found this document useful (0 votes)
709 views4 pages

Scikit

The document contains code for performing various machine learning tasks using scikit-learn including preprocessing data (normalizing, one-hot encoding, imputing missing values), KNN classification, decision tree regression, SVM classification, clustering digits data with SVM, and ensemble modeling using random forest regression. The code loads sample datasets, splits data into training and test sets, trains models, and evaluates performance on both training and test sets. Parameter tuning is done for KNN, decision trees, and random forests.

Uploaded by

Anzal Malik
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
709 views4 pages

Scikit

The document contains code for performing various machine learning tasks using scikit-learn including preprocessing data (normalizing, one-hot encoding, imputing missing values), KNN classification, decision tree regression, SVM classification, clustering digits data with SVM, and ensemble modeling using random forest regression. The code loads sample datasets, splits data into training and test sets, trains models, and evaluates performance on both training and test sets. Parameter tuning is done for KNN, decision trees, and random forests.

Uploaded by

Anzal Malik
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

#1st hands on *Preprocessing*

#Write your code here


from sklearn.datasets import load_iris
import sklearn.preprocessing as preprocessing
from sklearn.impute import SimpleImputer
import numpy as np

iris=load_iris()
normalizer = preprocessing.Normalizer(norm='l2').fit(iris.data)

iris_normalized = normalizer.transform(iris.data)
print(iris_normalized.mean(axis=0))

enc = preprocessing.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])
iris.data[:50,:]= np.nan
imputer= SimpleImputer(missing_values=np.nan, strategy="mean" )
#imputer = preprocessing.Imputer(missing_values='NaN', strategy='mean')

imputer = imputer.fit(iris.data)
iris_imputed = imputer.transform(iris.data)
print(iris_imputed.mean(axis=0))

#2nd hands on *Hands-On KNN*

#Write your code here


import sklearn.datasets as dataset
from sklearn.model_selection import train_test_split
import numpy as np

iris=dataset.load_iris()
X_train, X_test, Y_train,
Y_test=train_test_split(iris.data,iris.target,stratify=iris.target,random_state=30)
print(X_train.shape)
print(X_test.shape)

#from sklearn.neighbors._classification import KNeighborsClassifier


from sklearn.neighbors import KNeighborsClassifier

knn_clf= KNeighborsClassifier()
knn_clf=knn_clf.fit(X_train,Y_train)

print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))

cluster=3
max_score=0
best_n_neighbour=0
while(cluster<=10):
knn_clf=KNeighborsClassifier(n_neighbors=cluster)
knn_clf=knn_clf.fit(X_train,Y_train)
prev_score=max_score
max_score=knn_clf.score(X_test,Y_test)
if(max_score>prev_score):
best_n_neighbour=cluster
print(str(cluster),knn_clf.score(X_test,Y_test))

cluster=cluster+1

print(best_n_neighbour)

#3rd hands on *Decision Tree - Hands-On*

#Write your code here


import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np
from sklearn.tree import DecisionTreeRegressor
np.random.seed(100)
boston = datasets.load_boston()

X_train,X_test,Y_train,Y_test=model_selection.train_test_split(boston.data,boston.t
arget, random_state=30)
print(X_train.data.shape)
print(X_test.data.shape)

dt_regresssorr=DecisionTreeRegressor()
dt_reg=dt_regresssorr.fit(X_train,Y_train)

print(dt_reg.score(X_train,Y_train))
print(dt_reg.score(X_test,Y_test))

y_pred=dt_reg.predict(X_test[:2])
print(y_pred)
maxdepth = 2
maxscore = 0
for i in range(2,6):
dt_regresssorr=DecisionTreeRegressor(max_depth=i)
dt_reg=dt_regresssorr.fit(X_train,Y_train)
score = dt_reg.score(X_test, Y_test)
if(maxscore < score):
maxdepth = i
maxscore = score
print(maxdepth)

# 4th hands on *Hands-On - SVM*

#Write your code here


import sklearn.datasets as datasets
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,
stratify=y)

print(X_train.shape)
print(X_test.shape)

from sklearn.svm import SVC


svm_clf = SVC().fit(X_train, y_train)
print(svm_clf.score(X_test,y_test))

scaler = StandardScaler()
scaler.fit(X)
digits_standardized = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(digits_standardized, y,
random_state=30, stratify=y)

from sklearn.svm import SVC


svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf2.score(X_test,y_test))

#5th hands on *Hands-On - Clustering*

#Write your code here


import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing
digits = datasets.load_digits()
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(digits.data,digits.t
arget, random_state=30)
print(X_train.data.shape)
print(X_test.data.shape)

classifier=SVC()
svm_clf=classifier.fit(X_train,Y_train)

print(svm_clf.score(X_test,Y_test))

standardizer=preprocessing.StandardScaler()
standardizer=standardizer.fit(digits.data)
digits_standardized=standardizer.transform(digits.data)

X_train,X_test,Y_train,Y_test=model_selection.train_test_split(digits_standardized,
digits.target, random_state=30)
classifier=SVC()
svm_clf2=classifier.fit(X_train,Y_train)
print(svm_clf2.score(X_test,Y_test))

#6th hands on *Hands-On - Ensemble*


from sklearn.ensemble import RandomForestRegressor
import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np

np.random.seed(100)
boston = datasets.load_boston()

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(boston.data,


boston.target, random_state=30)
print(X_train.shape)
print(X_test.shape)

rf_Regressor = RandomForestRegressor()
rf_reg = rf_Regressor.fit(X_train, Y_train)

print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))

predicted = rf_reg.predict(X_test[:2])
print(predicted)

depths = []
scores = []
c_estimators = 100
for x in range(3, 6):
rf_Regressor = RandomForestRegressor(n_estimators=c_estimators, max_depth=x)
rf_reg = rf_Regressor.fit(X_train, Y_train)
score = rf_reg.score(X_test, Y_test)
depths.append(x)
scores.append(rf_reg.score(X_test, Y_test))

print( (depths[np.argmax(scores)],c_estimators) )

You might also like