17.11.24 - Jupyter Notebook - Doc
17.11.24 - Jupyter Notebook - Doc
2024_Jupiter_Notebook_doc#
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
1 of 6 11/17/2024, 4:50 PM
17.11 - Jupyter Notebook http://localhost:8888/notebooks/17.11.2024_Jupiter_Notebook_doc#
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
scaler = Normalizer().fit(X)
normalizedX = scaler.transform(X)
In [10]: # binarization
from pandas import read_csv
from numpy import set_printoptions
from sklearn.preprocessing import Binarizer
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
binarizer = Binarizer(threshold=6.0).fit(X)
binaryX = binarizer.transform(X)
[[0. 1. 1. 1. 0. 1. 0. 1.]
[0. 1. 1. 1. 0. 1. 0. 1.]
[1. 1. 1. 0. 0. 1. 0. 1.]
[0. 1. 1. 1. 1. 1. 0. 1.]
[0. 1. 1. 1. 1. 1. 0. 1.]]
2 of 6 11/17/2024, 4:50 PM
17.11 - Jupyter Notebook http://localhost:8888/notebooks/17.11.2024_Jupiter_Notebook_doc#
In [12]: # Feature Extraction with Univariate Statistical Tests (Chi-squared for classification
from pandas import read_csv
from numpy import set_printoptions
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
# load data
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# feature extraction
test = SelectKBest(score_func=chi2, k=3)
fit = test.fit(X, Y)
# summarize scores
set_printoptions(precision=3)
print(fit.scores_)
features = fit.transform(X)
3 of 6 11/17/2024, 4:50 PM
17.11 - Jupyter Notebook http://localhost:8888/notebooks/17.11.2024_Jupiter_Notebook_doc#
# load data
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# feature extraction
rfe = RFE(estimator=LogisticRegression(), n_features_to_select=3)
fit = rfe.fit(X, Y)
print(fit.n_features_)
print(fit.support_)
print(fit.ranking_)
C:\Users\CSE\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:45
8: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html (https://sciki
t-learn.org/stable/modules/preprocessing.html)
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regres
sion (https://scikit-learn.org/stable/modules/linear_model.html#logistic-regr
ession)
n_iter_i = _check_optimize_result(
3
[ True False False False False True True False]
[1 2 4 5 6 1 1 3]
4 of 6 11/17/2024, 4:50 PM
17.11 - Jupyter Notebook http://localhost:8888/notebooks/17.11.2024_Jupiter_Notebook_doc#
# load data
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# feature extraction
pca = PCA(n_components=4)
fit = pca.fit(X)
# summarize components
print(fit.explained_variance_ratio_)
print(fit.components_)
[0.889]
[[-2.022e-03 9.781e-02 1.609e-02 6.076e-02 9.931e-01 1.401e-02
5.372e-04 -3.565e-03]]
5 of 6 11/17/2024, 4:50 PM
17.11 - Jupyter Notebook http://localhost:8888/notebooks/17.11.2024_Jupiter_Notebook_doc#
# load data
filename = 'D:\\Dataset\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# Apply PCA and automatically select the number of components to explain 95% of the va
pca = PCA(n_components=0.75)
X_pca = pca.fit_transform(X_scaled)
In [ ]:
6 of 6 11/17/2024, 4:50 PM