ML Cops
ML Cops
t pandas as pd
t seaborn as sns
t matplotlib.pyplot as plt
sklearn.model_selection import train_test_split
sklearn.linear_model import LinearRegression
sklearn.metrics import mean_squared_error
sklearn.metrics import mean_absolute_error
sklearn.linear_model import Lasso, Ridge
sklearn.model_selection import GridSearchCV
sklearn.model_selection import RandomizedSearchCV
sklearn.metrics import accuracy_score, classification_report, confusion_mat
sklearn.metrics import r2_score
sklearn.preprocessing import StandardScaler
sklearn.linear_model import LinearRegression
sklearn.metrics import mean_absolute_error
sklearn.linear_model import Lasso, Ridge
sklearn.tree import DecisionTreeRegressor
sklearn.linear_model import LogisticRegression
sklearn.tree import DecisionTreeClassifier
sklearn.tree import DecisionTreeClassifier, plot_tree
sklearn.ensemble import RandomForestClassifier
sklearn.cluster import KMeans,AgglomerativeClustering
sklearn.metrics import silhouette_score
t scipy.cluster.hierarchy as sch
sklearn.preprocessing import LabelEncoder
sklearn.decomposition import PCA
sklearn.feature_selection import VarianceThreshold
statsmodels.tsa.seasonal import seasonal_decompose
dateutil.parser import parse
statsmodels.tsa.seasonal import seasonal_decompose
statsmodels.tsa.statespace.sarimax import SARIMAX
t statsmodels.api as sm
imblearn.over_sampling import SMOTE
sklearn.naive_bayes import GaussianNB
sklearn.svm import SVC
df = pd.read_csv('/content/bank.csv',delimiter=';')
df
age job marital education default housing loan contact mo
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 21 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 age 41188 non-null int64
1 job 41188 non-null object
2 marital 41188 non-null object
3 education 41188 non-null object
4 default 41188 non-null object
5 housing 41188 non-null object
6 loan 41188 non-null object
7 contact 41188 non-null object
8 month 41188 non-null object
9 day_of_week 41188 non-null object
10 duration 41188 non-null int64
11 campaign 41188 non-null int64
12 pdays 41188 non-null int64
13 previous 41188 non-null int64
14 poutcome 41188 non-null object
15 emp.var.rate 41188 non-null float64
16 cons.price.idx 41188 non-null float64
17 cons.conf.idx 41188 non-null float64
18 euribor3m 41188 non-null float64
19 nr.employed 41188 non-null float64
20 y 41188 non-null object
dtypes: float64(5), int64(5), object(11)
memory usage: 6.6+ MB
df.isnull().sum()
age 0
job 0
marital 0
education 0
default 0
housing 0
loan 0
contact 0
month 0
day_of_week 0
duration 0
campaign 0
pdays 0
previous 0
poutcome 0
emp.var.rate 0
cons.price.idx 0
cons.conf.idx 0
euribor3m 0
nr.employed 0
y 0
dtype: int64
sns.scatterplot(df)
<Axes: >
/usr/local/lib/python3.10/dist-packages/IPython/core/events.py:89: UserWarning: C
func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
x = df
plt.figure(figsize=(50,50))
for i, col in enumerate(x,1):
plt.subplot(10,3,i)
sns.countplot(data = x, x= col)
for column in df.select_dtypes(include='object'):
sns.countplot(x=column,hue='y' ,data=df)
plt.show()
df1 =df.select_dtypes('number')
df1
age duration campaign pdays previous emp.var.rate cons.price.idx con
df1.corr()
plt.figure(figsize=(10,10))
sns.heatmap(df1.corr(),annot = True)
<Axes: >
le = LabelEncoder()
for column in df.select_dtypes(include='object'):
df[column] = le.fit_transform(df[column])
df.head()
age job marital education default housing loan contact month day_of_wee
0 56 3 1 0 0 0 0 1 6
1 57 7 1 3 1 0 0 1 6
2 37 7 1 3 0 2 0 1 6
3 40 0 1 1 0 0 0 1 6
4 56 7 1 3 0 0 2 1 6
5 rows × 21 columns
x=df.drop(['y'],axis = 1)
y = df['y']
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=.8,random_
x_train,x_test,y_train,y_test