Aiml
Aiml
[19]: data = {
'age': [random.randint(20,60) for _ in range (100)],
'gender': [random.choice(['Male','Female']) for _ in range (100)],
'income': [random.randint(20000,100000) for _ in range (100)],
}
df=pd.DataFrame(data)
df.to_csv('data.csv',index=False)
[10]: data.tail()
plt.xlabel('Gender')
plt.ylabel('Count')
plt.title('Gender Comparison')
plt.show()
[15]: plt.figure(figsize=(1,2))
plt.bar(data['gender'].unique(), data['gender'].value_counts(), width = 0.4)
3
plt.xlabel('Gender')
plt.ylabel('Count')
plt.title('Gender Comparison')
plt.show()
[5]: data1 = {
'Age': [35, 41, 23, 32, 28, 36, 45, 39, 44, 29],
'Income': [70000, 90000, 50000, 60000, None, 75000, 100000, 80000, 95000,␣
,→55000],
df = pd.DataFrame(data1)
df.to_csv('data1.csv',index=False)
[7]: df=pd.read_csv('data1.csv')
print(df)
df
3 Linear Regression
[11]: df_sal.describe()
[15]: X=df_sal.iloc[:,1:-1].values
y=df_sal.iloc[:,-1].values
[16]: LinearRegression()
plt.box(False)
plt.show()
12
plt.box(False)
plt.show()
13
14
[3]: c1="advertising.csv"
df=pd.read_csv(c1)
df=pd.DataFrame(df)
[5]: df.shape
[5]: (200, 4)
[6]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
# Column Non-Null Count Dtype
0 TV 200 non-null float64
1 Radio 200 non-null float64
2 Newspaper 200 non-null float64
3 Sales 200 non-null float64
dtypes: float64(4)
memory usage: 6.4 KB
[7]: df.isnull().sum()
[7]: TV 0
Radio 0
Newspaper 0
Sales 0
dtype: int64
[8]: X=df.drop('Sales',axis=1)
y=df['Sales']
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
(140, 3)
15
(140,)
(60, 3)
(60,)
[10]: model = LinearRegression()
model.fit(x_train , y_train)
[10]: LinearRegression()
plt.xlabel('Actual Sales')
plt.ylabel('Prediceted Sales')
plt.title('Multile Linear Regression')
lims=[min(min(y_test),min(y_pred)), max(max(y_test), max(y_pred))]
plt.plot(lims, lims, 'k--')
plt.show
[12]:
5 Logistic Regression
[23]: np.random.seed(123)
n = 1000
age = np.random.randint(20,40,n)
gender = np.random.choice (['male','female'],n)
education = np.random.choice (['high school','college','graduate'],n)
job_level = np.random.choice (['junior','senior'],n)
last_evaluation = np.random.uniform (0.4, 1, n)
average_monthly_hours = np.random.randint (100, 300, n)
time_spend_company = np.random.randint (1, 10, n)
number_of_projects = np.random.randint (1, 7, n)
work_accident = np.random.randint (0, 1, n)
promotion = np.random.choice ([0, 1], n)
salary = np.random.choice (['low', 'medium', 'high'], n)
[24]: work_accident.shape
[24]: (1000,)
[25]: df = pd.DataFrame({
'age':age,
'gender':gender,
'education': education,
'job_level': job_level,
'last_evaluation':last_evaluation,
'average_monthly_hours': average_monthly_hours,
'time_spend_company' :time_spend_company,
'number_of_projects': number_of_projects,
'work_accident' : work_accident,
'promotion' : promotion,
'salary' :salary,})
[26]: print(df.isnull().sum())
age 0
gender 0
education 0
job_level 0
last_evaluation 0
average_monthly_hours 0
time_spend_company 0
number_of_projects 0
work_accident 0
promotion 0
salary 0
dtype: int64
[27]: df = df.drop(['number_of_projects','gender', 'last_evaluation'],axis=1)
df
17
df
4 0 0 2
.. ... ... ...
995 0 0 1
996 0 1 1
997 0 1 1
998 0 1 0
999 0 1 2
[29]: X=df.drop('promotion',axis=1)
y=df['promotion']
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
(700, 7)
(700,)
(300, 7)
(300,)
[31]: from sklearn.linear_model import LogisticRegression
C:\ProgramData\anaconda3\Lib\site-
packages\sklearn\linear_model\_logistic.py:460: ConvergenceWarning: lbfgs failed
to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
[32]: LogisticRegression()
plt.pyplot.show()
20
[11]: iris=load_iris()
X=iris.data
y=iris.target
[15]: iris=load_iris()
X=iris.data
y=iris.target
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
[24]: clas = classification_report(y_test, y_pred)
print(clas)
[22]: iris=load_iris()
X=iris.data
y=iris.target
[31]: GaussianNB()
[[23 0 0]
[ 0 18 1]
[ 0 1 17]]
[34]: clas = classification_report(y_test, y_pred)
print(clas)
[14]: iris=load_iris()
X=iris.data
y=iris.target
[60]: iris=load_iris()
X=iris.data
y=iris.target
[64]: KNeighborsClassifier(n_neighbors=7)
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
[67]: clas = classification_report(y_test, y_pred)
print(clas)
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
24
[2]: iris=load_iris()
X=iris.data
y=iris.target
[11]: DecisionTreeClassifier()
[[23 0 0]
[ 0 19 0]
[ 0 0 18]]
[14]: clas = classification_report(y_test, y_pred)
print(clas)
10 Clustering
[19]: df=pd.read_csv('Mall_Customers.csv')
[20]: df.columns
[21]: df.head()
[21]: CustomerID Genre Age Annual Income (k$) Spending Score (1-100)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
[23]: df.head()
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
27