0% found this document useful (0 votes)
10 views3 pages

Lokesh Py

Uploaded by

21491a0773
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views3 pages

Lokesh Py

Uploaded by

21491a0773
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.preprocessing import LabelEncoder as lb

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

df=pd.read_cvs()

#df.info()

#print(df.head(10))

plt.figure(figsize=(12,6))

sns.countplot(x='Attrition',data=df)

plt.title('Distribution over Attrition')

plt.show()

plt.figure(figsize=(12,6))

sns.histplot(df['Age'], bins=20)

plt.title("Distribution over Age")

plt.show()

plt.figure(figsize=(12,6))

sns.boxplot(x='Attrition',y='Age',data=df)

plt.title("Age vs Attrition")

plt.show()

df.dropna()

df.drop_duplicates()

df['Attrition']=lb().fit_transform(df['Attrition'])

df['BusinessTravel']=lb().fit_transform(df['BusinessTravel'])

df['Gender']=lb().fit_transform(df['Gender'])

df['MaritalStatus']=lb().fit_transform(df['MaritalStatus'])

df['OverTime']=lb().fit_transform(df['OverTime'])

print(df.head())

#print(df['Department'].unique())
mapping = {

'Sales':1,

'Research & Development':2,

'Human Resources':3

df['Department'] = df['Department'].map(mapping)

print(df.head())

x=df[['Age',

'BusinessTravel',

'Department',

'DistanceFromHome',

'EnvironmentSatisfaction',

'Gender',

'JobInvolvement',

'JobSatisfaction',

'MaritalStatus','MonthlyIncome',

'NumCompaniesWorked',

'OverTime',

'PercentSalaryHike',

'PerformanceRating',

'RelationshipSatisfaction',

'StandardHours',

'WorkLifeBalance','YearsSinceLastPromotion']]

y=df['Attrition']

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25)

#splittting the dataize=0.25)

#Learning Model

model=RandomForestClassifier(n_estimators=100)

model.fit(x_train,y_train)

print(model.score(x_test,y_test)*100)
z=model.predict(

[(24,2,1,52,1,1,3,4,1,2000,1,1,5,4,3,8,1,1)])

print(z)

if z(0)==0:

print("No Attrition")

else:

print("Employee Attrition")

You might also like