Lokesh Py
Lokesh Py
df=pd.read_cvs()
#df.info()
#print(df.head(10))
plt.figure(figsize=(12,6))
sns.countplot(x='Attrition',data=df)
plt.show()
plt.figure(figsize=(12,6))
sns.histplot(df['Age'], bins=20)
plt.show()
plt.figure(figsize=(12,6))
sns.boxplot(x='Attrition',y='Age',data=df)
plt.title("Age vs Attrition")
plt.show()
df.dropna()
df.drop_duplicates()
df['Attrition']=lb().fit_transform(df['Attrition'])
df['BusinessTravel']=lb().fit_transform(df['BusinessTravel'])
df['Gender']=lb().fit_transform(df['Gender'])
df['MaritalStatus']=lb().fit_transform(df['MaritalStatus'])
df['OverTime']=lb().fit_transform(df['OverTime'])
print(df.head())
#print(df['Department'].unique())
mapping = {
'Sales':1,
'Human Resources':3
df['Department'] = df['Department'].map(mapping)
print(df.head())
x=df[['Age',
'BusinessTravel',
'Department',
'DistanceFromHome',
'EnvironmentSatisfaction',
'Gender',
'JobInvolvement',
'JobSatisfaction',
'MaritalStatus','MonthlyIncome',
'NumCompaniesWorked',
'OverTime',
'PercentSalaryHike',
'PerformanceRating',
'RelationshipSatisfaction',
'StandardHours',
'WorkLifeBalance','YearsSinceLastPromotion']]
y=df['Attrition']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25)
#Learning Model
model=RandomForestClassifier(n_estimators=100)
model.fit(x_train,y_train)
print(model.score(x_test,y_test)*100)
z=model.predict(
[(24,2,1,52,1,1,3,4,1,2000,1,1,5,4,3,8,1,1)])
print(z)
if z(0)==0:
print("No Attrition")
else:
print("Employee Attrition")