Code PLFS MVPA
Code PLFS MVPA
In [1]: # Step 1
# upload the dataset
import pandas as pd
df = pd.read_excel('C:/Users/user/Desktop/PLFS_2022_23.xlsx')
['Sector',
Out[2]:
'State',
'Religion',
'Social Group',
'Sex',
'Age',
'Marital Status',
'General Education',
'Technical Education',
'No of years in formal education',
'Status of Current Attendance in Educational Institution',
'Whether received any Vocational/ Technical Training',
'Duration of Training',
'Status Code',
'Industry Code',
'Whether Engaged in any work in Subsidiary Capacity',
'No of Workers in the Enterprise',
'Type of Job Contract',
'Eligible of Paid Leave',
'Social Security Benefits',
'Earning for Regular Salaried/ Wage Workers',
'Earnings for Self Employed']
In [4]: df['Sector'].value_counts()
0 56713
Out[4]:
1 31542
Name: Sector, dtype: int64
In [6]: df['Religion'].value_counts()
1 72706
Out[6]:
0 15549
Name: Religion, dtype: int64
0 71226
Out[7]:
1 17029
Name: Social Group, dtype: int64
1 45439
Out[8]:
0 42816
Name: Sex, dtype: int64
df['Whether received any Vocational/ Technical Training'] = df['Whether received any Vocational/ Technical Training'].apply(lambd
df['Whether Engaged in any work in Subsidiary Capacity'] = df['Whether Engaged in any work in Subsidiary Capacity'].apply(lambda
import numpy as np
epsilon = 1e-7
df['Age_sq'] = df['Age'] ** 2
df['Formal_Edu_sq'] = df['No of years in formal education'] ** 2
import statsmodels.api as sm
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.05e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [20]: # Step - 16
# Calculate VIF values for the Model - 1
print(vif_data)
Variable VIF
0 const 145.780813
1 Sector 1.190473
2 Religion 1.066603
3 Sex 1.091667
4 Age 48.178772
5 Social Group 1.086503
6 General Education 5.226346
7 Marital Status 1.666061
8 Technical Education 1.353767
9 No of years in formal education 15.317316
10 Whether received any Vocational/ Technical Tra... 1.045240
11 Whether Engaged in any work in Subsidiary Capa... 1.182732
12 Type of Job Contract 1.218561
13 Age_sq 44.150284
14 Formal_Edu_sq 25.843036
In [21]: # Step - 17
# MODEL No. 2 - Estimating Earnings for Self Employed
import statsmodels.api as sm
y = df_self['log_self']
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.34e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [22]: # Step - 18
# Calculate VIF Values for Model - 2
print(vif_data)
Variable VIF
0 const 263.213006
1 Sector 1.163624
2 Religion 1.074025
3 Sex 1.220613
4 Age 44.292376
5 Social Group 1.096183
6 General Education 4.291929
7 Marital Status 1.206137
8 Technical Education 1.179082
9 No of years in formal education 16.354550
10 Whether received any Vocational/ Technical Tra... 1.109433
11 Whether Engaged in any work in Subsidiary Capa... 1.116591
12 Type of Job Contract 1.026795
13 Age_sq 43.738403
14 Formal_Edu_sq 25.047101