Predict Inflation Using Randomforest Regression
Predict Inflation Using Randomforest Regression
import numpy as np
#read data
dset = pd.read_csv("/content/Data_Inflation.csv",sep=",", encoding='latin-1')
dset.dtypes.value_counts()
print(dset.columns.tolist())
dset['Subregion'].value_counts()
dset.info()
dset = dset.dropna(axis=0)
dset.isnull().sum()
dset.info()
#Again now replace not availble with nan
dset['Inflation']= dset['Inflation'].replace('not available', np.nan)
dset.head(10)
print("Dataset dimension",dset_one_hot_encoding.shape)
print(dset_one_hot_encoding)
dset.isnull().sum()
#Dividing data into two groups having missing values and non missing values
column_with_nan = "Inflation"
data_with_nan =
dset_one_hot_encoding[dset_one_hot_encoding[column_with_nan].isnull()]
data_without_nan =
dset_one_hot_encoding[dset_one_hot_encoding[column_with_nan].notnull()]
xx_train = X_train.iloc[1:6]
yy_train = y_train.iloc[1:6]
xx_train
X_train = X_train.values
y_train = y_train.values
xx_train = xx_train.values
yy_train = yy_train.values
X_train
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
Train_x = X_train.shape
Train_y = y_train.shape
print("X_train",Train_x)
print("y_train",Train_y)
test_x = x_test.shape
print("X_test",test_x)
predicted_values = rf.predict(xx_train)
print(predicted_values)
print(yy_train)
type(yy_train)
lr.fit(X_train,y_train)