Data Modeling Featurization Visualization Example
Data Modeling Featurization Visualization Example
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
data = {
'Area': [1000, 1500, 2000, 2500, 1200],
'Rooms': [2, 3, 4, 4, 2],
'City': ['Mumbai', 'Delhi', 'Delhi', 'Mumbai', 'Chennai'],
'Price': [50, 70, 100, 110, 60] # in Lakhs
}
df = pd.DataFrame(data)
print("Sample Data:")
print(df)
encoder = OneHotEncoder(sparse=False)
city_encoded = encoder.fit_transform(df[['City']])
city_labels = encoder.get_feature_names_out(['City'])
plt.figure(figsize=(6,4))
sns.scatterplot(x='Area', y='Price', data=df)
plt.title('Area vs Price')
plt.xlabel('Area (sq ft)')
plt.ylabel('Price (Lakhs)')
plt.grid(True)
plt.show()
Data Modeling, Featurization, and Visualization - Python Example
X = df_featurized.drop('Price', axis=1)
y = df_featurized['Price']
model = LinearRegression()
model.fit(X_train, y_train)
predicted = model.predict(X_test)
print("Predicted vs Actual:")
for i in range(len(y_test)):
print(f"Actual: {y_test.iloc[i]} Lakhs, Predicted: {round(predicted[i], 2)} Lakhs")