House Pricing
House Pricing
Sure! Here's a complete project code for predicting house prices using a machine learning model.
This project involves loading the data, preprocessing it, training a model, and evaluating its
performance. We will use the famous Boston Housing dataset for this project.
python
Copy code
import pandas as pd
import numpy as np
python
Copy code
boston = load_boston()
boston_df['MEDV'] = boston.target
print(boston_df.head())
print(boston_df.describe())
python
Copy code
sns.histplot(boston_df['MEDV'], kde=True)
plt.xlabel('MEDV')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(12, 10))
plt.title('Correlation Matrix')
plt.show()
plt.title('RM vs MEDV')
plt.show()
plt.title('LSTAT vs MEDV')
plt.show()
python
Copy code
X = boston_df.drop('MEDV', axis=1)
y = boston_df['MEDV']
# Split the data into training and testing sets
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
python
Copy code
model = LinearRegression()
model.fit(X_train, y_train)
python
Copy code
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred)
plt.ylabel('Predicted MEDV')
plt.show()
python
Copy code
import joblib
joblib.dump(model, 'house_price_model.pkl')
loaded_model = joblib.load('house_price_model.pkl')
loaded_model_predictions = loaded_model.predict(X_test)
Step 8: Conclusion
This project demonstrates a basic end-to-end machine learning pipeline for predicting house prices
using the Boston Housing dataset. You can further improve this project by experimenting with
different machine learning algorithms, feature engineering techniques, and hyperparameter tuning.
Full Code
python
Copy code
import pandas as pd
import numpy as np
import joblib
boston = load_boston()
boston_df['MEDV'] = boston.target
print(boston_df.head())
print(boston_df.describe())
print(boston_df.isnull().sum())
sns.histplot(boston_df['MEDV'], kde=True)
plt.xlabel('MEDV')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(12, 10))
plt.title('Correlation Matrix')
plt.show()
plt.title('RM vs MEDV')
plt.show()
plt.title('LSTAT vs MEDV')
plt.show()
X = boston_df.drop('MEDV', axis=1)
y = boston_df['MEDV']
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred)
plt.xlabel('Actual MEDV')
plt.ylabel('Predicted MEDV')
plt.show()
joblib.dump(model, 'house_price_model.pkl')
loaded_model = joblib.load('house_price_model.pkl')
loaded_model_predictions = loaded_model.predict(X_test)
This project provides a solid foundation for building and deploying a house price prediction model
using machine learning. Feel free to expand on this by exploring more advanced models, feature
engineering, and model evaluation techniques.
DIABETES DETECTION
Sure! Here's a complete machine learning project for predicting diabetes using the Pima Indians
Diabetes Database. This project involves loading the data, preprocessing it, training a model, and
evaluating its performance.
python
Copy code
import pandas as pd
import numpy as np
import joblib
Download the dataset from here and save it in your working directory.
python
Copy code
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-
diabetes.data.csv"
print(diabetes_df.head())
# Display summary statistics
print(diabetes_df.describe())
print(diabetes_df.isnull().sum())
python
Copy code
sns.countplot(diabetes_df['Outcome'])
plt.xlabel('Outcome')
plt.ylabel('Count')
plt.show()
plt.figure(figsize=(12, 10))
plt.title('Correlation Matrix')
plt.show()
sns.pairplot(diabetes_df, hue='Outcome')
plt.show()
python
Copy code
X = diabetes_df.drop('Outcome', axis=1)
y = diabetes_df['Outcome']
# Split the data into training and testing sets
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
python
Copy code
model.fit(X_train, y_train)
python
Copy code
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)
# Plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
python
Copy code
joblib.dump(model, 'diabetes_model.pkl')
loaded_model = joblib.load('diabetes_model.pkl')
loaded_model_predictions = loaded_model.predict(X_test)
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
import joblib
plt.figure(figsize=(5,5))
plt.imshow(X_train[0], cmap='gray')
plt.title(f"Label: {y_train[0]}")
plt.show()
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
model = Sequential()
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.summary()
# Train the model
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix