0% found this document useful (0 votes)
27 views15 pages

House Pricing

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
27 views15 pages

House Pricing

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 15

HOUSE PRICING

Sure! Here's a complete project code for predicting house prices using a machine learning model.
This project involves loading the data, preprocessing it, training a model, and evaluating its
performance. We will use the famous Boston Housing dataset for this project.

Step 1: Import Libraries

python

Copy code

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.datasets import load_boston

Step 2: Load and Explore the Data

python

Copy code

# Load the Boston housing dataset

boston = load_boston()

boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)

boston_df['MEDV'] = boston.target

# Display the first few rows of the dataset

print(boston_df.head())

# Display summary statistics

print(boston_df.describe())

# Check for missing values


print(boston_df.isnull().sum())

Step 3: Data Visualization

python

Copy code

# Plot the distribution of the target variable

sns.histplot(boston_df['MEDV'], kde=True)

plt.title('Distribution of MEDV (Median Value of Owner-Occupied Homes)')

plt.xlabel('MEDV')

plt.ylabel('Frequency')

plt.show()

# Plot correlation matrix

plt.figure(figsize=(12, 10))

sns.heatmap(boston_df.corr(), annot=True, cmap='coolwarm')

plt.title('Correlation Matrix')

plt.show()

# Scatter plot for important features

sns.scatterplot(data=boston_df, x='RM', y='MEDV')

plt.title('RM vs MEDV')

plt.show()

sns.scatterplot(data=boston_df, x='LSTAT', y='MEDV')

plt.title('LSTAT vs MEDV')

plt.show()

Step 4: Preprocess the Data

python

Copy code

# Define features and target variable

X = boston_df.drop('MEDV', axis=1)

y = boston_df['MEDV']
# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

Step 5: Train the Model

python

Copy code

# Initialize and train the linear regression model

model = LinearRegression()

model.fit(X_train, y_train)

Step 6: Evaluate the Model

python

Copy code

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')

print(f'R^2 Score: {r2}')

# Plot the results

plt.figure(figsize=(10, 6))

plt.scatter(y_test, y_pred)

plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)


plt.xlabel('Actual MEDV')

plt.ylabel('Predicted MEDV')

plt.title('Actual vs Predicted MEDV')

plt.show()

Step 7: Save and Load the Model

python

Copy code

import joblib

# Save the model

joblib.dump(model, 'house_price_model.pkl')

# Load the model

loaded_model = joblib.load('house_price_model.pkl')

# Make predictions with the loaded model

loaded_model_predictions = loaded_model.predict(X_test)

Step 8: Conclusion

This project demonstrates a basic end-to-end machine learning pipeline for predicting house prices
using the Boston Housing dataset. You can further improve this project by experimenting with
different machine learning algorithms, feature engineering techniques, and hyperparameter tuning.

Full Code

For convenience, here is the full code block:

python

Copy code

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression


from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.datasets import load_boston

import joblib

# Load the Boston housing dataset

boston = load_boston()

boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)

boston_df['MEDV'] = boston.target

# Display the first few rows of the dataset

print(boston_df.head())

# Display summary statistics

print(boston_df.describe())

# Check for missing values

print(boston_df.isnull().sum())

# Plot the distribution of the target variable

sns.histplot(boston_df['MEDV'], kde=True)

plt.title('Distribution of MEDV (Median Value of Owner-Occupied Homes)')

plt.xlabel('MEDV')

plt.ylabel('Frequency')

plt.show()

# Plot correlation matrix

plt.figure(figsize=(12, 10))

sns.heatmap(boston_df.corr(), annot=True, cmap='coolwarm')

plt.title('Correlation Matrix')
plt.show()

# Scatter plot for important features

sns.scatterplot(data=boston_df, x='RM', y='MEDV')

plt.title('RM vs MEDV')

plt.show()

sns.scatterplot(data=boston_df, x='LSTAT', y='MEDV')

plt.title('LSTAT vs MEDV')

plt.show()

# Define features and target variable

X = boston_df.drop('MEDV', axis=1)

y = boston_df['MEDV']

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

# Initialize and train the linear regression model

model = LinearRegression()

model.fit(X_train, y_train)

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model


mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')

print(f'R^2 Score: {r2}')

# Plot the results

plt.figure(figsize=(10, 6))

plt.scatter(y_test, y_pred)

plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)

plt.xlabel('Actual MEDV')

plt.ylabel('Predicted MEDV')

plt.title('Actual vs Predicted MEDV')

plt.show()

# Save the model

joblib.dump(model, 'house_price_model.pkl')

# Load the model

loaded_model = joblib.load('house_price_model.pkl')

# Make predictions with the loaded model

loaded_model_predictions = loaded_model.predict(X_test)

This project provides a solid foundation for building and deploying a house price prediction model
using machine learning. Feel free to expand on this by exploring more advanced models, feature
engineering, and model evaluation techniques.
DIABETES DETECTION

Sure! Here's a complete machine learning project for predicting diabetes using the Pima Indians
Diabetes Database. This project involves loading the data, preprocessing it, training a model, and
evaluating its performance.

Predicting Diabetes with Machine Learning

Step 1: Import Libraries

python

Copy code

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import matplotlib.pyplot as plt

import seaborn as sns

import joblib

Step 2: Load and Explore the Data

Download the dataset from here and save it in your working directory.

python

Copy code

# Load the Pima Indians Diabetes dataset

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-
diabetes.data.csv"

column_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',


'DiabetesPedigreeFunction', 'Age', 'Outcome']

diabetes_df = pd.read_csv(url, header=None, names=column_names)

# Display the first few rows of the dataset

print(diabetes_df.head())
# Display summary statistics

print(diabetes_df.describe())

# Check for missing values

print(diabetes_df.isnull().sum())

Step 3: Data Visualization

python

Copy code

# Plot the distribution of the target variable

sns.countplot(diabetes_df['Outcome'])

plt.title('Distribution of Outcome (0 = No Diabetes, 1 = Diabetes)')

plt.xlabel('Outcome')

plt.ylabel('Count')

plt.show()

# Plot correlation matrix

plt.figure(figsize=(12, 10))

sns.heatmap(diabetes_df.corr(), annot=True, cmap='coolwarm')

plt.title('Correlation Matrix')

plt.show()

# Pairplot for important features

sns.pairplot(diabetes_df, hue='Outcome')

plt.show()

Step 4: Preprocess the Data

python

Copy code

# Define features and target variable

X = diabetes_df.drop('Outcome', axis=1)

y = diabetes_df['Outcome']
# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

Step 5: Train the Model

python

Copy code

# Initialize and train the RandomForest classifier

model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

Step 6: Evaluate the Model

python

Copy code

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')

print('Confusion Matrix:')

print(conf_matrix)

print('Classification Report:')

print(class_report)
# Plot the confusion matrix

plt.figure(figsize=(8, 6))

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['No Diabetes', 'Diabetes'],


yticklabels=['No Diabetes', 'Diabetes'])

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Confusion Matrix')

plt.show()

Step 7: Save and Load the Model

python

Copy code

# Save the model

joblib.dump(model, 'diabetes_model.pkl')

# Load the model

loaded_model = joblib.load('diabetes_model.pkl')

# Make predictions with the loaded model

loaded_model_predictions = loaded_model.predict(X_test)

# Verify the predictions

print(f'Accuracy of loaded model: {accuracy_score(y_test, loaded_model_predictions)}')

HAND WRITTEN NUMBERS

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

from tensorflow.keras.utils import to_categorical

import joblib

# Load the MNIST dataset

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Explore the dataset

print("Training data shape:", X_train.shape)

print("Testing data shape:", X_test.shape)

print("Training labels shape:", y_train.shape)

print("Testing labels shape:", y_test.shape)

# Display the first image in the training dataset

plt.figure(figsize=(5,5))

plt.imshow(X_train[0], cmap='gray')

plt.title(f"Label: {y_train[0]}")

plt.show()

# Reshape the data to fit the model

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)

X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

# Normalize the data

X_train = X_train.astype('float32')

X_test = X_test.astype('float32')

X_train /= 255

X_test /= 255

# One-hot encode the labels


y_train = to_categorical(y_train, 10)

y_test = to_categorical(y_test, 10)

print("Shape of X_train:", X_train.shape)

print("Shape of X_test:", X_test.shape)

print("Shape of y_train:", y_train.shape)

print("Shape of y_test:", y_test.shape)

# Initialize the model

model = Sequential()

# Add convolutional layers

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

# Add fully connected layers

model.add(Flatten())

model.add(Dense(128, activation='relu'))

model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))

# Compile the model

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Display the model architecture

model.summary()
# Train the model

history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.2, verbose=1)

# Plot training & validation accuracy values

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)

plt.plot(history.history['accuracy'])

plt.plot(history.history['val_accuracy'])

plt.title('Model accuracy')

plt.ylabel('Accuracy')

plt.xlabel('Epoch')

plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values

plt.subplot(1, 2, 2)

plt.plot(history.history['loss'])

plt.plot(history.history['val_loss'])

plt.title('Model loss')

plt.ylabel('Loss')

plt.xlabel('Epoch')

plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

# Evaluate the model on the test set

score = model.evaluate(X_test, y_test, verbose=0)

print('Test loss:', score[0])

print('Test accuracy:', score[1])

# Make predictions on the test set


y_pred = model.predict(X_test)

y_pred_classes = np.argmax(y_pred, axis=1)

y_true = np.argmax(y_test, axis=1)

# Print classification report

print("Classification Report:\n", classification_report(y_true, y_pred_classes))

# Plot confusion matrix

conf_matrix = confusion_matrix(y_true, y_pred_classes)

plt.figure(figsize=(10, 8))

sns.heatmap(conf_matrix

You might also like