0% found this document useful (0 votes)
88 views3 pages

Salary Prediction - Ipynb

This document loads and cleans a dataset using Pandas and NumPy. It then splits the data into training and testing sets. A simple linear regression model is fitted to the training set and used to make predictions on the testing set. Various steps are shown, including saving the model, visualizing results and making a new prediction.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
88 views3 pages

Salary Prediction - Ipynb

This document loads and cleans a dataset using Pandas and NumPy. It then splits the data into training and testing sets. A simple linear regression model is fitted to the training set and used to make predictions on the testing set. Various steps are shown, including saving the model, visualizing results and making a new prediction.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

{

"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 1 Load Data\n",
"\n",
"dataset = pd.read_csv('Dataset.csv')\n",
"X = dataset.iloc[:, :-1].values\n",
"y = dataset.iloc[:,1:].values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 2: Split data into training and testing\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3,
random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(dataset)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 3: Fit Simple Linear Regression to Training Data\n",
"from sklearn.linear_model import LinearRegression\n",
"regressor = LinearRegression()\n",
"regressor.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 4: Make Prediction\n",
"y_pred = regressor.predict(X_test)\n",
"regressor.score(X_test,y_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# save the model \n",
"filename='final_model.sav'\n",
"joblib.dump(regressor, filename)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 5 - Visualize training set results\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# plot the actual data points of training set\n",
"plt.scatter(X_train, y_train, color = 'red')\n",
"# plot the regression line\n",
"plt.plot(X_train, regressor.predict(X_train), color='blue')\n",
"plt.title('Salary vs Experience (Training set)')\n",
"plt.xlabel('Years of Experience')\n",
"plt.ylabel('Salary')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 6 - Visualize test set results\n",
"import matplotlib.pyplot as plt\n",
"# plot the actual data points of test set\n",
"plt.scatter(X_test, y_test, color = 'red')\n",
"# plot the regression line (same as above)\n",
"plt.plot(X_train, regressor.predict(X_train), color='blue')\n",
"plt.title('Salary vs Experience (Test set)')\n",
"plt.xlabel('Years of Experience')\n",
"plt.ylabel('Salary')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Step 7 - Make new prediction\n",
"new_salary_pred = regressor.predict([[15]])\n",
"print('The predicted salary of a person with 15 years experience is
',new_salary_pred)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "7925522ca4a6094676ae94dc980a50ecbd104794f80cac50db22517788b123ca"
},
"kernelspec": {
"display_name": "Python 3.10.0 64-bit",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.0"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

You might also like