Simple_and_Multiple_Regression
Simple_and_Multiple_Regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
#Reading the dataset
#dataset = pd.read_csv("https://raw.githubusercontent.com/Satyajeet-IITDelhi/sales/main/SLRSales.csv")
In [3]:
#Reading the dataset
dataset = pd.read_csv("C:/NeuralNetwork/MRMSL861/SLRSales.csv")
In [4]:
dataset.head()
0 43.6 13.9
1 38.0 12.0
2 30.1 9.3
3 35.3 9.7
4 46.4 12.3
In [5]:
#Model Building
#Simple Linear Regresion
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
In [6]:
#Setting the value for X and Y
x = dataset[['Adv_Exp']]
y = dataset['Sales']
In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 100)
In [8]:
slr= LinearRegression()
slr.fit(x_train, y_train)
Out[8]: LinearRegression()
In [9]:
#Printing the model coefficients
print('Intercept: ', slr.intercept_)
print('Coefficient:', slr.coef_)
Intercept: 14.462716405605931
Coefficient: [2.08367683]
In [10]:
print('Regression Equation: Sales = 14.46 + 2.08 * Adv_Exp')
In [11]:
import statsmodels.api as sm
In [12]:
#fit linear regression model
model = sm.OLS(y, x).fit()
In [13]:
#view model summary
print(model.summary())
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\Users\Satyajeet\anaconda3\lib\site-packages\scipy\stats\_stats_py.py:1736: UserWarning: kurtosistest only valid for n>=20 ... c
ontinuing anyway, n=12
warnings.warn("kurtosistest only valid for n>=20 ... continuing "
In [15]:
#Reading the dataset
dataset = pd.read_csv("https://raw.githubusercontent.com/Harshita0109/Sales-Prediction/master/advertising.csv")
In [16]:
dataset.head()
In [17]:
#Exploratory Data Analysis
#Distribution of the target variable
sns.distplot(dataset['Sales']);
In [18]:
#Exploratory Data Analysis
#Distribution of the Independent variable(IV)
sns.distplot(dataset['TV']);
In [21]:
#Heatmap
sns.heatmap(dataset.corr(), annot = True)
plt.show()
In [22]:
#Multiple Linear Regression(MLR)
#Equation: Sales = β0 + (β1 * TV) + (β2 * Radio) + (β3 * Newspaper)
#Setting the value for X and Y
x = dataset[['TV', 'Radio', 'Newspaper']]
y = dataset['Sales']
In [23]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.3, random_state=100)
In [24]:
mlr= LinearRegression()
mlr.fit(x_train, y_train)
Out[24]: LinearRegression()
In [25]:
#Printing the model coefficients
print(mlr.intercept_)
# pair the feature names with the coefficients
list(zip(x, mlr.coef_))
4.334595861728431
Out[25]: [('TV', 0.053829108667250075),
('Radio', 0.11001224388558056),
('Newspaper', 0.006289950146130346)]
In [26]:
import statsmodels.api as sm
In [27]:
#fit linear regression model
model = sm.OLS(y, x).fit()
In [28]:
#view model summary
print(model.summary())
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [ ]:
In [ ]: