RainFall - Prediction - Ipynb - Colaboratory
RainFall - Prediction - Ipynb - Colaboratory
ipynb - Colaboratory
import libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import seaborn as sns
load dataset
data=pd.read_csv('/content/weatherAUS.csv')
data.shape
(142193, 24)
data.head()
Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGustSpeed W
2008-
0 Albury 13.4 22.9 0.6 NaN NaN W 44.0
12-01
2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW 44.0
12-02
2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW 46.0
12-03
2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE 24.0
12-04
2008-
4 Albury 17.5 32.3 1.0 NaN NaN W 41.0
12-05
5 rows × 24 columns
data.columns
Index(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation',
'Sunshine', 'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
'Temp3pm', 'RainToday', 'RISK_MM', 'RainTomorrow'],
dtype='object')
data.tail()
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 1/7
6/17/23, 7:47 PM RainFall_Prediction.ipynb - Colaboratory
2017-
142188 Uluru 3.5 21.8 0.0 NaN NaN E 3
06-20
2017-
142189 Uluru 2.8 23.4 0.0 NaN NaN E 3
06-21
2017-
142190 Uluru 3.6 25.3 0.0 NaN NaN NNW 2
06-22
2017-
142191 Uluru 5.4 26.9 0.0 NaN NaN N 3
06-23
data.isnull().sum()
2017-
142192 Uluru 7.8 27.0 0.0 NaN NaN SE 2
Date 06-24 0
Location 0
5 rows × 24 columns
MinTemp 637
MaxTemp 322
Rainfall 1406
Evaporation 60843
Sunshine 67816
WindGustDir 9330
WindGustSpeed 9270
WindDir9am 10013
WindDir3pm 3778
WindSpeed9am 1348
WindSpeed3pm 2630
Humidity9am 1774
Humidity3pm 3610
Pressure9am 14014
Pressure3pm 13981
Cloud9am 53657
Cloud3pm 57094
Temp9am 904
Temp3pm 2726
RainToday 1406
RISK_MM 0
RainTomorrow 0
dtype: int64
data.describe()
MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustSpeed WindS
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142193 entries, 0 to 142192
Data columns (total 24 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 142193 non-null object
1 Location 142193 non-null object
2 MinTemp 141556 non-null float64
3 MaxTemp 141871 non-null float64
4 Rainfall 140787 non-null float64
5 Evaporation 81350 non-null float64
6 Sunshine 74377 non-null float64
7 WindGustDir 132863 non-null object
8 WindGustSpeed 132923 non-null float64
9 WindDir9am 132180 non-null object
10 WindDir3pm 138415 non-null object
11 WindSpeed9am 140845 non-null float64
12 WindSpeed3pm 139563 non-null float64
13 Humidity9am 140419 non-null float64
14 Humidity3pm 138583 non-null float64
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 2/7
6/17/23, 7:47 PM RainFall_Prediction.ipynb - Colaboratory
15 Pressure9am 128179 non-null float64
16 Pressure3pm 128212 non-null float64
17 Cloud9am 88536 non-null float64
18 Cloud3pm 85099 non-null float64
19 Temp9am 141289 non-null float64
20 Temp3pm 139467 non-null float64
21 RainToday 140787 non-null object
22 RISK_MM 142193 non-null float64
23 RainTomorrow 142193 non-null object
dtypes: float64(17), object(7)
memory usage: 26.0+ MB
droping columns which has majority of Nan values and useless columns
data=data.drop(columns=['Date','Location','Evaporation','Sunshine','C
data.shape
(142193, 17)
data=data.dropna(axis=0)
data.isnull().sum()
MinTemp 0
MaxTemp 0
Rainfall 0
WindGustDir 0
WindGustSpeed 0
WindDir9am 0
WindDir3pm 0
WindSpeed9am 0
WindSpeed3pm 0
Humidity9am 0
Humidity3pm 0
Pressure9am 0
Pressure3pm 0
Temp9am 0
Temp3pm 0
RainToday 0
RainTomorrow 0
dtype: int64
data.shape
(112925, 17)
data.head()
MinTemp MaxTemp Rainfall WindGustDir WindGustSpeed WindDir9am WindDir3pm WindSpeed9am Wi
label_encode=LabelEncoder()
data['WindGustDir']=label_encode.fit_transform(data['WindGustDir'])
data['WindDir9am']=label_encode.fit_transform(data['WindDir9am'])
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 3/7
6/17/23, 7:47 PM RainFall_Prediction.ipynb - Colaboratory
data['WindDir3pm']=label_encode.fit_transform(data['WindDir3pm'])
data['RainToday']=label_encode.fit_transform(data['RainToday'])
data['RainTomorrow']=label_encode.fit_transform(data['RainTomorrow'])
data
MinTemp MaxTemp Rainfall WindGustDir WindGustSpeed WindDir9am WindDir3pm WindSpeed9a
data.head()
p MaxTemp Rainfall WindGustDir WindGustSpeed WindDir9am WindDir3pm WindSpeed9am WindSpeed3pm
sns.heatmap(data.corr(),annot=True,fmt='.1f')
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 4/7
6/17/23, 7:47 PM RainFall_Prediction.ipynb - Colaboratory
<Axes: >
x=data.drop(columns=['RainTomorrow'],axis=1)
y=data['RainTomorrow']
x.shape
(112925, 16)
x
MinTemp MaxTemp Rainfall WindGustDir WindGustSpeed WindDir9am WindDir3pm
y.shape
(112925,)
x.head()
MinTemp MaxTemp Rainfall WindGustDir WindGustSpeed WindDir9am WindDir3pm Wi
y.head()
0 0
1 0
2 0
3 0
4 0
Name: RainTomorrow, dtype: int64
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,rand
x_train.shape
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 5/7
6/17/23, 7:47 PM RainFall_Prediction.ipynb - Colaboratory
(90340, 16)
y_train.shape
(90340,)
x_test.shape
(22585, 16)
y_test.shape
(22585,)
logistic=LogisticRegression()
logistic.fit(x_train,y_train)
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: Conver
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()
pred=logistic.predict(x_test)
acc=accuracy_score(pred,y_test)
print(acc)
0.8531768873145893
TESTING MANUALLY
input=[12.9,25.7,0.0,15,46.0,13,15,19.0,26.0,38.0,30.0,1007.6,1008.7,
narray=np.asarray(input)
narray2=narray.reshape(1,16)
print(narray2)
narray2.ndim
[[ 12.9 25.7 0. 15. 46. 13. 15. 19. 26. 38.
30. 1007.6 1008.7 21. 23.2 0. ]]
2
Pred2=logistic.predict(narray2)
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegressi
warnings.warn(
if Pred2==0:
print("No Rain Tomorrow")
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 6/7
6/17/23, 7:47 PM RainFall_Prediction.ipynb - Colaboratory
else:
print("There is a Rain Tomorrow")
No Rain Tomorrow
https://colab.research.google.com/drive/1FQVl3HBd8clgw913r-XGG6kZTLIpPH6b#scrollTo=DGwM46xItRK6&printMode=true 7/7