0% found this document useful (0 votes)
31 views8 pages

DMV - 4 - Jupyter Notebook

Uploaded by

Anushka Jadhav
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
31 views8 pages

DMV - 4 - Jupyter Notebook

Uploaded by

Anushka Jadhav
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

In [1]: import pandas as pd

In [5]: data = pd.read_csv('city_day.csv')

In [6]: data.head()

Out[6]:
City Date PM2.5 PM10 NO NO2 NOx NH3 CO SO2 O3 Benzene

2015-
0 Ahmedabad NaN NaN 0.92 18.22 17.15 NaN 0.92 27.64 133.36 0.00
01-01

2015-
1 Ahmedabad NaN NaN 0.97 15.69 16.46 NaN 0.97 24.55 34.06 3.68
01-02

2015-
2 Ahmedabad NaN NaN 17.40 19.30 29.70 NaN 17.40 29.07 30.70 6.80
01-03

2015-
3 Ahmedabad NaN NaN 1.70 18.48 17.97 NaN 1.70 18.59 36.08 4.43
01-04

2015-
4 Ahmedabad NaN NaN 22.10 21.42 37.76 NaN 22.10 39.33 39.31 7.01
01-05

In [7]: data.columns

Out[7]: Index(['City', 'Date', 'PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO',
'SO2',
'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI', 'AQI_Bucket'],
dtype='object')

In [8]: data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29531 entries, 0 to 29530
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 City 29531 non-null object
1 Date 29531 non-null object
2 PM2.5 24933 non-null float64
3 PM10 18391 non-null float64
4 NO 25949 non-null float64
5 NO2 25946 non-null float64
6 NOx 25346 non-null float64
7 NH3 19203 non-null float64
8 CO 27472 non-null float64
9 SO2 25677 non-null float64
10 O3 25509 non-null float64
11 Benzene 23908 non-null float64
12 Toluene 21490 non-null float64
13 Xylene 11422 non-null float64
14 AQI 24850 non-null float64
15 AQI_Bucket 24850 non-null object
dtypes: float64(13), object(3)
memory usage: 3.6+ MB

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 1/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

In [9]: data.describe()

Out[9]:
PM2.5 PM10 NO NO2 NOx NH3

count 24933.000000 18391.000000 25949.000000 25946.000000 25346.000000 19203.000000

mean 67.450578 118.127103 17.574730 28.560659 32.309123 23.483476

std 64.661449 90.605110 22.785846 24.474746 31.646011 25.684275

min 0.040000 0.010000 0.020000 0.010000 0.000000 0.010000

25% 28.820000 56.255000 5.630000 11.750000 12.820000 8.580000

50% 48.570000 95.680000 9.890000 21.690000 23.520000 15.850000

75% 80.590000 149.745000 19.950000 37.620000 40.127500 30.020000

max 949.990000 1000.000000 390.680000 362.210000 467.630000 352.890000

In [10]: data.isnull().sum()

Out[10]: City 0
Date 0
PM2.5 4598
PM10 11140
NO 3582
NO2 3585
NOx 4185
NH3 10328
CO 2059
SO2 3854
O3 4022
Benzene 5623
Toluene 8041
Xylene 18109
AQI 4681
AQI_Bucket 4681
dtype: int64

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 2/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

In [22]: data['PM2.5'].fillna(data['PM2.5'].mean(), inplace=True)


data['PM10'].fillna(data['PM10'].mean(), inplace=True)
data['NO'].fillna(data['NO'].mean(), inplace=True)
data['NO2'].fillna(data['NO2'].mean(), inplace=True)
data['NOx'].fillna(data['NOx'].mean(), inplace=True)
data['NH3'].fillna(data['NH3'].mean(), inplace=True)
data['CO'].fillna(data['CO'].mean(), inplace=True)
data['SO2'].fillna(data['SO2'].mean(), inplace=True)
data['O3'].fillna(data['O3'].mean(), inplace=True)
data['Benzene'].fillna(data['Benzene'].mean(), inplace=True)
data['Toluene'].fillna(data['Toluene'].mean(), inplace=True)
data['Xylene'].fillna(data['Xylene'].mean(), inplace=True)
data['AQI'].fillna(data['AQI'].mean(), inplace=True)
data['AQI_Bucket'].fillna(data['AQI_Bucket'].mean(), inplace=True)

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 3/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

--------------------------------------------------------------------------
-
TypeError Traceback (most recent call las
t)
Cell In[22], line 14
12 data['Xylene'].fillna(data['Xylene'].mean(), inplace=True)
13 data['AQI'].fillna(data['AQI'].mean(), inplace=True)
---> 14 data['AQI_Bucket'].fillna(data['AQI_Bucket'].mean(), inplace=True)

File C:\Python310\lib\site-packages\pandas\core\series.py:6221, in Series.


mean(self, axis, skipna, numeric_only, **kwargs)
6213 @doc(make_doc("mean", ndim=1))
6214 def mean(
6215 self,
(...)
6219 **kwargs,
6220 ):
-> 6221 return NDFrame.mean(self, axis, skipna, numeric_only, **kwarg
s)

File C:\Python310\lib\site-packages\pandas\core\generic.py:11978, in NDFra


me.mean(self, axis, skipna, numeric_only, **kwargs)
11971 def mean(
11972 self,
11973 axis: Axis | None = 0,
(...)
11976 **kwargs,
11977 ) -> Series | float:
> 11978 return self._stat_function(
11979 "mean", nanops.nanmean, axis, skipna, numeric_only, **kwar
gs
11980 )

File C:\Python310\lib\site-packages\pandas\core\generic.py:11935, in NDFra


me._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
11931 nv.validate_func(name, (), kwargs)
11933 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
> 11935 return self._reduce(
11936 func, name=name, axis=axis, skipna=skipna, numeric_only=numeri
c_only
11937 )

File C:\Python310\lib\site-packages\pandas\core\series.py:6129, in Series.


_reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
6124 # GH#47500 - change to TypeError to match other methods
6125 raise TypeError(
6126 f"Series.{name} does not allow {kwd_name}={numeric_only} "
6127 "with non-numeric dtypes."
6128 )
-> 6129 return op(delegate, skipna=skipna, **kwds)

File C:\Python310\lib\site-packages\pandas\core\nanops.py:147, in bottlene


ck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
145 result = alt(values, axis=axis, skipna=skipna, **kwds)
146 else:
--> 147 result = alt(values, axis=axis, skipna=skipna, **kwds)
149 return result

File C:\Python310\lib\site-packages\pandas\core\nanops.py:404, in _datetim


elike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
401 if datetimelike and mask is None:
localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 4/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook
402 mask = isna(values)
--> 404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwarg
s)
406 if datetimelike:
407 result = _wrap_results(result, orig_values.dtype, fill_value=i
NaT)

File C:\Python310\lib\site-packages\pandas\core\nanops.py:719, in nanmean


(values, axis, skipna, mask)
716 dtype_count = dtype
718 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
--> 719 the_sum = values.sum(axis, dtype=dtype_sum)
720 the_sum = _ensure_numeric(the_sum)
722 if axis is not None and getattr(the_sum, "ndim", False):

File C:\Python310\lib\site-packages\numpy\core\_methods.py:48, in _sum(a,


axis, dtype, out, keepdims, initial, where)
46 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
47 initial=_NoValue, where=True):
---> 48 return umr_sum(a, axis, dtype, out, keepdims, initial, where)

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [ ]: import matplotlib.pyplot as plt

In [23]: data['Date'] = pd.to_datetime(data['Date'])

In [24]: plt.figure(figsize=(12, 6))


plt.plot(data['Date'], data['AQI'], label='AQI', color='blue', linewidth=2)
plt.title('Overall AQI Trend Over Time')
plt.xlabel('Date')
plt.ylabel('AQI Value')
plt.xticks(rotation=45)
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 5/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

In [25]: pollutants = ['PM2.5', 'PM10', 'CO']



for pollutant in pollutants:
plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data[pollutant], label=pollutant, linewidth=2)
plt.title(f'{pollutant} Levels Over Time')
plt.xlabel('Date')
plt.ylabel(f'{pollutant} Concentration')
plt.xticks(rotation=45)
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 6/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

In [26]: daily_avg_aqi = data.groupby('Date')['AQI'].mean().reset_index()



plt.figure(figsize=(12, 6))
plt.bar(daily_avg_aqi['Date'], daily_avg_aqi['AQI'], color='orange')
plt.title('Average AQI Values Across Dates')
plt.xlabel('Date')
plt.ylabel('Average AQI Value')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 7/8
10/6/24, 7:51 PM DMV_4 - Jupyter Notebook

In [27]: plt.figure(figsize=(12, 6))


plt.scatter(data['PM2.5'], data['AQI'], alpha=0.5)
plt.title('Relationship Between PM2.5 and AQI')
plt.xlabel('PM2.5 Concentration')
plt.ylabel('AQI Value')
plt.grid()
plt.show()

In [ ]: ​

In [ ]: ​

localhost:8888/notebooks/BE_PRACTICALS/DMV_4.ipynb 8/8

You might also like