Working With Dates in Pandas: Prepared by Asif Bhat
Working With Dates in Pandas: Prepared by Asif Bhat
Out[2]:
Name DOB EmpID
Out[3]:
Name DOB EmpID
In [4]: # Extract Month , Day , Year , 'Day of Week' and 'Week of Year' from the Date field
person['Month'] = person.DOB.dt.month
person['Day'] = person.DOB.dt.day
person['Year'] = person.DOB.dt.year
person['Week Number'] =person.DOB.dt.isocalendar().week
person['Day Of Week'] = person.DOB.dt.dayofweek
person['Day Name']=pd.to_datetime(person['DOB']).dt.day_name()
person['Month Name']=pd.to_datetime(person['DOB']).dt.month_name()
person
Out[4]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name
In [5]: # Changing Datetime format to '%d/%m/%Y' using strftime()
person['DOB']=pd.to_datetime(person['DOB']).dt.strftime('%d/%m/%Y') # Note : This will change the datatype back to object
person
person.dtypes
Out[5]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name
Out[6]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name
Out[7]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name
Out[8]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name
Out[9]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name
# Yesterday
yesterday = pd.to_datetime('now').date()- pd.Timedelta('1 day')
print('Yesterday: {}'.format(yesterday))
# tomorrow
tomorrow = pd.to_datetime('now').date() + pd.Timedelta('1 day')
print('Tomorrow: {}'.format(tomorrow))
#OR
tomorrow = pd.to_datetime('now').date() + pd.DateOffset(days=1)
print('Tomorrow: {}'.format(tomorrow))
In [13]: # Age of the person (Extract year from current time and subtract from Year column)
person['Age'] = pd.to_datetime('now').year - person['Year']
person
# OR
person['Age'] = pd.to_datetime('now').year - pd.to_datetime(person['DOB']).dt.year
person
Out[13]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name Age
Out[13]:
Name DOB EmpID Month Day Year Week Number Day Of Week Day Name Month Name Age
Out[14]:
Date Births
0 1959-01-01 35
1 1959-01-02 32
2 1959-01-03 30
3 1959-01-04 31
4 1959-01-05 44
5 1959-01-06 29
6 1959-01-07 45
7 1959-01-08 43
8 1959-01-09 38
9 1959-01-10 27
In [15]: # Find min & max date to get the date range
pd.to_datetime(female['Date']).max()-pd.to_datetime(female['Date']).min() # This is one year of dataset that we need to
female.head()
Out[17]:
Date Births Month Day Year Week Number Day Of Week Day Name Month Name
Out[18]: 1213
Out[20]:
Births
Month Name
April 1195
August 1351
December 1314
February 1148
January 1213
July 1300
June 1212
March 1218
May 1208
November 1350
October 1368
September 1446
Out[21]:
Births
Month Name
April 1195
August 1351
December 1314
February 1148
January 1213
July 1300
June 1212
March 1218
May 1208
November 1350
October 1368
September 1446
In [22]: # We will convert "Month Name" column into Categorical variable and specify the ordering
order = ['January','February','March','April','May','June',
'July','August','September','October','November','December']
Out[23]:
Births
Month Name
January 1213
February 1148
March 1218
April 1195
May 1208
June 1212
July 1300
August 1351
September 1446
October 1368
November 1350
December 1314
In [24]: # Bar plot to get monthly female births using matplotlib library
plt.figure(figsize=(14,6))
plt.bar(female.groupby('Month Name').sum().index,female.groupby('Month Name').sum()['Births'])
plt.show()
Out[26]:
Births
Day Name
Monday 2139
Tuesday 2275
Wednesday 2280
Thursday 2283
Friday 2182
Saturday 2142
Sunday 2022
Out[29]:
Date Births Month Day Year Week Number Day Of Week Day Name Month Name
In [30]: # Convert date column into Datetime index for faster selection.
female = female.set_index(['Date'])
female
female.index # DatetimeIndex
Out[30]:
Births Month Day Year Week Number Day Of Week Day Name Month Name
Date
Out[31]:
Births Month Day Year Week Number Day Of Week Day Name Month Name
Date
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: -315619200000000000
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
The above exception was the direct cause of the following exception:
The above exception was the direct cause of the following exception:
KeyError: '1960'
In [33]: # Get all rows for the month of Janaury(1959-01-01 - 1959-01-31).
female.loc['1959-01-01':'1959-01-31'][['Births']]
Out[33]:
Births
Date
1959-01-01 35
1959-01-02 32
1959-01-03 30
1959-01-04 31
1959-01-05 44
1959-01-06 29
1959-01-07 45
1959-01-08 43
1959-01-09 38
1959-01-10 27
1959-01-11 38
1959-01-12 33
1959-01-13 55
1959-01-14 47
1959-01-15 45
1959-01-16 37
1959-01-17 50
1959-01-18 43
1959-01-19 41
1959-01-20 52
1959-01-21 34
1959-01-22 53
1959-01-23 39
1959-01-24 32
1959-01-25 37
1959-01-26 43
1959-01-27 39
1959-01-28 35
1959-01-29 44
1959-01-30 38
1959-01-31 24
In [35]: # plot line chart for January data using Pandas visualization
female.loc['1959-01-01':'1959-01-31'][['Births']].plot(figsize=(13,5))
Out[35]: <AxesSubplot:xlabel='Date'>
In [36]: # plot line chart for January data using Matplotlib library
plt.figure(figsize=(13,5))
plt.plot(female.loc['1959-01-01':'1959-01-31'][['Births']])
Out[37]:
SampleData
Date
1920-01-31 11
1920-02-29 62
1920-03-31 192
1920-04-30 152
1920-05-31 146
... ...
2019-08-31 198
2019-09-30 83
2019-10-31 29
2019-11-30 194
2019-12-31 51
Out[37]: <AxesSubplot:xlabel='Date'>
End
In [ ]: