Data Wrangling- Jupyter Notebook
Data Wrangling- Jupyter Notebook
In [7]:
# Data exploration, here we assign the data, and then we visualize the data in a tabular format.
# Assign data
data = {'Name': ['Jai', 'Princi', 'Gaurav',
'Anuj', 'Ravi', 'Natasha', 'Riya'],
'Age': [17, 17, 18, 17, 18, 17, 17],
'Gender': ['M', 'F', 'M', 'M', 'M', 'F', 'F'],
'Marks': [90, 76, 'NaN', 74, 65, 'NaN', 71]}
# Display data
print(df)
localhost:8892/notebooks/Untitled10.ipynb?kernel_name=python3 1/5
2/6/23, 5:11 PM Untitled10 - Jupyter Notebook
In [23]:
# Compute average
c = avg = 0
for ele in df["Marks"]:
if str(ele).isnumeric():
c += 1
avg += ele
avg /= c
# Display data
print(df)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3628 try:
-> 3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Marks'
The above exception was the direct cause of the following exception:
KeyError: 'Marks'
In [13]:
# Categorize gender
df['Gender'] = df['Gender'].map({'M': 0,
'F': 1, }).astype(float)
# Display data
print(df)
localhost:8892/notebooks/Untitled10.ipynb?kernel_name=python3 2/5
2/6/23, 5:11 PM Untitled10 - Jupyter Notebook
In [14]:
# Display data
print(df)
In [15]:
# import module
import pandas as pd
# printing details
print(details)
ID NAME BRANCH
0 101 Jagroop CSE
1 102 Praveen CSE
2 103 Harjot CSE
3 104 Pooja CSE
4 105 Rahul CSE
5 106 Nikita CSE
6 107 Saurabh CSE
7 108 Ayush CSE
8 109 Dolly CSE
9 110 Mohit CSE
In [16]:
# Import module
import pandas as pd
# Printing fees_status
print(fees_status)
ID PENDING
0 101 5000
1 102 250
2 103 NIL
3 104 9000
4 105 15000
5 106 NIL
6 107 4500
7 108 1800
8 109 250
9 110 NIL
localhost:8892/notebooks/Untitled10.ipynb?kernel_name=python3 3/5
2/6/23, 5:11 PM Untitled10 - Jupyter Notebook
In [17]:
# Creating Dataframe
fees_status = pd.DataFrame(
{'ID': [101, 102, 103, 104, 105,
106, 107, 108, 109, 110],
'PENDING': ['5000', '250', 'NIL',
'9000', '15000', 'NIL',
'4500', '1800', '250', 'NIL']})
# Merging Dataframe
print(pd.merge(details, fees_status, on='ID'))
In [18]:
In [19]:
localhost:8892/notebooks/Untitled10.ipynb?kernel_name=python3 4/5
2/6/23, 5:11 PM Untitled10 - Jupyter Notebook
In [20]:
# Printing Dataframe
print(df)
In [21]:
In [ ]:
localhost:8892/notebooks/Untitled10.ipynb?kernel_name=python3 5/5