0% found this document useful (0 votes)
11 views6 pages

Week 2

Uploaded by

srujjanbelamgi12
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views6 pages

Week 2

Uploaded by

srujjanbelamgi12
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

import pandas as pd

# DataFrame 1
data1 = {'Name': ['Pankaj', 'Meghna', 'Lisa'],
'Country': ['India', 'India', 'USA'],
'Role': ['CEO', 'CTO', 'CTO']}
df1 = pd.DataFrame(data1)
# DataFrame 2
data2 = {'ID': [1, 2, 3],
'Name': ['Pankaj', 'Anupam', 'Amit']}
df2 = pd.DataFrame(data2)
print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)

DataFrame 1:
Name Country Role
0 Pankaj India CEO
1 Meghna India CTO
2 Lisa USA CTO

DataFrame 2:
ID Name
0 1 Pankaj
1 2 Anupam
2 3 Amit

result_row = pd.merge(df1, df2, on='Name')


print(result_row)

Name Country Role ID


0 Pankaj India CEO 1

# Left Join
result_left = pd.merge(df1, df2, on='Name', how='left')
print("\nResult Left Join:")
print(result_left)
# Right Join
result_right = pd.merge(df1, df2, on='Name', how='right')
print("\nResult Right Join:")
print(result_right)
# Outer Join

result_outer = pd.merge(df1, df2, on='Name', how='outer')


print("\nResult Outer Join:")
print(result_outer)

Result Left Join:


Name Country Role ID
0 Pankaj India CEO 1.0
1 Meghna India CTO NaN
2 Lisa USA CTO NaN

Result Right Join:


Name Country Role ID
0 Pankaj India CEO 1
1 Anupam NaN NaN 2
2 Amit NaN NaN 3

Result Outer Join:


Name Country Role ID
0 Amit NaN NaN 3.0
1 Anupam NaN NaN 2.0
2 Lisa USA CTO NaN
3 Meghna India CTO NaN
4 Pankaj India CEO 1.0

# Left Join
result_left = pd.merge(df1, df2, on='Name', how='left')
print("\nResult Left Join:")
print(result_left)
# Right Join
result_right = pd.merge(df1, df2, on='Name', how='right')
print("\nResult Right Join:")
print(result_right)
# Outer Join

result_outer = pd.merge(df1, df2, on='Name', how='outer')


print("\nResult Outer Join:")
print(result_outer)

Result Left Join:


Name Country Role ID
0 Pankaj India CEO 1.0
1 Meghna India CTO NaN
2 Lisa USA CTO NaN

Result Right Join:


Name Country Role ID
0 Pankaj India CEO 1
1 Anupam NaN NaN 2
2 Amit NaN NaN 3

Result Outer Join:


Name Country Role ID
0 Amit NaN NaN 3.0
1 Anupam NaN NaN 2.0
2 Lisa USA CTO NaN
3 Meghna India CTO NaN
4 Pankaj India CEO 1.0

# Sales Dictionary and Region Dictionary


sales_dict = {'ID': [1, 2, 3, 4],
'Amount': [100, 200, 300, 400]}
region_dict = {'ID': [1, 2, 3, 5],
'Region': ['East', 'West', 'North', 'South']}
# Create DataFrames
sales_df = pd.DataFrame.from_dict(sales_dict)
region_df = pd.DataFrame.from_dict(region_dict)
print("Sales DataFrame:")
print(sales_df)
print("\nRegion DataFrame:")
print(region_df)

Sales DataFrame:
ID Amount
0 1 100
1 2 200
2 3 300
3 4 400

Region DataFrame:
ID Region
0 1 East
1 2 West
2 3 North
3 5 South

# b) Merging with Inner Join


result_inner = pd.merge(sales_df, region_df, on='ID', how='inner')
print("\nInner Join:")
print(result_inner)
# c) Merging with Left Join
result_left = pd.merge(sales_df, region_df, on='ID', how='left')
print("\nLeft Join:")
print(result_left)
# d) Merging with Right Join
result_right = pd.merge(sales_df, region_df, on='ID', how='right')
print("\nRight Join:")
print(result_right)
# e) Merging with Outer Join
result_outer = pd.merge(sales_df, region_df, on='ID', how='outer')
print("\nOuter Join:")
print(result_outer)
Inner Join:
ID Amount Region
0 1 100 East
1 2 200 West
2 3 300 North

Left Join:
ID Amount Region
0 1 100 East
1 2 200 West
2 3 300 North
3 4 400 NaN

Right Join:
ID Amount Region
0 1 100.0 East
1 2 200.0 West
2 3 300.0 North
3 5 NaN South

Outer Join:
ID Amount Region
0 1 100.0 East
1 2 200.0 West
2 3 300.0 North
3 4 400.0 NaN
4 5 NaN South

import numpy as np
import pandas as pd
# Data with Missing Values
data = {'A': [1, np.nan, 3, 4],
'B': [5, 6, np.nan, 8],
'C': [np.nan, np.nan, 9, 10]}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
# 1. Drop rows with any missing value
print("\nDrop rows with any missing values:")
print(df.dropna())
# 2. Drop columns with at least one missing value
print("\nDrop columns with at least one missing value:")
print(df.dropna(axis=1))
# 3. Drop rows/columns with all missing values
print("\nDrop rows/columns with all missing values:")
print(df.dropna(how='all'))
# 4. Drop rows/columns based on threshold (at least 2 non-NaN values)
print("\nDrop rows/columns based on threshold:")
print(df.dropna(thresh=2))
# 5. Replace NaN with the previous value (Forward Fill)
print("\nReplace NaN with the previous value:")
print(df.ffill()) # Using ffill() instead of fillna(method='pad')
# 6. Replace NaN with the previous value, limit=1 (Forward Fill with Limit)
print("\nReplace NaN with the previous value, limit=1:")
print(df.ffill(limit=1)) # Using ffill() with limit
# 7. Replace NaN with the next value (Backward Fill)
print("\nReplace NaN with the forward value:")
print(df.bfill()) # Using bfill() instead of fillna(method='bfill')

Original DataFrame:
A B C
0 1.0 5.0 NaN
1 NaN 6.0 NaN
2 3.0 NaN 9.0
3 4.0 8.0 10.0

Drop rows with any missing values:


A B C
3 4.0 8.0 10.0

Drop columns with at least one missing value:


Empty DataFrame
Columns: []
Index: [0, 1, 2, 3]
Drop rows/columns with all missing values:
A B C
0 1.0 5.0 NaN
1 NaN 6.0 NaN
2 3.0 NaN 9.0
3 4.0 8.0 10.0

Drop rows/columns based on threshold:


A B C
0 1.0 5.0 NaN
2 3.0 NaN 9.0
3 4.0 8.0 10.0

Replace NaN with the previous value:


A B C
0 1.0 5.0 NaN
1 1.0 6.0 NaN
2 3.0 6.0 9.0
3 4.0 8.0 10.0

Replace NaN with the previous value, limit=1:


A B C
0 1.0 5.0 NaN
1 1.0 6.0 NaN
2 3.0 6.0 9.0
3 4.0 8.0 10.0

Replace NaN with the forward value:


A B C
0 1.0 5.0 9.0
1 3.0 6.0 9.0
2 3.0 8.0 9.0
3 4.0 8.0 10.0

import pandas as pd

fruit = { 'orange' : [3,2,0,1], 'apple' : [0,3,7,2], 'grapes' : [7,14,6,15] }


df1 = pd.DataFrame(fruit)
df1

orange apple grapes

0 3 0 7

1 2 3 14

2 0 7 6

3 1 2 15

Next steps: Generate code with df1


toggle_off View recommended plots New interactive sheet

fruit = { 'grapes' : [13,12,10,2,55,98], 'mango' : [10,13,17,2,9,76], 'banana' : [20,23,27,4,np.nan,np.nan]} # Added np.nan


df2 = pd.DataFrame(fruit)
df2

grapes mango banana

0 13 10 20.0

1 12 13 23.0

2 10 17 27.0

3 2 2 4.0

4 55 9 NaN

5 98 76 NaN

Next steps: Generate code with df2


toggle_off View recommended plots New interactive sheet

df2 = df2.drop(df2.index[2])
df2
grapes mango banana

0 13 10 20.0

1 12 13 23.0

3 2 2 4.0

4 55 9 NaN

5 98 76 NaN

Next steps: Generate code with df2


toggle_off View recommended plots New interactive sheet

pd.concat((df1, df2), axis = 0)

orange apple grapes mango banana

0 3.0 0.0 7 NaN NaN

1 2.0 3.0 14 NaN NaN

2 0.0 7.0 6 NaN NaN

3 1.0 2.0 15 NaN NaN

0 NaN NaN 13 10.0 20.0

1 NaN NaN 12 13.0 23.0

3 NaN NaN 2 2.0 4.0

4 NaN NaN 55 9.0 NaN

5 NaN NaN 98 76.0 NaN

df1

orange apple grapes

0 3 0 7

1 2 3 14

2 0 7 6

3 1 2 15

Next steps: Generate code with df1


toggle_off View recommended plots New interactive sheet

pd.concat([df1, df2], ignore_index=True)

orange apple grapes mango banana

0 3.0 0.0 7 NaN NaN

1 2.0 3.0 14 NaN NaN

2 0.0 7.0 6 NaN NaN

3 1.0 2.0 15 NaN NaN

4 NaN NaN 13 10.0 20.0

5 NaN NaN 12 13.0 23.0

6 NaN NaN 2 2.0 4.0

7 NaN NaN 55 9.0 NaN

8 NaN NaN 98 76.0 NaN

%%time
df = pd.DataFrame(columns=['A'])
for i in range(30):
# Instead of append, use concat to add rows
df = pd.concat([df, pd.DataFrame([{'A': i*2}])], ignore_index=True)

CPU times: user 17.4 ms, sys: 0 ns, total: 17.4 ms


Wall time: 16.7 ms

%%time
df = pd.concat([pd.DataFrame([i*2], columns=['A']) for i in range(30)], ignore_index=True)

CPU times: user 11.4 ms, sys: 1.04 ms, total: 12.5 ms
Wall time: 39.6 ms

Start coding or generate with AI.

You might also like