Acknowledgement
Acknowledgement
Debangshu Karmakar
“XII” – Science
1.
Index
2.
Introduction
What is a DataFrame?
3.
Programs
import pandas as pd
dtf=pd.DataFrame(dt)
print(dtf)
4.
import pandas as pd
dt={'Yr1':[34500,56000,47000,49000],'Yr2':[44900,46100,57000,59000]}
dtf=pd.DataFrame(dt,index=['Qtr1','Qtr2','Qtr3','Qtr4'])
print(dtf)
5.
import pandas as pd
dt={'Rollno':[115,236,307,422],'Name':['Pavni','Rishi','Preet','Parul'],'Marks':[97.5,98.0,98.5,98.0]}
dtf=pd.DataFrame(dt)
print(dtf)
6.
import pandas as pd
dt={'Zone1':[56000,58000],'Zone2':[70000,68000],'Zone3':[75000,78000],'Zone4':[60000,61000]}
dtf=pd.DataFrame(dt,index=['Target','Sales'])
print(dtf)
7.
import pandas as pd
r1=[101,113,124]
r2=[130,140,200]
r3=[115,216,217]
combine=[r1,r2,r3]
df=pd.DataFrame(combine)
print(df)
8.
import pandas as pd
df={'city':['Delhi','Bengaluru','Chennai','Mumbai'],'Maxtemp':[40,31,35,29],'Mintemp':[32,25,27,21],'
Rainfall':[24.1,36.2,40.8,35.2]}
temp=pd.DataFrame(df)
print(temp)
9.
import pandas as pd
data = {
'A': [50,110],
'B': [80,120],
'C': [120,130],
'D': [180,140],
}
df = pd.DataFrame(data)
print(df)
df['E'] = [14, 220]
print("DataFrame after adding column E:")
print(df)
new_row = {'A': 2, 'B': 130, 'C': 140, 'D': 150, 'E': 300}
df = df._append(new_row, ignore_index=True)
print("DataFrame after adding a new row:")
print(df)
df = df.drop(columns=['A', 'C'])
print("DataFrame after removing columns A and C:")
print(df)
df = df.drop([0, 1])
print("DataFrame after removing the first and second rows:")
print(df)
10.
11.
import pandas as pd
data = {
print(df1)
12.
print(df1.iloc[[0, 2]])
print(df1[['qty', 'Company']])
print(df1)
print(df1.loc[104])
print("Updated DataFrame for company name and quantity of records 101 and 102:")
print(df1)
print(df1)
13.
14.
15.
import pandas as pd
data2 = {
'Bno': [1, 2, 3, 4],
'name': ['Sunil Grover', 'sourav ganguli', 'virat kohli', 'rahul dravid'],
'score1': [60, 65, 70, 80],
'score2': [70, 45, 90, 70]
}
batsman = pd.DataFrame(data2)
print(batsman)
batsman['total'] = batsman['score1'] + batsman['score2']
16.
print('Dataframe after adding total column is')
print(batsman)
print('lowest score of score 1 is', batsman['score1'].min())
print("Highest score of score2:", batsman['score2'].max())
batsman.index = ['player1', 'player2', 'player3', 'player4']
print('DataFrame with new index:')
print(batsman)
print("Details of batsmen with score1 < 75:")
print(batsman[batsman['score1'] < 75])
print("Names of batsmen with score1 < 75:")
print(batsman.loc[batsman['score1'] < 75, 'name'])
print("Name and score1 of batsmen with score1 < 75:")
print(batsman.loc[batsman['score1'] < 75, ['name', 'score1']])
batsman_sorted = batsman.sort_values(by='score2', ascending=False)
print("DataFrame in descending order of score2:")
print(batsman_sorted)
batsman.columns = ['batsmanno', 'bname', 's1', 's2', 'sum']
print("DataFrame after renaming columns:")
print(batsman)
batsman.loc[batsman['s2'] > 75, 's1'] += 5
print("DataFrame after adding 5 to s1 where s2 > 75:")
print(batsman)
17.
18.
import pandas as pd
data_df1 = {'mark1': [10, 40, 15, 40, 10], 'mark2': [15, 45, 30, 70, 50]}
df1 = pd.DataFrame(data_df1,index=[0,1,2,3,5])
df2 = pd.DataFrame(data_df2,index=[0,1,2,4,3])
print('df1')
print(df1)
print('df2')
print(df2)
print(df_sum)
df1 += 10
print(df1)
19.
df1['mark1'] += 5
print(df1)
d2 = df1.add(df2, fill_value=0)
print(d2)
20.
21.
import matplotlib.pyplot as plt
22.
23.
Conclusion
The DataFrame is a highly versatile and widely adopted data structure that serves
as a cornerstone in data manipulation and analysis across various programming
languages and frameworks. In Python, it is a central component of the pandas
library, which is one of the most popular tools for data analysis and manipulation
in the data science ecosystem.
24.
Bibliography
25.