0% found this document useful (0 votes)
8 views6 pages

Etl1 6

Uploaded by

23bsds152sanjays
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views6 pages

Etl1 6

Uploaded by

23bsds152sanjays
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

EX-1

CODE
import pandas as pd

str1 = 'abc def abcdef icd'

print("Original series:")

print(str1)

ser = pd.Series(list(str1))

element_freq = ser.value_counts()

print(element_freq)

current_freq = element_freq.dropna().index[-1]

result = "".join(ser.replace(' ', current_freq))

print(result)

OUTPUT

EX-2

CODE
import pandas as pd
df = pd.DataFrame({'X':[78,85,96,80,86],
'Y':[84,94,89,83,86],'Z':[86,97,96,72,83]});
print(df)
OUTPUT

EX-3

CODE
import pandas as pd

import numpy as np

exam_data={'name':['Anasstasia','Dima','Katherine','James','Emily','Michael',
'Matthew','Laura','Kelvin','Jonas'],'score':[12.5,9,16.5,np.nan,9,20,14.5,np.
nan,8,19],'attempts':[1,3,2,3,2,3,1,1,2,1],'qualify':['yes','no','yes','no','
no','yes','yes','no','no','yes']}

labels=['a','b','c','d','e','f','g','h','i','j']

df=pd.DataFrame(exam_data,index=labels)

print("Summary of the basic information about this DataFrame and its data:")

print(df.info())

OUTPUT
EX-4

CODE
import pandas as pd

pd.set_option('display.max_rows', None)

student_data = pd.DataFrame({

'name': ['Alberto Franco','Gino Mcneill','Ryan Parkes','Eesha


Hinton','Gino Mcne','David Parkes'],

'date_Of_Birth':
['15/05/2002','17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997
'],'roll_no': ['s001','s002','s003','s001','s002','s004'],'class': ['V',
'V','VI', 'VI', 'V', 'VI'],'age': [12, 12, 13, 13, 14, 12],'height': [173,
192, 186, 167, 151, 159],'weight': [35, 32, 33, 30, 31, 32],'address':
['street1', 'street2', 'street3', 'street', 'street2', 'street4']},
index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])

print("Original DataFrame:")

print(student_data)

print('\nMean, min, and max value of age for each value of the school:')

grouped_single = student_data.groupby('name').agg({'age': ['mean', 'min',


'max']})

print(grouped_single)

OUTPUT
EX-5

CODE

import pandas as pd

import numpy as np

pd.set_option('display.max_rows', None)

df = pd.DataFrame({'ord_no': [70001, np.nan, 70002, 70004, np.nan, 70005,


np.nan, 70010, 70003, 70012, np.nan, 70013],'purch_amt': [150.5, 270.65,
65.26, 110.5, 948.5, 2400.6, 5760, 1983.43, 2480.4, 250.45, 75.29,
3045.6],'ord_date': ['2012-10-05', '2012-09-10', np.nan, '2012-08-17', '2012-
09-10', '2012-07-27', '2012-09-10', '2012-10-10', '2012-10-10', '2012-06-27',
'2012-08-17', '2012-04-25'],'customer_id': [3002, 3001, 3001, 3003, 3002,
3001, 3001, 3004, 3003, 3002, 3001, 3001],'salesman_id': [5002, 5003, 5001,
np.nan, 5002, 5001, 5001, np.nan, 5003, 5002, 5003, np.nan]})

print("Original Orders DataFrame:")

print(df)

print("\nNumber of missing values in the DataFrame:")

print(df.isna().sum())

OUTPUT
EX-6

CODE

import pandas as pd

import numpy as np

np.random.seed(24)

df = pd.DataFrame({'A': np.linspace(1, 10, 10)})

df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4),


columns=list('BCDE'))], axis=1)

df.iloc[0, 2] = np.nan

df.iloc[3, 3] = np.nan

df.iloc[4, 1] = np.nan

df.iloc[9, 4] = np.nan

print("Original DataFrame:")

print(df)

def highlight_max(s):

'''

Highlight the maximum in a Series green.

'''

is_max = s == s.max()

return ['background-color: green' if v else '' for v in is_max]

print("\nHighlight the maximum value in each column:")

df.style.apply(highlight_max, subset=pd.IndexSlice[:, ['B', 'C', 'D', 'E']])


OUTPUT

You might also like