0% found this document useful (0 votes)
90 views

Python Pandas

This document shows how to perform various data manipulation and analysis tasks using the Pandas library in Python. It covers importing and exploring data, indexing, selecting columns, converting data types, input/output functions to read and write CSV files, merging and joining DataFrames, and accessing external data sources using the Quandl API.

Uploaded by

Amit Verma
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
90 views

Python Pandas

This document shows how to perform various data manipulation and analysis tasks using the Pandas library in Python. It covers importing and exploring data, indexing, selecting columns, converting data types, input/output functions to read and write CSV files, merging and joining DataFrames, and accessing external data sources using the Quandl API.

Uploaded by

Amit Verma
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

########## Data Manipulation (Pandas)

pip install pandas


pip install numpy

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mathplotlib import style

style.use('ggplot')

web_stats = {'Day':[1,2,3,4,5,6],
'Visitors': [4,4,4,5,5,6],
'Bounce_Rate':[4,4,4,5,5,6]}

df = pd.DataFrame(web_stats)

#print(df)
#print(df.head())
#print(df.tail())
#print(df.tail(2))

#Setting Day Index


print(df.set_index('Day'))

#Index are placed in the Dataframe


df.set_index('Day', inplace=True)

#Below both code are same to print the columns


print(df['Visitors'])
print(df.Visitors])

#Print Multiple Columns


print(df[['Visitors', 'Bounce_Rate']])

#Convert to List
print(df.Visistors.tolist())

#Convert to Array
print(np.array(df[['Visitors', 'Bounce_Rate']]))

###################
################### IO (Converter to anything)

import pandas as pd

df = pd.read_csv('FileName.csv')
df.set_index('Date', inplace=True)
print(df.head())
df.to_csv('NewFile.csv')

df.read_csv('NewFile.csv', index_col=0)

#ReName the Column


df.columns = ['Autain_HPI']
print(df.head())

df.to_csv('NewFile2.csv', header=False)
#Define Headers, if not defined
df.read_csv('NewFile.csv', names=['Date','Austin_HPI'],index_col=0)
print(df.head())

#To HTML
df.to_html('eample.html')

#Column Rename
df.rename(columns={'Autin_HPI':'7th Digit of Autin'}, inplace=True)

######### Quandl
#########
pip install quandl

-> Housing price index (Freddie Mac)

import Quandl
import pandas as pd

api_key = open('quadlkey.text','r').read()
df = Quandl.get('FMAC/HPI_AK', authtoken=api_key)
print(df.head())

#Load from li
# Read from List,
fiddy_states = pd.read_html('http://list of us')

# this is list
print(fiddy_states)

#Require column zero


print(fiddy_states[0][0])

#Remove first column, as it doesnt require then put everything


for abbv in fiddy_states[0][0][1:]
print("FMAC/HPI" + str(abbv))

#Part 5 - Combining Data Frame


##############################

import pandas as pd

#concatenate dataframe, if they have same columns


concat = pd.concat([df1,df2])

#concatenate different columns


concat = pd.concat([df1,df2, df3])

#Adding to the series (At the end)


s = pd.series([80,20,50], index=['Colum1','Column2','Column3'])

df4 = df1.append(S, index_ignore = True)


print(df4)

#Part 6 - Merging & Joining Data Frames


########################################
import pandas as pd

df1 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2001, 2002, 2003, 2004])

df2 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2005, 2006, 2007, 2008])

df3 = pd.DataFrame({'HPI':[80,85,88,85],
'Unemployment':[7, 8, 9, 6],
'Low_tier_HPI':[50, 52, 50, 53]},
index = [2001, 2002, 2003, 2004])

#Merge (When doesnt matter to you)


print(pd.merge(df1,df2,on = 'HPI'))

#Merge on the two columns (remove duplicates)


print(pd.merge(df1,df2,on=['HPI','Int_rate']))

#Joined (When index matter to you)


df1.set_index('HPI', inplace=True)
df3.set_index('HPI', inplace=True)

joined = df1.join(df3)
print(joined)

#Merged
print(pd.merge(df1,df2,on = 'HPI', how = 'outer'))

##### Part - 7 -
################################

You might also like