########## Data Manipulation (Pandas)
pip install pandas
pip install numpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mathplotlib import style
style.use('ggplot')
web_stats = {'Day':[1,2,3,4,5,6],
'Visitors': [4,4,4,5,5,6],
'Bounce_Rate':[4,4,4,5,5,6]}
df = pd.DataFrame(web_stats)
#print(df)
#print(df.head())
#print(df.tail())
#print(df.tail(2))
#Setting Day Index
print(df.set_index('Day'))
#Index are placed in the Dataframe
df.set_index('Day', inplace=True)
#Below both code are same to print the columns
print(df['Visitors'])
print(df.Visitors])
#Print Multiple Columns
print(df[['Visitors', 'Bounce_Rate']])
#Convert to List
print(df.Visistors.tolist())
#Convert to Array
print(np.array(df[['Visitors', 'Bounce_Rate']]))
###################
################### IO (Converter to anything)
import pandas as pd
df = pd.read_csv('FileName.csv')
df.set_index('Date', inplace=True)
print(df.head())
df.to_csv('NewFile.csv')
df.read_csv('NewFile.csv', index_col=0)
#ReName the Column
df.columns = ['Autain_HPI']
print(df.head())
df.to_csv('NewFile2.csv', header=False)
#Define Headers, if not defined
df.read_csv('NewFile.csv', names=['Date','Austin_HPI'],index_col=0)
print(df.head())
#To HTML
df.to_html('eample.html')
#Column Rename
df.rename(columns={'Autin_HPI':'7th Digit of Autin'}, inplace=True)
######### Quandl
#########
pip install quandl
-> Housing price index (Freddie Mac)
import Quandl
import pandas as pd
api_key = open('quadlkey.text','r').read()
df = Quandl.get('FMAC/HPI_AK', authtoken=api_key)
print(df.head())
#Load from li
# Read from List,
fiddy_states = pd.read_html('http://list of us')
# this is list
print(fiddy_states)
#Require column zero
print(fiddy_states[0][0])
#Remove first column, as it doesnt require then put everything
for abbv in fiddy_states[0][0][1:]
print("FMAC/HPI" + str(abbv))
#Part 5 - Combining Data Frame
##############################
import pandas as pd
#concatenate dataframe, if they have same columns
concat = pd.concat([df1,df2])
#concatenate different columns
concat = pd.concat([df1,df2, df3])
#Adding to the series (At the end)
s = pd.series([80,20,50], index=['Colum1','Column2','Column3'])
df4 = df1.append(S, index_ignore = True)
print(df4)
#Part 6 - Merging & Joining Data Frames
########################################
import pandas as pd
df1 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2001, 2002, 2003, 2004])
df2 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2005, 2006, 2007, 2008])
df3 = pd.DataFrame({'HPI':[80,85,88,85],
'Unemployment':[7, 8, 9, 6],
'Low_tier_HPI':[50, 52, 50, 53]},
index = [2001, 2002, 2003, 2004])
#Merge (When doesnt matter to you)
print(pd.merge(df1,df2,on = 'HPI'))
#Merge on the two columns (remove duplicates)
print(pd.merge(df1,df2,on=['HPI','Int_rate']))
#Joined (When index matter to you)
df1.set_index('HPI', inplace=True)
df3.set_index('HPI', inplace=True)
joined = df1.join(df3)
print(joined)
#Merged
print(pd.merge(df1,df2,on = 'HPI', how = 'outer'))
##### Part - 7 -
################################