56 Assignments
56 Assignments
# (x + y = 30 )
# (4x + 9y = 150)
testQuestionVariable = np.array([[1,1],[4,9]])
testQuestionValue = np.array([30,150])
#use linalg function of Scipy
#use solve method to solve the linear equation and find value for x and y
linalg.solve(testQuestionVariable,testQuestionValue)
#last five olymnics data: place, year and number of countries participated
olympic_data_list = {'HostCity':['London','Beijing','Athens','Sydney','Atlanta'],
'Year':[2012,2008,2004,2000,1996],
'No. of Participating Countries':[205,204,201,200,197]
}
df_olympic_data = pd.DataFrame(olympic_data_list)
df_olympic_data
olympic_data_dict = {'London':{2012:205},'Beijing':{2008:204}}
df_olympic_data_dict = pd.DataFrame(olympic_data_dict)
df_olympic_data_dict
#select by City name
df_olympic_data.HostCity
#use describe function to display the content
df_olympic_data.describe
olympic_series_participation =
pd.Series([205,204,201,200,197],index=[2012,2008,2004,2000,1996])
olympic_series_country = pd.Series(['London','Beijing','Athens','Sydney','Atlanta'],
index=[2012,2008,2004,2000,1996])
df_olympic_series = pd.DataFrame({'No. of Participating
Countries':olympic_series_participation,
'Host Cities':olympic_series_country})
df_olympic_series
import numpy as np
np_array = np.array([2012,2008,2004,2006])
dict_ndarray = {'year':np_array}
df_ndarray = pd.DataFrame(dict_ndarray)
df_ndarray
Create DataFrame from DataFrame object
df_from_df = pd.DataFrame(df_olympic_series)
df_from_df
#view values
df_from_df.values
View dataset
Select dataset
#import libraries
import numpy as np
import pandas as pd
#create dataframe from dict of series for summer olympics : 1996 to 2012
olympic_series_participation =
pd.Series([205,204,201,200,197],index=[2012,2008,2004,2000,1996])
olympic_series_country = pd.Series(['London','Beijing','Athens','Sydney','Atlanta'],
index=[2012,2008,2004,2000,1996])
df_olympic_series = pd.DataFrame({'No. of Participating
Countries':olympic_series_participation,
'Host Cities':olympic_series_country})
View Data
Select Data
#select data for Host Cities
df_olympic_series['Host Cities']
#another data selecion No. of Participating Countries
df_olympic_series['No. of Participating Countries']
#select lable-location based access by label
df_olympic_series.loc[2012]
#Integer-location based indexing by position
df_olympic_series.iloc[0:2]
#Integer-location based data selection by index value
df_olympic_series.iat[3,1]
#select data element by condition where number of participated countries are more than 200
# hint - use boolean expression
df_olympic_series[df_olympic_series['No. of Participating Countries']>200]
#view result
resultSet
#MISSING VALUES
import pandas as pd
sum_of_series = first_series+second_series
sum_of_series
dropna_s
dropna_s.fillna(0)
fillna_s
#fill values with zeroes before performing addition operation for missing indices
fill_NaN_with_zeros_before_sum =first_series.add(second_series,fill_value=0)
fill_NaN_with_zeros_before_sum
import numpy as np
import pandas as pd
#print a simple series with list as an argument
first_series = pd.Series(list('abcdef'))
print (first_series)
s_country = pd.Series(np_country)
print (s_country)
#Evaluate countries and their corresponding gdp per capita and print them as series
dict_country_gdp =
pd.Series([52056.01781,40258.80862,40034.85063,39578.07441,39170.41371,
37958.23146,37691.02733,36152.66676,34706.19047,33630.24604,
33529.83052,30860.12808],index=['Luxembourg','Macao, China','Norway',
'Japan','Switzerland','Hong Kong, China','United States','Qatar','Iceland','Sweden',
'Singapore','Denmark'])
print (dict_country_gdp)
first_vector_series+second_vector_series
#now shuffle index of second vector series
second_vector_series = pd.Series([10,20,30,40],index=['a','d','b','c'])
first_vector_series+second_vector_series
#now replace few indexes with new ones in second vector series
second_vector_series = pd.Series([10,20,30,40],index=['a','b','e','f'])
first_vector_series+second_vector_series
Assignment 01 FAA
Analyse the Federal Aviation Authority Dataset using Pandas
DESCRIPTION
Problem:
Analyze the Federal Aviation Authority (FAA) dataset using Pandas to do the following:
1. View
aircraft make name
state name
aircraft model name
text information
flight phase
event description type
fatal flag
2. Clean the dataset and replace the fatal flag NaN with “No”
3. Find the aircraft types and their occurrences in the dataset
4. Remove all the observations where aircraft names are not available
5. Display the observations where fatal flag is “Yes”
What to:
A dataset in CSV format is given for the Fire Department of New York City. Analyze the dataset to
determine:
1. The total number of fire department facilities in New York city
2. The number of fire department facilities in each borough
3. The facility names in Manhattan
#import libraries
import pandas as pd
#read data from csv file fire department of New York City (FDNY)
df_fdny_csv_data_raw = pd.read_csv('C:\dataset\FDNY_Firehouse_Listing.csv')
#view datatypes
df_fdny_csv_data.dtypes