We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF or read online on Scribd
You are on page 1/ 6
In
In
In
In
In
In
In
In
In
In
In
In
In
In
import pandas as pd
dataset = pd.read_csv(r"D: \datasets\Demographicbata. csv")
dataset
: # Explore data in python
#1, Full dataframe
#2. How many rows & columns. you have to chk the row becuase the no. of raw sh
len(dataset) #195 rows imported (this is for tracking later part )
: #3. see columns
dataset.columns
: #4. Number of columns
Jen (dataset. columns)
1 #5. top rows
dataset.head() # it will print top 5 rows
dataset..head(2)
: #6. Bottom rows
dataset.tail() #last 5 rows
dataset. tail(3)
: #7. information of the column
dataset.info() #strings are called as object
: #8. get stats on the columns
dataset.describe() #it will work Like a statistic fun
dataset.describe().transpose() #transpose convert column into rows
: # Renaming columns of a dataframeIn
In
In
In
In
In
In
In
In
In
In
In
In
In
In
dataset..head()
dataset.columns
dataset.columns
dataset .head()
Cab,
dataset.columns = [‘CountryName’, ‘CountryCode’, ‘BirthRate’, ‘InternetUsers’,
dataset-head()
: # subsetting a dataframes in pandas
#1. Rows
#2. Columns
#3. combine the tno
# Rows:
dataset[21:26] #how python know that only this is rows based on index
dataset[:]
dataset[:10]
dataset .head(10)
# How to reverse the dataframe
dataset : : -1)
dataset[::-1]
dataset[ : -1]
dataset
: ff get only every 20th row
dataset[: : 20]In
In
In
In
In
In
In
In
In
In
In
In
In
In
In
In
In
2 # COLUMNS:
dataset.columns
dataset .head()
dataset [ 'CountryName' ] .head(1@)
['countryName’ "BirthRate' ]
dataset[[ ‘CountryNane' , ‘BirthRate']}.head()
dataset-head()
dataset[ 'BirthRate']
: # combine the two
dataset[4:8][[CountryNane', ‘BirthRate']]
dataset [['CountryName', ‘BirthRate']][4:8]
df1 = dataset [['CountryName’, ‘BirthRate’ ]]
df
df2 = dataset [4:8]
af2
dataset[4:8]
af
: # Basic operatioin of dataframe
dataset .head()
dataset[[ ‘CountryCode' , "BirthRate', "InternetUsers']][:1@] #subet dataframeIn
In
In
In
In
In
In
In
In
In
In
In
In
In
In
In
In
dataset. head()
wathmetical operation =
dataset .BirthRate * dataset. InternetUsers
# Add @ column
dataset['myCalc'] = dataset.BirthRate * dataset. InternetUsers
dataset. head()
Remove a column
dataset.drop("BinthRate", axis=1)
dataset .drop('myCalc',axis = 1)
dataset.head()
dataset .columns(2]
dataset .InternetUsers
dataset .InternetUsers
dataset. InternetUsers<3 #we are checking given condition if its correct true ¢
Filter = dataset.InternetUsers < 2
dataset.InternetUsers < 2
pd.set_option('display.max_rows', 500)
Filter
datasetIn
In
In
In
In
In
In
In
In
In
In
In
In
In
In
dataset[3:7]
stats[30:40]
stats[Filter] # IT WILL take that row which are false
stats.BirthRate>4e
Filter2 = stats.BirthRate>4o
Filter2
stats[Filter2]
: #Filter and Filter2
Filter & Filter2
stats[Filter & Filter2]
stats[(stats.BirthRate > 40) & (stats.InternetUsers < 2)]
stats.head()
stats
stats[stats.IncomeGroup == ‘Low income']
1 # How to get the unique categories
stats. IncomeGroup .unique()
: # Introduction to seaborn # seaborn is very powerfull visualizatio(STATISTIC V
import matplotlib.pyplot as plt # visulaiztion
import seaborn as sns # distribution visualtion
Xnatplotlib inline
plt.rcParams[ figure. figsize’] = 8,4
import warnings
warnings. filterwarnings( ‘ignore’ )In({]
Int]
In (J
In[(]
In [ J:
In]:
In]:
In[}:
In[]
In]:
Inf]:
Inf]
import warnings
warnings. filterwarnings(' ignore’)
stats-head()
# Distributions:
visi = sns.distplot(stats["BirthRate"})
vis1 = sns.distplot(stats["InternetUsers"], bin:
‘#80 PLOTS:
vis2 = sns.boxplot(data = stats, x="IncomeGroup", y="BirthRate" )
# refer to seaborn gallary
# visualizing with seaborn
vis3 = sns.Inplot(data = stats,x = ‘InternetUsers’, y = ‘BirthRate’, fit_reg =
vis4 = sns.Inplot(data = stats,x = ‘InternetUsers', y = 'BirthRate’)
vis5 = sns.Inplot(data = stats,x = ‘InternetUsers', y = 'BirthRate’,
fit_reg = False,hue = ‘IncomeGroup') #hue - parameter for cc
vis5 = sns.Inplot(data = stats,x = ‘InternetUsers', y = ‘'BirthRate’,
fit_reg = False,hue = ‘IncomeGroup', size = 10)
In this section we learned
1> importing data into python 2> Dataframe via panda 3> exploring datasets:
head{)taill)info()describe() 4> Renaming columns 5> subsetting dataframes 6> Basic
operations with dataframe 8> filtering data frames 9> seaborn introduction