Week 13 1-Pandas
Week 13 1-Pandas
Pandas Series
A Pandas series is like a column in a table . it is 1D array which holds data of any type.
import pandas as pd
x = [1,7,2]
y = pd.Series(x)
print(y)
0 1
1 7
2 2
dtype: int64
# with Create label you can create your own name labels:
import pandas as pd
x = [1,7,2]
y = pd.Series(x, index=["x", "y", "z"])
print(y)
x 1
y 7
z 2
dtype: int64
1
""" you can also use a key or value object like a dictionary,
when creating a series.
here we will create a simple pandas series from a dictionary.
"""
import pandas as pd
cal = {"day1": 420, "day2":380, "day3":390}
x = pd.Series(cal)
print(x)
day1 420
day2 380
day3 390
dtype: int64
# now we will create a series using only data from day1 and day2
import pandas as pd
cal = {"day1": 420, "day2":380, "day3":390}
result = pd.Series(cal, index=["day1", "day2"])
print(result)
day1 420
day2 380
dtype: int64
Data Frame
"""DataFrame: Data sets in pandas are usually multidimentional tables,
and they are called DataFrames.
series are like columns and dataframes is the whole table.
"""
# we will now create a dataframe from 2 series.
import pandas as pd
x = {"cal": [420, 380, 390], "duration": [50, 40, 45]}
y = pd.DataFrame(x)
print(y)
cal duration
0 420 50
1 380 40
2 390 45
# Locate row: pandas use the loc attibute to return one or more
specified row.
import pandas as pd
data = {"cal": [420, 380, 390], "dur":[50, 40, 45]}
x = pd.DataFrame(data)
print(x.loc[0])
cal 420
dur 50
Name: 0, dtype: int64
cal dur
0 420 50
1 380 40
# named Index: with the index arg, you can name your own index.
import pandas as pd
data = {"cal": [420, 380, 390], "dur":[50, 40, 45]}
x = pd.DataFrame(data, index=["day1", "day2", "day3"])
print(x)
cal dur
day1 420 50
day2 380 40
day3 390 45
cal 380
dur 40
Name: day2, dtype: int64
# output in a dataframe:
import pandas as pd
data = {"cal": [420, 380, 390], "dur":[50, 40, 45]}
x = pd.DataFrame(data, index=["day1", "day2", "day3"])
print(x.loc[["day1", "day2"]])
cal dur
day1 420 50
day2 380 40
Pandas CSV
# load the data from the csv file into dataframe i.e data.csv
import pandas as pd
x = pd.read_csv('Data.csv')
print(x)
import pandas as pd
x = pd.read_csv('data.csv')
print(x.head())
# Viewing the data : one of the most used method for a quick overview
of the dataframe is the head() method. this method returns the headers
and a specified number of rows.
# here we will print the 1st 10 rows in the dataframe.
import pandas as pd
x = pd.read_csv('data.csv')
print(x.head(10))
Duration Pulse Maxpulse Calories
0 60 110 130 409.1
1 60 117 145 479.0
2 60 103 135 340.0
3 45 109 175 282.4
4 45 117 148 406.0
5 60 102 127 300.0
6 60 110 136 374.0
7 45 104 134 253.3
8 30 109 133 195.1
9 60 98 124 269.0
import pandas as pd
x = pd.read_csv('data.csv')
print(x.tail())
# what if you want the information about the data in the dataframe:
via info()
import pandas as pd
df = pd.read_csv('data.csv')
print(df.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Duration 169 non-null int64
1 Pulse 169 non-null int64
2 Maxpulse 169 non-null int64
3 Calories 164 non-null float64
dtypes: float64(1), int64(3)
memory usage: 5.4 KB
None