Week1 Numpy, Pandas (178) .Ipynb Colab
Week1 Numpy, Pandas (178) .Ipynb Colab
ipynb - Colab
import numpy as np
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr)
[1 2 3 4 5]
import numpy as np
arr = np.array((1, 2, 3, 4, 5))
print(arr)
[1 2 3 4 5]
import numpy as np
arr = np.array(42)
print(arr) # 0-D array
42
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr) #1-D
[1 2 3 4 5]
import numpy as np
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr) #2-D
[[1 2 3]
[4 5 6]]
import numpy as np
arr = np.array([1, 2, 3, 4])
print(arr[2] + arr[3]) #Indexing
import numpy as np
data = [1, 2, 3, 4, 5]
mean_value = np.mean(data)
print("Mean value:", mean_value) # np.mean():Compute the arithmetic mean along the specified axia.
https://colab.research.google.com/drive/1qwaH1GyJBP9YIkz6cQk4o_FZZunCeHhO#scrollTo=cG2TGH6DPEjK&printMode=true 1/6
7/3/24, 9:28 PM Week1-Numpy,pandas(178).ipynb - Colab
import numpy as np
data = [1, 2,np.nan, 4, 5, np.nan, 7]
median = np.nanmedian(data)
print("The median, ignoring NaN values, is:", median)
# np.nanmean():Compute the median of an array ignoring any NaN values
import numpy as np
data = [1, 2, 3, 4, 5]
std = np.std(data)
print("Standard Deviation value:", std) #np.std():Compute standard deviation along the specified axis.
import numpy as np
data = [1, 2, 3, 4, 5]
var = np.var(data)
print("Variance value:", var)
import numpy as np
data = [1, 2, 2, 3, 4, 4, 4, 5, 5, 6]
unique_elements = np.unique(data)
print("Unique elements in the data are:", unique_elements)
#np.unique():Find unique elements in array
import pandas as pd
import pandas as pd
data=pd.read_csv('Airline Dataset.csv') #read_csv():read our csv file
import pandas as pd
data=pd.read_csv('Airline Dataset.csv')
data.head(5) #data.head():returns first 5rows
https://colab.research.google.com/drive/1qwaH1GyJBP9YIkz6cQk4o_FZZunCeHhO#scrollTo=cG2TGH6DPEjK&printMode=true 2/6
7/3/24, 9:28 PM Week1-Numpy,pandas(178).ipynb - Colab
Airport
Passenger First Last Airport Country Airport Departure Arrival
Gender Age Nationality Country Continents
ID Name Name Name Name Continent Date Airport
Code
Kugluktuk North
1 43872 Elwood Catt Male 62 Nicaragua CA Canada NAM 12/26/2022 YCO
Airport America
Grenoble-
2 42633 Darby Felgate Male 67 Russia Isère FR France EU Europe 1/18/2022 GNB
Airport
Ottawa /
North
3 78493 Dominica Pyle Female 71 China Gatineau CA Canada NAM 9/16/2022 YND
America
Airport
import pandas as pd
data=pd.read_csv('Airline Dataset.csv')
data.describe() #Generate descriptive statistics(excluding NaN values)
https://colab.research.google.com/drive/1qwaH1GyJBP9YIkz6cQk4o_FZZunCeHhO#scrollTo=cG2TGH6DPEjK&printMode=true 3/6
7/3/24, 9:28 PM Week1-Numpy,pandas(178).ipynb - Colab
1 to 8 of 8 entries Filter
index Passenger ID Age
count 98619.0 98619.0
mean 55168.68952230301 45.50402052342855
std 25985.44369560229 25.92984855207266
min 10000.0 1.0
25% 32608.0 23.0
50% 55338.0 46.0
75% 77695.0 68.0
max 99999.0 90.0
Show 25 per page
Like what you see? Visit the data table notebook to learn more about interactive tables.
Distributions
2-d distributions
Values
import pandas as pd
data=pd.read_csv('Airline Dataset.csv')
data.shape #Provides dimensions of the dataset.
(98619, 15)
import pandas as pd
data = {
'Category': ['A', 'B', 'A', 'C', 'B', 'A', 'A', 'C', 'C', 'C', 'B', 'A']
}
df = pd.DataFrame(data)
print("Sample DataFrame:")
print(df)
value_counts = df['Category'].value_counts()
https://colab.research.google.com/drive/1qwaH1GyJBP9YIkz6cQk4o_FZZunCeHhO#scrollTo=cG2TGH6DPEjK&printMode=true 4/6
7/3/24, 9:28 PM Week1-Numpy,pandas(178).ipynb - Colab
print("\nValue counts for the 'Category' column:")
print(value_counts) #Index.value_counts(): returns object containing counts of unique values in descending order.
Sample DataFrame:
Category
0 A
1 B
2 A
3 C
4 B
5 A
6 A
7 C
8 C
9 C
10 B
11 A
import pandas as pd
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'Dave', 'Eve'],
'Age': [25, None, 35, None, 28],
'City': ['New York', 'Los Angeles', None, 'Chicago', 'Boston']
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df) #display original df
df_cleaned = df.dropna() #df.dropna():Remove missing values
print("\nDataFrame after dropping rows with missing values:")
print(df_cleaned)
df_cleaned_columns = df.dropna(axis=1) #Drop columns with any missing values (NaN)
print("\nDataFrame after dropping columns with missing values:")
print(df_cleaned_columns)
Original DataFrame:
Name Age City
0 Alice 25.0 New York
1 Bob NaN Los Angeles
2 Charlie 35.0 None
3 Dave NaN Chicago
4 Eve 28.0 Boston
https://colab.research.google.com/drive/1qwaH1GyJBP9YIkz6cQk4o_FZZunCeHhO#scrollTo=cG2TGH6DPEjK&printMode=true 5/6
7/3/24, 9:28 PM Week1-Numpy,pandas(178).ipynb - Colab
2 Charlie
3 Dave
4 Eve
import pandas as pd
data = {
https://colab.research.google.com/drive/1qwaH1GyJBP9YIkz6cQk4o_FZZunCeHhO#scrollTo=cG2TGH6DPEjK&printMode=true 6/6