Pandas Filtering
Pandas Filtering
You can filter rows by applying a condition that returns a boolean Series.
import pandas as pd
# Sample DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David'],
'Age': [25, 30, 35, 40],
'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}
df = pd.DataFrame(data)
2 Charlie 35 Chicago
3 David 40 Houston
2. Multiple Conditions
You can combine multiple conditions using & (and), | (or), and ~ (not).
The query() method allows you to filter rows using a query string.
5. Filtering Columns
Name City
0 Alice New York
1 Bob Los Angeles
2 Charlie Chicago
3 David Houston
Name City
2 Charlie Chicago
3 David Houston
Name Age
1 Bob 30
2 Charlie 35
Summary
loc and iloc are used for label-based and integer-based indexing,
respectively.
import pandas as pd
# Sample DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Hank',
'Ivy', 'Jack'],
'Age': [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix',
'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose']
}
df = pd.DataFrame(data)
# Example 10: Filter rows where Age is greater than the mean age
print(df[df['Age'] > df['Age'].mean()])
2. Multiple Conditions
# Example 3: Filter rows where Age is not 30 and City is not 'New York'
print(df[(df['Age'] != 30) & ~(df['City'] == 'New York')])
# Example 4: Filter rows where Age is between 30 and 50 and City is not
'Phoenix'
print(df[(df['Age'] >= 30) & (df['Age'] <= 50) & (df['City'] != 'Phoenix')])
# Example 6: Filter rows where Age is even and City starts with 'S'
print(df[(df['Age'] % 2 == 0) & (df['City'].str.startswith('S'))])
# Example 7: Filter rows where Name contains 'a' and Age > 40
print(df[df['Name'].str.contains('a') & (df['Age'] > 40)])
# Example 8: Filter rows where City is 'Chicago' or 'Houston' and Age > 30
print(df[df['City'].isin(['Chicago', 'Houston']) & (df['Age'] > 30)])
# Example 9: Filter rows where Name length is greater than 4 and Age is
odd
print(df[(df['Name'].apply(lambda x: len(x) > 4)) & (df['Age'] % 2 != 0)])
# Example 10: Filter rows where Age is greater than the mean age and
City is not 'San Jose'
print(df[(df['Age'] > df['Age'].mean()) & (df['City'] != 'San Jose')])
# Example 9: Filter rows where City is not in ['New York', 'Los Angeles']
print(df[~df['City'].isin(['New York', 'Los Angeles'])])
# Example 10: Filter rows where Age is greater than the mean age
print(df.query('Age > Age.mean()'))
# Example 1: Filter rows where Age > 30 and select Name and City
columns
print(df.loc[df['Age'] > 30, ['Name', 'City']])
# Example 2: Filter rows where City is 'Chicago' and select all columns
print(df.loc[df['City'] == 'Chicago', :])
# Example 5: Filter rows where Age > 30 and select columns by name
print(df.loc[df['Age'] > 30, 'Name':'City'])
# Example 6: Filter rows where Name starts with 'A' and select the first 3
columns
print(df.loc[df['Name'].str.startswith('A'), :3])
# Example 8: Filter rows where Age is even and select specific columns
print(df.loc[df['Age'] % 2 == 0, ['Name', 'Age']])
8. Filter rows where City is in the list ['Chicago', 'Houston', 'San Diego'].
10. Filter rows where Age is greater than the mean age.
13. Filter rows where Age is less than 40 and City is "Los Angeles".
14. Filter rows where Name contains 'a' and Age is greater than
40.
15. Filter rows where City is not in ['New York', 'Los Angeles'].
20. Filter rows where Name is in the list ['Alice', 'Bob', 'Charlie'].
2. Multiple Conditions
3. Filter rows where Age is not 30 and City is not "New York".
4. Filter rows where Age is between 30 and 50 and City is not
"Phoenix".
6. Filter rows where Age is even and City starts with 'S'.
7. Filter rows where Name contains 'a' and Age > 40.
8. Filter rows where City is "Chicago" or "Houston" and Age > 30.
9. Filter rows where Name length is greater than 4 and Age is odd.
10. Filter rows where Age > mean age and City is not "San Jose".
12. Filter rows where Name contains 'a' or 'e' and Age > 30.
13. Filter rows where City is not "Chicago" and Age is even.
14. Filter rows where Name starts with 'C' or 'D' and Age > 35.
15. Filter rows where City is "San Diego" or "San Jose" and Age <
60.
16. Filter rows where Name length is 5 and Age is between 30 and
50.
17. Filter rows where City contains 'San' and Age > 40.
18. Filter rows where Name ends with 'e' and Age is odd.
16. Filter rows where City is not in ['New York', 'Los Angeles'].
19. Filter rows where City contains 'San' and Age > 40.
1. Filter rows where Age > 30 and select Name and City columns.
4. Filter rows by index and select specific columns (columns 0 and 2).
5. Filter rows where Age > 30 and select columns Name to City.
6. Filter rows where Name starts with 'A' and select the first 3 columns.
8. Filter rows where Age is even and select Name and Age columns.
10. Filter rows where City is "San Diego" and select all columns.
11. Filter rows where Age < 40 and select Name and Age columns.
12. Filter rows by index [3, 6, 9] and select columns [1, 2].
15. Filter rows where Age > 50 and select Name and Age columns.
16. Filter rows by index [0, 1, 2] and select columns [0, 2].
Answers
Let me know when you're ready for the answers, and I'll provide them! 😊
Sure! Below are the answers to the 20 questions for each filtering
technique.
python
Copy
import pandas as pd
# Sample DataFrame
data = {
'Age': [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
df = pd.DataFrame(data)
# Answers
print(df[df['City'] == 'Chicago']) # 2
print(df[df['Name'].str.startswith('A')]) # 3
print(df[df['Name'].str.contains('a')]) # 6
print(df[df['Age'] % 2 == 0]) # 7
print(df[df['City'].str.startswith('S')]) # 11
print(df[df['Name'].str.endswith('e')]) # 12
print(df[df['Age'] % 2 != 0]) # 16
print(df[df['City'].str.contains('San')]) # 18
print(df[df['Age'].isin(df['Age'].nlargest(3))]) # 19
2. Multiple Conditions
python
Copy
# Answers
print(df[(df['Age'] >= 30) & (df['Age'] <= 50) & (df['City'] != 'Phoenix')])
#4
print(df[df['Name'].str.startswith('A') | df['Name'].str.startswith('B')]) # 5
python
Copy
# Answers
print(df[df['City'].isin(['Chicago', 'Houston'])]) # 1
print(df[df['Name'].isin(['Alice', 'Bob'])]) # 2
print(df[df['City'].str.startswith('S')]) # 4
print(df[df['Name'].str.endswith('e')]) # 5
print(df[df['City'].str.contains('San')]) # 6
print(df[df['Age'].isin(df['Age'].nlargest(3))]) # 7
print(df[df['Age'].isin(range(30, 51))]) # 10
python
Copy
# Answers
print(df.query('City == "Chicago"')) # 2
print(df.query('Name.str.startswith("A")', engine='python')) # 3
print(df.query('Name.str.contains("a")', engine='python')) # 6
print(df.query('Age % 2 == 0')) # 7
print(df.query('City.str.startswith("S")', engine='python')) # 11
print(df.query('Name.str.endswith("e")', engine='python')) # 12
print(df.query('City.str.contains("San")', engine='python')) # 13
print(df.query('Age in @df.Age.nlargest(3)')) # 14
print(df.query('Age % 2 != 0')) # 17
python
Copy
# Answers
print(df.iloc[2:6]) # 3
print(df.loc[df['Name'].str.startswith('A'), :3]) # 6