Lab Assignment 6
Lab Assignment 6
import pandas as pd
In [2]:
cars = pd.read_csv('cars.csv')
In [3]:
cars.head(5)
Out[3]: car_ID symboling CarName fueltype aspiration doornumber carbody drivewheel engin
0 1 3 alfa-romero
giulia gas std two convertible rwd
1 2 3 alfa-romero
stelvio gas std two convertible rwd
2 3 alfa-romero
1 Quadrifoglio gas std two hatchback rwd
3 4 2 audi 100 ls gas std four sedan fwd
4 5 2 audi 100ls gas std four sedan 4wd
5 rows × 26 columns
In [4]:
cars.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 car_ID 205 non-null int64
1 symboling 205 non-null int64
2 CarName 205 non-null object
3 fueltype 205 non-null object
4 aspiration 205 non-null object
5 doornumber 205 non-null object
6 carbody 205 non-null object
7 drivewheel 205 non-null object
8 enginelocation 205 non-null object
9 wheelbase 205 non-null float64
10 carlength 205 non-null float64
11 carwidth 205 non-null float64
12 carheight 205 non-null float64
13 curbweight 205 non-null int64
14 enginetype 205 non-null object
15 cylindernumber 205 non-null object
16 enginesize 205 non-null int64
17 fuelsystem 205 non-null object
18 boreratio 205 non-null float64
19 stroke 205 non-null float64
20 compressionratio 205 non-null float64
21 horsepower 205 non-null int64
22 peakrpm 205 non-null int64
23 citympg 205 non-null int64
24 highwaympg 205 non-null int64
25 price 205 non-null float64
dtypes: float64(8), int64(8), object(10)
memory usage: 41.8+ KB
In [5]:
cars.describe()
In [6]:
cars['highwaympg'].head(5)
Out[6]: 0 27
1 27
2 26
3 30
4 22
Name: highwaympg, dtype: int64
In [7]:
cars['highwaympg'].nlargest(5)
Out[7]: 30 54
18 53
90 50
159 47
160 47
Name: highwaympg, dtype: int64
In [8]:
cars['highwaympg'].value_counts().head(5)
Out[8]: 25 19
38 17
24 17
30 16
32 16
Name: highwaympg, dtype: int64
In [10]:
cars['price'].tail(5)
Out[11]: 8916.5 2
16500.0 2
7609.0 2
7898.0 2
6692.0 2
Name: price, dtype: int64
In [12]:
cars['fueltype'].unique()
In [13]:
cars['enginelocation'].unique()
In [14]:
cars['brand'] = cars.apply(lambda x: x.CarName.split(' ')[0], axis=1)
cars['name'] = cars.apply(lambda x: ' '.join(x.CarName.split(' ')[1:]), axis=1)
In [15]:
cars['brand'].unique()
In [16]:
brand_respellings = {'maxda': 'mazda', 'Nissan': 'nissan', 'porcshce': 'porsche'
cars['brand'] = cars['brand'].replace(brand_respellings)
In [17]:
cars['brand'].unique()
In [18]:
cars['name'] = cars['name'].str.replace('|'.join(brand_respellings.keys()), lamb
In [19]:
cars['name'].unique()
In [20]:
cars.rename(columns={'name':'brandname', 'car_ID':'carid'}, inplace=True)
In [21]:
cars.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 28 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 carid 205 non-null int64
1 symboling 205 non-null int64
2 CarName 205 non-null object
3 fueltype 205 non-null object
4 aspiration 205 non-null object
5 doornumber 205 non-null object
6 carbody 205 non-null object
7 drivewheel 205 non-null object
8 enginelocation 205 non-null object
9 wheelbase 205 non-null float64
10 carlength 205 non-null float64
11 carwidth 205 non-null float64
12 carheight 205 non-null float64
13 curbweight 205 non-null int64
14 enginetype 205 non-null object
15 cylindernumber 205 non-null object
16 enginesize 205 non-null int64
17 fuelsystem 205 non-null object
18 boreratio 205 non-null float64
19 stroke 205 non-null float64
20 compressionratio 205 non-null float64
21 horsepower 205 non-null int64
22 peakrpm 205 non-null int64
23 citympg 205 non-null int64
24 highwaympg 205 non-null int64
25 price 205 non-null float64
26 brand 205 non-null object
27 brandname 205 non-null object
dtypes: float64(8), int64(8), object(12)
memory usage: 45.0+ KB
In [23]:
dropped_columns = ['carid', 'symboling']
cars.drop(columns=dropped_columns, inplace=True)
In [24]:
cars.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CarName 205 non-null object
1 fueltype 205 non-null object
2 aspiration 205 non-null object
3 doornumber 205 non-null object
4 carbody 205 non-null object
5 drivewheel 205 non-null object
6 enginelocation 205 non-null object
7 wheelbase 205 non-null float64
8 carlength 205 non-null float64
9 carwidth 205 non-null float64
10 carheight 205 non-null float64
11 curbweight 205 non-null int64
12 enginetype 205 non-null object
13 cylindernumber 205 non-null object
14 enginesize 205 non-null int64
15 fuelsystem 205 non-null object
16 boreratio 205 non-null float64
17 stroke 205 non-null float64
18 compressionratio 205 non-null float64
19 horsepower 205 non-null int64
20 peakrpm 205 non-null int64
21 citympg 205 non-null int64
22 highwaympg 205 non-null int64
23 price 205 non-null float64
24 brand 205 non-null object
25 brandname 205 non-null object
dtypes: float64(8), int64(6), object(12)
memory usage: 41.8+ KB
In [ ]: