0% found this document useful (0 votes)
42 views

CSV Python Annotated

This document discusses reading and writing CSV files in Python using the csv module. It covers reading data from a CSV file, accessing specific columns, writing data to a CSV file, and various formatting options like delimiters, quoting styles, and escape characters.

Uploaded by

Suman Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
42 views

CSV Python Annotated

This document discusses reading and writing CSV files in Python using the csv module. It covers reading data from a CSV file, accessing specific columns, writing data to a CSV file, and various formatting options like delimiters, quoting styles, and escape characters.

Uploaded by

Suman Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 27

100 DAYS OF DATA

WORKING WITH CSV


DATA ANALYSIS OF TITANIC DATA SET INCLUDED

Data analysis from a CSV file in Python


Learn to read and write CSV files in Python


COPY

name,age,height(cm),weight(kg)
Lenin,30,188,90
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46

\t

csv

pandas
csv.reader

COPY

import csv

with open('my_family.csv') as input:


csv_reader = csv.reader(input, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
print(f'Header row - {", ".join(row)}')
line_count += 1
else:
print(f'{row[0]} is {row[1]} years old, {row[2]} cm tal
line_count += 1
print(f'Total: {line_count} lines')

COPY

Header row - name, age, height(cm), weight(kg)


Lenin is 30 years old, 188 cm tall and 90 kg heavy
Phil is 42 years old, 178 cm tall and 76 kg heavy
Claire is 40 years old, 165 cm tall and 54 kg heavy
Alex is 18 years old, 140 cm tall and 46 kg heavy
Total: 5 lines
line_count

csv.reader next(reader object,

None)

COPY

import csv

with open('my_family.csv') as input:


csv_reader = csv.reader(input, delimiter=',')
line_count = 0
next(csv_reader, None) #ignore the header
for row in csv_reader:
print(f'{row[0]} is {row[1]} years old, {row[2]} cm tall an
line_count += 1
print(f'Total: {line_count} lines')

COPY

Lenin is 30 years old, 188 cm tall and 90 kg heavy


Phil is 42 years old, 178 cm tall and 76 kg heavy
Claire is 40 years old, 165 cm tall and 54 kg heavy
Alex is 18 years old, 140 cm tall and 46 kg heavy
Total: 4 lines
Total: 4 lines

csv.DictReader

DictReader

COPY

import csv

with open('my_family.csv') as input:


csv_reader = csv.DictReader(input, delimiter=',')
for row in csv_reader:
print(f'{row["name"]} is {row["age"]} years old, {row["heig
print(f'Total: {csv_reader.line_num} lines')

csv_reader.line_num

csv.DictReader

csv.DictReader
csv.writer

COPY

import csv

header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']

data = [ ['Phil', 42, 178, 76],


['Alex', 18, 140, 46],
['Claire', 40, 165, 54] ]

filename = "my_family.csv"

with open(filename, 'w') as output:


csvwriter = csv.writer(output)

# Write a single list


csvwriter.writerow(header)

# Writing a list of lists


csvwriter.writerows(data)
COPY

Name,Age,Height(cm),Weight(kg)
Phil,42,178,76
Alex,18,140,46
Claire,40,165,54

writerow

writerows

csv.writer ,

delimiter

COPY

import csv

header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']

data = [ ['Phil', 42, 178, 76],


['Alex', 18, 140, 46],
['Claire', 40, 165, 54] ]

filename = "my_family.csv"
with open(filename, 'w') as output:
csvwriter = csv.writer(output, delimiter = '|')

# Write a single list


csvwriter.writerow(header)

# Writing a list of lists


csvwriter.writerows(data)

COPY

Name|Age|Height(cm)|Weight(kg)
Phil|42|178|76
Alex|18|140|46
Claire|40|165|54

DictWriter

fieldnames

COPY

import csv

header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']

data = [
data = [
{"Name":"Phil", "Age": 42, "Height(cm)":178, "Weight(kg)":76},
{"Name":"Claire", "Age": 40, "Height(cm)":165, "Weight(kg)":54}
{"Name":"Alex", "Age": 18, "Height(cm)":140, "Weight(kg)":46}
]

filename = "my_family.csv"

with open(filename, 'w') as output:


csvwriter = csv.DictWriter(output, fieldnames=header)
csvwriter.writeheader()
for row in data:
csvwriter.writerow(row)

COPY

Name,Age,Height(cm),Weight(kg)
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46

writerows

COPY

import csv

header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']


data = [
{"Name":"Phil", "Age": 42, "Height(cm)":178, "Weight(kg)":76},
{"Name":"Claire", "Age": 40, "Height(cm)":165, "Weight(kg)":54}
{"Name":"Alex", "Age": 18, "Height(cm)":140, "Weight(kg)":46}
]

filename = "my_family.csv"

with open(filename, 'w') as output:


csvwriter = csv.DictWriter(output, fieldnames=header)
csvwriter.writeheader()
csvwriter.writerows(data)

COPY

Name,Age,Height(cm),Weight(kg)
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46

"
"

COPY

Name,Age,Height(cm),Weight(kg),Address
Phil,42,178,76,'Gryffindor room, Hogwarts'
Claire,40,165,54,'Snapes room, Hogwarts'
Alex,18,140,46,'4 Private Drive, Little Whinging'

quotechar

COPY

import csv

filename = "my_family.csv"

with open(filename, 'r') as output:


csvreader = csv.reader(output, quotechar="'")
for row in csvreader:
print(row)
COPY

['Name', 'Age', 'Height(cm)', 'Weight(kg)', 'Address']


['Phil', '42', '178', '76', 'Gryffindor room, Hogwarts']
['Claire', '40', '165', '54', 'Snapes room, Hogwarts']
['Alex', '18', '140', '46', '4 Private Drive, Little Whinging']

quoting

csv.QUOTE_MINIMAL

csv.QUOTE_ALL

csv.QUOTE_NONNUMERIC

csv.QUOTE_NONE

COPY

import csv

filename = "my_family.csv"

header = ['Name','Age','Height(cm)','Weight(kg)','Address']

data = [
['Phil',42,178,76,'Gryffindor room, Hogwarts'],
['Claire',40,165,54,'Snapes room, Hogwarts'],
[ , , , , p , g ],
['Alex',18,140,46,'4 Private Drive, Little Whinging']
]

with open(filename, 'w') as output:


csvwriter = csv.writer(output, quotechar="'", quoting=csv.QUOTE_A
csvwriter.writerow(header)
csvwriter.writerows(data)

csv.QUOTE_ALL

COPY

'Name','Age','Height(cm)','Weight(kg)','Address'
'Phil','42','178','76','Gryffindor room, Hogwarts'
'Claire','40','165','54','Snapes room, Hogwarts'
'Alex','18','140','46','4 Private Drive, Little Whinging'

csv.QUOTE_NONE

COPY

import csv
filename = "my_family.csv"

header = ['Name','Age','Height(cm)','Weight(kg)','Address']

data = [
['Phil',42,178,76,'Gryffindor room, Hogwarts'],
['Claire',40,165,54,'Snapes room, Hogwarts'],
['Alex',18,140,46,'4 Private Drive, Little Whinging']
]

with open(filename, 'w') as output:


csvwriter = csv.writer(output, quotechar="'", quoting=csv.QUOTE_N
csvwriter.writerow(header)
csvwriter.writerows(data)

COPY

Traceback (most recent call last):


File "main.py", line 16, in <module>
csvwriter.writerows(data)
_csv.Error: need to escape, but no escapechar set

csv.QUOTE_NONE csv

escapechar
\

COPY

import csv

filename = "my_family.csv"

header = ['Name','Age','Height(cm)','Weight(kg)','Address']

data = [
['Phil',42,178,76,'Gryffindor room, Hogwarts'],
['Claire',40,165,54,'Snapes room, Hogwarts'],
['Alex',18,140,46,'4 Private Drive, Little Whinging']
]

with open(filename, 'w') as output:


csvwriter = csv.writer(output, quotechar="'", quoting=csv.QUOTE_N
csvwriter.writerow(header)
csvwriter.writerows(data)

COPY

Name,Age,Height(cm),Weight(kg),Address
Phil,42,178,76,Gryffindor room\, Hogwarts
Claire,40,165,54,Snapes room\, Hogwarts
Alex,18,140,46,4 Private Drive\, Little Whinging
\

COPY

Name, Age, Height(cm), Weight(kg), Address


Phil, 42, 178, 76, 'Gryffindor room, Hogwarts'
Claire, 40, 165, 54, 'Snapes room, Hogwarts'
Alex, 18, 140, 46, '4 Private Drive, Little Whinging'

skipinitialspace

COPY

import csv

with open('my_family.csv', 'r') as f:


csv_reader = csv.reader(f, quotechar="'")

for line in csv_reader:


print(line)
p ( )

COPY

['Name', ' Age', ' Height(cm)', ' Weight(kg)', ' Address']


['Phil', ' 42', ' 178', ' 76', " 'Gryffindor room", " Hogwarts'"]
['Claire', ' 40', ' 165', ' 54', " 'Snapes room", " Hogwarts'"]
['Alex', ' 18', ' 140', ' 46', " '4 Private Drive", " Little Whingi

skipinitialspace

True

COPY

import csv

with open('my_family.csv', 'r') as f:


csv_reader = csv.reader(f, quotechar="'", skipinitialspace=True

for line in csv_reader:


print(line)

COPY

['Name', 'Age', 'Height(cm)', 'Weight(kg)', 'Address']


['Phil', '42', '178', '76', 'Gryffindor room, Hogwarts']
['Claire', '40', '165', '54', 'Snapes room, Hogwarts']
[ , , , , p , g ]
['Alex', '18', '140', '46', '4 Private Drive, Little Whinging']

COPY

import pandas as pd

df = pd.read_csv('my_family.csv')
print(df)

COPY

Name Age Height(cm) Weight(kg)


0 Phil 42 178 76
1 Claire 40 165 54
2 Alex 18 140 46
COPY

import pandas as pd

df = pd.read_csv('my_family.csv')

print(type(df['Age'][0]))
print(type(df['Height(cm)'][0]))
print(type(df['Weight(kg)'][0]))

COPY

<class 'numpy.int64'>
<class 'numpy.int64'>
<class 'numpy.int64'>

names
pd.read_csv()

COPY

Phil,42,178,76
Claire,40,165,54
Alex,18,140,46

COPY

import pandas as pd

df = pd.read_csv('my_family.csv',
index_col='Name',
names=['Name', 'Age', 'Height(cm)', 'Weight(kg)']
)
print(df)

COPY

Age Height(cm) Weight(kg)


Name
Phil 42 178 76
Claire 40 165 54
Alex 18 140 46
df.to_csv

COPY

import pandas as pd

df = pd.read_csv('my_family.csv',
index_col='Name',
names=['Name', 'Age', 'Height(cm)', 'Weight(kg)']
)
df.to_csv('my_new_family.csv')

COPY

Age Height(cm) Weight(kg)


Name
Phil 42 178 76
Claire 40 165 54
Alex 18 140 46
COPY

import pandas as pd

#load the csv file


df = pd.read_csv('train.csv')

# Column Names
print(df.columns)

# Count unique values in Sex column


print(df['Sex'].value_counts())

# Percentage of male and female passengers


print(df['Sex'].value_counts(normalize=True))
COPY

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age',


'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
male 577
female 314
Name: Sex, dtype: int64
male 0.647587
female 0.352413
Name: Sex, dtype: float64

COPY

import pandas as pd

#load the csv file


df = pd.read_csv('train.csv')

# Column Names
print(df.columns)

# Count unique values in Sex column


print(df[df["Survived"] == 1]['Sex'].value_counts())
# Percentage of surviving male and female passengers
print(df[df["Survived"] == 1]['Sex'].value_counts(normalize=True))

COPY

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age',


'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
female 233
male 109
Name: Sex, dtype: int64
female 0.681287
male 0.318713
Name: Sex, dtype: float64

value_counts()

COPY

import pandas as pd

#load the csv file


df = pd.read_csv('train.csv')

# median age of each sex


median_age_men=df[df['Sex']=='male']['Age'].median()
median_age_women=df[df['Sex']=='female']['Age'].median()

print(f"The median age of men is {median_age_men}")


print(f"The median age of women is {median_age_women}")

COPY

The median age of men is 29.0


The median age of women is 27.0

You might also like