0% found this document useful (0 votes)
248 views1 page

Import Import As Import As: CSV Numpy NP Pandas PD

The code analyzes World Cup and Titanic passenger data using Python libraries like Pandas and Matplotlib. It creates visualizations like scatter plots of player stats colored by position, pie charts of team red cards, and bar charts of passenger fares by class. It also finds specific players and outputs values like the number of players on high-ranked teams with over 350 minutes or the team with the highest goals ratio. Finally, it includes an interactive loop to count married women over a given age who embarked at Cherbourg.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
248 views1 page

Import Import As Import As: CSV Numpy NP Pandas PD

The code analyzes World Cup and Titanic passenger data using Python libraries like Pandas and Matplotlib. It creates visualizations like scatter plots of player stats colored by position, pie charts of team red cards, and bar charts of passenger fares by class. It also finds specific players and outputs values like the number of players on high-ranked teams with over 350 minutes or the team with the highest goals ratio. Finally, it includes an interactive loop to count married women over a given age who embarked at Cherbourg.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 1

In [1]: # World Cup Data Analysis

import csv
import numpy as np
import pandas as pd
datafram = pd.read_csv("players.csv")
datafram.head(10)

Out[1]:
surname team position minutes shots passes tackles saves

0 Abdoun Algeria midfielder 16 0 6 0 0

1 Belhadj Algeria defender 270 1 146 8 0

2 Boudebouz Algeria midfielder 74 3 28 1 0

3 Bougherra Algeria defender 270 1 89 11 0

4 Chaouchi Algeria goalkeeper 90 0 17 0 2

5 Djebbour Algeria forward 123 3 19 1 0

6 Ghezzal Algeria forward 40 3 8 0 0

7 Guedioura Algeria midfielder 38 0 18 1 0

8 Halliche Algeria defender 270 2 94 4 0

9 Kadir Algeria midfielder 262 0 104 3 0

In [2]: # World Cup Data Visualization


# Create a scatterplot of players showing passes made (y-axis) versus minutes played (x-axi
s).
# Color each player based on their position (goalkeeper, defender, midfielder, forward).

import csv
import matplotlib.pyplot as plt
File = open('PlayersExt.csv')
rows = csv.DictReader(File)
passesItems = []
minutesItems = []
colorsItems = []
for data in rows:
passesItems.append(float(data['passes']))
minutesItems.append(float(data['minutes']))
if 'goalkeeper' in data['position'] :
colorsItems.append('black')
elif 'defender' in data['position'] :
colorsItems.append('red')
elif 'midfielder' in data['position'] :
colorsItems.append('blue')
elif 'forward' in data['position'] :
colorsItems.append('grey')
else: colorsItems.append('brown')
plt.xlabel('minutes')
plt.ylabel('passes')
plt.scatter(minutesItems,passesItems, c=colorsItems,marker='o')

In [3]: # Create a pie chart showing the relative percentage of teams with 0, 1, and 2 red cards.

import csv
import matplotlib.pyplot as plt
File = open('Teams.csv')
rows = csv.DictReader(File)
Red_cards_0 = 0
Red_cards_1 = 0
Red_cards_2 = 0
for data in rows:
if int(data['redCards']) == 0 :
Red_cards_0=Red_cards_0+1
elif int(data['redCards']) == 1 :
Red_cards_1=Red_cards_1+1
elif int(data['redCards']) == 2 :
Red_cards_2=Red_cards_2+1
colors = ['yellow','red','brown']
plt.pie([Red_cards_0, Red_cards_1,Red_cards_2], labels=['redCards 0','redCards 1','redCards
2'],radius=1.8, colors=colors, autopct='%1.3f%%')
plt.show()

In [4]: # Titanic Data Visualization


# Create a bar chart showing the average fare paid by passengers in each class.
# The three bars should be labeled 'first', 'second', 'third'.

import csv
import matplotlib.pyplot as plt
File = open('Titanic.csv')
row = csv.DictReader(File)

bars = []
heights = []

First_class = 0
Second_class = 0
Third_class = 0

First_class_sum = float(0)
Second_class_sum = float(0)
Third_class_sum = float(0)

for data in row:


if '1' in data['class']:
First_class = First_class + 1
First_class_sum = First_class_sum+ float(data['fare'])
if int(data['class']) == 2 :
Second_class = Second_class+ 1
Second_class_sum = Second_class_sum+ float(data['fare'])
if int(data['class']) == 3 :
Third_class = Third_class + 1
Third_class_sum = Third_class_sum+ float(data['fare'])

heights_1 = int(First_class_sum/First_class)
heights.append(heights_1)

heights_2 = int(Second_class_sum/Second_class)
heights.append(heights_2)

heights_3 = int(Third_class_sum/Third_class)
heights.append(heights_3)

bars = ['First', 'Second', 'Third']


plt.xlabel("Class")
plt.ylabel ("Avg Fare")
plt.title("Avg Fare")
plt.bar(bars, heights, label = "Avg Fare", color='lightgreen')
plt.legend()
plt.show()

In [5]: # World Cup with Pandas


# What player on a team with "ia" in the team name played less than 200 minutes and made mor
e than 100 passes?
# Print the player surname

surname = pd.read_csv("players.csv")
answer = surname[surname.team.str.contains('ia') & (surname.minutes < 200) & (surname.passes
>100) & surname.surname]

431 Kuzmanovic
Name: surname, dtype: object

In [6]: # What player on a team with "ia" in the team name played less than 200 minutes and made mor
e than 100 passes?

ds = pd.read_csv("players.csv")
an = ds[ds.team.str.contains('ia') & (ds.minutes < 200) & (ds.passes >100) & ds.surname]

431 Kuzmanovic
Name: surname, dtype: object

In [7]: # How many players on a team with ranking <10 played more than 350 minutes?

import csv
import numpy as np
info = open('PlayersExt.csv')
rows = csv.DictReader(info)
playersNumber = 0
for rf in rows:
if int(rf['ranking'])<10 and int(rf['minutes'])>350:
playersNumber = playersNumber+1
print(playersNumber)

54

In [8]: # Which team has the highest ratio of goalsFor to goalsAgainst? Print the team only.

OpenFile = open('Teams.csv')
row = csv.DictReader(OpenFile)

for j in row:
ratio = int(j['goalsFor'])/int(j['goalsAgainst'])
if ratio ==7:
print(j['team'])

Portugal

In [ ]: # Titanic Data Analysis


# Write a loop that asks the user to enter an age, then returns the number of married women
over that age who
# embarked in Cherbourg. Terminate the loop when the user enters a number that is less than
0.

Married_Women = 0
while True:
OpenFile = open('Titanic.csv')
row = csv.DictReader(OpenFile)
Input_age = input('Enter Age: ')
if Input_age < "0":
break
Married_Women = 0
for i in row:
if 'Mrs.' in i['first'] and 'Cherbourg' in i['embarked'] and i['age']>Input_age:

Enter Age: 25
14 married women 25
Enter Age: 39
10 married women 39
Enter Age: 55
2 married women 55
Enter Age: 65
0 married women 65
Enter Age: 42
9 married women 42

You might also like