0% found this document useful (0 votes)
21 views15 pages

INFO II Practice 7

The code imports libraries and loads data about World Cup matches, winners, and players. It then cleans the match data and adds a 'Result' column indicating the winner or draw. Functions are defined to get team and match statistics.

Uploaded by

Deepesh Suresh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views15 pages

INFO II Practice 7

The code imports libraries and loads data about World Cup matches, winners, and players. It then cleans the match data and adds a 'Result' column indicating the winner or draw. Functions are defined to get team and match statistics.

Uploaded by

Deepesh Suresh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 15

1 import numpy as np

2 import pandas as pd
3 import matplotlib.pyplot as plt
4 import seaborn as sns
5
6 plt.style.use('ggplot')
7 plt.rcParams['font.family'] = 'sans-serif'
8 plt.rcParams['font.serif'] = 'Ubuntu'
9 plt.rcParams['font.monospace'] = 'Ubuntu Mono'
10 plt.rcParams['font.size'] = 14
11 plt.rcParams['axes.labelsize'] = 12
12 plt.rcParams['axes.labelweight'] = 'bold'
13 plt.rcParams['axes.titlesize'] = 12
14 plt.rcParams['xtick.labelsize'] = 12
15 plt.rcParams['ytick.labelsize'] = 12
16 plt.rcParams['legend.fontsize'] = 12
17 plt.rcParams['figure.titlesize'] = 12
18 plt.rcParams['image.cmap'] = 'jet'
19 plt.rcParams['image.interpolation'] = 'none'
20 plt.rcParams['figure.figsize'] = (12, 10)
21 plt.rcParams['axes.grid']=True
22 plt.rcParams['lines.linewidth'] = 2
23 plt.rcParams['lines.markersize'] = 8
24 colors = ['xkcd:pale orange', 'xkcd:sea blue', 'xkcd:pale red', 'xkcd:sage green', 'xkcd:terra cotta', 'xkcd:dull p
25 'xkcd:scarlet']
1 from google.colab import drive
2 drive.mount('/content/gdrive')
3
4 data_matches = pd.read_csv('/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/WorldCupMatches.csv
5 data_winner = pd.read_csv('/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/WorldCups.csv'
6 data_players = pd.read_csv('/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/WorldCupPlayers.csv
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", for

1 data_matches.head()

Half-
Home Home Away Away
Win
Year Datetime Stage Stadium City Team Team Team Team Attendance
conditions
Name Goals Goals Name
Goals

13 Jul
Group
0 1930.0 1930 - Pocitos Montevideo France 4.0 1.0 Mexico 4444.0
1
15:00

13 Jul
Group Parque
1 1930.0 1930 - Montevideo USA 3.0 0.0 Belgium 18346.0
4 Central
15:00

14 Jul
Group Parque
2 1930.0 1930 - Montevideo Yugoslavia 2.0 1.0 Brazil 24059.0
2 Central
12:45

14 Jul
Group
3 1930.0 1930 - Pocitos Montevideo Romania 3.0 1.0 Peru 2549.0
3
14:50

15 Jul
Group Parque
4 1930.0 1930 - Montevideo Argentina 1.0 0.0 France 23409.0
1 Central
16:00

1 data_winner.head()

Year Country Winner Runners-Up Third Fourth GoalsScored QualifiedTeams MatchesPlayed

0 1930 Uruguay Uruguay Argentina USA Yugoslavia 70 13 18

1 1934 Italy Italy Czechoslovakia Germany Austria 70 16 17

2 1938 France Italy Hungary Brazil Sweden 84 15 18

3 1950 Brazil Uruguay Brazil Sweden Spain 88 13 22

Germany
4 1954 Switzerland Hungary Austria Uruguay 140 16 26
FR

1 data_players.head()

Team Line- Shirt


RoundID MatchID Coach Name Player Name Position Event
Initials up Number

CAUDRON Raoul
0 201 1096 FRA S 0 Alex THEPOT GK NaN
(FRA)

Oscar
1 201 1096 MEX LUQUE Juan (MEX) S 0 GK NaN
BONFIGLIO

CAUDRON Raoul Marcel


2 201 1096 FRA S 0 NaN G40'
(FRA) LANGILLER
1 sns.countplot(data_winner.Winner)
2 plt.grid(True)
3 plt.ylabel('Number of World Cup won',fontsize=14)
4 plt.xlabel('Country',fontsize=14)
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a
warnings.warn(
Text(0.5, 0, 'Country')

1 sns.countplot(data_matches['Stage'])
2 plt.xticks(rotation=20)
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a
warnings.warn(
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22]), <a list of 23 Text major ticklabel objects>)
1 important_columns = ['Home Team Name','Home Team Goals','Away Team Goals','Away Team Name','Stage']
2 data_matches = data_matches[important_columns].dropna().reset_index().drop('index',axis=1)

1 win_draw_lose = []
2 for i in range(len(data_matches)):
3 home_team_goal = int(data_matches['Home Team Goals'].loc[i])
4 away_team_goal = int(data_matches['Away Team Goals'].loc[i])
5 if ##COMPLETE:
6 win_draw_lose.append('Draw')
7 if ##COMPLETE:
8 win_draw_lose.append(data_matches['Home Team Name'].loc[i])
9 if ##COMPLETE:
10 win_draw_lose.append(data_matches['Away Team Name'].loc[i])
11 data_matches['Result']= ##COMPLETE

1 qatar_team_list = ["Argentina",
2 "Australia",
3 "Belgium",
4 "Brazil" ,
5 "Cameroon"
5 "Cameroon",
6 "Canada" ,
7 "Costa Rica",
8 "Croatia" ,
9 "Denmark" ,
10 "Ecuador" ,
11 "England" ,
12 "France" ,
13 "Germany" ,
14 "Ghana" ,
15 "IR Iran",
16 "Japan" ,
17 "Korea Republic",
18 "Mexico",
19 "Morocco",
20 "Netherlands",
21 "Poland",
22 "Portugal",
23 "Qatar",
24 "Saudi Arabia",
25 "Senegal",
26 "Serbia",
27 "Spain",
28 "Switzerland",
29 "Tunisia",
30 "Uruguay",
31 "USA",
32 "Wales"]

1 team_list = list(set(data_matches['Home Team Name']))


2 for t in qatar_team_list:
3 if t not in team_list:
4 print('We have a problem with team %s'%(t))
5 qatar_probabilities = {'Win':0.20,'Draw':0.20,'Lose':0.60}

select_team_statistics

1 def select_team_statistics(team):
2 data_team = # COMPLETE
3 winning_count = # COMPLETE
4 draw_count = # COMPLETE
5 lose_count = # COMPLETE
6 return data_team,{'Winning Count':winning_count,'Draw Count':draw_count,'Lose Count':lose_count}

1 print(select_team_statistics('Qatar'))
2 print(select_team_statistics('Portugal'))

select_match_statistics
team_A team_B

1 def select_match_statistics(team_A, team_B):


2 data_team = data_matches[(data_matches['Home Team Name']==team_A)& (data_matches['Away Team Name']==team_B)]
3 data_team = data_team.append(data_matches[(data_matches['Home Team Name']==team_B)& (data_matches['Away Team Na
4 len_data = len(data_team)
4 len_data = len(data_team)
5 if #COMPLETE :
6 print('These teams never played against each other')
7 else:
8 print('These teams played against each other %i times'%(len_data))
9 team_A_win = # COMPLETE
10 team_B_win = # COMPLETE
11 draw = # COMPLETE
12 return data_team,{team_A:team_A_win,team_B:team_B_win,'Draw':draw}

select_match_statistics France England France Qatar

1 data_match, stats = # COMPLETE


2 data_match, stats = # COMPLETE

1 # SOLUTION
2 data_match,stats = select_match_statistics('France','England')
3 data_match,stats = select_match_statistics('France','Qatar')

find score

1 def find_score(team):
2 team_data, stats = #COMPLETE
2 team_data, stats = #COMPLETE
3 team_stage = team_data['Stage'].reset_index().drop('index',axis=1)
4 sum_groups = 0
5 sum_finals = 0
6 for s in range(len(team_stage)):
7 stage_val = team_stage.loc[s].values[0].split(' ')
8 if stage_val[0]=='Group':
9 ## COMPLETE
10 if stage_val[0]!='Group':
11 ## COMPLETE
12
13 score_1 = ## COMPLETE
14 try:
15 score_2 = ## COMPLETE
16 except:
17 score_2=0
18 return score_1+score_2

find_score_two_teams

1 def find_score_two_teams(team_A,team_B):
2 if team_A == 'Qatar':
3 score_1 = 0.09
4 score_2 = ##COMPLETE
5 if team_B == 'Qatar':
6 score_2 = ##COMPLETE
7 score_1 = find_score(team_A)
8 if team_A!='Qatar' and team_B!='Qatar':
9 score_1 = ##COMPLETE
10 score_2 = ##COMPLETE
11 team_A_score = ##COMPLETE
12 team_B_score = ##COMPLETE
13 if team_A_score>team_B_score:
14 team_A_score = min(0.90,team_A_score)
15 team_B_score = max(0.10,team_B_score)
16 else:
17 team_B_score = min(0.90,team_B_score)
18 team_A_score = max(0.10,team_A_score)
19 team_A_score = ##COMPLETE
20 team_B_score = ##COMPLETE
21 res = {team_A: team_A_score, team_B:team_B_score, 'Draw':0.10}
22 return res

select_match_statistics

1 def select_match_statistics(team_A, team_B):


2 data_team = data_matches[(data_matches['Home Team Name']==team_A)& (data_matches['Away Team Name']==team_B)]
3 data_team = data_team.append(data_matches[(data_matches['Home Team Name']==team_B)& (data_matches['Away Team Na
4 len_data = len(data_team)
5 if len_data==0:
6 #print('These teams never played against each other')
7 res = find_score_two_teams(team_A,team_B)
8 else:
9 #print('These teams played against each other %i times'%(len_data))
10 team_A_win = len(data_team[data_team.Result==team_A])/len_data
11 team_B_win = len(data_team[data_team.Result==team_B])/len_data
12 draw = len(data_team[data_team.Result=='Draw'])/len_data
13 res_list = [team_A_win,team_B_win,draw]
14 cond = 0
15 for i in range(len(res_list)):
16 if res_list[i]==1.0:
17 res_list[i] = 0.80
18 k=i
19 cond = 1
20 break
21 if cond==1:
22 for i in range(len(res_list)):
23 if i!=k:
24 res_list[i]=0.10
25
26 res = {team_A:res_list[0],team_B:res_list[1],'Draw':res_list[2]}
27 return data_team,res

1 _,stats_1=select_match_statistics('Saudi Arabia','Portugal')
2 _,stats_2=select_match_statistics('France','Portugal')
3 print(stats_1)
4 print(stats_2)

groups

1 group_A = ['Qatar','Ecuador','Senegal','Netherlands']
2 group_B = ['England','IR Iran','USA','Wales']
3 # COMPLETE
4 groups = # COMPLETE

1 group_name = group_B
2 #COMPLETE

1 def run_group(group_name):
2 data_res = pd.DataFrame(np.zeros(len(group_name)).T,index= group_name,columns=['Points'])
3 for team_1 in group_name:
4 for team_2 in group_name:
5 if team_1!=team_2:
6 _,stats = select_match_statistics(team_1,team_2)
7 result = np.random.choice(list(stats.keys()),p=list(stats.values()))
8 try:
9 data_res['Points'].loc[result]=data_res['Points'].loc[result]+3
10 except:
11 data_res['Points'].loc[team_1]=data_res['Points'].loc[team_1]+1
12 data_res['Points'].loc[team_2]=data_res['Points'].loc[team_2]+1
13 return data_res.sort_values(by='Points',ascending=False)

1 group_names = ['group A' 'group B' 'group C' 'group D' 'group E' 'group F' 'group G' 'group H']
1 group_names = ['group A','group B','group C','group D','group E','group F','group G','group H']
2
3 def run_groups():
4 group_list =[]
5 for g in # COMPLETE:
6 g_group = # COMPLETE
7 g_group = g_group.rename(columns={'Points':'Points '+group_names[g]})
8 group_list.# COMPLETE
9 return group_list
10
11 group_list = run_groups()
12 print(group_list)

find_qualification_stage
['Netherlands', 'England'], ['Senegal', 'USA'], ...

1 def find_qualification_stage(res_groups):
2 qual_matches = []
3 group_stage = np.arange(0,len(groups),2)
4 k=0
5 for g in range(len(group_stage)):
6 qual_matches.append(#COMPLETE)
7 qual_matches.append(#COMPLETE)
8 k=#COMPLETE
9 return qual_matches

elimination_stage

1 def elimination_stage(selections,k=1):
2 quarter_finals_team = []
3 for i in range(len(selections)):
4 team_1 = # COMPLETE
5 team_2 = # COMPLETE
6 _,stats = # COMPLETE
7 result = np.random.choice(list(stats.keys()),p=list(stats.values()))
8 if result==#COMPLETE:
9 result = np.random.choice([team_1,team_2],p=[0.5,0.5])
10 quarter_finals_team.append(result)
11 if k ==1:
12 quarter_finals_team=np.array(quarter_finals_team).reshape(2,-1)
13 return quarter_finals_team

1 group_stage = run_groups()
2 eight_finals = find_qualification_stage(group_stage)
3 print(eight_finals)
4 quarter_finals = elimination_stage(eight_finals)
5 print(quarter_finals)
6 semi_finals = elimination_stage(quarter_finals,k=0)
7 print(semi_finals)
8 _,stats = select_match_statistics(semi_finals[0],semi_finals[1])
8 _,stats = select_match_statistics(semi_finals[0],semi_finals[1])
9 winner = np.random.choice(list(stats.keys()),p=list(stats.values()))
10 print(winner)

whole_tournament

1 def whole_tournament():
2 group_stage = # COMPLETE
3 eight_finals = # COMPLETE
4 quarter_finals = # COMPLETE
5 semi_finals = # COMPLETE
6 _,stats = # COMPLETE
7 winner = np.random.choice(list(stats.keys()),p=list(stats.values()))
8 if winner=='Draw':
9 winner = np.random.choice([semi_finals[0],semi_finals[1]],p=[0.5,0.5])
10 return winner

1 stats = []
2 for i in range(1000):
3 if (i%100)==0 and i>0:
4 print('Running Simulation number %i' %(i))
5 so_far = pd.DataFrame(stats).value_counts().index[0][0]
6 print('Most predicted winner so far is %s'%(so_far))
7 winner = whole_tournament()
8 stats.append(winner)

1 pd.DataFrame(stats)[0].value_counts().plot(kind="bar")

whole_tournament

1 def whole_tournament():
2 # COMPLETE
1 from google.colab import drive
2 drive.mount('/content/gdrive')
3
4 import pandas as pa
5 import pylab as pl
6 import seaborn as sn
7 pl.style.use('bmh')
8
9 T = pa.read_csv("/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/Fifa2019.csv")
10 T.head()

1 # COMPLETE

1 T100 = T.head(100)
2 m = T100['Wage'].min()
3 print(m)

1 # COMPLETE

T6 L

1 T6 = T.query('Club in @L')
2 T6.head()

1 pl.figure(figsize = (8,5))
2 sn.boxplot(x='Club',y='Wage', data=T6,
3 whis=[0,100], showmeans = True, color ='white',
4 meanprops={'marker':'s','markerfacecolor':'red', 'markeredgecolor':'black'})
5 pl.axhline(y=m,color='red')
6 pl.show()

L
m m
1 def fcc_pop(name):
2 S = T.query('Club == @name')
3 N = len(S)
4 return [k/N*100 for k in range(N+1)]
5
6 fcc_pop('Juventus')

fcc_pop('Juventus')

1 def salaries(name):
2 S = T.query('Club == @name')
3 column = S.sort_values('Wage')[#COMPLETE]
4 return list (column)

1 #COMPLETE

1 # COMPLETE

fcc_mr
1 def fcc_mr(name):
2 F = [0]
3 mass_sal = 0
4 sal_club = salaries(name)
5 for sal in sal_club:
6 mass_sal = # COMPLETE
7 F.append( # COMPLETE )
8 return F

1 # COMPLETE

1 def lorenz(name,mark,c):
2 pl.plot(fcc_pop(name),fcc_mr(name),mark,color=c,label=name)
3
4 pl.figure(figsize=(6,6))
5 pl.axis([0,100,0,100])
6 pl.xticks(range(0,110,10))
7 pl.yticks(range(0,110,10))
8 pl.xlabel('FCC of players %')
9 pl.ylabel('FCC of mass salaries %')
10 lorenz('Juventus','o-','green')
11 pl.legend()

1 # COMPLETE
trapeze

1 def trapeze(#COMPLETE):
2 #COMPLETE

1 X = fcc_pop('Juventus')
2 Y = fcc_mr('Juventus')

1 def area(X,Y):
2 A=0
3 for k in range(len(X)-1):
4 b = # COMPLETE # /100
5 B = # COMPLETE #
6 H = # COMPLETE # - X[k]/100
7 A = # COMPLETE # + trapeze(# COMPLETE #)
8 return A

gini name

1 # COMPLETE

You might also like