INFO II Practice 7
INFO II Practice 7
2 import pandas as pd
3 import matplotlib.pyplot as plt
4 import seaborn as sns
5
6 plt.style.use('ggplot')
7 plt.rcParams['font.family'] = 'sans-serif'
8 plt.rcParams['font.serif'] = 'Ubuntu'
9 plt.rcParams['font.monospace'] = 'Ubuntu Mono'
10 plt.rcParams['font.size'] = 14
11 plt.rcParams['axes.labelsize'] = 12
12 plt.rcParams['axes.labelweight'] = 'bold'
13 plt.rcParams['axes.titlesize'] = 12
14 plt.rcParams['xtick.labelsize'] = 12
15 plt.rcParams['ytick.labelsize'] = 12
16 plt.rcParams['legend.fontsize'] = 12
17 plt.rcParams['figure.titlesize'] = 12
18 plt.rcParams['image.cmap'] = 'jet'
19 plt.rcParams['image.interpolation'] = 'none'
20 plt.rcParams['figure.figsize'] = (12, 10)
21 plt.rcParams['axes.grid']=True
22 plt.rcParams['lines.linewidth'] = 2
23 plt.rcParams['lines.markersize'] = 8
24 colors = ['xkcd:pale orange', 'xkcd:sea blue', 'xkcd:pale red', 'xkcd:sage green', 'xkcd:terra cotta', 'xkcd:dull p
25 'xkcd:scarlet']
1 from google.colab import drive
2 drive.mount('/content/gdrive')
3
4 data_matches = pd.read_csv('/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/WorldCupMatches.csv
5 data_winner = pd.read_csv('/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/WorldCups.csv'
6 data_players = pd.read_csv('/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/WorldCupPlayers.csv
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", for
1 data_matches.head()
Half-
Home Home Away Away
Win
Year Datetime Stage Stadium City Team Team Team Team Attendance
conditions
Name Goals Goals Name
Goals
13 Jul
Group
0 1930.0 1930 - Pocitos Montevideo France 4.0 1.0 Mexico 4444.0
1
15:00
13 Jul
Group Parque
1 1930.0 1930 - Montevideo USA 3.0 0.0 Belgium 18346.0
4 Central
15:00
14 Jul
Group Parque
2 1930.0 1930 - Montevideo Yugoslavia 2.0 1.0 Brazil 24059.0
2 Central
12:45
14 Jul
Group
3 1930.0 1930 - Pocitos Montevideo Romania 3.0 1.0 Peru 2549.0
3
14:50
15 Jul
Group Parque
4 1930.0 1930 - Montevideo Argentina 1.0 0.0 France 23409.0
1 Central
16:00
1 data_winner.head()
Germany
4 1954 Switzerland Hungary Austria Uruguay 140 16 26
FR
1 data_players.head()
CAUDRON Raoul
0 201 1096 FRA S 0 Alex THEPOT GK NaN
(FRA)
Oscar
1 201 1096 MEX LUQUE Juan (MEX) S 0 GK NaN
BONFIGLIO
1 sns.countplot(data_matches['Stage'])
2 plt.xticks(rotation=20)
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a
warnings.warn(
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22]), <a list of 23 Text major ticklabel objects>)
1 important_columns = ['Home Team Name','Home Team Goals','Away Team Goals','Away Team Name','Stage']
2 data_matches = data_matches[important_columns].dropna().reset_index().drop('index',axis=1)
1 win_draw_lose = []
2 for i in range(len(data_matches)):
3 home_team_goal = int(data_matches['Home Team Goals'].loc[i])
4 away_team_goal = int(data_matches['Away Team Goals'].loc[i])
5 if ##COMPLETE:
6 win_draw_lose.append('Draw')
7 if ##COMPLETE:
8 win_draw_lose.append(data_matches['Home Team Name'].loc[i])
9 if ##COMPLETE:
10 win_draw_lose.append(data_matches['Away Team Name'].loc[i])
11 data_matches['Result']= ##COMPLETE
1 qatar_team_list = ["Argentina",
2 "Australia",
3 "Belgium",
4 "Brazil" ,
5 "Cameroon"
5 "Cameroon",
6 "Canada" ,
7 "Costa Rica",
8 "Croatia" ,
9 "Denmark" ,
10 "Ecuador" ,
11 "England" ,
12 "France" ,
13 "Germany" ,
14 "Ghana" ,
15 "IR Iran",
16 "Japan" ,
17 "Korea Republic",
18 "Mexico",
19 "Morocco",
20 "Netherlands",
21 "Poland",
22 "Portugal",
23 "Qatar",
24 "Saudi Arabia",
25 "Senegal",
26 "Serbia",
27 "Spain",
28 "Switzerland",
29 "Tunisia",
30 "Uruguay",
31 "USA",
32 "Wales"]
select_team_statistics
1 def select_team_statistics(team):
2 data_team = # COMPLETE
3 winning_count = # COMPLETE
4 draw_count = # COMPLETE
5 lose_count = # COMPLETE
6 return data_team,{'Winning Count':winning_count,'Draw Count':draw_count,'Lose Count':lose_count}
1 print(select_team_statistics('Qatar'))
2 print(select_team_statistics('Portugal'))
select_match_statistics
team_A team_B
1 # SOLUTION
2 data_match,stats = select_match_statistics('France','England')
3 data_match,stats = select_match_statistics('France','Qatar')
find score
1 def find_score(team):
2 team_data, stats = #COMPLETE
2 team_data, stats = #COMPLETE
3 team_stage = team_data['Stage'].reset_index().drop('index',axis=1)
4 sum_groups = 0
5 sum_finals = 0
6 for s in range(len(team_stage)):
7 stage_val = team_stage.loc[s].values[0].split(' ')
8 if stage_val[0]=='Group':
9 ## COMPLETE
10 if stage_val[0]!='Group':
11 ## COMPLETE
12
13 score_1 = ## COMPLETE
14 try:
15 score_2 = ## COMPLETE
16 except:
17 score_2=0
18 return score_1+score_2
find_score_two_teams
1 def find_score_two_teams(team_A,team_B):
2 if team_A == 'Qatar':
3 score_1 = 0.09
4 score_2 = ##COMPLETE
5 if team_B == 'Qatar':
6 score_2 = ##COMPLETE
7 score_1 = find_score(team_A)
8 if team_A!='Qatar' and team_B!='Qatar':
9 score_1 = ##COMPLETE
10 score_2 = ##COMPLETE
11 team_A_score = ##COMPLETE
12 team_B_score = ##COMPLETE
13 if team_A_score>team_B_score:
14 team_A_score = min(0.90,team_A_score)
15 team_B_score = max(0.10,team_B_score)
16 else:
17 team_B_score = min(0.90,team_B_score)
18 team_A_score = max(0.10,team_A_score)
19 team_A_score = ##COMPLETE
20 team_B_score = ##COMPLETE
21 res = {team_A: team_A_score, team_B:team_B_score, 'Draw':0.10}
22 return res
select_match_statistics
1 _,stats_1=select_match_statistics('Saudi Arabia','Portugal')
2 _,stats_2=select_match_statistics('France','Portugal')
3 print(stats_1)
4 print(stats_2)
groups
1 group_A = ['Qatar','Ecuador','Senegal','Netherlands']
2 group_B = ['England','IR Iran','USA','Wales']
3 # COMPLETE
4 groups = # COMPLETE
1 group_name = group_B
2 #COMPLETE
1 def run_group(group_name):
2 data_res = pd.DataFrame(np.zeros(len(group_name)).T,index= group_name,columns=['Points'])
3 for team_1 in group_name:
4 for team_2 in group_name:
5 if team_1!=team_2:
6 _,stats = select_match_statistics(team_1,team_2)
7 result = np.random.choice(list(stats.keys()),p=list(stats.values()))
8 try:
9 data_res['Points'].loc[result]=data_res['Points'].loc[result]+3
10 except:
11 data_res['Points'].loc[team_1]=data_res['Points'].loc[team_1]+1
12 data_res['Points'].loc[team_2]=data_res['Points'].loc[team_2]+1
13 return data_res.sort_values(by='Points',ascending=False)
1 group_names = ['group A' 'group B' 'group C' 'group D' 'group E' 'group F' 'group G' 'group H']
1 group_names = ['group A','group B','group C','group D','group E','group F','group G','group H']
2
3 def run_groups():
4 group_list =[]
5 for g in # COMPLETE:
6 g_group = # COMPLETE
7 g_group = g_group.rename(columns={'Points':'Points '+group_names[g]})
8 group_list.# COMPLETE
9 return group_list
10
11 group_list = run_groups()
12 print(group_list)
find_qualification_stage
['Netherlands', 'England'], ['Senegal', 'USA'], ...
1 def find_qualification_stage(res_groups):
2 qual_matches = []
3 group_stage = np.arange(0,len(groups),2)
4 k=0
5 for g in range(len(group_stage)):
6 qual_matches.append(#COMPLETE)
7 qual_matches.append(#COMPLETE)
8 k=#COMPLETE
9 return qual_matches
elimination_stage
1 def elimination_stage(selections,k=1):
2 quarter_finals_team = []
3 for i in range(len(selections)):
4 team_1 = # COMPLETE
5 team_2 = # COMPLETE
6 _,stats = # COMPLETE
7 result = np.random.choice(list(stats.keys()),p=list(stats.values()))
8 if result==#COMPLETE:
9 result = np.random.choice([team_1,team_2],p=[0.5,0.5])
10 quarter_finals_team.append(result)
11 if k ==1:
12 quarter_finals_team=np.array(quarter_finals_team).reshape(2,-1)
13 return quarter_finals_team
1 group_stage = run_groups()
2 eight_finals = find_qualification_stage(group_stage)
3 print(eight_finals)
4 quarter_finals = elimination_stage(eight_finals)
5 print(quarter_finals)
6 semi_finals = elimination_stage(quarter_finals,k=0)
7 print(semi_finals)
8 _,stats = select_match_statistics(semi_finals[0],semi_finals[1])
8 _,stats = select_match_statistics(semi_finals[0],semi_finals[1])
9 winner = np.random.choice(list(stats.keys()),p=list(stats.values()))
10 print(winner)
whole_tournament
1 def whole_tournament():
2 group_stage = # COMPLETE
3 eight_finals = # COMPLETE
4 quarter_finals = # COMPLETE
5 semi_finals = # COMPLETE
6 _,stats = # COMPLETE
7 winner = np.random.choice(list(stats.keys()),p=list(stats.values()))
8 if winner=='Draw':
9 winner = np.random.choice([semi_finals[0],semi_finals[1]],p=[0.5,0.5])
10 return winner
1 stats = []
2 for i in range(1000):
3 if (i%100)==0 and i>0:
4 print('Running Simulation number %i' %(i))
5 so_far = pd.DataFrame(stats).value_counts().index[0][0]
6 print('Most predicted winner so far is %s'%(so_far))
7 winner = whole_tournament()
8 stats.append(winner)
1 pd.DataFrame(stats)[0].value_counts().plot(kind="bar")
whole_tournament
1 def whole_tournament():
2 # COMPLETE
1 from google.colab import drive
2 drive.mount('/content/gdrive')
3
4 import pandas as pa
5 import pylab as pl
6 import seaborn as sn
7 pl.style.use('bmh')
8
9 T = pa.read_csv("/content/gdrive/MyDrive/INFO_II_Introduction_scientific_programming/Fifa2019.csv")
10 T.head()
1 # COMPLETE
1 T100 = T.head(100)
2 m = T100['Wage'].min()
3 print(m)
1 # COMPLETE
T6 L
1 T6 = T.query('Club in @L')
2 T6.head()
1 pl.figure(figsize = (8,5))
2 sn.boxplot(x='Club',y='Wage', data=T6,
3 whis=[0,100], showmeans = True, color ='white',
4 meanprops={'marker':'s','markerfacecolor':'red', 'markeredgecolor':'black'})
5 pl.axhline(y=m,color='red')
6 pl.show()
L
m m
1 def fcc_pop(name):
2 S = T.query('Club == @name')
3 N = len(S)
4 return [k/N*100 for k in range(N+1)]
5
6 fcc_pop('Juventus')
fcc_pop('Juventus')
1 def salaries(name):
2 S = T.query('Club == @name')
3 column = S.sort_values('Wage')[#COMPLETE]
4 return list (column)
1 #COMPLETE
1 # COMPLETE
fcc_mr
1 def fcc_mr(name):
2 F = [0]
3 mass_sal = 0
4 sal_club = salaries(name)
5 for sal in sal_club:
6 mass_sal = # COMPLETE
7 F.append( # COMPLETE )
8 return F
1 # COMPLETE
1 def lorenz(name,mark,c):
2 pl.plot(fcc_pop(name),fcc_mr(name),mark,color=c,label=name)
3
4 pl.figure(figsize=(6,6))
5 pl.axis([0,100,0,100])
6 pl.xticks(range(0,110,10))
7 pl.yticks(range(0,110,10))
8 pl.xlabel('FCC of players %')
9 pl.ylabel('FCC of mass salaries %')
10 lorenz('Juventus','o-','green')
11 pl.legend()
1 # COMPLETE
trapeze
1 def trapeze(#COMPLETE):
2 #COMPLETE
1 X = fcc_pop('Juventus')
2 Y = fcc_mr('Juventus')
1 def area(X,Y):
2 A=0
3 for k in range(len(X)-1):
4 b = # COMPLETE # /100
5 B = # COMPLETE #
6 H = # COMPLETE # - X[k]/100
7 A = # COMPLETE # + trapeze(# COMPLETE #)
8 return A
gini name
1 # COMPLETE