Info4602 Final Code
Info4602 Final Code
#https://drive.google.com/file/d/1SGPqCZwycigITLBdOb8nCLXaxeYywHGy/view?usp=sharing
#https://drive.google.com/file/d/10MBYdOLqnAG-hIDdYKAMNVHTkSva1EJJ/view?usp=sharing
# four factors https://drive.google.com/file/d/1P8tiYPwSoE-MIPmZ4RegCLR1upr7_MCV/view?usp=sharing
# attendance https://drive.google.com/file/d/1Zxav1okw7wIAFk4wre5B5orilkfCKV-3/view?usp=sharing
import pandas as pd
link = 'https://drive.google.com/uc?id=1SGPqCZwycigITLBdOb8nCLXaxeYywHGy'
team_games = pd.read_csv(link)
team_games['home_away'] = team_games['MATCHUP'].apply(lambda x: 'Home' if 'vs.' in x else 'Away')
team_games
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 1/12
6/2/24, 4:51 PM group_collab - Colab
Unnamed:
season_id team_id game_id GAME_DATE MATCHUP WL MIN FGM F
0
CHI vs.
0 0 22000 1610612741 20000006 2000-10-31 L 240 26
SAC
HOU vs.
1 1 22000 1610612745 20000008 2000-10-31 L 240 34
MIN
MIN @
2 2 22000 1610612750 20000008 2000-10-31 W 240 43
HOU
SEA @
3 3 22000 1610612760 20000013 2000-10-31 L 240 32
VAN
PHI @
4 4 22000 1610612755 20000001 2000-10-31 W 240 38
NYK
... ... ... ... ... ... ... ... ... ...
ATL vs.
56267 56267 22022 1610612737 22200947 2023-03-03 W 240 52
POR
HOU @
56268 56268 22022 1610612745 22200959 2023-03-04 W 240 49
SAS
CHI @
56269 56269 22022 1610612741 22200989 2023-03-08 W 240 46
DEN
GSW vs.
56270 56270 22022 1610612744 22201121 2023-03-26 L 240 34
MIN
POR @
56271 56271 22022 1610612757 22201166 2023-04-02 W 240 43
MIN
link = 'https://drive.google.com/uc?id=1P8tiYPwSoE-MIPmZ4RegCLR1upr7_MCV'
fourfactors = pd.read_csv(link)
fourfactors.head(5)
link = 'https://drive.google.com/uc?id=1Zxav1okw7wIAFk4wre5B5orilkfCKV-3'
attendance = pd.read_csv(link)
attendance
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 2/12
6/2/24, 4:51 PM group_collab - Colab
Home
Home Total Home Home Away Total Away Away T
Rank Team Average
Games Attendance PCT Games Attendance PCT G
Attendance
NY
4 5 41 808885 19728 NaN 41 18620 NaN
Knicks
... ... ... ... ... ... ... ... ... ...
keyboard_arrow_down Alex
# Convert 'WL' to a numeric win indicator and extract the year
team_games['win'] = team_games['WL'].apply(lambda x: 1 if x == 'W' else 0)
team_games['year'] = team_games['season_id'].astype(str).str[-4:].astype(int)
team_ids = team_games['team_id'].unique()
team_key = {}
id_key = {}
for id in team_ids:
team = team_games[['team_id','team_nickname']][team_games['team_id'] == id]
team_name = team.reset_index().loc[0][2]
team_key[team_name] = id
id_key[id] = team_name
preset1 = team_games[['team_abbreviation','team_nickname','game_id','WL','DREB','AST']]
preset2 = team_games[['team_abbreviation','team_nickname','game_id','WL','PTS','BLK']]
preset3 = team_games[['team_id','team_abbreviation','team_nickname','game_id','WL','DREB','PLUS_MINUS','PIE','home_away']]
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 3/12
6/2/24, 4:51 PM group_collab - Colab
import altair as alt
alt.data_transformers.enable("vegafusion")
test = alt.Chart(source).mark_circle(size=60,opacity=.5).encode(
x=f'{PLUS_MINUS}:Q',
y=f'{PIE}:Q',
color=alt.Color(f'{DREB}:Q',scale=alt.Scale(
range=["#ADD8E6","#FF0000"],
domain=[source[source.columns[-3]].min(),source[source.columns[-3]].max()]))
)
brush = alt.selection_interval()
#input_dropdown = alt.binding_select(options=list(id_key.values()),name='Team')
#selection = alt.selection_point(fields=['Team'],bind=input_dropdown)
color = alt.Color(f'{HOME_AWAY}:N',scale=alt.Scale(
range=["#1f77b4","#FF7F0E"],
domain=[source[HOME_AWAY].min(),source[HOME_AWAY].max()]),title='Home vs. Away')
point2 = alt.Chart(source).mark_circle(strokeWidth=0,filled=True,opacity=.2).encode(
x=alt.X(f'{PLUS_MINUS}:Q',axis=alt.Axis(labels=False,title=None,grid=False,ticks=False)),
y=alt.Y(f'{PIE}:Q',axis=alt.Axis(labels=False,title=None,grid=False,ticks=False)),
color= alt.condition(brush,color,if_false=alt.value('grey')),
).add_params(brush).properties(title=f'Game Win or Loss (+/-) by Impact 2000-2024') #.transf
stripY = alt.Chart(source).mark_tick().encode(
y=alt.Y(f'{PIE}:Q',title=f'{PIE}'),
color= alt.condition(brush,color,if_false=alt.value('grey'))
) #.transform_filter(team_select)
stripX = alt.Chart(source).mark_tick().encode(
x=alt.X(f'{PLUS_MINUS}:Q',title=f'Win or Lose By'),
color= alt.condition(brush,color,if_false=alt.value('grey'))
) #.transform_filter(team_select)
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 4/12
6/2/24, 4:51 PM group_collab - Colab
source2 = team_games[['season_id','GAME_DATE','TS_PCT','PLUS_MINUS','team_id','home_away']]
source2 = source2.groupby(['season_id','home_away']).agg({'TS_PCT':'mean','PLUS_MINUS':'mean'}).reset_index()
source2['season'] = source2['season_id'].apply(lambda x: str(x)[1:])
source2 = source2[source2['season_id'] < 23000]
attendance_chart = alt.Chart(grouped_attendance2).mark_line().encode(
x=alt.X(' year:O', title='season'),
y=alt.Y('Total Average Attendance', title='Fan Attendance')
).properties(title='Average Fan Attendance by Game').interactive()
advantage = alt.Chart(source2).mark_line().encode(
x=alt.X('season:O', axis=alt.Axis(labelAngle=-90, labels=True)),
y=alt.Y('PLUS_MINUS:Q',title='Avg Pts Won / Lossed By'),
color=alt.Color('home_away:N', title='Home vs Away', legend=alt.Legend(orient='left'))
).properties(title='Home Court Advantage By Season')
(advantage | (attendance_chart))
keyboard_arrow_down Jason
spurs = team_games[team_games['team_id'] == team_key['Spurs']]
spurs_best_game = spurs[spurs['PLUS_MINUS'] == spurs['PLUS_MINUS'].max()]
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 5/12
6/2/24, 4:51 PM group_collab - Colab
spurs_best_game
Unnamed:
season_id team_id game_id GAME_DATE MATCHUP WL MIN FGM F
0
SAS @
32822 32822 22015 1610612759 21500305 2015-12-07 W 240 47
PHI
game_finder = team_games[team_games['game_id']==21500305]
game_finder
Unnamed:
season_id team_id game_id GAME_DATE MATCHUP WL MIN FGM FGA FG_PCT FG3M FG3A FG3_PCT FTM FTA FT_
0
SAS @
32822 32822 22015 1610612759 21500305 2015-12-07 W 240 47 76 0.618 4 11 0.364 21 25 0
PHI
PHI vs.
35006 35006 22015 1610612755 21500305 2015-12-07 L 240 26 75 0.347 4 24 0.167 12 21 0
SAS
import pandas as pd
import altair as alt
link = 'https://drive.google.com/uc?id=1SGPqCZwycigITLBdOb8nCLXaxeYywHGy'
data = pd.read_csv(link)
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 6/12
6/2/24, 4:51 PM group_collab - Colab
# Aggregate data by team and year
filtered_data = team_games.groupby(['team_abbreviation', 'year','home_away']).agg({
'win': 'mean', # Average win rate
'FG_PCT': 'mean' # Average field goal percentage
}).reset_index()
# Base chart for displaying the data with the transform_filter applied for team selection
base_chart = alt.Chart(filtered_data3).encode(
x=alt.X('year:O', axis=alt.Axis(title='Year')),
).transform_filter(
team_select # Apply team selection here
)
mark_50 = alt.Chart(
pd.DataFrame({'y':[0.5]})
).mark_rule(color='black', strokeDash=[4, 2]).encode(
y='y'
)
interactive_chart1 + mark_50
print(team_games.columns)
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 7/12
6/2/24, 4:51 PM group_collab - Colab
'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV',
'PF', 'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE', 'SEASON_TYPE', 'SEASON',
'game_id.1', 'team_id.1', 'TEAM_CITY', 'MIN.1', 'E_OFF_RATING',
'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING',
'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT',
'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'USG_PCT',
'E_USG_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE', 'team_id.2',
'full_name', 'team_abbreviation', 'team_nickname', 'team_city',
'team_state', 'year_founded', 'home_away', 'win', 'year'],
dtype='object')
#shooting efficiency
filtered_data = team_games.groupby(['team_abbreviation', 'year', 'home_away']).agg({
'FG_PCT': 'mean', # Average field goal percentage
'FG3_PCT': 'mean', # Average three-point percentage
'FT_PCT': 'mean' # Average free throw percentage
}).reset_index()
#team_select = alt.selection_single(
# fields=['team_abbreviation'],
# bind=alt.binding_select(options=filtered_data['team_abbreviation'].unique()),
# name='Select Team'
#)
base_chart2 = alt.Chart(filtered_data).encode(
x=alt.X('year:O', axis=alt.Axis(title='Year'))
).transform_filter(
team_select
)
fg_pct_chart2 = base_chart2.mark_line().encode(
y=alt.Y('FG_PCT:Q', axis=alt.Axis(title='Field Goal Percentage', titleColor='blue')),
color='home_away:N',
tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('FG_PCT:Q', title='Field Goal %')]
)
tp_pct_chart2 = base_chart2.mark_line(strokeDash=[6,2]).encode(
y=alt.Y('FG3_PCT:Q', axis=alt.Axis(title='Three-Point Percentage', titleColor='green')),
color='home_away:N',
tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('FG3_PCT:Q', title='Three-Point %')]
)
ft_pct_chart2 = base_chart2.mark_line(strokeDash=[2,1]).encode(
y=alt.Y('FT_PCT:Q', axis=alt.Axis(title='Free Throw Percentage', titleColor='red')),
color='home_away:N',
tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('FT_PCT:Q', title='Free Throw %')]
)
interactive_chart2
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 8/12
6/2/24, 4:51 PM group_collab - Colab
CAL
import pandas as pd
df1 = pd.read_csv(link)
df2 = df1[['WL', 'PIE', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV',
'PF', 'PTS', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT',
'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT',
'E_PACE', 'PACE', 'PACE_PER40', 'POSS']]
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 9/12
6/2/24, 4:51 PM group_collab - Colab
correlation_with_first_row = df2.corr().iloc[0]
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 10/12
6/2/24, 4:51 PM group_collab - Colab
KYLE
return organized_data.reset_index()
# Define the feature columns for which the average will be calculated
feature_cols = ['MIN','FGM','FGA','FG_PCT','FG3M','FG3A','FG3_PCT','FTM','FTA','FT_PCT','OREB','DREB','REB',
'AST','STL','BLK','TOV','PF','PTS','PLUS_MINUS','E_OFF_RATING','OFF_RATING','E_DEF_RATING','DEF_RATING','E_NET_R
'NET_RATING','AST_PCT','AST_TOV','AST_RATIO','OREB_PCT','DREB_PCT','REB_PCT','E_TM_TOV_PCT',
'TM_TOV_PCT','EFG_PCT','TS_PCT','USG_PCT','E_USG_PCT','E_PACE','PACE','PACE_PER40','POSS','PIE']
group_by_cols = ['team_abbreviation','home_away','team_id','season_id']
# Call the function to calculate average values for the specified players
average_data = averaging_nba(team_games, group_by_cols, feature_cols)
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 11/12
6/2/24, 4:51 PM group_collab - Colab
team_abbreviation home_away team_id season_id MIN FGM FGA FG_PCT FG3M FG3A FG3_PCT
0 ATL Away 1610612737 22000 240.000000 33.682927 81.097561 0.416000 3.439024 10.414634 0.334244 16
1 ATL Away 1610612737 22001 241.219512 32.829268 78.951220 0.416780 4.512195 13.902439 0.316610 18
2 ATL Away 1610612737 22002 243.048780 34.170732 79.146341 0.433317 4.780488 13.804878 0.352780 18
3 ATL Away 1610612737 22003 242.439024 33.926829 79.609756 0.426854 5.048780 15.439024 0.305268 16
4 ATL Away 1610612737 22004 241.829268 35.512195 81.463415 0.435902 3.439024 11.658537 0.292293 17
... ... ... ... ... ... ... ... ... ... ... ...
average_df = average_data.round(2)
average_data.reset_index()
2011 WAS Home 1610612764 42013 240.000000 32.600000 80.600000 0.402800 5.400000 17.400000 0.307200 15
2012 WAS Home 1610612764 42014 240.000000 37.200000 81.200000 0.463000 10.600000 25.400000 0.406400 20
index team_abbreviation home_away team_id season_id MIN FGM FGA FG_PCT FG3M FG3A FG3_
2013 WAS Home 1610612764 42016 240.000000 40.833333 88.333333 0.462500 7.333333 24.000000 0.308833 20
0 0 ATL Away 1610612737 22000 240.000000 33.682927 81.097561 0.416000 3.439024 10.414634 0.33
2014 WAS Home 1610612764 42017 240.000000 39.333333 83.000000 0.472333 8.333333 20.666667 0.403667 19
1 1 ATL Away 1610612737 22001 241.219512 32.829268 78.951220 0.416780 4.512195 13.902439 0.31
2015 WAS Home 1610612764 42020 240.000000 39.000000 92.000000 0.425500 8.500000 29.500000 0.302000 26
2 2 ATL Away 1610612737 22002 243.048780 34.170732 79.146341 0.433317 4.780488 13.804878 0.35
2016 rows × 47 columns
3 3 ATL Away 1610612737 22003 242.439024 33.926829 79.609756 0.426854 5.048780 15.439024 0.30
4 4 ATL Away 1610612737 22004 241.829268 35.512195 81.463415 0.435902 3.439024 11.658537 0.29
https://colab.research.google.com/drive/1wR7spT_Vig7IAlXgja5V0NWriKhFLlet#printMode=true 12/12