NFL - SURVIVAL - Ipynb - Colab
NFL - SURVIVAL - Ipynb - Colab
ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from lifelines import KaplanMeierFitter
Collecting lifelines
Downloading lifelines-0.28.0-py3-none-any.whl (349 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 349.2/349.2 kB 6.8 MB/s eta 0:00:00
Requirement already satisfied: numpy<2.0,>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from lifelines) (1.25.2)
Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from lifelines) (1.11.4)
Requirement already satisfied: pandas>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from lifelines) (2.0.3)
Requirement already satisfied: matplotlib>=3.0 in /usr/local/lib/python3.10/dist-packages (from lifelines) (3.7.1)
Requirement already satisfied: autograd>=1.5 in /usr/local/lib/python3.10/dist-packages (from lifelines) (1.6.2)
Collecting autograd-gamma>=0.3 (from lifelines)
Downloading autograd-gamma-0.5.0.tar.gz (4.0 kB)
Preparing metadata (setup.py) ... done
Collecting formulaic>=0.2.2 (from lifelines)
Downloading formulaic-1.0.1-py3-none-any.whl (94 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 94.2/94.2 kB 11.8 MB/s eta 0:00:00
Requirement already satisfied: future>=0.15.2 in /usr/local/lib/python3.10/dist-packages (from autograd>=1.5->lifelines) (0.18.3)
Collecting interface-meta>=1.2.0 (from formulaic>=0.2.2->lifelines)
Downloading interface_meta-1.3.0-py3-none-any.whl (14 kB)
Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from formulaic>=0.2.2->lifelines) (4.12.2)
Requirement already satisfied: wrapt>=1.0 in /usr/local/lib/python3.10/dist-packages (from formulaic>=0.2.2->lifelines) (1.14.1)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (1.2.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (4.53.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (1.4.5)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (24.1)
Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (9.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0->lifelines) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2.0->lifelines) (2023.4)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2.0->lifelines) (2024.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.0->lifelines) (1.16.0)
Building wheels for collected packages: autograd-gamma
Building wheel for autograd-gamma (setup.py) ... done
Created wheel for autograd-gamma: filename=autograd_gamma-0.5.0-py3-none-any.whl size=4030 sha256=e9fc31ea2cae6cd2af9774cb3630195ff02eee0e16d5d5d6e5550516a41cc2d3
Stored in directory: /root/.cache/pip/wheels/25/cc/e0/ef2969164144c899fedb22b338f6703e2b9cf46eeebf254991
Successfully built autograd-gamma
Installing collected packages: interface-meta, autograd-gamma, formulaic, lifelines
Successfully installed autograd-gamma-0.5.0 formulaic-1.0.1 interface-meta-1.3.0 lifelines-0.28.0
import pandas as pd
import io
data = pd.read_csv('nfl_survival_analysis_data.csv')
data.head ()
https://colab.research.google.com/drive/1Lqz5bzKiQeGIPmODt7CJXqFhJAaG1NqR#printMode=true 1/5
6/23/24, 11:38 PM NFL_SURVIVAL.ipynb - Colab
Draft_Yr Rnd Pick Tm Player Pos Age To AP1 PB ... Rec Rec_Yds Rec_TD Tkl Def_Int Sk College Player_ID Retired Duration
0 1967 1 1 BAL Bubba Smith DE 22 1976 1 2 ... 0 0 0 0.0 0 0.0 Michigan St. SmitBu00 1 10
1 1967 1 2 MIN Clint Jones RB 22 1973 0 0 ... 38 431 0 0.0 0 0.0 Michigan St. JoneCl00 1 7
2 1967 1 3 SFO Steve Spurrier QB 22 1976 0 0 ... 0 0 0 0.0 0 0.0 Florida SpurSt00 1 10
3 1967 1 4 MIA Bob Griese QB 22 1980 2 8 ... 0 0 0 0.0 0 0.0 Purdue GrieBo00 1 14
4 1967 1 5 HOU George Webster LB 21 1976 3 3 ... 0 0 0 0.0 5 0.0 Michigan St. WebsGe00 1 10
5 rows × 32 columns
kmf = KaplanMeierFitter()
# The 1st arg accepts an array or pd.Series of individual survival times
# The 2nd arg accepts an array or pd.Series that indicates if the event
# interest (or death) occured.
kmf.fit(durations = data.Duration,
event_observed = data.Retired)
kmf.event_table
https://colab.research.google.com/drive/1Lqz5bzKiQeGIPmODt7CJXqFhJAaG1NqR#printMode=true 2/5
6/23/24, 11:38 PM NFL_SURVIVAL.ipynb - Colab
event_at
15.0 88 85 3 0 205
16.0 52 49 3 0 117
17.0 33 33 0 0 65
18.0 11 11 0 0 32
19.0 5 5 0 0 21
20.0 8 8 0 0 16
21.0 5 5 0 0 8
22.0 1 1 0 0 3
23.0 1 1 0 0 2
26.0 1 1 0 0 1
0.7111339148281169
https://colab.research.google.com/drive/1Lqz5bzKiQeGIPmODt7CJXqFhJAaG1NqR#printMode=true 3/5
6/23/24, 11:38 PM NFL_SURVIVAL.ipynb - Colab
# Calculate the survival probability for t = 1
event_at_1 = kmf.event_table.iloc[1, :]
surv_for_1 = (event_at_1.at_risk - event_at_1.observed) / event_at_1.at_risk
surv_for_1
0.902137335152342
0.8785751161590087
# The probability that an NFL player has a career longer than 2 years
surv_after_2 = surv_for_0 * surv_for_1 * surv_for_2
surv_after_2
0.5636414796488797
kmf.predict(2)
0.5636414796488793
kmf.predict([1,3,5,10])
1 0.641540
3 0.498758
5 0.372828
10 0.112089
Name: KM_estimate, dtype: float64
kmf.survival_function_
https://colab.research.google.com/drive/1Lqz5bzKiQeGIPmODt7CJXqFhJAaG1NqR#printMode=true 4/5
6/23/24, 11:38 PM NFL_SURVIVAL.ipynb - Colab
KM_estimate
timeline
0.0 0.711134
1.0 0.641540
2.0 0.563641
3.0 0.498758
4.0 0.432463
5.0 0.372828
6.0 0.314448
7.0 0.259329
8.0 0.206205
9.0 0.156233
10.0 0.112089
11.0 0.079018
12.0 0.052435
kmf.median_survival_time_
13.0 0.034957
3.0
14.0 0.020540
# plot the
15.0KM estimate
0.012023
kmf.plot()
16.0 and y-axis
# Add title 0.006988
label
plt.title("The Kaplan-Meier Estimate for Drafted NFL Players\n(1967-2015)")
17.0 0.003440
plt.ylabel("Probability a Player is Still Active")
18.0 0.002258
plt.show()
19.0 0.001720
20.0 0.000860
21.0 0.000323
22.0 0.000215
23.0 0.000108
https://colab.research.google.com/drive/1Lqz5bzKiQeGIPmODt7CJXqFhJAaG1NqR#printMode=true 5/5