I hope you find it interesting! Any feedback is welcome.
The Code
Load and preprocess data
df = pd.read_csv(“one-hit-wonders.csv”)
Clean and prepare data
df_clean = df.dropna(subset=[‘rank’, ‘year’, ‘peak_year_index’, ‘sport_name’, ‘league’]).copy()
df_clean[‘year’] = pd.to_numeric(df_clean[‘year’], errors=‘coerce’)
df_clean[‘rank’] = pd.to_numeric(df_clean[‘rank’], errors=‘coerce’)
df_clean[‘played_val’] = pd.to_numeric(df_clean[‘played_val’], errors=‘coerce’)
df_clean[‘stat_val’] = pd.to_numeric(df_clean[‘stat_val’], errors=‘coerce’)
Create performance metrics
df_clean.loc[:, ‘performance_score’] = 1 / (df_clean[‘rank’] + 1) * 1000 # Higher = better
Calculate career length for each athlete
df_clean.loc[:, ‘career_length’] = df_clean.groupby(‘name’)[‘year_index’].transform(‘max’) - df_clean.groupby(‘name’)[‘year_index’].transform(‘min’) + 1
df_clean.loc[:, ‘peak_timing’] = df_clean.groupby(‘name’)[‘peak_year_index’].transform(‘first’) / df_clean.groupby(‘name’)[‘year_index’].transform(‘max’)
Calculate peak performance for each athlete
athlete_peak_performance = df_clean.groupby(‘name’)[‘performance_score’].max().reset_index(name=‘peak_performance’)
Merge peak performance back into the original dataframe
df_clean = pd.merge(df_clean, athlete_peak_performance, on=‘name’, how=‘left’)
Calculate consistent years based on a new logic: years in the top 10 rank
This fixes the issue of penalizing consistent players with long careers
df_clean[‘is_consistent’] = df_clean[‘rank’] <= 10
consistent_years_count = df_clean.groupby(‘name’)[‘is_consistent’].sum().reset_index(name=‘consistent_years_count’)
Merge consistent years count and calculate final consistency score
athlete_stats = pd.merge(athlete_peak_performance, consistent_years_count, on=‘name’, how=‘left’)
athlete_stats[‘career_years’] = df_clean.groupby(‘name’)[‘career_length’].first().reset_index(name=‘career_years’)[‘career_years’]
athlete_stats[‘consistency’] = athlete_stats[‘consistent_years_count’] / athlete_stats[‘career_years’]
Add other aggregated metrics from the original dataframe
athlete_stats = pd.merge(athlete_stats, df_clean.groupby(‘name’)[‘peak_timing’].mean().reset_index(name=‘peak_timing’), on=‘name’, how=‘left’)
athlete_stats = pd.merge(athlete_stats, df_clean.groupby(‘name’)[‘rank’].min().reset_index(name=‘best_rank’), on=‘name’, how=‘left’)
athlete_stats = pd.merge(athlete_stats, df_clean.groupby(‘name’)[‘sport_name’].first().reset_index(name=‘sport_name’), on=‘name’, how=‘left’)
athlete_stats = pd.merge(athlete_stats, df_clean.groupby(‘name’)[‘league’].first().reset_index(name=‘league’), on=‘name’, how=‘left’)
Create constellation coordinates using performance metrics
athlete_stats[‘x_coord’] = athlete_stats[‘consistency’] + np.random.normal(0, 0.05, len(athlete_stats))
athlete_stats[‘y_coord’] = athlete_stats[‘peak_performance’] / 100 + np.random.normal(0, 0.05, len(athlete_stats))
athlete_stats[‘star_size’] = np.clip(athlete_stats[‘peak_performance’] / 10, 5, 50)
athlete_stats[‘brightness’] = athlete_stats[‘peak_performance’]
League color mappings
league_colors = {
‘nba’: ‘#FF4500’,
‘wnba’: ‘#FFD700’,
‘atp’: ‘#00BFFF’,
‘wta’: ‘#1E90FF’,
‘pga’: ‘#228B22’,
‘lpga’: ‘#32CD32’,
‘nhl’: ‘#00008B’,
‘mlb’: ‘#B22222’
}
Initialize app with custom external stylesheets for better icons and fonts
app = dash.Dash(name, external_stylesheets=[
dbc.themes.UNITED,
‘https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css’,
‘https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap’
])
app.title = “Elite Athlete Career Trajectory”
Layout
app.layout = html.Div([
# Main background container
html.Div([
# Store component to hold the list of selected athletes
dcc.Store(id=‘selected-athletes-store’, data=),
# Content wrapper with glassmorphism effect
html.Div([
# Header Section
dbc.Row([
dbc.Col([
html.Div([
html.I(className="fas fa-chart-line fa-3x mb-3",
style={'color': '#667eea'}),
html.H1("Elite Athlete Career Trajectory",
className="display-3 mb-3",
style=custom_styles['header_title']),
html.P([
html.I(className="fas fa-info-circle me-2"),
"This interactive dashboard visualizes data from The Pudding's article 'One-Hit Wonders in Sports'. "
"It analyzes the top 20 players from 8 sports leagues over the last 30 years, exploring the relationship "
"between peak performance and career consistency."
], className="lead text-center mx-auto mb-4",
style={**custom_styles['subtitle'], 'max-width': '900px'}),
html.Hr(style={'border': '2px solid #667eea', 'width': '100px', 'margin': '0 auto'})
], className="text-center")
])
], className="mb-5"),
# Control Panel
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardHeader([
html.I(className="fas fa-sliders-h me-2"),
"Control Panel"
], style=custom_styles['control_header']),
dbc.CardBody([
dbc.Row([
dbc.Col([
html.Label([
html.I(className="fas fa-trophy me-2"),
"Select League:"
], className="fw-bold mb-2", style={'color': '#495057'}),
dcc.Dropdown(
id='league-dropdown',
options=[{'label': i.upper(), 'value': i} for i in athlete_stats['league'].unique()],
value='nba',
multi=False,
placeholder="Select a league...",
className="mb-3",
style={
'border-radius': '10px',
'border': '2px solid #e9ecef'
}
)
], md=6),
dbc.Col([
html.Label([
html.I(className="fas fa-star me-2"),
"Peak Performance (Min):"
], id="performance-label", className="fw-bold mb-2",
style={'color': '#495057'}),
html.Div([
dcc.Slider(
id='performance-threshold-slider',
min=0,
max=1000,
step=10,
value=0,
marks={int(val): {'label': f'{int(val)}', 'style': {'color': '#667eea', 'font-weight': 'bold'}}
for val in np.linspace(0, 1000, 5)},
className="mb-3"
)
], style=custom_styles['slider_container']),
dbc.Tooltip(
"Peak performance is calculated as 1 / (rank + 1) * 1000. A higher score indicates a better performance in a given year.",
target="performance-label",
placement="bottom"
)
], md=6)
]),
dbc.Row([
dbc.Col([
html.Label([
html.I(className="fas fa-chart-line me-2"),
"Consistency (Min):"
], id="consistency-label", className="fw-bold mb-2",
style={'color': '#495057'}),
html.Div([
dcc.Slider(
id='consistency-threshold-slider',
min=0,
max=1,
step=0.05,
value=0,
marks={round(val, 2): {'label': f'{round(val, 2)}', 'style': {'color': '#667eea', 'font-weight': 'bold'}}
for val in np.linspace(0, 1, 5)},
className="mb-3"
)
], style=custom_styles['slider_container']),
dbc.Tooltip(
"Consistency is the ratio of an athlete's career years that they finished in the top 10.",
target="consistency-label",
placement="bottom"
)
], md=6),
dbc.Col([
html.Label([
html.I(className="fas fa-calendar-alt me-2"),
"Longevity (Min):"
], id="longevity-label", className="fw-bold mb-2",
style={'color': '#495057'}),
html.Div([
dcc.Slider(
id='longevity-threshold-slider',
min=0,
max=20,
step=1,
value=0,
marks={int(val): {'label': f'{int(val)}', 'style': {'color': '#667eea', 'font-weight': 'bold'}}
for val in np.linspace(0, 20, 5)},
className="mb-3"
)
], style=custom_styles['slider_container']),
dbc.Tooltip(
"Longevity is the total number of career years for the athlete documented in the data.",
target="longevity-label",
placement="bottom"
)
], md=6)
]),
# Radio items with better styling
dbc.Row([
dbc.Col([
html.Div([
html.Label([
html.I(className="fas fa-chart-bar me-2"),
"Select Chart:"
], className="fw-bold mb-3", style={'color': '#495057'}),
dcc.RadioItems(
id='graph-selector-radio',
options=[
{'label': [
html.I(className="fas fa-star me-2"),
'Career Trajectory: Peak vs. Consistency'
], 'value': 'constellation'},
{'label': [
html.I(className="fas fa-line-chart me-2"),
'Evolution of Annual Performance Score'
], 'value': 'performance_year'},
{'label': [
html.I(className="fas fa-trophy me-2"),
'Evolution of Top 10 Ranking'
], 'value': 'consistency_year'}
],
value='constellation',
className="d-flex flex-wrap justify-content-center",
inputClassName="form-check-input me-2",
labelClassName="form-check-label me-3 mb-2 p-2 rounded",
style={'font-weight': '500'}
)
], style=custom_styles['radio_container'])
])
]),
])
], style=custom_styles['control_card'])
])
], className="mb-4"),
# Main content with graphs and stats
dbc.Row([
# Column for all graphs (9/12 of the row)
dbc.Col([
dbc.Card([
dbc.CardHeader([
html.I(className="fas fa-chart-area me-2"),
"Performance Analysis"
], style=custom_styles['control_header']),
dbc.CardBody([
# Main constellation graph container
html.Div(id='main-graph-container', children=[
dcc.Graph(id='main-graph', style={'height': '600px'},
config={'displayModeBar': True, 'displaylogo': False}),
]),
# Performance by year graph container (initially hidden)
html.Div(id='performance-graph-container', style={'display': 'none'}, children=[
html.H5([
html.I(className="fas fa-chart-line me-2"),
"Evolution of Annual Performance Score"
], className="text-center mt-3 mb-4", style={'color': '#495057'}),
dcc.Graph(id='performance-by-year-graph', style={'height': '600px'},
config={'displayModeBar': True, 'displaylogo': False}),
]),
# Consistency by year graph container (initially hidden)
html.Div(id='consistency-graph-container', style={'display': 'none'}, children=[
html.H5([
html.I(className="fas fa-trophy me-2"),
"Evolution of Top 10 Ranking"
], className="text-center mt-3 mb-4", style={'color': '#495057'}),
dcc.Graph(id='consistency-by-year-graph', style={'height': '600px'},
config={'displayModeBar': True, 'displaylogo': False}),
])
])
], style=custom_styles['graph_card'])
], md=9),
# Column for athlete details and stats cards (3/12 of the row)
dbc.Col([
dbc.Button([
html.I(className="fas fa-redo me-2"),
"Reset Selection"
], id="reset-button", className="w-100 mb-3",
style=custom_styles['reset_button']),
dbc.Card([
dbc.CardHeader([
html.I(className="fas fa-user-astronaut me-2"),
"Athlete Details"
], style=custom_styles['control_header']),
dbc.CardBody([
html.Div(id='star-details', className="mb-3")
], style={'min-height': '200px'})
], style=custom_styles['details_card']),
], md=3)
], className="mb-4"),
# Footer
dbc.Row([
dbc.Col([
html.Hr(className="my-4", style={'border': '1px solid #dee2e6'}),
html.P([
html.I(className="fas fa-code me-2"),
"Dashboard developed with Python | Plotly | Dash. Data courtesy of ",
html.A("The Pudding", href="#", className="text-decoration-none",
style={'color': '#667eea', 'font-weight': '500'}),
"."
], className="text-center mb-0", style=custom_styles['footer'])
])
])
], style=custom_styles['content_wrapper'])
], style=custom_styles['main_container'])
])
New Callback to control which graph is visible
@app.callback(
[Output(‘main-graph-container’, ‘style’),
Output(‘performance-graph-container’, ‘style’),
Output(‘consistency-graph-container’, ‘style’)],
[Input(‘graph-selector-radio’, ‘value’)]
)
def update_graph_visibility(selected_graph):
if selected_graph == ‘constellation’:
return {‘display’: ‘block’}, {‘display’: ‘none’}, {‘display’: ‘none’}
elif selected_graph == ‘performance_year’:
return {‘display’: ‘none’}, {‘display’: ‘block’}, {‘display’: ‘none’}
elif selected_graph == ‘consistency_year’:
return {‘display’: ‘none’}, {‘display’: ‘none’}, {‘display’: ‘block’}
return {‘display’: ‘block’}, {‘display’: ‘none’}, {‘display’: ‘none’} # Fallback
Callback to manage the selection of up to 3 athletes
@app.callback(
Output(‘selected-athletes-store’, ‘data’),
Input(‘main-graph’, ‘clickData’),
State(‘selected-athletes-store’, ‘data’)
)
def update_selected_athletes_list(click_data, current_selection):
if not click_data:
return dash.no_update
clicked_name = click_data['points'][0]['customdata'][0]
if clicked_name in current_selection:
current_selection.remove(clicked_name)
elif len(current_selection) < 3:
current_selection.append(clicked_name)
return current_selection
Callback for the reset button
@app.callback(
Output(‘selected-athletes-store’, ‘data’, allow_duplicate=True),
Input(‘reset-button’, ‘n_clicks’),
prevent_initial_call=True
)
def reset_selection(n_clicks):
if n_clicks:
return
return dash.no_update
Callback to update the main constellation graph and the statistics
@app.callback(
[Output(‘main-graph’, ‘figure’)],
[Input(‘league-dropdown’, ‘value’),
Input(‘performance-threshold-slider’, ‘value’),
Input(‘consistency-threshold-slider’, ‘value’),
Input(‘longevity-threshold-slider’, ‘value’),
Input(‘selected-athletes-store’, ‘data’)]
)
def update_constellation_and_stats(selected_league, perf_threshold, consist_threshold, longevity_threshold, selected_athletes):
# Filter data based on selected leagues and the new threshold sliders
filtered_df = athlete_stats[
(athlete_stats[‘league’] == selected_league) &
(athlete_stats[‘peak_performance’] >= perf_threshold) &
(athlete_stats[‘consistency’] >= consist_threshold) &
(athlete_stats[‘career_years’] >= longevity_threshold)
].copy()
fig = go.Figure()
if filtered_df.empty:
fig.add_annotation(text="No athletes meet these criteria.",
xref="paper", yref="paper", x=0.5, y=0.5,
showarrow=False, font=dict(size=16, color="#6c757d"))
else:
fig = px.scatter(
filtered_df,
x='consistency',
y='peak_performance',
size='career_years',
hover_data=['name', 'sport_name', 'league', 'best_rank', 'career_years', 'consistency', 'peak_performance'],
custom_data=['name'],
title=f"Career Trajectory: Peak Performance vs. Consistency ({selected_league.upper()})",
color_discrete_sequence=['#667eea'],
labels={
'consistency': 'Consistency →',
'peak_performance': 'Peak Performance ↑'
}
)
fig.update_traces(
marker=dict(line=dict(width=2, color='rgba(255,255,255,0.8)')),
opacity=0.8
)
selected_stars_df = filtered_df[filtered_df['name'].isin(selected_athletes)]
if not selected_stars_df.empty:
for _, row in selected_stars_df.iterrows():
fig.add_trace(go.Scatter(
x=[row['consistency']],
y=[row['peak_performance']],
mode='markers',
marker=dict(
size=row['career_years'] * 1.5,
color='rgba(255, 215, 0, 0.9)',
line=dict(width=3, color='#ff6b6b'),
symbol='star'
),
name=f"Selected: {row['name']}",
showlegend=False
))
fig.update_layout(
template='plotly_white',
title_font_size=18,
title_font_color='#495057',
height=600,
showlegend=False,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(family="Inter, sans-serif")
)
return (fig,)
Callback to update the two new line graphs
@app.callback(
[Output(‘performance-by-year-graph’, ‘figure’),
Output(‘consistency-by-year-graph’, ‘figure’),
Output(‘star-details’, ‘children’)],
[Input(‘selected-athletes-store’, ‘data’)]
)
def update_line_graphs_and_details(selected_athletes):
# Initialize empty figures with annotations
fig_perf = go.Figure()
fig_rank = go.Figure()
star_details =
if not selected_athletes:
fig_perf.add_annotation(text="Click on athletes in the scatter plot to view their performance trajectory",
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="#6c757d"))
fig_rank.add_annotation(text="Click on athletes in the scatter plot to view their ranking trajectory",
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="#6c757d"))
star_details = dbc.Alert([
html.I(className="fas fa-mouse-pointer fa-2x mb-3 d-block"),
html.H5("Select Athletes", className="mb-2"),
html.P("Click on up to 3 athletes in the chart to explore their detailed profiles and performance history.")
], color="light", className="text-center border-0",
style={'background': 'linear-gradient(135deg, rgba(102, 126, 234, 0.1), rgba(118, 75, 162, 0.1))'})
else:
history_df = df_clean[df_clean['name'].isin(selected_athletes)].copy()
# Performance by year graph
fig_perf = px.line(
history_df,
x='year',
y='performance_score',
color='name',
labels={
'year': 'Year',
'performance_score': 'Performance Score'
}
)
fig_perf.update_layout(
legend_title_text='Athletes',
font=dict(family="Inter, sans-serif")
)
# Evolution of ranking with scatter plot
top10_history_df = history_df[history_df['rank'] <= 10]
if top10_history_df.empty:
fig_rank.add_annotation(text="No years in the top 10 for these athletes.",
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="#6c757d"))
else:
fig_rank = go.Figure()
colors = ['#667eea', '#764ba2', '#ff6b6b'] # Color palette for athletes
for i, name in enumerate(top10_history_df['name'].unique()):
athlete_data = top10_history_df[top10_history_df['name'] == name]
fig_rank.add_trace(go.Scatter(
x=athlete_data['year'],
y=athlete_data['rank'],
mode='lines+markers+text',
name=name,
text=athlete_data['rank'],
textposition="bottom center",
marker=dict(size=12, symbol='circle'),
line=dict(width=3, color=colors[i % len(colors)])
))
fig_rank.update_layout(
title="Evolution of Top 10 Ranking",
xaxis_title="Year",
yaxis_title="Ranking",
yaxis=dict(autorange='reversed', dtick=1),
template='plotly_white',
height=600,
legend_title_text='Athletes',
font=dict(family="Inter, sans-serif")
)
# Update star details with enhanced styling
selected_athlete_df = athlete_stats[athlete_stats['name'].isin(selected_athletes)].copy()
colors = ['primary', 'success', 'warning']
for i, (_, row) in enumerate(selected_athlete_df.iterrows()):
star_details.append(
dbc.Alert([
html.Div([
html.H5([
html.I(className="fas fa-user-circle me-2"),
f"{row['name']}"
], className="mb-3", style={'color': '#495057'}),
html.Div([
html.P([
html.I(className="fas fa-trophy me-2"),
html.Strong("League: "), f"{row['league'].upper()}"
], className="mb-2"),
html.P([
html.I(className="fas fa-running me-2"),
html.Strong("Sport: "), f"{row['sport_name'].title()}"
], className="mb-2"),
html.P([
html.I(className="fas fa-medal me-2"),
html.Strong("Best Rank: "), f"#{int(row['best_rank'])}"
], className="mb-2"),
html.P([
html.I(className="fas fa-calendar me-2"),
html.Strong("Career Years: "), f"{int(row['career_years'])}"
], className="mb-2"),
html.P([
html.I(className="fas fa-chart-line me-2"),
html.Strong("Peak Score: "), f"{row['peak_performance']:.1f}"
], className="mb-2"),
html.P([
html.I(className="fas fa-star me-2"),
html.Strong("Consistency: "), f"{row['consistency']:.2f}"
], className="mb-0")
])
])
], color=colors[i % len(colors)], className="mb-3",
style={'border': 'none', 'border-radius': '10px', 'box-shadow': '0 4px 15px rgba(0,0,0,0.1)'})
)
fig_perf.update_layout(
template='plotly_white',
height=600,
title_font_size=16,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(family="Inter, sans-serif")
)
fig_rank.update_layout(
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)'
)
return (fig_perf, fig_rank, star_details)
Callback to update the two new line graphs
@app.callback(
[Output(‘performance-threshold-slider’, ‘max’),
Output(‘consistency-threshold-slider’, ‘max’),
Output(‘longevity-threshold-slider’, ‘max’),
Output(‘performance-threshold-slider’, ‘value’),
Output(‘consistency-threshold-slider’, ‘value’),
Output(‘longevity-threshold-slider’, ‘value’)],
[Input(‘league-dropdown’, ‘value’)]
)
def update_slider_properties_on_league_change(selected_league):
league_data = athlete_stats[athlete_stats[‘league’] == selected_league]
perf_max = league_data[‘peak_performance’].max()
consist_max = league_data[‘consistency’].max()
longevity_max = league_data[‘career_years’].max()
return perf_max, consist_max, longevity_max, 0, 0, 0
server = app.server