Been a while since I posted! Here is a Dash app that lets you explore how smoking, gambling, and cheating relate to people’s choice of a high-risk versus low-risk lottery, plus demographics:
KPIs at the top surface overall sample size, % who smoke, % choosing the riskier bet, and average expected payout.
A faceted bar chart shows how smokers, gamblers, and cheaters each split between Lottery A and B.
The treemap breaks down respondents’ incomes by education level; the heatmap maps incomes across U.S. regions.
A correlation matrix and Sankey diagram reveal that these “risk” behaviors overlap only slightly (smoking→gambling) and otherwise act independently.
A quick summary translates the chi-square test and correlations into plain English, so you immediately know which links are significant, and which aren’t.
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, callback_context
import dash_bootstrap_components as dbc
from scipy.stats import chi2_contingency
# Load and preprocess data
df = pd.read_csv(
"https://raw.githubusercontent.com/plotly/Figure-Friday/main/2025/week-23/steak-risk-survey.csv"
)
# Rename columns for clarity
df = df.rename(columns={
df.columns[1]: "Lottery Choice",
"Do you ever smoke cigarettes?": "Smokes",
"Do you ever gamble?": "Gamble",
"Have you ever cheated on your significant other?": "Cheated",
"Household Income": "Income",
"Education": "Education",
"Location (Census Region)": "Region"
})
# Clean income categories
df["Income"] = df["Income"].fillna("Unknown").apply(
lambda x: x if x in ["$0 - $24,999","$25,000 - $49,999","$50,000 - $99,999","$100,000 - $149,999","$150,000+"] else "Unknown"
)
# Filter out missing data
df = df.dropna(subset=["Lottery Choice","Smokes","Income","Education","Region","Cheated","Gamble"])
# Prepare correlation and chi-square helpers
corr_df = df[["Smokes","Gamble","Cheated"]].replace({"Yes":1,"No":0})
def make_contingency(data):
ct = pd.crosstab(data["Cheated"], data["Gamble"])
chi2, p, _, _ = chi2_contingency(ct)
return ct, chi2, p
# Unified colors
DISCRETE_COLORS = {"Yes": "#e74c3c", "No": "#2c3e50"}
CONTINUOUS_SCALE = "Tealgrn"
CORR_SCALE = "RdBu"
SANKY_LINK_COLOR = "#bbbbbb"
# Sankey data
labels = ["Smokes: Yes","Smokes: No","Gamble: Yes","Gamble: No","Cheated: Yes","Cheated: No"]
flow1 = pd.crosstab(df["Smokes"], df["Gamble"]).reindex(index=["Yes","No"], columns=["Yes","No"]).fillna(0)
flow2 = pd.crosstab(df["Gamble"], df["Cheated"]).reindex(index=["Yes","No"], columns=["Yes","No"]).fillna(0)
source = [0,0,1,1,2,2,3,3]
target = [2,3,2,3,4,5,4,5]
value = [flow1.loc["Yes","Yes"], flow1.loc["Yes","No"], flow1.loc["No","Yes"], flow1.loc["No","No"],
flow2.loc["Yes","Yes"], flow2.loc["Yes","No"], flow2.loc["No","Yes"], flow2.loc["No","No"]]
fig_sankey = go.Figure(go.Sankey(
node=dict(label=labels, pad=15, thickness=20,
color=[DISCRETE_COLORS["Yes"], DISCRETE_COLORS["No"], DISCRETE_COLORS["Yes"], DISCRETE_COLORS["No"], DISCRETE_COLORS["Yes"], DISCRETE_COLORS["No"]]),
link=dict(source=source, target=target, value=value, color=SANKY_LINK_COLOR)
))
fig_sankey.update_layout(title_text="Behavior Flow: Smoking → Gambling → Cheating", font_size=12)
# Dash App
theme = dbc.themes.LUX
app = Dash(__name__, external_stylesheets=[theme], suppress_callback_exceptions=True)
app.title = "Survey Insights"
CARD_STYLE = {"backgroundColor": "#ffffff", "padding": "15px", "marginBottom": "20px", "boxShadow": "0 2px 6px rgba(0,0,0,0.1)"}
# Layout
def serve_layout():
return dbc.Container(fluid=True, children=[
html.H1("Survey Insights", className="text-center mt-3 mb-4"),
# KPI cards
dbc.Row([
dbc.Col(dbc.Card([html.H5("Total Responses"), html.H2(id='kpi-total')], style=CARD_STYLE), width=3),
dbc.Col(dbc.Card([html.H5("% Smokers"), html.H2(id='kpi-smoke')], style=CARD_STYLE), width=3),
dbc.Col(dbc.Card([html.H5("% Choose Lottery A"), html.H2(id='kpi-bet')], style=CARD_STYLE), width=3),
dbc.Col(dbc.Card([html.H5("Avg Expected Value"), html.H2(id='kpi-ev')], style=CARD_STYLE), width=3)
], className='mb-4'),
# Filters
dbc.Row([
dbc.Col(dbc.Card([
html.H5("Filters"),
dcc.Dropdown(id="region-filter", options=[{"label":r,"value":r} for r in sorted(df.Region.unique())], placeholder="Select region...", clearable=True),
dcc.Dropdown(id="income-filter", options=[{"label":i,"value":i} for i in sorted(df.Income.unique())], placeholder="Select income...", clearable=True, style={"marginTop":"10px"}),
html.Button("Reset Filters", id='reset-filters', n_clicks=0, className="mt-2 btn btn-secondary w-100")
], style=CARD_STYLE), width=3)
], className='mb-4'),
# Graphs & Summary placeholder
html.Div(
children=[
dcc.Graph(id='lottery-bar', figure={}),
dcc.Graph(id='income-education-treemap', figure={})
],
id='graphs-container'
),
# Quick Summary
dbc.Card([
html.H5("Quick Statistical Summary"),
html.Div(id='summary-stats', style={"padding":"10px"})
], style=CARD_STYLE)
])
app.layout = serve_layout
# Callback
@app.callback(
[Output('graphs-container','children'),
Output('kpi-total','children'), Output('kpi-smoke','children'),
Output('kpi-bet','children'), Output('kpi-ev','children'),
Output('summary-stats','children')],
[Input('region-filter','value'), Input('income-filter','value'), Input('reset-filters','n_clicks'),
Input('income-education-treemap','clickData'), Input('lottery-bar','clickData')]
)
def update_dashboard(region, income, reset, treemap_click, bar_click):
ctx = callback_context.triggered
# Reset filters
if ctx and ctx[0]['prop_id'] == 'reset-filters.n_clicks':
region = income = None
# Filtered DF
df_f = df.copy()
if region: df_f = df_f[df_f.Region==region]
if income: df_f = df_f[df_f.Income==income]
# Drill-down
if treemap_click:
lbl = treemap_click['points'][0]['label']
df_f = df[df.Education==lbl] if lbl in df.Education.values else df[df.Income==lbl]
if bar_click:
choice = bar_click['points'][0]['x']
df_f = df_f[df_f['Lottery Choice']==choice]
# KPIs
tot = len(df_f)
p_sm = f"{df_f.Smokes.eq('Yes').mean()*100:.1f}%"
p_bet = f"{df_f['Lottery Choice'].eq('Lottery A').mean()*100:.1f}%"
a_ev = f"${df_f['Lottery Choice'].map({'Lottery A':50,'Lottery B':18}).mean():.2f}"
# Charts
behaviors = ['Smokes','Gamble','Cheated']
df_long = df_f.melt(id_vars=['Lottery Choice'], value_vars=behaviors, var_name='Behavior', value_name='Response')
grouped = df_long.groupby(['Behavior','Response','Lottery Choice']).size().reset_index(name='Count')
fig1 = px.bar(grouped, x='Lottery Choice', y='Count', color='Response', barmode='group', facet_col='Behavior',
category_orders={'Behavior':behaviors,'Response':['Yes','No']}, color_discrete_map=DISCRETE_COLORS,
title='Lottery Choice by Risk Behaviors')
fig1.update_traces(texttemplate='%{y}', textposition='outside')
fig1.update_layout(margin=dict(t=40,l=20,r=20,b=20), yaxis_title='Number of Responses')
fig2 = px.treemap(df_f.groupby(['Education','Income']).size().reset_index(name='Count'), path=['Education','Income'], values='Count',
color='Count', color_continuous_scale=CONTINUOUS_SCALE, title='Income Distribution by Education')
fig2.update_layout(height=500, margin=dict(t=40,l=20,r=20,b=20))
fig3 = px.density_heatmap(df_f, x='Region', y='Income', color_continuous_scale=CONTINUOUS_SCALE,
title='Heatmap of Income by Region')
fig4 = px.imshow(corr_df.corr(), text_auto=True, title='Correlation Matrix', color_continuous_scale=CORR_SCALE)
fig4.update_traces(zmid=0)
fig5 = fig_sankey
rows = [
dbc.Row(dbc.Col(dcc.Graph(id='lottery-bar', figure=fig1), width=12), className='mb-4'),
dbc.Row([dbc.Col(dcc.Graph(id='income-education-treemap', figure=fig2), width=6), dbc.Col(dcc.Graph(figure=fig3), width=6)], className='mb-4'),
dbc.Row([dbc.Col(dcc.Graph(figure=fig4), width=6), dbc.Col(dcc.Graph(figure=fig5), width=6)], className='mb-4')
]
# Statistical summary
ct, chi2, p = make_contingency(df_f)
corr_vals = corr_df.corr().to_dict()
summary = html.Ul([
html.Li(f"Chi-square test for association between Cheating and Gambling: p = {p:.3f}. {'Significant' if p < 0.05 else 'Not significant'}"),
html.Li(f"Small positive correlation between Smoking and Gambling (r = {corr_vals['Smokes']['Gamble']:.2f}), indicating smokers are slightly more likely to gamble."),
html.Li(f"Near-zero correlation between Smoking and Cheating (r = {corr_vals['Smokes']['Cheated']:.2f}), suggesting no linear relationship."),
html.Li(f"Near-zero correlation between Gambling and Cheating (r = {corr_vals['Gamble']['Cheated']:.2f}), suggesting these behaviors are independent.")
], style={"marginLeft": "20px"})
return rows, tot, p_sm, p_bet, a_ev, summary
if __name__ == '__main__':
app.run(debug=True)