Statistical annotations in graphs

How do I add statistical annotations, e.g. p-values, to graphs?

For example:

Code for bar graph without statistical annotations:

import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')

days=['day1 and day 2', 'day 3 and day 4']
#n_numbers = ['n = 20', 'n = 14']

# Group and calculate the mean and sem
mean = df.groupby('day').mean()
sem = df.groupby('day').sem()

# Extract mean from days for input
mean_thur=df.query("day=='Thur'")['total_bill'].mean()
mean_fri=df.query("day=='Fri'")['total_bill'].mean()
mean_sat=df.query("day=='Sat'")['total_bill'].mean()
mean_sun=df.query("day=='Sun'")['total_bill'].mean()

# Extract sem from days for input
sem_thur=df.query("day=='Thur'")['total_bill'].sem()
sem_fri=df.query("day=='Fri'")['total_bill'].sem()
sem_sat=df.query("day=='Sat'")['total_bill'].sem()
sem_sun=df.query("day=='Sun'")['total_bill'].sem()

# Bar graphs and error bars for top stack only
fig = go.Figure(data=[
    go.Bar(name='Thursday and Saturday', x=days, y=[mean_thur, mean_sat], marker_color='#E45746', opacity=0.8),           
    
    go.Bar(name='Friday and Sunday', x=days, y=[mean_fri, mean_sun], marker_color='#72B7B2', opacity=0.8,           
           error_y=dict(
           type='data', # value of error bar given in data coordinates
           array=[sem_fri, sem_sun], color='rgba(0,0,0,1)', thickness=2, width=10,
           visible=True)
          )          
])

# Error bars for bottom stack
fig.add_trace(go.Scatter(
    x=['day1 and day 2'], y=[mean_thur, sem_thur],
    mode='markers',
    name='error_bars_thursday',
    error_y=dict(
        type='constant',
        value=sem_thur,
        color='rgba(0,0,0,1)',
        thickness=1.8,
        width=10        
    ),
        marker=dict(color='rgba(0,0,0,1)', size=10, opacity=0),
    showlegend=False
))

fig.add_trace(go.Scatter(
    x=['day 3 and day 4'], y=[mean_sat, sem_sat],
    mode='markers',
    name='error_bars_thursday',
    error_y=dict(
        type='constant',
        value=sem_thur,
        color='rgba(0,0,0,1)',
        thickness=1.8,
        width=10,
    ),
        marker=dict(color='rgba(0,0,0,1)', size=10, opacity=0),
    showlegend=False
))


# Add n numbers
fig.add_trace(go.Scatter(
    x=['day1 and day 2', 'day 3 and day 4'],
    y=[40, 47],
    mode="text",
    name="n_numbers",
    text=['n=20', 'n=50'],
    textposition="top center",
    showlegend=False
))

# Customization of layout and traces
fig.update_layout(template='simple_white', title='', yaxis_title='Title Y', barmode='stack',
                  hoverlabel_namelength=-1,
                  newshape_line_color='magenta', newshape_opacity=0.2)
fig.update_traces(marker_line_color='rgba(0,0,0,0.8)', marker_line_width=1, opacity=0.8)

# Make figure zoomable, hide logo et cetera
config = dict({'scrollZoom':True, 'displaylogo': True,
               'modeBarButtonsToAdd':['drawopenpath', 'eraseshape']
              })

fig.show()

print(mean)
print(mean_thur)
print(mean_fri)
print(mean_sat)
print(mean_sun)
print(sem)
print(sem_thur)
print(sem_fri)
print(sem_sat)
print(sem_sun)

I now made this graph below; how can I place the p-value p=0.00156 specified as

fig.add_trace(go.Scatter(
    x=['day1 and day 2'],
    y=[26],
    mode="text",
    name="p-value",
    text=['p=0.00156'],
    textposition="top center",
    showlegend=False
))

in the middle, i.e. between day1 and day 2 and day 3 and day 4?

import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')

days=['day1 and day 2', 'day 3 and day 4']
#n_numbers = ['n = 20', 'n = 14']

# Group and calculate the mean and sem
mean = df.groupby('day').mean()
sem = df.groupby('day').sem()

# Extract mean from days for input
mean_thur=df.query("day=='Thur'")['total_bill'].mean()
mean_fri=df.query("day=='Fri'")['total_bill'].mean()
mean_sat=df.query("day=='Sat'")['total_bill'].mean()
mean_sun=df.query("day=='Sun'")['total_bill'].mean()
mean_thur1=mean['total_bill'].Thur

# Extract sem from days for input
sem_thur=df.query("day=='Thur'")['total_bill'].sem()
sem_fri=df.query("day=='Fri'")['total_bill'].sem()
sem_sat=df.query("day=='Sat'")['total_bill'].sem()
sem_sun=df.query("day=='Sun'")['total_bill'].sem()

# Bar graphs and error bars for top stack only
fig = go.Figure(data=[
    go.Bar(name='Thursday and Saturday', x=days, y=[mean_thur, mean_sat], marker_color='#E45746', opacity=0.8),           
    
    go.Bar(name='Friday and Sunday', x=days, y=[mean_fri, mean_sun], marker_color='#72B7B2', opacity=0.8,           
           error_y=dict(
           type='data', # value of error bar given in data coordinates
           array=[sem_fri, sem_sun], color='rgba(0,0,0,1)', thickness=2, width=10,
           visible=True)
          )          
])

# Error bars for bottom stack
fig.add_trace(go.Scatter(
    x=['day1 and day 2'], y=[mean_thur, sem_thur],
    mode='markers',
    name='error_bars_thursday',
    error_y=dict(
        type='constant',
        value=sem_thur,
        color='rgba(0,0,0,1)',
        thickness=1.8,
        width=10        
    ),
        marker=dict(color='rgba(0,0,0,1)', size=10, opacity=0),
    showlegend=False
))

fig.add_trace(go.Scatter(
    x=['day 3 and day 4'], y=[mean_sat, sem_sat],
    mode='markers',
    name='error_bars_thursday',
    error_y=dict(
        type='constant',
        value=sem_thur,
        color='rgba(0,0,0,1)',
        thickness=1.8,
        width=10,
    ),
        marker=dict(color='rgba(0,0,0,1)', size=10, opacity=0),
    showlegend=False
))


# Add n numbers
fig.add_trace(go.Scatter(
    x=['day1 and day 2', 'day 3 and day 4'],
    y=[30, 36],
    mode="text",
    name="n_numbers",
    text=['n=20', 'n=50'],
    textposition="top center",
    showlegend=False
))

# Add shapes for bottom bars
# Vertical line bar 1
fig.add_shape(type="line",
    x0='day1 and day 2', y0=20, x1='day1 and day 2', y1=25,
    line=dict(color='rgba(0,0,0,1)',width=2)
)

# Vertical line bar 2
fig.add_shape(type="line",
    x0='day 3 and day 4', y0=22, x1='day 3 and day 4', y1=25,
    line=dict(color='rgba(0,0,0,1)',width=2)
)

# Horizontal line bottom bars
fig.add_shape(type="line",
    x0='day1 and day 2', y0=25, x1='day 3 and day 4', y1=25,
    line=dict(
        color='rgba(0,0,0,1)',
        width=2)
)

# Add shapes for top bars
# Vertical line bar 1
fig.add_shape(type="line",
    x0='day1 and day 2', y0=38, x1='day1 and day 2', y1=48,
    line=dict(color='rgba(0,0,0,1)',width=2)
)

# Vertical line bar 2
fig.add_shape(type="line",
    x0='day 3 and day 4', y0=44, x1='day 3 and day 4', y1=48,
    line=dict(color='rgba(0,0,0,1)',width=2)
)

# Horizontal line bottom bars
fig.add_shape(type="line",
    x0='day1 and day 2', y0=48, x1='day 3 and day 4', y1=48,
    line=dict(
        color='rgba(0,0,0,1)',
        width=2)
)


# Add p-values 
fig.add_trace(go.Scatter(
    x=['day1 and day 2'],
    y=[26],
    mode="text",
    name="p-value",
    text=['p=0.00156'],
    textposition="top center",
    showlegend=False
))



# Customization of layout and traces
fig.update_layout(template='simple_white', title='', yaxis_title='Title Y', barmode='stack',
                  newshape_line_color='magenta', newshape_opacity=0.2,
                  hoverlabel_namelength=-1)
fig.update_traces(marker_line_color='rgba(0,0,0,0.8)', marker_line_width=1, opacity=0.8)

fig.update_shapes(opacity=1)

# Make figure zoomable, hide logo et cetera
config = dict({'scrollZoom':True, 'displaylogo': True,
               'modeBarButtonsToAdd':['drawopenpath', 'eraseshape']
              })

fig.show()

print(mean)
print(mean_thur)
print(mean_fri)
print(mean_sat)
print(mean_sun)
print(sem)
print(sem_thur)
print(sem_fri)
print(sem_sat)
print(sem_sun)

@Alexboiboi and @nicolaskruchten do you perhaps have a suggestion for this? Or should I simply use another program to depict brackets and p-values?

Hi @windrose,

you can work with shapes instead of adding a trace just draw a line. Something like fig.add_shape(type="line" ...

see here for more infos:

Absolute text/annotation positioning can be achieved in categorical plots, see here Free text positioning in categorical graph?. In this case the annotation/text will stay put, of course.

Here’s a code that accomplishes fairly well what I want; may there is better solution?

import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')

days=['day1 and day 2', 'day 3 and day 4']

# Group and calculate the mean and sem
mean = df.groupby('day').mean()
sem = df.groupby('day').sem()

# Bar graphs and error bars for top stack only
fig = go.Figure(data=[
    go.Bar(name='Thursday and Saturday', x=days, y=[mean_thur, mean_sat], marker_color='#E45746', opacity=0.8),           
    
    go.Bar(name='Friday and Sunday', x=days, y=[mean_fri, mean_sun], marker_color='#72B7B2', opacity=0.8,           
           error_y=dict(
           type='data', # value of error bar given in data coordinates
           array=[sem_fri, sem_sun], color='rgba(0,0,0,1)', thickness=2, width=10,
           visible=True)
          )          
])

# Error bars for bottom stack
fig.add_trace(go.Scatter(
    x=['day1 and day 2'], y=[mean_thur],
    mode='markers',
    name='error_bars_thursday',
    error_y=dict(
        type='constant',
        value=sem_thur,
        color='rgba(0,0,0,1)',
        thickness=1.8,
        width=10        
    ),
        marker=dict(color='rgba(0,0,0,1)', size=10, opacity=0),
        showlegend=False
))

fig.add_trace(go.Scatter(
    x=['day 3 and day 4'], y=[mean_sat],
    mode='markers',
    name='error_bars_saturday',
    error_y=dict(
        type='constant',
        value=sem_sat,
        color='rgba(0,0,0,1)',
        thickness=1.8,
        width=10,
    ),
        marker=dict(color='rgba(0,0,0,1)', size=10, opacity=0),
        showlegend=False
))


# Add n numbers
fig.add_trace(go.Scatter(
    x=['day1 and day 2', 'day 3 and day 4'],
    y=[30, 36],
    mode="text",
    name="n_numbers",
    text=['n=20', 'n=50'],
    textposition="top center",
    showlegend=False
))

# Add brackets for p-values
# Bottom bars
fig.add_trace(go.Scatter(x=['day1 and day 2', 'day1 and day 2', 'day 3 and day 4', 'day 3 and day 4'],
                         y=[20, 25, 25, 22],
                         fill=None, mode="lines", line=dict(color='rgba(0,0,0,1)',width=2),
                         showlegend=False
                        )
             )
# Top bars
fig.add_trace(go.Scatter(x=['day1 and day 2', 'day1 and day 2', 'day 3 and day 4', 'day 3 and day 4'],
                         y=[40, 47, 47, 45],
                         fill=None, mode="lines", line=dict(color='rgba(0,0,0,1)',width=2),
                         showlegend=False
                        )
             )


# Add p-values
fig.add_annotation(text="p=0.00156",
                   name="p-value",                                  
                   xref="paper", yref="paper",
                   x=0.5, y=0.57, showarrow=False,
                   font=dict(size=12, color="black")
                  )

fig.add_annotation(text="***",
                   name="p-value",                                  
                   xref="paper", yref="paper",
                   x=0.5, y=1.1, showarrow=False,
                   font=dict(size=12, color="black"),                                   
                  )

                   
# Customization of layout and traces
fig.update_layout(template='simple_white', title='', yaxis_title='Title Y', barmode='stack',
                  dragmode='drawrect', font_size=12,
                  # style of new shapes
                  newshape=dict(line_color='magenta', fillcolor=None, opacity=0.5),
                  hoverlabel_namelength=-1)
fig.update_traces(marker_line_color='rgba(0,0,0,0.8)', marker_line_width=1, textfont_size=12, opacity=0.8)

#fig.update_shapes(opacity=1)

# Make figure zoomable, hide logo et cetera
config = dict({'scrollZoom':True, 'displaylogo': True,
               'modeBarButtonsToAdd':['drawopenpath', 'eraseshape']
              })

fig.show(config=config)

print(mean)
print(sem)

Please see also Scatter plot with conditional error bars

I hope that adding p-values and their brackets will be made easier in the future.