Hi everyone,
I am new to plotly/dash, but I noticed there is no built in for an âupset plotâ for showing set intersections (see here: https://upset.app/). I just made a function for making these plots from the base graph objects. It is still rough (could use axis ticks and background for the bar portion of the chart). It takes in a dataframe of boolean columns where 1 means a row belongs to a group, 0 otherwise. Would greatly appreciate it if someone could assist in adding the ticks and backgrounds to the top part of the chart. Thank you!
Maybe this could be added to the plotly express or figure factory? Here is the code:
import plotly.graph_objects as go
import pandas as pd
from itertools import compress
import itertools
def plotly_upset_plot(df):
# an array of dimensions d x d*2^d possible subsets where d is the number of columns
subsets = []
# the sizes of each subset (2^d array)
subset_sizes = [ ]
d = len(df.columns)
for i in range(1, d + 1):
subsets = subsets + [list(x) for x in list(itertools.combinations(df.columns, i))]
for s in subsets:
curr_bool = [1]*len(df)
for col in df.columns:
if col in s: curr_bool = [x and y for x, y in zip(curr_bool, list(df.loc[:, col].copy()))]
else: curr_bool = [x and not y for x, y in zip(curr_bool, list(df.loc[:, col].copy()))]
subset_sizes.append(sum(curr_bool))
plot_df = pd.DataFrame({'Intersection': subsets, 'Size':subset_sizes})
plot_df = plot_df.sort_values(by = 'Size', ascending = False)
max_y = max(plot_df['Size'])+0.1*max(plot_df['Size'])
subsets = list(plot_df['Intersection'])
scatter_x = []
scatter_y = []
for i, s in enumerate(subsets):
for j in range(d):
scatter_x.append(i)
scatter_y.append(-j*max_y/d-0.1*max_y)
fig = go.Figure()
# fig.add_trace(go.Scatter(x=[-1.2,len(subsets)],y= [max_y+0.1*max_y,max_y+0.1*max_y],fill='tozeroy'))
template = ['' for x in scatter_x]
fig.add_trace(go.Scatter(x = scatter_x, y = scatter_y, mode = 'markers', showlegend=False, marker=dict(size=16,color='#C9C9C9'), hovertemplate = template))
fig.update_layout(xaxis=dict(showgrid=False, zeroline=False),
yaxis=dict(showgrid=True, zeroline=False),
plot_bgcolor = "#FFFFFF", margin=dict(t=40, l=150))
for i, s in enumerate(subsets):
scatter_x_has = []
scatter_y_has = []
for j in range(d):
if df.columns[j] in s:
scatter_x_has.append(i)
scatter_y_has.append(-j*max_y/d-0.1*max_y)
fig.add_trace(go.Scatter(x = scatter_x_has, y = scatter_y_has, mode = 'markers+lines', showlegend=False, marker=dict(size=16,color='#000000',showscale=False), hovertemplate = template))
fig.update_xaxes(showticklabels=False) # Hide x axis ticks
fig.update_yaxes(showticklabels=False) # Hide y axis ticks
fig.update_traces(hoverinfo=None)
plot_df['Intersection'] = ['+'.join(x) for x in plot_df['Intersection']]
template = [f'<extra><br><b>{lab}</b><br><b>N-Count</b>: {n}</extra>' for lab, n in zip(plot_df['Intersection'], plot_df['Size'])]
bar = go.Bar(x = list(range(len(subsets))), y = plot_df['Size'], marker = dict(color='#000000'), text = plot_df['Size'], hovertemplate = template, textposition='outside', hoverinfo='none')
fig.add_trace(bar)
template = ['' for x in range(d)]
max_string_len = max([len(x) for x in df.columns])
fig_lab = go.Scatter(x = [-0.01*max_string_len]*d, y = scatter_y, text = df.columns, mode = 'text', textposition='middle left',showlegend=False, hovertemplate = template)
fig_lab = go.Scatter(x = [-0.01*max_string_len]*d, y = scatter_y, text = df.columns, mode = 'text', textposition='middle left',showlegend=False, hovertemplate = template)
fig.add_trace(fig_lab)
fig.update_layout(title = '<b>Intersections<b>', yaxis_range=[-max_y-0.1*max_y-1,max_y+0.1*max_y], xaxis_range = [-0.13*max_string_len, len(subsets)], showlegend = False, title_x=0.5)
return fig