Here you go [apologies for the long function, subplots are dynamic based on df size]:
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import *
from math import ceil
from itertools import cycle
def bar_cat_overview__slice_by_categorical(df, categoricals, sliceable, max_cols=4, height=700, bargap=1):
print(f"Categorical variables are {categoricals}")
max_rows=ceil(float(len(df.columns))/max_cols)
fig = make_subplots(
rows=max_rows,
cols=max_cols,
subplot_titles=df.drop([sliceable], axis=1).columns).update_layout(
{"title":f"bars of features, slice by {sliceable}"})
col_cyc = cycle(iter(range(1,max_cols+1)))
color_list = cycle(iter(px.colors.qualitative.T10[:len(df[sliceable].unique())]))
row=1
ctr=0
for j in df.drop([sliceable], axis=1).columns:
col = next(col_cyc)
for attr1 in df[j].unique():
if ctr < 1:
for attr2 in df[sliceable].unique():
fig.add_bar(
x = [attr1],
y = [df[j][(df[j]==attr1) & (df[sliceable]==attr2)].count()],
row=row,
col=col,
marker={"color": next(color_list)},
name=str(attr2),
showlegend=True
)
else:
for attr2 in df[sliceable].unique():
fig.add_bar(
x = [attr1],
y = [df[j][(df[j]==attr1) & (df[sliceable]==attr2)].count()],
row=row,
col=col,
marker={"color": next(color_list)},
name=str(attr2),
showlegend=False
)
ctr+=1
if col % max_cols == 0:
row+=1
col+=1
return fig.update_layout({"height": height,
"barcornerradius": 1,
"bargap": bargap,
"barmode":"group",
"legend": {"title": sliceable}
})
fig = bar_cat_overview__slice_by_categorical(df, categoricals=categoricals, sliceable="A", max_cols=3, height=400,
bargap=0.8
)
fig
copy paste this dataframe and hit go:
import pandas as pd
import numpy as np
df = pd.DataFrame({
"A": ["awareness", "retention", "retention", "retention", "awareness", "converison"],
"B": ["social", "email", "referral", "notification", "PPC", "SEO"],
"C": ["male", "female", "male", "female", "male", "female"],
"D": ["confidential", "confidential", "confidential", "confidential", "confidential", "confidential"],
"E" : [0, 0, 0, 0, 0, 1],
})
categoricals = ["A", "B", "C", "D"]
df
see how the widths of the bar are far too narrow? I would like the bar widths to be consistent.
TIA!