Hi,
I am new to the community. This is the very first time I developed a dash and I am struggling with performance. I think t has to do with the logic of how I implemented my app. My app is pretty complex. It aims to use a dataset uploaded by a user, then based on a dropdown selection, user can select attributes. Once an attributes is selected, analysis will be run on the attribute and graphs will be created.
The bottleneck of my app is the data handling. Since it is highly interactive (user can add multiple attributes and filter by clicking on graphs). Each time a graphs needs to be created it is loading the data again even though I am using a redis Cache. I don’t know how avoid loading json data from my cache back to dataframe each time a graph needs to be created.
The callback which create the issue is the following one :
@dash.callback(
Output({"type": "dynamic-graph", "attribute": MATCH, "graph_type": MATCH}, "figure"),
[
Input({"type": "dynamic-graph", "attribute": MATCH, "graph_type": MATCH}, "id"),
Input("dropdown-selection-status", "data"),
Input("layout-status", "data"),
Input("filtered-data", "data"),
],
prevent_initial_call=True,
)
def update_graphs(id, dropdown_status, layout_status, filtered_data_json):
if not (layout_status and dropdown_status) or not (
layout_status.get("is_complete") and dropdown_status.get("selection_updated")
):
raise PreventUpdate
attribute_name = id["attribute"]
graph_type = id["graph_type"]
filtered_data_json = UtilsDataManager.get_data("filtered_data") # load data from redis cache
filtered_data = pd.DataFrame(filtered_data_json) # convert to pandas dataframe
figure = UtilsDataManager.get_or_create_figure(
attribute_name, graph_type, filtered_data
) # create figure or only update data from figure
return figure if figure else dash.no_update
def get_or_create_figure(attribute_name, graph_type, filtered_data):
dropdown_selection = get_data("dropdown_selection") or []
if attribute_name not in dropdown_selection:
return None
graph_key = f"graph_{attribute_name}_{graph_type}"
figure = get_data(graph_key)
if figure is None:
create_func = UtilsDashHelpers.get_graph_functions(graph_type, "create")
figure = create_func(filtered_data, attribute_name)
else:
update_func = UtilsDashHelpers.get_graph_functions(graph_type, "update")
figure = update_func(figure, filtered_data, attribute_name)
set_data(graph_key, figure)
return figure
GRAPH_TYPE_FUNCTIONS = {
"completeness": {
"create": UtilsGraphics.create_pie_chart_completude,
"update": UtilsGraphics.update_pie_chart_completude,
},
"uniqueness": {
"create": UtilsGraphics.create_pie_chart_uniqueness,
"update": UtilsGraphics.update_pie_chart_uniqueness,
},
"validity": {"create": UtilsGraphics.create_pie_chart_valid, "update": UtilsGraphics.update_pie_chart_valid},
"pattern": {"create": UtilsGraphics.create_bar_chart_pattern, "update": UtilsGraphics.update_bar_chart_pattern},
"length": {"create": UtilsGraphics.create_bar_chart_length, "update": UtilsGraphics.update_bar_chart_length},
"frequency": {
"create": UtilsGraphics.create_bar_chart_frequency,
"update": UtilsGraphics.update_bar_chart_frequency,
},
}
def get_graph_functions(graph_type, type):
graph_functions = GRAPH_TYPE_FUNCTIONS.get(graph_type)
if not graph_functions:
raise ValueError(f"Invalid graph_type: {graph_type}")
return graph_functions[type]
This is the logic I am currently using. Each time callback is called, data a reload and converted to dataframe. Have you faced similar performance issues ? If Did you succeed in making it more efficient ?
Thank you for you thought or input about this.