I am building a multi page dash app for my topic modeling project. And since the backend code takes a while to return the output, I guess it is past the callback timeout and then the result is not displayed. So I decided to use background callback to see if it will work. Unfortunately, it is only crashing my VScode and I have no idea why. Does anyone have any idea?
cache = diskcache.Cache("./cache")
background_callback_manager = DiskcacheManager(cache)
dash.register_page(__name__, name="Topic Modelling", order=4)
layout = [dbc.Container([
html.H4("Topic Modelling"),
dbc.Row(input_method),
dbc.Row(id="topic_modelling"),
],),
html.Div(id="topic_output"),
]
def dynamic_topic_input_object(topic_input):
if topic_input == "select_file":
df = pd.read_csv(
'Dash_Application_Final/session_state/session_state.csv')
selected_columns = ['session_number', 'keyword', 'num_videos']
drop_down_list = [", ".join(row[selected_columns].astype(str))
for _, row in df.tail(5).iterrows()]
return dmc.Select(label="Choose a file",
id= {"type": "search_history_TM", "index": "myindex"},
data=[{"label": option, "value": option}
for option in drop_down_list],
)
elif topic_input == "user_path":
return dbc.Row(dmc.TextInput(label="Enter the full path of the csv file, and select source",
id={"type": "user_path_text_TM", "index": "myindex"}))
@callback(
Output("topic_modelling", "children"),
Input("topic_input", "value")
)
def topic_modelling(topic_input):
if topic_input is not None:
topic_input_object = dynamic_topic_input_object(topic_input)
return [
topic_input_object,
dmc.RadioGroup(
children=[dmc.Radio("Twitter", value='twitter'),
dmc.Radio("LinkedIn", value="linkedin"),
dmc.Radio("Youtube", value="youtube")],
label="Select Source",
size="sm",
mt=10,
id="topic_source_1"),
hyperparameters_choice,
dbc.Container(id="custom_hyperparameters"),
dbc.Row(dbc.Col(dbc.Button(
"Get Topics!",
outline=True,
color="primary",
id="get_topics_1",
n_clicks=0,
className="align-items-center mt-2",
), width=3),)
]
@callback(
Output("custom_hyperparameters", "children"),
Input("select_hyperparameters", "value")
)
def custom_hyperparameters(select_hyperparameters):
if select_hyperparameters == "custom":
return [
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardHeader("Choose UMAP Parameters"),
dbc.CardBody([
dmc.NumberInput(
id={"type": "n_neighbors", "index": "myindex"},
min=5, max=300, step=5, value=100,
label='n_neighbors',
style={"width": "100%"}),
dmc.NumberInput(
id={"type": "n_components", "index": "myindex"},
min=1, max=50, step=1, value=5,
label='n_components',
),
dmc.Select(
id={"type": "umap_metric", "index": "myindex"},
data=[
{'label': 'Euclidean',
'value': 'euclidean'},
{'label': 'Cosine', 'value': 'cosine'}
],
label='Select the metric',
value='cosine')
])])
], width=6),
dbc.Col([
dbc.Card([
dbc.CardHeader("Choose HDBSCAN Parameters"),
dbc.CardBody([
dmc.NumberInput(
id={"type": "min_cluster_size", "index": "myindex"},
min=1, max=100, step=1, value=25,
label='Minimum cluster size'),
dmc.Select(
id={"type": "hdb_metric", "index": "myindex"},
data=[
{'label': 'Euclidean',
'value': 'euclidean'},
{'label': 'Cosine', 'value': 'cosine'},
{'label': 'Manhattan',
'value': 'manhattan'}
],
label='Select the metric',
value='euclidean'),
dmc.Select(
id={"type": "cluster_selection_method", "index": "myindex"},
data=[
{'label': 'EOM', 'value': 'eom'},
{'label': 'Leaf', 'value': 'leaf'}
],
label='Select cluster selection method',
value='eom')
])])
], width=6)
], class_name="mt-4")
]
else:
return []
@callback(
Output("topic_output", "children", allow_duplicate=True),
[
Input("get_topics_1", "n_clicks"),
Input({"type": "search_history_TM", "index": ALL}, "value"),
Input({"type": "user_path_text_TM", "index": ALL}, "value"),
Input("topic_source_1", "value"),
Input({"type": "n_neighbors", "index": ALL}, "value"),
Input({"type":"n_components", "index": ALL}, "value"),
Input({"type":"umap_metric", "index": ALL}, "value"),
Input({"type":"min_cluster_size", "index": ALL}, "value"),
Input({"type":"hdb_metric", "index": ALL}, "value"),
Input({"type":"cluster_selection_method", "index": ALL}, "value"),
],
background= True,
manager=background_callback_manager,
prevent_initial_call=True
)
def process_topic_modelling(
n_clicks, selected_file, user_path, topic_source,
n_neighbors, n_components, umap_metric, min_cluster_size,
hdb_metric, cluster_selection_method
):
if n_clicks is None:
raise PreventUpdate # Prevent update if n_clicks is None
triggered_id = callback_context.triggered[0]["prop_id"].split(".")[0]
if triggered_id == 'get_topics_1' and n_clicks>0 :
# Determine the filepath based on which input method was selected
if selected_file:
print(selected_file)
selected_session_number = int(float(selected_file[0].split(",")[0]))
print(selected_session_number, type(selected_session_number))
selected_row = session_state_df[session_state_df['session_number'] == selected_session_number]
print(selected_row, type(selected_row))
filepath = str(selected_row[topic_source + "_df"].item())
elif user_path:
filepath = user_path[0]
else:
raise PreventUpdate # Prevent update if neither input method provides a valid path
# Check for custom hyperparameters only if the components exist
hyperparameters = None
if n_neighbours is not None:
# Only set hyperparameters if custom components were rendered
hyperparameters = {
"n_neighbors": n_neighbors[0],
"n_components": n_components[0],
"umap_metric": umap_metric[0],
"min_cluster_size": min_cluster_size[0],
"hdb_metric": hdb_metric[0],
"cluster_selection_method": cluster_selection_method[0]
}
# Perform topic modelling with the selected parameters
topic_df = topic_model.top2vec_topics(filepath, process_radio_selection(topic_source), topic_source, hyperparameters)
table = create_table_container(topic_df, topic_source)
return table