I am trying to build a dashboard that will generate several plots based on a single SQL data query. I want the query to be modifiable via the dashboard (e.g. to query a different order amount or similar), and then change all plots at once. The query maybe expensive so I don’t want it to run N times for N different plots.
I have tried to do this using the flask cache decorator @cache.memoize()
, similar to the example given in the docs: https://dash.plotly.com/performance
Here is a stripped back version of what I’m doing. I can tell that the query_data
function is not doing what I intend because: 1. the resulting graphs show different data points on the x-axis. If it was using the same cached dataset the data points in x should be the same 2. The print
statements in the query_data
function come out twice everytime I change an input cell.
Can anyone explain why this isn’t working or how I can achieve what I want.
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
from dash.dependencies import Input, Output
from setup_redshift import setup_connection
from flask_caching import Cache
from datetime import datetime
import pandas as pd
conn = setup_connection()
app = dash.Dash(__name__)
cache = Cache(app.server, config={
# 'CACHE_TYPE': 'filesystem',
'CACHE_TYPE': 'memcached',
'CACHE_DIR': 'cache-directory'
})
sql_query = '''select i.order_amount_in_usd, r.calibrated_score, r.score
from datalake.investigations i
inner join datalagoon.prod_model_decision r
ON i.investigation_id = r.investigation_id
where i.team_id = {}
AND i.order_amount_in_usd < {}
AND r.calibrated_score >= 0
order by RANDOM()
limit 1000'''
@cache.memoize()
def query_data(team_id, max_usd):
print("Calling data query now with team_id={} and max_usd={} at time {}".format(team_id, max_usd, datetime.now()))
_sql = sql_query.format(team_id, max_usd)
print(_sql)
data = pd.read_sql(sql_query.format(team_id, max_usd), conn)
print("data is {} rows ".format(len(data)))
print("data max usd is {}".format(data['order_amount_in_usd'].max()))
return data
@app.callback(Output(component_id='output-graph', component_property='figure'),
[Input(component_id='data-select-team-id', component_property='value'),
Input(component_id='data-select-max-usd', component_property='value')])
def plot_data(team_id, max_usd):
print("calling query_data at from graph at {}".format(datetime.now()))
in_data = query_data(team_id, max_usd)
print("going to make graph1 now at {}".format(datetime.now()))
fig = px.scatter(in_data,
x='order_amount_in_usd',
y='calibrated_score')
return fig
@app.callback(Output(component_id='output-graph2', component_property='figure'),
[Input(component_id='data-select-team-id', component_property='value'),
Input(component_id='data-select-max-usd', component_property='value')])
def plot_second_data(team_id, max_usd):
print("calling query_data at from graph2 at {}".format(datetime.now()))
in_data = query_data(team_id, max_usd)
print("going to make graph2 now at {}".format(datetime.now()))
fig = px.scatter(in_data,
x='order_amount_in_usd',
y='score')
return fig
app.layout = html.Div( # style={'backgroundColor': colors['background']},
children=[dcc.Input(id='data-select-team-id',
value=7625,
placeholder='Input Team ID',
type='number',
min=0,
max=1_000_000_000,
debounce=True
),
dcc.Input(id='data-select-max-usd',
value=5000,
type='number',
debounce=True),
dcc.Graph(id='output-graph'),
dcc.Graph(id='output-graph2')]
)
if __name__ == '__main__':
app.run_server(debug=True)