Hi @Emil,
I was trying to repeat the experiment where you plotted millions of rows in a matter of seconds. It take the code below about 5 seconds to plot ~500,000 rows onto a px.scatter(). Do you know why that might be taking so long?
I used this data here.
And below, you can find the reproducible code.
import datetime
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
from dash.dependencies import Output, Input, State
from flask_caching.backends import FileSystemCache
from dash_extensions.callback import CallbackCache
df_org = pd.read_csv("green_tripdata_2019-01.csv")
app = dash.Dash(prevent_initial_callbacks=True)
server = app.server
app.layout = html.Div([
html.Button("Run benchmark (with cache)", id="btn_wc"),
dcc.Dropdown(id="dd_wc", options=[{"label": x, "value": x} for x in df_org["passenger_count"].unique()], value=1),
dcc.Store(id="time_wc"), dcc.Loading(dcc.Store(id="store_wc"), fullscreen=True, type="dot"), html.Div(id="log_wc"),
dcc.Graph(id='mygrpah')
])
cc = CallbackCache(cache=FileSystemCache(cache_dir="cache"))
@cc.cached_callback([Output("store_wc", "data"), Output("time_wc", "data")],
[Input("btn_wc", "n_clicks")])
def query_wc(n_clicks):
df = df_org[["passenger_count", "trip_distance", "total_amount"]]
return df, datetime.datetime.now()
@cc.callback([Output("log_wc", "children"), Output("mygrpah", "figure")],
[Input("store_wc", "data")], [State("dd_wc",'value'), State("time_wc", "data")])
def calc_wc(df, value, time):
toc = datetime.datetime.now()
df_filtered = df[df["passenger_count"] == value]
print(df_filtered[:3])
print(df_filtered.info())
fig = px.scatter(df_filtered, x='trip_distance', y='total_amount')
return ("ELAPSED = {} seconds".format((toc - time).total_seconds())), fig
cc.register(app)
if __name__ == '__main__':
app.run_server(debug=True)