Hi All,
I am new in this community and quite fresh with python and dash - I saw some similar posts but could not solve my specific problem and I thought to open a dedicated thread - hope this is fine.
I wanted to create an app to allow the visualization of some data coming from a CSV file - that the user can select.
The CSV file is can be relative big (30-40 columns and 200K lines)
Here the file I use for my tests: 0_Test.csv
The app I have created it works fine locally but once I have deployed on a virtual machine it gets incredibly slow.
I did some tests and I do not think it is due to the computational time to preform the calculations on the VM but I have the feeling I am sending relative big amounts of data from client to VM and viceversa. Therefore, the limitation should be the internet connection (thatβs only my feeling).
looking at different posts and forums I saw people suggesting to use this commands to compute directly on the client browser:
app.config['suppress_callback_exceptions'] = True
app.css.config.serve_locally = True
app.scripts.config.serve_locally = True
I have tried to use them, but I did not see difference on performance.
(and since to me it is not clear what they do, I commented them in my code)
can anyone please help me to get the code here below more efficient?
import base64
import datetime
import io
import plotly
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import time
import plotly.graph_objects as go
import numpy as np
import pandas as pd
def update_figure(df,value):
if value is not None:
x_axes= df['DateTime']
figure = {
'data': [{'type': 'Scattergl', 'x': x_axes,'y': df[i], 'name': i }for i in value]
}
else: figure = {}
return figure
def filter_Spikes(df,value_spike):
if value_spike is not None:
for column in value_spike:
print(column +'...processing')
try:
mask = df[column].rolling( window=3, center=True).mean().fillna(method='bfill').fillna(method='ffill')
difference = np.abs(df[column] - mask)
outlier_idx = difference > ((df[column].std()*5))
print(outlier_idx.sum())
for col in df.columns:
df[col][outlier_idx]= np.nan
except:
continue
return df
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
#app.config['suppress_callback_exceptions'] = True
#app.css.config.serve_locally = True
#app.scripts.config.serve_locally = True
app.layout = html.Div([
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '95%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=False
),
dcc.Dropdown(
id='data_drop_down',
multi=True,
style = {'width': '70%'}),
dcc.Dropdown(
id='data_drop_down_spikes',
multi=True,
style = {'width': '70%'}),
html.Div(
id='output-data-upload'),
dcc.Graph(
id='graph',
style={"height" : "750px", "width" : "100%"}
),
])
@app.callback([Output('output-data-upload', 'children'),
Output('data_drop_down', 'options'),
Output('data_drop_down', 'disabled'),
Output('graph', 'figure'),
Output('data_drop_down_spikes', 'options')],
[Input('upload-data', 'contents'),
Input('data_drop_down', 'value'),
Input('data_drop_down_spikes', 'value')]
)
def parse_contents(contents, value, value_spike):
if contents is not None:
content_type, content_string = contents.split(',')
if content_string[-2:] == "==":
decoded = base64.b64decode(content_string)
else:
decoded = base64.b64decode(content_string+'==')
df = pd.read_csv(io.StringIO(decoded.decode('"ISO-8859-1"')),sep=";", encoding = "ISO-8859-1", header=19)
for i in df.columns:
if 'Date'.casefold() in i.casefold():
Date_col = i
if 'Time'.casefold() in i.casefold():
Time_col = i
df["DateTime"] = df[Date_col] + " " + df[Time_col]
df.index = pd.to_datetime(df["DateTime"])
options=[{'label': i, 'value': i} for i in df.columns]
options_spikes=options
disabled = False
if value_spike is not None:
df=filter_Spikes(df, value_spike).copy()
Start_figure = time.clock()
figure=update_figure(df,value)
End_figure = time.clock()
children = html.Div(['time update_figure:', str(End_figure-Start_figure)])
return children, options, disabled, figure, options_spikes
else:
children = html.Div(['No data has been uploaded'])
options=[{'label': 'None', 'value': 'None'}]
options_spikes=options
disabled = True
figure=update_figure(pd.DataFrame(), value)
return children, options, disabled, figure, options_spikes
server = app.server
if __name__ == '__main__':
app.run_server(debug=False)
Thank you very much!
This is really a great community!