I have this 1M rows table. I wanna be able to filter the rows and limit the visible ones to something reasonable like 25 (page_size) but I want ALL the data to appear in a datashader or hexbin geo map, as well as to the downloaded Excel or CSV file.
So far I got everything to work except that the graph and excel files will only show the number of rows set by the page_size argument. Can’t get my head around how to pass all the rows.
#!/usr/bin/env python
# coding: utf-8
import plotly.figure_factory as ff
import plotly.io as pio
import plotly.express as px
import dash
from dash.dependencies import Input, Output, State
import dash_table
import pandas as pd
import pickle
import dash_core_components as dcc
import dash_html_components as html
pio.renderers.default = "browser"
df = pd.read_pickle('data.pkl', compression='gzip')
df['index'] = range(1, len(df) + 1 )
app = dash.Dash(__name__)
page_size = 25
app.layout = html.Div(
children=[
html.Div(
dash_table.DataTable(
id='table-paging-with-graph',
columns=[
{"name": i, "id": i,'deletable': True, 'hideable': True} for i in sorted(df.columns)
],
export_format='xlsx',
row_deletable=True,
page_current=0,
page_size=page_size,
page_action='custom',
filter_action='custom',
filter_query='',
sort_action='custom',
sort_mode='multi',
sort_by=[]
),
style={'height': 750, 'overflowY': 'scroll'}
),
html.Div(
id='table-paging-with-graph-container'
)
]
)
operators = [['ge ', '>='],
['le ', '<='],
['lt ', '<'],
['gt ', '>'],
['ne ', '!='],
['eq ', '='],
['contains '],
['datestartswith ']]
def split_filter_part(filter_part):
for operator_type in operators:
for operator in operator_type:
if operator in filter_part:
name_part, value_part = filter_part.split(operator, 1)
name = name_part[name_part.find('{') + 1: name_part.rfind('}')]
value_part = value_part.strip()
v0 = value_part[0]
if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
value = value_part[1: -1].replace('\\' + v0, v0)
else:
try:
value = float(value_part)
except ValueError:
value = value_part
# word operators need spaces after them in the filter string,
# but we don't want these later
return name, operator_type[0].strip(), value
return [None] * 3
@app.callback(
Output('table-paging-with-graph', "data"),
Input('table-paging-with-graph', "page_current"),
Input('table-paging-with-graph', "page_size"),
Input('table-paging-with-graph', "sort_by"),
Input('table-paging-with-graph', "filter_query"))
def update_table(page_current, page_size, sort_by, filter):
filtering_expressions = filter.split(' && ')
dff = df
for filter_part in filtering_expressions:
col_name, operator, filter_value = split_filter_part(filter_part)
if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
# these operators match pandas series operator method names
dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
elif operator == 'contains':
dff = dff.loc[dff[col_name].str.contains(filter_value)]
elif operator == 'datestartswith':
# this is a simplification of the front-end filtering logic,
# only works with complete fields in standard format
dff = dff.loc[dff[col_name].str.startswith(filter_value)]
if len(sort_by):
dff = dff.sort_values(
[col['column_id'] for col in sort_by],
ascending=[
col['direction'] == 'asc'
for col in sort_by
],
inplace=False
)
return dff.iloc[
page_current*page_size: (page_current + 1)*page_size
].to_dict('records')
@app.callback(
Output('table-paging-with-graph-container', "children"),
Input('table-paging-with-graph', "data")
)
def update_graph(rows):
dff = pd.DataFrame(rows)
fig = ff.create_hexbin_mapbox(
data_frame=dff, lat="ADDR_LAT", lon="ADDR_LONG",
nx_hexagon=50, opacity=0.7, labels={"color": "Point Count"},
min_count=1
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin=dict(b=0, t=0, l=0, r=0))
return html.Div(
[
dcc.Graph(figure=fig
)
])
if __name__ == '__main__':
app.run_server(port="8055", debug=False)