Return a groupby datatable (mean) from previous datatable

JXS · August 7, 2020, 12:11pm

Hi guys, im fairly new to Dash and im trying to grasp as much as i can in this short period of time. There aren’t much references out there that addresses my problem.
Below is my code and im having trouble with updating the new datatable with the means of a groupby from the previous datatable.

please ignore the additional dropdown for now. I would like to fix the aforementioned issue…

import pandas as pd
import numpy as np
import dash
import dash_table as dt
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go

# set display options
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# import data
path = '/Users/jun/Downloads/Data Analyst Prework Dataset.csv'
df = pd.read_csv(path)
# pre-processing data
# define function to clean brand names
def rm_space(item):
    if ' ' in item:
        # Only prints the string without trailing white spaces
        return item[:item.find(' ')]
    else:
        return item

def convert(x):
    if x != 0:
        return 100
    else:
        return 0

df['Transaction Date'] = pd.to_datetime(df['Transaction Date']).dt.strftime("%Y-%m")
df['Customer Id'] = df['Customer Id'].astype('str')
df = df.rename(columns = {'Customer Id': 'Customer ID'})
df['Brand'] = df['Brand'].apply(rm_space).str.upper()

# measuring the performances of the brands in terms of date, customer and sales agent
pivot1 = pd.crosstab([df['Transaction Date'], df['Customer ID'], df['Sales Agent ID']], df['Brand'])
pivot_reset = pivot1.applymap(convert).reset_index()
pivot2 = pivot_reset.groupby('Transaction Date').mean().T
pivot_reset2 = pivot2.reset_index()

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1('Supplier LTD Dataset', style={'text-align': 'center'}
            ),

    dcc.Dropdown(
        id='date_dropdown',
        options=[{'label': i, 'value': i} for i in pivot_reset['Transaction Date'].unique()],
    ),
    dcc.Dropdown(
        id='customer_dropdown',
        options=[{'label': i, 'value': i} for i in pivot_reset['Customer ID'].unique()],
    ),

    html.Br(),

    html.Label('Main Dataframe', style={'font-weight': 'bold'}),
    dt.DataTable(
        columns=[{'name': i, 'id': i} for i in pivot_reset.columns],
        id='main_table',
        data=pivot_reset.to_dict('records'),  # the contents of the table
        editable=False,              # allow editing of data inside all cells
        filter_action='none',     # allow filtering of data by user ('native') or not ('none')
        sort_action="native",       # enables data to be sorted per-column by user or not ('none')
        sort_mode="multi",         # sort across 'multi' or 'single' columns
        row_deletable=False,         # choose if user can delete a row (True) or not (False)
        selected_columns=[],        # ids of columns that user selects
        selected_rows=[],           # indices of rows that user selects
        page_action="native",       # all data is passed to the table up-front or not ('none')
        page_size=10,                # number of rows visible per page
        fixed_columns={'headers': True, 'data': 1},
        style_table={'minWidth': '100%'},

        style_cell={                # ensure adequate header width when text is shorter than cell's text
            'minWidth': 100, 'maxWidth': 100, 'width': 100
        },
        style_data={                # overflow cells' content into multiple lines
            'whiteSpace': 'normal',
            'height': 'auto'
        }

    ),
    html.Hr(),
    html.Label('Main Dataframe #2', style={'font-weight': 'bold'}),
    dt.DataTable(
        columns=[{'name': i, 'id': i} for i in pivot_reset2.columns],
        id='brand_active_monthly',
        data=pivot_reset2.to_dict('records'),  # the contents of the table
        editable=False,              # allow editing of data inside all cells
        filter_action='none',     # allow filtering of data by user ('native') or not ('none')
        sort_action="native",       # enables data to be sorted per-column by user or not ('none')
        sort_mode="multi",         # sort across 'multi' or 'single' columns
        row_deletable=False,         # choose if user can delete a row (True) or not (False)
        selected_columns=[],        # ids of columns that user selects
        selected_rows=[],           # indices of rows that user selects
        page_action="native",       # all data is passed to the table up-front or not ('none')
        page_current=0,             # page number that user is on
        page_size=50,                # number of rows visible per page
        fixed_columns={'headers': True, 'data': 1},
        style_table={'minWidth': '100%'},
        style_cell={                # ensure adequate header width when text is shorter than cell's text
            'minWidth': 95, 'maxWidth': 95, 'width': 95
        },
        style_data={                # overflow cells' content into multiple lines
            'whiteSpace': 'normal',
            'height': 'auto'
        }
    )


])

@app.callback(
    Output('main_table', 'data'),
    [Input('customer_dropdown', 'value')]
)
def update_table(value):
    dff = pivot_reset[pivot_reset['Customer ID'] == value]
    return dff.to_dict('records')

# Callback to re-calculate means after filtering is
@app.callback(
    Output('brand_active_monthly', 'data'),
    [Input('main_table', 'data')]
)
def update_means(data):
    # Calculate means from data currently stored in top datatable
    dff_2 = pd.DataFrame.from_dict(data)
    df_floats_means = dff_2.groupby(['Transaction Date']).mean(numeric_only=True)
    df_t = df_floats_means.T

    # Return means to means datatable
    #THIS NOW RETURNS DICT INSTEAD OF LIST
    T_means = df_t('records')
    return T_means


if __name__ == '__main__':
    app.run_server(debug=True)

Hopefully there are some kind souls out there who can shed some light.

Regards,
Jun

AnnMarieW · August 7, 2020, 1:55pm

Hi @JXS and welcome to the Dash community!

It looks like it just might be a typo…
Try T_means = df_t.to_dict(‘records’) instead of T_means = df_t(‘records’)

If that doesn’t fix it, could you be more specific about what type of error you are seeing?

Note also that you don’t need to have these in two separate callbacks. It can be done in one like this:

@app.callback(
    [Output('main_table', 'data'), Output('brand_active_monthly', 'data')]
    [Input('customer_dropdown', 'value')]
)
def update_table(value):
    dff = pivot_reset[pivot_reset['Customer ID'] == value]

   df_floats_means = dff.groupby(['Transaction Date']).mean(numeric_only=True)
   df_t = df_floats_means.T

    return dff.to_dict('records'), df_t.to_dict('records')

Another thing to check out when you have time is the new dash pivot table. It might be pretty cool in your application. 📣 Dash Pivottable Released

JXS · August 7, 2020, 2:24pm

Hi there!

Omg thank you so much! I tried your suggestion on combining both callbacks into one callback and it works for multiple outputs now. I was working with another solution as well and now i stumble across troubles having to select multiple dropdown values. (Because only with multiple values i can see the average of all selected customers) I am certainly aware that the callback will not be valid anymore as the boolean only refers to one value. Do you have any idea on that??

Regards,
Jun

AnnMarieW · August 7, 2020, 2:48pm

I’m glad it worked!

If the customer_dropdown allows for multiple selections, you could do this:
dff = pivot_reset[pivot_reset[‘Customer ID’].isin(value)]

And another little pandas tip: There is a built-in function to remove whitespace, so you don’t have define your own function. Try:

df[‘Brand’] = df[‘Brand’].str.rstrip().str.upper()

(I just found this last week and wanted to share :-))

JXS · August 7, 2020, 3:05pm

found the same solution here as well!
https://www.interviewqs.com/ddi_code_snippets/rows_cols_python

Thanks for the pandas tip tho! Im still new to python theres much more to explore! Now it make sense.

You’re of great help Ann! im so glad theres people in the community who are always so generous in sharing knowledge!

I was wondering on how do i select all the customers and put it as a single variable (‘All’) in my dropdown?
This is what i tried and i think it’ll be a hassle to remove it one by one…

AnnMarieW · August 7, 2020, 3:11pm

options=[{"label": "All", "value": "all"}] + [{'label': i, 'value': i} for i in pivot_reset['Customer ID'].unique()],

Then in the callback you can handle the selection for “all”

JXS · August 8, 2020, 4:27pm

Alright now i got both to work fine with a for loop and if statement!

Since now the callback runs on the isin function, if nothing is selected it will throw an error right?
Is there a way to solve remove that error at all? i tried implementing either statements:
if value is None or if not value but it still doesnt work…

Regards,
Jun

AnnMarieW · August 8, 2020, 4:43pm

Super!

You are correct to check if the value is None. It should work. If you would like to share more of your code, I’ll take a look.

JXS · August 8, 2020, 5:48pm

Here is the callback part. You may take a look.

@app.callback(
    [Output('main_table', 'data'), Output('brand_active_monthly', 'data')],
    [Input('customer_dropdown', 'value')],
)
def update_table(values):
    for value in values:
        if value is None:
            dff = pivot_reset
            df_floats_means = (dff.groupby(['Transaction Date']).mean()).round(2)
            df_t = df_floats_means.T.reset_index()
            return dff.to_dict('records'), df_t.to_dict('records')
        elif value == 'all':
            dff = pivot_reset
            df_floats_means = (dff.groupby(['Transaction Date']).mean()).round(2)
            df_t = df_floats_means.T.reset_index()
            return dff.to_dict('records'), df_t.to_dict('records')
        else:
            dff = pivot_reset[pivot_reset['Customer ID'].isin(values)]
            df_floats_means = (dff.groupby(['Transaction Date']).mean()).round(2)
            df_t = df_floats_means.T.reset_index()
            return dff.to_dict('records'), df_t.to_dict('records')

AnnMarieW · August 8, 2020, 8:02pm

OK, I think I see the problem. There is no need to iterate over values – and None is not iterable, so that would raise an error.

Try deleting: for value in values:
and change the elif to: elif "all" in values:

JXS · August 8, 2020, 8:11pm

No wonder… its because of the for loop how can i overlook that… thanks for pointing that out!

AnnMarieW · August 8, 2020, 8:46pm

I’m happy to help!

Now if only I can find my bug… I’ve been stuck all day

JXS · August 8, 2020, 9:01pm

Hahahaha i can totally understand how that feels… The more complex the problem, the more the obstacles are… i have been tinkering with my dashboard and trying add more complexity to it. Getting stuck again and again. Hahaha…

Topic		Replies	Views
Return a dataframe as a data_table from a callback Dash Python	4	9581	March 26, 2019
Display dataframe with groupby in DashTable Dash Python	3	3179	October 21, 2021
Dash data table return from callback and dropdown list Dash Python question	2	966	April 16, 2023
Callback to return new columns to DataTable Dash Python question	2	1002	October 27, 2022
Get ValueError when using pandas.DataFrame.groupby in the callback Dash Python	2	4843	April 27, 2021

Return a groupby datatable (mean) from previous datatable

Related topics