Black Lives Matter. Please consider donating to Black Girls Code today.

Get ValueError when using pandas.DataFrame.groupby in the callback

Hi, I want to create an app that users can upload a CSV file and choose two columns of the CSV file using dropdowns. The app will plot the plot a bar chart in which the x-axis is the first dropdown users choose and the y-axis will using groupby function to compute the mean of the second column users choose. But I get an error after I run the code. Can anyone teach me how to fix it?

Below is the error:

Callback error updating datatable-upload-graph.figure
ValueError: Grouper for '<class 'pandas.core.frame.DataFrame'>' not 1-dimensional

Below is the code:

import base64
import io
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import pandas as pd
from dash.exceptions import PreventUpdate
import plotly.graph_objs as go

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    dcc.Upload(
        id='datatable-upload',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select Files')
        ]),
        style={
            'width': '100%', 'height': '60px', 'lineHeight': '60px',
            'borderWidth': '1px', 'borderStyle': 'dashed',
            'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px'
        },
    ),
    dash_table.DataTable(id='datatable-upload-container'),
    dcc.Dropdown(
        id='data_selector1',
        options=[
            {'label': '', 'value': ''}
        ],
        value=[],
        # multi=True
    ),
    dcc.Dropdown(
        id='data_selector2',
        options=[
            {'label': '', 'value': ''}
        ],
        value=[]
    ),
    html.Div(id='print'),
    dcc.Graph(id='datatable-upload-graph'),
])


def parse_contents(contents, filename):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    if 'csv' in filename:
        # Assume that the user uploaded a CSV file
        return pd.read_csv(
            io.StringIO(decoded.decode('utf-8')))
    elif 'xls' in filename:
        # Assume that the user uploaded an excel file
        return pd.read_excel(io.BytesIO(decoded))


@app.callback(Output('data_selector1', 'options'),
              [Input('datatable-upload-container', 'data')])
def update_dropdown(rows):
    if rows is None:
        raise PreventUpdate
    df = pd.DataFrame(rows)
    print('updating menus')
    columns = df.columns
    col_labels = [{'label': k, 'value': k} for k in columns]
    return col_labels


@app.callback(Output('data_selector2', 'options'),
              [Input('datatable-upload-container', 'data')])
def update_dropdown1(rows):
    if rows is None:
        raise PreventUpdate
    df = pd.DataFrame(rows)
    print('updating menus')
    columns = df.columns
    col_labels = [{'label': k, 'value': k} for k in columns]
    return col_labels


@app.callback(Output('datatable-upload-container', 'data'),
              [Input('datatable-upload', 'contents')],
              [State('datatable-upload', 'filename')])
def update_output(contents, filename):
    if contents is None:
        return [{}]
    df = parse_contents(contents, filename)
    data = df.to_dict('records')
    return data


@app.callback(Output('datatable-upload-graph', 'figure'),
              [Input('data_selector1', 'value'),
               Input('data_selector2', 'value')],
              [State('datatable-upload-container', 'data')])
def display_graph(value1, value2, rows):
    df = pd.DataFrame(rows)
    df1 = df.groupby(df[value1])
    df2 = df1.mean()
    df3 = df2[df[value2]]
    trace = go.Bar(x=df3.index, y=df3)
    return {
        'data': [trace]
    }



if __name__ == '__main__':
    app.run_server(debug=True)

I am not a pandas expert, but whenever I have used group by, I have always passed the column name, not the values. That is, instead of

df1 = df.groupby(df[value1])

one would write

df1 = df.groupby(value1)

1 Like