Show & Tell: Parse data from dcc.Upload without pandas (multiple headers)

Hi.

This is an example of how to parse data from a csv file using a dcc.Upload component and not pandas reader. This is in case you need to read data from multiple headers.

Example csv:

id:2020-code
filename:test.csv
Time-UTC:09:36:27
Date:05-14-20
Time,     Col 1,    Col 2,     Col 3,     Col 4
0.0000,  6.04649,  1.69522,  0.0321367,  14.7099
0.0005,  6.09178,  1.69522,  0.0321367,  14.7895
0.0010,  6.13707,  1.71105,  0.0321367,  14.7895
...
import base64
import datetime
import io
import codecs
import json

import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table

import pandas as pd

class CSV_FILE():
    def __init__(self):
        self.id = ''
        self.filename = ''
        self.time = ''
        self.date = ''
        self.df = pd.DataFrame()

    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    dcc.Upload(
        id='upload-data',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select Files')
        ]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        # Allow multiple files to be uploaded
        multiple=True
    ),
    html.Div(id='output-data-upload'),
])

def parse_contents(contents, filename, date):

    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    input_file = CSV_FILE()

    try:
        if 'csv' in filename:
            # Assume that the user uploaded a CSV file
            bio = io.BytesIO(decoded)
            StreamReader = codecs.getreader('utf-8')  # here you pass the encoding
            wrapper_file = StreamReader(bio)

            for number,row in enumerate(wrapper_file,start=0):
                row = row.rstrip()

                if number == 0:
                    input_file.id = row
                elif number == 1:
                    input_file.filename = row
                elif number == 2:
                    input_file.time = row
                elif number == 3:
                    input_file.date = row
                else:
                    break

            input_file.df = pd.read_csv(io.StringIO(decoded.decode('utf-8')),header=4)
            print(input_file)

    except Exception as e:
        print(e)
        return html.Div([
            'There was an error processing this file.'
        ])

    return html.Div([
        html.H5(filename),
        html.H6(datetime.datetime.fromtimestamp(date)),

        html.H6(
            "Header"
        ),

        html.Div(
            input_file.id
        ),
        html.Div(
            input_file.filename
        ),
        html.Div(
            input_file.time
        ),
        html.Div(
            input_file.date
        ),

        dash_table.DataTable(
            data=input_file.df.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in input_file.df.columns]
        ),

    ])


@app.callback(Output('output-data-upload', 'children'),
              [Input('upload-data', 'contents')],
              [State('upload-data', 'filename'),
               State('upload-data', 'last_modified')])
def update_output(list_of_contents, list_of_names, list_of_dates):
    if list_of_contents is not None:
        children = [parse_contents(c, n, d) for c, n, d in
        zip(list_of_contents, list_of_names, list_of_dates)]
        return children

if __name__ == '__main__':
    app.run_server(debug=True)

Example:

1 Like

Very nice. Thanks for sharing this example!