Hi everyone! I’ve been stuck on this issue for some time now and would VERY MMUCH appreciate any help.
I am creating a Dash app where
- User can upload two datasets (csv, or excel)
- Two dropdowns will populate themselves with the columns from each dataset
- User will use the dropdowns to choose what the matching key will be from each dataset
- Merge the two datasets based on merging key
- Get the final merged dataframe
I’m having trouble with step 4 and 5. I’m not sure how to go about with creating a function and a callback by referring to the uploaded datasets. This has stumped me for soo long!
My current code:
import json
import io
import base64
!pip install numpy==1.21.6
import numpy as np
import pandas as pd
import dash
import dash_bootstrap_components as dbc
from dash import dash_table, dcc, html
from dash.dependencies import Input, Output, State
!pip install plotly==5.9.0
!pip install xarray==0.20.2
#define Dash app, reference Dash Bootstrap as external stylsheet
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
layout = html.Div([
html.H5(“Upload Files”),
dcc.Upload(
id=‘upload-data-1’,
children=html.Div([
'File 1: Drag and Drop or ',
html.A(‘Select Files’)
]),
style={
‘width’: ‘80%’,
‘height’: ‘60px’,
‘lineHeight’: ‘60px’,
‘borderWidth’: ‘1px’,
‘borderStyle’: ‘dashed’,
‘borderRadius’: ‘5px’,
‘textAlign’: ‘left’,
‘margin’: ‘10px’
},
multiple=False),
dcc.Upload(
id='upload-data-2',
children=html.Div([
'File 2: Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '80%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'left',
'margin': '10px'
},
multiple=False),
html.Div([
html.H5("First Dataset"),
dcc.Dropdown(
id = 'first_dropdown',
options = [],
)
]),
html.Div([
html.H5("Second Dataset"),
dcc.Dropdown(
id = 'second_dropdown',
options = [],
)
]),
html.Br(),
html.Button('Merge datasets',
id = 'merge-button',
n_clicks = 0),
html.Br(),
html.Div(id='container', children = 'Choose linking key from each dataset and merge datasets')
])
app.layout = html.Div(
[layout])
#Read dataset 1
def parse_contents_1(contents, filename):
content_type, content_string = contents.split(‘,’)
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df1 = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df1 = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return None
return df1
#Read dataset 2
def parse_contents_2(contents, filename):
content_type, content_string = contents.split(‘,’)
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df2 = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df2 = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return None
return df2
#Dropdowns for Dataset1 and 2
@app.callback(Output(‘first_dropdown’, ‘options’),
Output(‘second_dropdown’, ‘options’),
[Input(‘upload-data-1’, ‘contents’),
Input(‘upload-data-1’, ‘filename’),
Input(‘upload-data-2’, ‘contents’),
Input(‘upload-data-2’, ‘filename’)])
def update_dropdown(contents1, filename1, contents2, filename2):
if contents1 is not None and contents2 is not None:
df1 = parse_contents_1(contents1, filename1)
columns1 = df1.columns.values.tolist()
columns1 = [ {‘label’: x, ‘value’: x} for x in columns1 ]
df2 = parse_contents_2(contents2, filename2)
columns2 = df2.columns.values.tolist()
columns2 = [ {'label': x, 'value': x} for x in columns2 ]
if df1 is not None and df2 is not None:
return columns1, columns2
else:
return []
else:
return []
if name==“main”:
app.run(host=“0.0.0.0”,port=300,debug=False)