Good afternoon everyone, Good afternoon everyone,
I am attempting to set up a layout where users can upload a dataset, select columns, and have a table display the uploaded dataset along with the user-selected columns. Currently, there are no issues; it works fine. Additionally, I want to enable users to select a column and change its datatype. However, I am encountering difficulties in implementing this feature.
The user can modify the datatype of a column by selecting a column and a datatype, then clicking the submit button. It works fine, and the output is stored in a specific dcc.Store
. If I perform a second transformation, it works, but the output shows the result of the second transformation and not the first one.
The logic of the callback is as follows: I have two dcc.Store
components, one where the original dataset is uploaded, and a second one where it is transformed and stored. The callback first checks if the transformed dcc.Store
is empty. If it is, it takes the original data from dcc.Store
as input. If not, it means a transformation has already been performed and stored in the second dcc.Store
, which the function takes as input. Then, it proceeds to perform a transformation (e.g., from int
to str
) and stores the result in the second dcc.Store
. However, as mentioned above, it is not working. It appears that each transformation is overwriting the previous one, even though I ensure that the transformed dataset is saved in the second dcc.Store
and can be used as input for further transformations.
Here is a snippet of my code for reproducibility. I appreciate any assistance in advance!
import dash
from dash import dcc, Input, Output, State, callback, html
import dash_bootstrap_components as dbc
import io
import base64
import pandas as pd
from dash.dash_table import DataTable
from dash.exceptions import PreventUpdate
app = dash.Dash(__name__)
app.layout = html.Div([
dcc.Store(id="transformation"),
dcc.Store(id="storage"),
dbc.Row([
dbc.Col(lg=2),
dbc.Col([
dcc.Upload(
id="upload",
children=html.Div(["Upload"]),
multiple=False
)
])
]),
dbc.Row([
dbc.Col(lg=1),
dbc.Col([
dcc.Dropdown(
id="columns_selection",
options=[], #set by callback
multi=True,
placeholder="Select one or more columns"
)
], lg=8),
dbc.Col(dbc.Button("Submit", id="submit_button"), lg=2)
]),
html.Br(),
dbc.Row([
dbc.Col(lg=2),
dbc.Col(html.Div(id="table_output"), lg=8)
]),
html.Br(),
dbc.Row([
dbc.Col(lg=1),
dbc.Col([
dcc.Dropdown(
id="dropdown_col_trans",
options=[],
multi=False,
placeholder="Select a col to transform"
)
], lg=4),
dbc.Col([
dcc.Dropdown(
id="dropdown_type",
options=[
{'label': 'Texte', 'value': 'str'},
{'label': 'Nombre Entier', 'value': 'int'},
{'label': 'Nombre Virgule', 'value': 'float'},
{'label': 'Date', 'value': 'datetime'}
],
multi=False,
placeholder="Select a type"
)
], lg=4),
dbc.Col(dbc.Button("Submit", id="button_transformation"))
])
])
@callback(
Output('columns_selection', 'options'),
Input('upload', 'contents'),
State('upload', 'filename')
)
def update_column_options(contents, filename):
if contents is None:
raise PreventUpdate
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename or 'xlsx' in filename:
df = pd.read_excel(io.BytesIO(decoded))
else:
raise PreventUpdate
return [{'label': col, 'value': col} for col in df.columns]
except Exception as e:
return []
@callback(
[Output('table_output', 'children'),
Output('storage', 'data')],
Input('submit_button', 'n_clicks'),
[State('columns_selection', 'value'),
State('upload', 'contents'),
State('upload', 'filename')]
)
def update_table(n_clicks, selected_columns, contents, filename):
if n_clicks is None or contents is None:
raise PreventUpdate
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename or 'xlsx' in filename:
df = pd.read_excel(io.BytesIO(decoded))
if selected_columns:
df = df[selected_columns]
table = DataTable(
data=df.to_dict('records'),
columns=[{"name": i, "id": i} for i in df.columns],
fixed_rows={"headers": True},
virtualization=True,
filter_action="native",
export_format="csv",
style_cell={'maxWidth': '150px',
"minWidth": "90px",
"color": "#495057",
"textAlign": "left",
'textOverflow': 'ellipsis'},
style_header={"fontWeight": "bold",
"color":"#495057",
"textAlign": "center"})
return table, df.to_json(date_format='iso', orient='split')
except Exception as e:
return None, None
@callback(
Output('dropdown_col_trans', 'options'),
Input('storage', 'data')
)
def set_transform_column_options(json_data):
if json_data is None:
raise PreventUpdate
df = pd.read_json(io.StringIO(json_data), orient='split')
return [{'label': col, 'value': col} for col in df.columns]
@callback(Output("transformation", "data"),
[Input("button_transformation", "n_clicks"),
Input("storage", "data"),
Input("transformation", "data")],
[State("dropdown_col_trans", "value"),
State("dropdown_type", "value")])
def transform_data(n_clicks, original_data, transformed_data, cols, data_type):
if n_clicks is None:
raise PreventUpdate
json_to_use = transformed_data if transformed_data else original_data
df = pd.read_json(io.StringIO(json_to_use), orient='split')
try:
# Apply the transformation
if data_type == 'int':
df[cols] = df[cols].astype(int)
elif data_type == 'float':
df[cols] = df[cols].astype(float)
elif data_type == 'str':
df[cols] = df[cols].astype(str)
elif data_type == 'datetime':
df[cols] = pd.to_datetime(df[cols])
new_transformed_json_data = df.to_json(date_format='iso', orient='split')
print(new_transformed_json_data)
return new_transformed_json_data
except Exception as e:
print("PROBLEM PROBLEM PROBLEM\n")
return None
if __name__ == "__main__":
app.run_server(mode="inline", debug=True)