Hi everyone,
I am building a Sankey diagram in Plotly for a material flow analysis and I am struggling with reverse flows.
My issue is that some flows go from a node on the right side of the diagram (Waste) back to nodes further left (Other use and Recarburising use). I would like these reverse flows to run clearly underneath the main Sankey, so they look like recycling loops. However, Plotly seems to route them in ways I cannot control well.
What I have already tried:
-
setting
arrangement="fixed"instead of"snap" -
manually defining all node
xandypositions -
moving the
Wastenode much further down -
increasing figure height
But even with fixed positioning, the reverse flows do not reliably go “below” the diagram. If I move Waste too far down, the node gets clipped at the bottom of the figure because it is large.
So my questions are:
-
Is there any way in Plotly Sankey to control the routing of reverse flows more explicitly?
-
Can one force links like
Waste -> Other useandWaste -> Recarburising useto run below the other flows?
A simplified version of my setup looks like this:
# -----------------------------
# X/Y-Positionen
# -----------------------------
x_positions = {
"materials": 0.02,
"use": 0.28,
"stock": 0.50,
"end_of_life": 0.62,
"sink": 0.92,
# "extra": 0.98,
}
y_overrides = {
"Natural graphite": 0.20,
"Synthetic graphite": 0.65,
"EV use": 0.06,
"ESS batteries use": 0.18,
"Other battery use": 0.30,
"Electrodes use": 0.42,
"Refractories use": 0.54,
"Foundries use": 0.63,
"Recarburising use": 0.75,
"Other use": 0.85,
"In use stock": 0.10, # hoch genug, damit stock-flows von oben kommen
"Waste": 0.75, # unterhalb von Recarburising use → Flow geht unten herum
"Dissipation": 0.35,
"Landfill": 0.75,
"Environment": 0.42,
}
node_x = {}
node_y = {}
top_margin = 0.06
bottom_margin = 0.06
for col_name, node_list in columns.items():
n = len(node_list)
if n == 1:
ys = [0.5]
else:
span = 1 - top_margin - bottom_margin
step = span / (n - 1)
ys = [top_margin + i * step for i in range(n)]
for node, y in zip(node_list, ys):
node_x[node] = x_positions[col_name]
node_y[node] = y
# Explizite Absicherung der gewĂĽnschten Spaltenlogik
for node, y in y_overrides.items():
if node in node_y:
node_y[node] = y
# zusätzliche harte x-Absicherung
for node in ["EV use", "ESS batteries use", "Other battery use",
"Electrodes use", "Refractories use", "Foundries use",
"Recarburising use", "Other use"]:
if node in node_x:
node_x[node] = x_positions["use"]
for node in ["Waste", "Dissipation"]:
if node in node_x:
node_x[node] = x_positions["end_of_life"]
for node in ["Landfill", "Environment"]:
if node in node_x:
node_x[node] = x_positions["sink"]
if "In use stock" in node_x:
node_x["In use stock"] = x_positions["stock"]
if "Natural graphite" in node_x:
node_x["Natural graphite"] = x_positions["materials"]
if "Synthetic graphite" in node_x:
node_x["Synthetic graphite"] = x_positions["materials"]
# Reihenfolge der Knoten
nodes = [n for group in columns.values() for n in group]
node_index = {node: i for i, node in enumerate(nodes)}
df_plot["source_id"] = df_plot["source"].map(node_index)
df_plot["target_id"] = df_plot["target"].map(node_index)
unmapped = df_plot[df_plot["source_id"].isna() | df_plot["target_id"].isna()]
if not unmapped.empty:
print("Nicht gemappte Flows:")
display(unmapped)
else:
print("Alle Flows erfolgreich gemappt.")
print("X-Position Recarburising use:", node_x.get("Recarburising use"))
print("X-Position Waste:", node_x.get("Waste"))
print("X-Position Dissipation:", node_x.get("Dissipation"))
# -----------------------------
# Sankey
# -----------------------------
# convert tonnes to kt
df_plot["value_kt"] = df_plot["value"] / 1000
# compute node totals
node_totals_in = {n: 0 for n in nodes}
node_totals_out = {n: 0 for n in nodes}
for _, row in df_plot.iterrows():
node_totals_out[row["source"]] += row["value"]
node_totals_in[row["target"]] += row["value"]
# choose what to show for each node
# for most sankeys, outgoing totals for source-like nodes and incoming totals for sink-like nodes are fine
node_labels = []
for n in nodes:
total = max(node_totals_in[n], node_totals_out[n])
node_labels.append(f"{n}<br>{total/1000:.1f} kt")
node_dict = dict(
label=node_labels,
x=[node_x[n] for n in nodes],
y=[node_y[n] for n in nodes],
pad=12,
thickness=12,
color=node_colors,
line=dict(color="black", width=0.5),
hovertemplate="%{label}<extra></extra>",
)
# align nur setzen, wenn Plotly die Eigenschaft unterstĂĽtzt / sinnvoll ist
node_dict["align"] = "left"
fig = go.Figure(
go.Sankey(
arrangement="snap",
node=node_dict,
link=dict(
source=df_plot["source_id"],
target=df_plot["target_id"],
value=df_plot["value"],
color=link_colors,
customdata=df_plot[["type"]].values,
hovertemplate=(
"Von: %{source.label}<br>"
"Nach: %{target.label}<br>"
"Typ: %{customdata[0]}<br>"
"Wert: %{value:,.2f}<extra></extra>"
),
),
)
)
fig.update_layout(
title=f"<b>Global graphite flow in 2024<b>",
width=1200,
height=1100,
font=dict(size=13),
margin=dict(l=20, r=20, t=60, b=20),
)
fig.show()
In the full figure, the reverse flows visually cut across the diagram instead of forming a clear lower recycling loop.
I would be very grateful for any advice on whether this is a limitation of Plotly Sankey, or whether there is a better way to structure the nodes and links.
Thank you very much.
