Duplicate labels on parallel categories

plotingdoof · February 3, 2025, 6:03pm

Hello,

I am making a parallel categories plot with:

fig = px.parallel_categories(
        pivoted,
        dimensions=sorted_treatments,
        color="meth_numeric",
        color_continuous_scale=[
            (0.0, "blue"),
            (0.5, "orange"),
            (1.0, "red"),
        ],
        range_color=[0, 2],
    )
    
    fig.update_layout(
        title=f"Methylation state transitions in {genome.readable_name} across conditions",
        coloraxis_showscale=False
    )
    
    out_file = output_dir / f"{genome.readable_name}_{motif.motif}_parallel_categories.html"
    fig.write_html(str(out_file))

I get the following plot:

How can I remove the duplicate labels of the categories (on what would be the Y axis of the image)?

plotingdoof · February 4, 2025, 8:38pm

tabwue · February 4, 2025, 9:28pm

Hi plotingdoof, is it possible that the duplicate labels are remnants from previous plotting?

plotingdoof · February 4, 2025, 10:05pm

Hello,

This is when I open the HTML freshly. There is no loop around the code, which is called once.

plotingdoof · February 14, 2025, 2:28am

Hello,

Just wanted to bump this as I’m still seeking help.

PipInstallPython · February 14, 2025, 4:41am

Not sure if this will help, but here is some code that might be useful to help you figure out a solution:

import plotly.graph_objects as go
import pandas as pd
import numpy as np
from collections import defaultdict

# Create sample data with smoother transitions
np.random.seed(42)
dates = pd.date_range(start='2000-01-01', end='2015-01-01', freq='YS')
categories = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

# Initialize ranks
initial_ranks = np.random.permutation(len(categories)) + 1
rank_dict = defaultdict(list)

for i, category in enumerate(categories):
    rank_dict[category].append(initial_ranks[i])

# Generate ranks for subsequent years
for _ in range(1, len(dates)):
    current_ranks = [rank_dict[cat][-1] for cat in categories]
    new_ranks = []

    for rank in current_ranks:
        # Possible moves: -1 (up), 0 (stay), 1 (down)
        move = np.random.choice([-1, 0, 1])
        new_rank = max(1, min(len(categories), rank + move))
        while new_rank in new_ranks:
            # Ensure no duplicate ranks
            new_rank = max(1, min(len(categories), new_rank + np.random.choice([-1, 1])))
        new_ranks.append(new_rank)

    # Assign new ranks to categories
    for category, new_rank in zip(categories, new_ranks):
        rank_dict[category].append(new_rank)

# Create DataFrame
data = []
for category in categories:
    for date, rank in zip(dates, rank_dict[category]):
        data.append({'Date': date, 'Category': category, 'Rank': rank})

df = pd.DataFrame(data)

# Create the bump chart
fig = go.Figure()

colors = ['#FFFFFF', '#D2B48C', '#DDA0DD', '#FFFF00', '#87CEEB',
          '#FF6347', '#C0C0C0', '#98FB98', '#FFA07A', '#00CED1']

for category, color in zip(categories, colors):
    df_cat = df[df['Category'] == category]
    fig.add_trace(go.Scatter(
        x=df_cat['Date'],
        y=df_cat['Rank'],
        mode='lines+markers+text',
        name=category,
        line=dict(color=color, width=4),
        marker=dict(color=color, size=20),
        text=df_cat['Rank'],
        textposition='middle right',
        textfont=dict(color=color, size=14),
    ))

# Customize the layout
fig.update_layout(
    title={
        'text': 'World GDP<br>2000 - 2015',
        'font': {'size': 24, 'color': 'white'},
        'x': 0.5,
        'y': 0.95,
    },
    plot_bgcolor='#333333',
    paper_bgcolor='#333333',
    height=800,
    width=1200,
    showlegend=False,
    xaxis=dict(
        showgrid=True,
        gridcolor='#555555',
        tickmode='array',
        tickvals=dates,
        ticktext=[d.year for d in dates],
        tickfont=dict(color='white', size=12),
        tickangle=0,
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#555555',
        range=[0.5, 10.5],
        dtick=1,
        tickfont=dict(color='white', size=12),
    ),
    font=dict(color='white'),
)

# Show the plot
fig.show()

OR this approach:

import plotly.graph_objects as go
import pandas as pd
import numpy as np
from collections import defaultdict

# Create sample data with smoother transitions
np.random.seed(42)
years = list(range(2000, 2016))
categories = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

# Initialize ranks
initial_ranks = np.random.permutation(len(categories)) + 1
rank_dict = defaultdict(list)

for i, category in enumerate(categories):
    rank_dict[category].append(initial_ranks[i])

# Generate ranks for subsequent years
for _ in range(1, len(years)):
    current_ranks = [rank_dict[cat][-1] for cat in categories]
    new_ranks = []

    for rank in current_ranks:
        move = np.random.choice([-1, 0, 1])
        new_rank = max(1, min(len(categories), rank + move))
        while new_rank in new_ranks:
            new_rank = max(1, min(len(categories), new_rank + np.random.choice([-1, 1])))
        new_ranks.append(new_rank)

    for category, new_rank in zip(categories, new_ranks):
        rank_dict[category].append(new_rank)

# Prepare data for Sankey diagram
source = []
target = []
value = []
label = []
color_index = []

for year_idx in range(len(years) - 1):
    for cat_idx, category in enumerate(categories):
        source.append(year_idx * len(categories) + rank_dict[category][year_idx] - 1)
        target.append((year_idx + 1) * len(categories) + rank_dict[category][year_idx + 1] - 1)
        value.append(1)
        color_index.append(cat_idx)

for year in years:
    label.extend([f"{year} - {rank}" for rank in range(1, len(categories) + 1)])

# Create color scale
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98FB98',
          '#DDA0DD', '#F0E68C', '#87CEFA', '#CD853F', '#B0C4DE']

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=label,
        color="lightgray"
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color=[colors[i] for i in color_index]
    ))])

# Update the layout
fig.update_layout(
    title_text="World GDP Rankings 2000-2015",
    font_size=10,
    plot_bgcolor='#333333',
    paper_bgcolor='#333333',
    font_color='white',
    height=800,
    width=1200
)

fig.show()

Also their is Sanky Diagrams:
Sankey diagram in Python…

I also built out a custom component:

Topic		Replies	Views
How to Rotate Dimension Labels in Parallel Categories 📊 Plotly Python	2	3490	December 13, 2019
Customization of the color bars in Plotly's parallel_categories diagram 📊 Plotly Python question	0	354	June 21, 2023
How to create plot with duplicate xaxis labels and 2 xaxis in the same plot? 📊 Plotly Python	7	5748	April 29, 2020
Parallel Categories Tick Text Placement and Bar Size 📊 Plotly Python question	0	326	April 12, 2022
How to make a single label per category in legend for subplots? 📊 Plotly Python	2	870	October 19, 2020

Duplicate labels on parallel categories

Related topics