Figure Friday 2024 - week 29

@li.nguyen Awesome!!!

1 Like
'''
This script looks for ewf_appearances.csv in the same folder as this script.
'''
# 
import polars as pl
import polars.selectors as cs
import plotly.express as px
import plotly.graph_objects as go

# tweak function returns 
def tweak_df_appearances():
    '''
    uses polars lazy mode to read csv file, process various columns.
    '''
    return(
        pl.scan_csv('ewf_appearances.csv')
        .filter(pl.col('tier') == 1)
        .with_columns(                     
            pl.col('attendance')
                .str.replace('NA', '0') # when attendance is NA, replace with string 0
                .str.replace(',', '')   # get rid of thousands commas
                .cast(pl.Int32),
            SEASON = pl.col('season').str.slice(0,4).cast(pl.Int32),
        )
        .rename({'date': 'date_str'})
        .with_columns(
            DATE = pl.col('date_str').str.to_date(format='%m/%d/%Y', strict=True)
        )    
        .sort('SEASON', 'team_name', 'DATE')
        .with_columns(
           MATCH_NUM = pl.cum_count('attendance').over('SEASON','team_name'),
           SEASON_GOAL_DIFF = pl.cum_sum('goal_difference').over('SEASON','team_name'),
        )
        .select(
            pl.col(
                'SEASON', 'MATCH_NUM',  'DATE', 'team_name', 
                'goal_difference', 'SEASON_GOAL_DIFF', 'result', 'win', 'loss', 'draw', 'points'
            )
        )
        .collect()  # convert polars lazy frame to polars data frame
    )

df_appearances = tweak_df_appearances()


def custom_annotation(fig, text, showarrow, x, y,  xanchor, yanchor, xshift, yshift, align, ax=0, ay=0, my_color='gray'):
    '''
    adds text to plolty graph
    '''
    fig.add_annotation(
        text = text,
        x = x,
        y = y,
        xanchor=xanchor,
        yanchor=yanchor,
        xshift=xshift,
        yshift=yshift,
        font=dict(size=10, color="grey"),
        align=align,
        ax=ax,
        ay=ay,
        font_color=my_color
    )
    return fig
    
#  df_table is equivalent to data in the ewf_standings.csv. 
#  Could have just read this info from ewf_standings.csv
df_table = (
    df_appearances
    .group_by('SEASON', 'team_name')
    .agg(
        pl.col('win').sum().alias('W'),
        pl.col('draw').sum().alias('D'),
        pl.col('loss').sum().alias('L'),
        pl.col('points').sum().alias('P'),
        pl.col('goal_difference').sum().alias('GD'),
    )
    .with_columns(
        MATCHES = (pl.col('W') + pl.col('D') + pl.col('L'))
    )
    .with_columns(
        PTS_PER_MATCH = (pl.col('P')/pl.col('MATCHES'))
    )
    .sort('SEASON', 'PTS_PER_MATCH', 'GD', descending = [False, True, True])
    .with_columns(RANK = pl.col('SEASON').cum_count().over('SEASON'))
    .select(pl.col(['SEASON', 'RANK','team_name', 'W', 'D', 'L', 'P', 'GD', 'MATCHES', 'PTS_PER_MATCH']))
)

# do a pivot of df_appearances, to put team names in the columns.
# this is useful for plotting multiple traces with px.line
df_pivot = (
    df_appearances
    .with_columns(
        SEASON_GOAL_DIFF = 
            pl.cum_sum('goal_difference').over('SEASON', 'team_name')
    )
    .pivot(
        on = 'team_name',
        index=['SEASON', 'MATCH_NUM'],
        values = 'SEASON_GOAL_DIFF'
    )
)

# iterate through seasons, make a separate plotfor each one
# [-1:] plots the most recent season. Edit to plot any subset or all seasons
seasons =  df_appearances['SEASON'].unique().to_list()
for season in seasons:  # edit [-1:] for other subsets or all seasons
    print(season)
    df = (
        df_pivot.filter(pl.col('SEASON') == season)
    )
    df_cols = list(df.columns)
    null_cols = [c for c in df_cols if len(df.filter(pl.col(c).is_not_null())) == 0]
    df = df.drop(null_cols)
    df = (
        pl.concat([df, df.select(pl.all().sum())])
        .with_row_index('ROW_NUM')
    )
    max_row_num = df['ROW_NUM'].max()
    # Add row for all goal diff values to start at 0, not the game 1 goal diff 
    for c in df.columns[2:]:    
        df = (
            df
            .with_columns(
                pl.when(pl.col('ROW_NUM') == max_row_num)
                .then(pl.lit(0))
                .otherwise(c)
                .name.keep()
            )
            .sort('MATCH_NUM', descending=False)
        )

    print(df)
    #  y data starting at column 3 is the team names, used for y-axis data 
    y_cols = df.columns[3:]
    fig = px.line(
        df,
        x = 'MATCH_NUM',
        y = df.columns[3:],
        template='plotly_white',
        line_shape='hvh'         
    )

    # put annotation next to each trace with team name, rank, and goal diff
    # team with same or close goal differential have overlapping annotations.
    # this gets messy should be fixed with a spacing algorithm.
    for i, col in enumerate(y_cols):
        df_rank_gd = (
            df_table
            .filter(
                pl.col('SEASON') == season,
                pl.col('team_name') == col
            )
        )
        print(df_rank_gd)
        season_rank = df_rank_gd['RANK'][0]
        goal_differential = df_rank_gd['GD'][0]
        # color map has 10 values, then wraps around (reason for i mod 10)
        my_color = px.colors.qualitative.Plotly[i%10]
        # shorten team names, Women & Ladies understood as women' league
        annotate_text = (   
            col.replace('Women', '')
            .replace('Ladies', '')
         )
        annotate_text += f'({season_rank}, Goal Diff. = {goal_differential})'
        y_final = df.filter(pl.col('MATCH_NUM') == max_row_num)[col][0]
        fig = custom_annotation(
            fig, 
            annotate_text, 
            False, 
            max_row_num+0.2, 
            y_final, 
            'left',  
            'middle',  
            0, 
            0, 
            'left', 
            my_color=my_color
            ) 

    x_label = "MATCH NUMBER<br><sup>"
    x_label += "SOURCE: The English Women's Football (EWF) Database, May 2024<br>"
    x_label += '<a href="https://github.com/probjects/ewf-database">https://github.com/probjects/ewf-database</a>'
    x_label += '</sup>'
    fig.update_layout(
        autosize=False,
        width=800,
        height=600,
        showlegend=False,
        title=go.layout.Title(
            text=f"English Woman's Football, Tier 1  {season} Goal Differential",
            xref="paper",
            x=0
        ),
        xaxis=go.layout.XAxis(
            title=go.layout.xaxis.Title(
                text=x_label
            ),
            range=[0, max_row_num+6],
            showgrid=False,
        ),
        yaxis=go.layout.YAxis(
            title=go.layout.yaxis.Title(
                text='Season Goal Differential'
                ),
             showgrid=False,
             zeroline=False
        )
    )

    fig.show()   
1 Like

This is the code for plotting goal differential of all teams over the course or each season

Thank you @adamschroeder and @PipInstallPython for setting me straight on how to post formatted code.

1 Like

PlotlyFigureFriday_Y24W29_EWF

Here’s my submission. I focused more on the UI as I’ve been wanting learn the dash mantine components library for some time.

8 Likes

This looks absolutely stunning! :star: I really like the small details—the football team icons in the table, the filter interactions linking the table to other charts with buttons, the consistent colour scheme, the filters all located at the top, and the tooltip showing the game outcomes. Great job! :rocket:

You should absolutely submit this to the Dash show-and-tell this month as well!

Do you have the code somewhere? I’d love to check it out! :slight_smile:

1 Like

Wow! :star2: I’ve never seen that done before - amazing job figuring this out @RenaudLN @mo.elauzei. I wasn’t familiar with the add_layout_image function, but this actually makes me curious about another chart variation. Looping in @PipInstallPython as well, as he seems to be a master in creating complex charts :smile:

The isotype pictograms - has any of you ever created something like this? I’ve read through the marker_style functionality which also looks like a good candidate, but the add_layout_image function sounds more promising. Do you think we could achieve something like the isotype pictogram with the add_layout_image function or is there actually a better approach?


Hey @Alfredo49,

Your second color palette for the categories is a big improvement in terms of distinguishability! :star2:

Your dashboard reminded me of some of my past ones, so I wanted to share some advice that really helped me. This article about gradient backgrounds explains how they can affect color perception and might distract from the main focus—the charts.

In the end, we want the users to be able to easily focus on the charts, and a simpler background can help with that. While I personally don’t use gradient backgrounds often, I don’t think they’re always bad (distinct to that article). They can work beautifully if they’re subtle, fit the overall color scheme, and don’t draw attention away from the charts.

Here are some Tableau dashboard examples where I think it works nicely - the gradient background is very subtle and fits the overall color scheme, while the charts are still placed on simple white containers. This makes it easier to focus on the charts :slight_smile:


Sources:

1 Like

It’s definitely possible with Plotly and layout images with a little bit of maths and fiddling!

Here’s my copy of the above chart with plotly

import numpy as np
import pandas as pd
import plotly.express as px


values = pd.Series(
    {
        "Other renewables": 1614,
        "Biofuels": 1102,
        "Solar": 1793,
        "Wind": 3540,
        "Hydro": 10455,
    }
)
height = 450
width = 1000
font_size = 16
bar_gap = 0.2
max_icons = 110
margin_x = 20
bg_color = "#142229"


plot_aspect_ratio = (width - (2 * margin_x)) / (height - 150 - font_size)

bar_width = (width - 180) / len(values) * (1 - bar_gap)
bar_max_height = (height - 150 - font_size)

magnitude = 10 ** np.floor(np.log(values.max() / max_icons) / np.log(10))
icon_magnitude = np.ceil((values.max() / max_icons) / magnitude) * magnitude

max_n_icons = np.ceil(values.max() / icon_magnitude)

possible_n_cols = np.arange(4, 15)
n_cols = possible_n_cols[
    np.argmin(np.abs(np.ceil(max_n_icons / possible_n_cols) / possible_n_cols - bar_max_height / bar_width))
]

df = (
    values.to_frame("value")
    .rename_axis("key")
    .reset_index()
    .assign(
        text_height=(np.ceil(values / icon_magnitude / n_cols) * icon_magnitude * n_cols).to_numpy(),
        pos=lambda df: list(range(len(values))),
    )
)

fig = (
    px.bar(
        df,
        x="pos",
        y="text_height",
        template="plotly_dark",
        width=width,
        height=height,
        custom_data=["value"],
    )
    .update_traces(
        marker={"color": "rgba(0,0,0,0)", "line_width": 0},
        texttemplate="%{customdata[0]:,.0f} TWh", textposition="outside",
        hovertemplate="%{customdata[0]:,.0f} TWh",
    )
    .update_yaxes(showgrid=False, title=None, showticklabels=False, constraintoward="bottom", zeroline=False)
    .update_xaxes(showgrid=False, title=None, tickvals=list(range(len(values))), ticktext=values.index.to_list())
    .update_layout(
        yaxis_scaleanchor="x",
        yaxis_scaleratio=len(values)/plot_aspect_ratio/values.max(),
        showlegend=False,
        bargap=bar_gap,
        margin_pad=10,
        margin_l=margin_x,
        margin_r=margin_x,
        paper_bgcolor=bg_color,
        plot_bgcolor=bg_color,
    )
)

fig.add_annotation(
    xref="paper",
    yref="paper",
    xanchor="left",
    yanchor="top",
    xshift=margin_x,
    yshift=40,
    x=0,
    y=1,
    text="Renewable energy across the world",
    ax=0,
    ay=0,
    font={"size": 20, "color": "#fff"}
)
fig.add_annotation(
    xref="paper",
    yref="paper",
    xanchor="left",
    yanchor="top",
    xshift=margin_x,
    yshift=12,
    x=0,
    y=1,
    text="Primary energy consumption of renewable energy sources, 2019",
    ax=0,
    ay=0,
    font={"size": 16, "color": "#fff"}
)
fig.add_annotation(
    xref="paper",
    yref="paper",
    xanchor="left",
    yanchor="top",
    xshift=margin_x,
    yshift=-12,
    x=0,
    y=1,
    text="Source: Our world in data",
    ax=0,
    ay=0,
    font={"size": 12, "color": "#fff"}
)

sizex = (1 - bar_gap) / n_cols
sizey = values.max() / np.ceil(max_n_icons / n_cols)

icons = {
    "Solar": "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='1em' height='1em' viewBox='0 0 256 256'%3E%3Cpath fill='%23FF6D00' d='M120 40V16a8 8 0 0 1 16 0v24a8 8 0 0 1-16 0m8 24a64 64 0 1 0 64 64a64.07 64.07 0 0 0-64-64m-69.66 5.66a8 8 0 0 0 11.32-11.32l-16-16a8 8 0 0 0-11.32 11.32Zm0 116.68l-16 16a8 8 0 0 0 11.32 11.32l16-16a8 8 0 0 0-11.32-11.32M192 72a8 8 0 0 0 5.66-2.34l16-16a8 8 0 0 0-11.32-11.32l-16 16A8 8 0 0 0 192 72m5.66 114.34a8 8 0 0 0-11.32 11.32l16 16a8 8 0 0 0 11.32-11.32ZM48 128a8 8 0 0 0-8-8H16a8 8 0 0 0 0 16h24a8 8 0 0 0 8-8m80 80a8 8 0 0 0-8 8v24a8 8 0 0 0 16 0v-24a8 8 0 0 0-8-8m112-88h-24a8 8 0 0 0 0 16h24a8 8 0 0 0 0-16'/%3E%3C/svg%3E",
    "Wind": "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='1em' height='1em' viewBox='0 0 24 24'%3E%3Cpath fill='%232DB958' d='M12 2c3.292 0 6 2.435 6 5.5c0 1.337-.515 2.554-1.369 3.5H21a1 1 0 0 1 1 1c0 3.292-2.435 6-5.5 6c-1.336 0-2.553-.515-3.5-1.368V21a1 1 0 0 1-1 1c-3.292 0-6-2.435-6-5.5c0-1.336.515-2.553 1.368-3.5H3a1 1 0 0 1-1-1c0-3.292 2.435-6 5.5-6c1.337 0 2.554.515 3.5 1.369V3a1 1 0 0 1 1-1'/%3E%3C/svg%3E",
    "Biofuels": "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='1em' height='1em' viewBox='0 0 24 24'%3E%3Cpath fill='%23FF4136' d='m22 3.41l-.12-1.26l-1.2.4a13.84 13.84 0 0 1-6.41.64a11.87 11.87 0 0 0-6.68.9A7.23 7.23 0 0 0 3.3 9.5a9 9 0 0 0 .39 4.58a16.6 16.6 0 0 1 1.18-2.2a9.85 9.85 0 0 1 4.07-3.43a11.16 11.16 0 0 1 5.06-1A12.08 12.08 0 0 0 9.34 9.2a9.48 9.48 0 0 0-1.86 1.53a11.38 11.38 0 0 0-1.39 1.91a16.39 16.39 0 0 0-1.57 4.54A26.42 26.42 0 0 0 4 22h2a30.69 30.69 0 0 1 .59-4.32a9.25 9.25 0 0 0 4.52 1.11a11 11 0 0 0 4.28-.87C23 14.67 22 3.86 22 3.41'/%3E%3C/svg%3E",
    "Hydro": "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='1em' height='1em' viewBox='0 0 512 512'%3E%3Cpath fill='%232ECEC4' d='M265.12 60.12a12 12 0 0 0-18.23 0C215.23 97.15 112 225.17 112 320c0 88.37 55.64 144 144 144s144-55.63 144-144c0-94.83-103.23-222.85-134.88-259.88M272 412a12 12 0 0 1-11.34-16a11.89 11.89 0 0 1 11.41-8A60.06 60.06 0 0 0 332 328.07a11.89 11.89 0 0 1 8-11.41A12 12 0 0 1 356 328a84.09 84.09 0 0 1-84 84'/%3E%3C/svg%3E",
    "Other renewables": "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='1em' height='1em' viewBox='0 0 24 24'%3E%3Cpath fill='%23C658C0' fill-rule='evenodd' d='M9.586 2.586A2 2 0 0 1 11 2h2a2 2 0 0 1 2 2v.089l.473.196l.063-.063a2 2 0 0 1 2.828 0l1.414 1.414a2 2 0 0 1 0 2.827l-.063.064l.196.473H20a2 2 0 0 1 2 2v2a2 2 0 0 1-2 2h-.089l-.196.473l.063.063a2 2 0 0 1 0 2.828l-1.414 1.414a2 2 0 0 1-2.828 0l-.063-.063l-.473.196V20a2 2 0 0 1-2 2h-2a2 2 0 0 1-2-2v-.089l-.473-.196l-.063.063a2 2 0 0 1-2.828 0l-1.414-1.414a2 2 0 0 1 0-2.827l.063-.064L4.089 15H4a2 2 0 0 1-2-2v-2a2 2 0 0 1 2-2h.09l.195-.473l-.063-.063a2 2 0 0 1 0-2.828l1.414-1.414a2 2 0 0 1 2.827 0l.064.063L9 4.089V4a2 2 0 0 1 .586-1.414M8 12a4 4 0 1 1 8 0a4 4 0 0 1-8 0' clip-rule='evenodd'/%3E%3C/svg%3E",
}

fig.add_layout_image(
    source=icons.get("Hydro"),
    xref="paper",
    yref="paper",
    xanchor="left",
    yanchor="top",
    x=20 / width,
    y=1 - 60 / height,
    sizex=20 / width,
    sizey=20 / height,
    sizing="contain",
    opacity=1,
    layer="above"
)
fig.add_annotation(
    xref="paper",
    yref="paper",
    xanchor="left",
    yanchor="top",
    xshift=margin_x + 20,
    yshift=-40,
    x=0,
    y=1,
    text=f"= {icon_magnitude} TWh",
    ax=0,
    ay=0,
    font={"size": 12, "color": "#fff"}
)

for i, (k, v) in enumerate(values.items()):
    for j in np.arange(0, np.ceil(v / icon_magnitude)):
        x, y = j % n_cols, j // n_cols
        fig.add_layout_image(
            source=icons.get(k),
            xref="x",
            yref="y",
            xanchor="center",
            yanchor="top",
            x=i - (1 - bar_gap) / 2 + (x + 1 / 2) * sizex,
            y=(y + 1) * sizey,
            sizex=sizex * 0.9,
            sizey=sizey * 0.9,
            sizing="contain",
            opacity=1,
            layer="above"
        )
        if j == np.ceil(v / icon_magnitude) - 1:
            fig.add_shape(
                type="rect",
                x0=i - (1 - bar_gap) / 2 + x * sizex,
                x1=i - (1 - bar_gap) / 2 + (x + 1) * sizex,
                y0=(y + 1) * sizey,
                y1=(y + 1 - (np.ceil(v / icon_magnitude) - v / icon_magnitude)) * sizey,
                fillcolor=bg_color,
                line_width=0,
            )

fig.show()
6 Likes

@RenaudLN Wow, you all are absolutely nailing these chart challenges! :rocket: I’ve never seen a pictogram created in Plotly before, and I couldn’t find any examples on the forum either. I think your version looks even better than the original - love the water drops! :droplet:

As @adamschroeder suggested for the bump chart, I believe this would also make a great standalone post to improve its visibility for search engines - “How to create a pictogram with Plotly”.

2 Likes

Really professional-looking dashboard you built, @spd .
:wave: welcome to the community. Where have you been all this time :slight_smile: ?

I agree with @li.nguyen . I recommend submitting this app to the Plotly Dash Examples Page. Do you mind sharing your code as well?

1 Like

Thanks @li.nguyen ! No code available yet. I need to clean it up since I rushed to get this done in time but, in the meanwhile, I’m happy to share or walkthrough any part of my app if there’s something specific you want to know more.

I appreciate the feedback :slight_smile:

1 Like

Thank you! Happy to join :slight_smile:

I’ll try to submit the app/share a GitHub repo once I tidy up the code

1 Like

Hi @li.nguyen,

Thank you for the valuable advice. Since frontend design isn’t my strong suit, I often find choosing color palettes challenging, especially with contrasting elements. I’ll definitely keep your suggestions in mind for future dashboards!

1 Like

@Alfredo49 - it’s always a learning journey, and it definitely doesn’t mean you can’t make it your strong suit :slight_smile: Believe me, my first dashboards were just as colorful because, let’s be honest, colors make everything more fun.

I’m still learning as well, but one key takeaway I’ve had is that less is often more, particularly with data visualization and dashboards. Looking forward to seeing your next submissions! :art:

2 Likes