Okay, I put the Sankey into a Dash app and some controls on whether population is shown by raw counts, per 100k by country of origin or per 100k by country of destination. Also a check box to add or remove the intra-continental migration link. Finally, a little bit of explanation below the plot about what the meaning of these choices is.
#!/usr/bin/env python
# coding: utf-8
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, html, dcc, callback, Output, Input
import dash_bootstrap_components as dbc
def hex_to_rgba(hex_color,alpha):
hex_color = hex_color.lstrip('#')
r = int(hex_color[0:2], 16)
g = int(hex_color[2:4], 16)
b = int(hex_color[4:6], 16)
return f'rgba({r}, {g}, {b}, {alpha})'
df = pd.read_csv('un-migration-2024.csv')
#Have asterisks in some of the country data
origins = list(df['Origin'].values)
destinations = list(df['Destination'].values)
df['Origin'] = [x.strip('*') for x in origins]
df['Destination'] = [x.strip('*') for x in destinations]
#Get the countries data
countriesdf = pd.read_csv('https://raw.githubusercontent.com/gavinr/world-countries-centroids/refs/heads/master/dist/countries.csv')
#Append data for Hong Kong
new_row = pd.DataFrame(
{
"longitude": [114.16993426713121],
"latitude": [22.318764728315433],
"COUNTRY": ['Hong Kong'],
'ISO': ['HK'],
'COUNTRYAFF': ['Hong Kong'],
'AFF_ISO': ['HK'],
}
)
countriesdf = pd.concat([countriesdf, new_row], ignore_index=True)
continents = [
'AFRICA',
'ASIA',
'EUROPE',
'LATIN AMERICA AND THE CARIBBEAN',
'NORTHERN AMERICA',
'OCEANIA',
]
noncountries = [
'Low-and-Lower-middle-income countries',
'Low-and-middle-income countries',
'Low-income countries',
'Lower-middle-income countries',
'Eastern Africa',
'Eastern Asia',
'Eastern Europe',
'Eastern and South-Eastern Asia',
'Europe and Northern',
'Northern Africa',
'Northern Africa and Western Asia',
'Northern Europe',
'Northern Mariana Islands',
'Oceania (excluding Australia and New Zealand)',
'Upper-middle-income countries',
'Western Africa',
'Western Asia',
'Western Europe',
'Western Sahara',
'Land-locked Developing Countries (LLDC)',
'Latin America and the Caribbean',
'Least developed countries',
'Less developed regions',
'Less developed regions, excluding China',
'Less developed regions, excluding least developed countries',
'Eastern Africa',
'Eastern Asia',
'Eastern Europe',
'Eastern and South-Eastern Asia',
'Small Island Developing States (SIDS)',
'Solomon Islands',
'Somalia',
'South Africa',
'South America',
'South-Eastern Asia',
'Southern Africa',
'Southern Asia',
'Southern Europe',
]
contiseries_from = pd.Series(continents)
contiseries_to = pd.Series(continents)
contiseries_to.index = (6 + contiseries_from.index).to_list()
#We have duplicate entries in the table. Drop these.
destorig = (df['Destination']+df['Origin'])
df = df.loc[~destorig.duplicated(),:]
#Lets extract out the inter-continent data and the non-country data as separate tables
continentdf = df.loc[(df['Origin'].isin(continents)),:]
continentdf = continentdf.loc[continentdf['Destination'].isin(continents),:]
continentdf = continentdf.sort_values(['Destination','Origin'])
continentdf = continentdf.reset_index(drop=True)
#continentdf
#contiseries[contiseries == continentdf['Destination']].index.to_list()
#Put in the correct location index and destination index for each origin/destination combo
#Can't figure out how to do this without a for loop LAME!
continentdf['Destidx'] = None
continentdf['Origidx'] = None
for continent in contiseries_to:
contidx_to = contiseries_to[contiseries_to == continent].index.to_list()[0]
contidx_from = contiseries_from[contiseries_from == continent].index.to_list()[0]
continentdf.loc[(continentdf['Destination']==continent),'Destidx'] = contidx_to
continentdf.loc[(continentdf['Origin']==continent),'Origidx'] = contidx_from
#lets make a column normalized by population of each continent. We will normalize by the Origin and Destination
#In seperate columns so the data can be selected in Dash
continentdf['origin_pop_normalized'] = 0.
continentdf['destination_pop_normalized'] = 0.
#Population from Wikipedia and Worldometer
popdict = {
'ASIA': 4835320061,
'AFRICA': 1549867585,
'EUROPE': 742556239,
'LATIN AMERICA AND THE CARIBBEAN': 667888552,
'NORTHERN AMERICA': 387528403,
'OCEANIA': 46609602,
}
for continent in popdict.keys():
selector = continentdf.loc[:,'Origin'] == continent
continentdf.loc[selector,'origin_pop_normalized'] = (continentdf.loc[selector,'2024']/popdict[continent])*100000
selector = continentdf.loc[:,'Destination'] == continent
continentdf.loc[selector,'destination_pop_normalized'] = (continentdf.loc[selector,'2024']/popdict[continent])*100000
# Define color scheme - using a cohesive color palette
COLORS = {
'primary': '#3E92CC', # Blue
'secondary': '#2A628F', # Darker Blue
'success': '#13A76C', # Green
'warning': '#FF934F', # Orange
'danger': '#DB5461', # Red
'info': '#5BC0BE', # Teal
'light': '#F2F4F8', # Light Gray
'dark': '#292F36', # Dark Gray
'bg': '#F2F4F8', # Light background
'text': '#292F36', # Text color
}
# Initialize the Dash app with Bootstrap theme
app = Dash(__name__,
external_stylesheets=[dbc.themes.FLATLY],
meta_tags=[{'name': 'viewport', 'content': 'width=device-width, initial-scale=1'}])
#Make the radio items
radios = dbc.RadioItems(
id='data-type',
options=[
{'label': 'Raw', 'value': 'raw'},
{'label': 'Per 100k Destination', 'value': 'dest'},
{'label': 'Per 100k Origin', 'value': 'orig'},
],
value='orig',
className="mb-4",
inputClassName="me-2",
labelClassName="ms-1",
inline=True
)
checklist = dbc.Checklist(
id='intracont-filter',
options=[
{'label': 'Yes', 'value': 'yes'},
],
value=[],
inline=True,
className="mb-4",
inputClassName="me-2",
labelClassName="ms-1 me-3"
)
# App layout with Bootstrap components
app.layout = dbc.Container([
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardHeader("Control Panel",
style={'background-color': COLORS['primary'],
'color': 'white',
'font-weight': 'bold'}),
dbc.CardBody([
html.H5("Analyze population by:", className="card-title"),
radios,
html.H5("Show Intracontinental?", className="card-title"),
checklist,
]),
], className="shadow-sm mb-4"),
], width=12, lg=4, className="mb-4"),
dbc.Col(id='plot-card', width=12, lg=8, className="mb-4"),
]),
])
@app.callback(
Output('plot-card', 'children'),
Input('data-type', 'value'),
Input('intracont-filter', 'value'),
)
def make_continent_sankey(data_type,intracont):
continentdfuse = continentdf
#Have to reorganize the colors to ensure that the links have the same color in either direction.
colorindexes = [1,2,3,4,5,6,2,7,8,9,10,11,3,8,12,13,14,15,4,9,13,16,17,18,5,10,14,17,19,20,6,11,15,18,20,21]
continentdfuse['colors'] = [hex_to_rgba(px.colors.qualitative.Light24[0:21][(idx-1)],0.6) for idx in colorindexes]
if intracont:
continentdfuse = continentdfuse
else:
#Filter the dataframe where Origin and Destination match.
continentdfuse = continentdf.loc[~(continentdf['Origin'] == continentdf['Destination']),:]
#Setup the basic dictionaries for the nodes and links
nodeuse = dict(
pad = 15,
thickness = 10,
line = dict(color = "black", width = 0.6),
label = pd.concat([contiseries_from,contiseries_to]),#continentdfuse['Origin'].unique().append(continentdfuse['Origin'].unique()),
customdata = ['departures','departures','departures','departures','departures',
'arrivals','arrivals','arrivals','arrivals','arrivals','arrivals','arrivals'],
hovertemplate = '', #'%{label} has %{value} %{customdata}<br>per 100k people.<extra></extra>',
color = "blue"
)
linkuse = dict(
source = continentdfuse['Origidx'], # indices correspond to labels, eg A1, A2, A1, B1, ...
target = continentdfuse['Destidx'],
value = continentdfuse['2024'],
customdata = continentdfuse['2024']/1000000,
hovertemplate = '',#'%{customdata:.2f} Million people<br>'+
#'migrated from %{source.label}<br>to %{target.label}<br>'+
#'<extra></extra>',
color = continentdfuse['colors'],
)
if data_type == 'raw':
nodeuse['hovertemplate'] = '%{label} has %{value} %{customdata}<extra></extra>'
linkuse['value'] = continentdfuse['2024']
linkuse['hovertemplate'] = '%{customdata:.2f} Million people<br>migrated from %{source.label}<br>to %{target.label}<extra></extra>'
cardtitle = "Population migration among the continents"
markdown_note = dcc.Markdown(
'''
This plot shows the population flows between continents using raw population numbers.
This scales the nodes by the total number of departing people and arriving people on the
respective sides of the chart. Asia has the largest number of emigrants and immigrants.
Continents of departure are on the left. Continent of arrival is on the right.
'''
)
elif data_type == 'dest':
nodeuse['hovertemplate'] = '%{label} has %{value} %{customdata} per 100k people<br>in the destination continent.<extra></extra>'
linkuse['value'] = (continentdfuse['destination_pop_normalized'])
linkuse['hovertemplate'] = '%{customdata:.2f} Million people<br>migrated from %{source.label}<br>to %{target.label}<br>This is %{value} per 100k people<br>in the destination continent<extra></extra>'
cardtitle = "Population migration among the continents relative to continent of arrival"
markdown_note = dcc.Markdown(
'''
This plot shows the population flows between continents scaled as number
per 100k people in the continent of arrival. This shows the impact that a given continent
has on the continents that its people migrate to relative to their population.
Continents of departure are on the left. Continent of arrival is on the right.
'''
)
else:
nodeuse['hovertemplate'] = '%{label} has %{value} %{customdata} per 100k people<br>in the continent of origin.<extra></extra>'
linkuse['value'] = (continentdfuse['origin_pop_normalized'])
linkuse['hovertemplate'] = '%{customdata:.2f} Million people<br>migrated from %{source.label}<br>to %{target.label}<br>This is %{value} per 100k people<br>in the continent of origin<extra></extra>'
cardtitle = "Population migration among the continents"
markdown_note = dcc.Markdown(
'''
This plot shows the population flows between continents scaled as number
per 100k people in the continent of origin. This shows the impact that emigrees
have on the continent they are departing from.
Continents of departure are on the left. Continent of arrival is on the right.
'''
)
fig = go.Figure(data=[go.Sankey(
node = nodeuse,
link = linkuse
)])
fig.update_layout(
hovermode = 'x',
font=dict(size = 14, color = 'white'),
plot_bgcolor='white',
paper_bgcolor='white',
margin={"r":0,"t":0,"l":0,"b":5},
)
return(
dbc.Card([
dbc.CardHeader(cardtitle,
style={'background-color': COLORS['primary'],
'color': 'white',
'font-weight': 'bold'}),
dbc.CardBody([
dcc.Graph(figure=fig),
markdown_note
]),
dbc.CardFooter([
'Source: ',dbc.CardLink("UN Population Division", href='https://www.un.org/development/desa/pd/content/international-migrant-stock'),
], className="text-center m-0")
], className="shadow-sm mb-4")
)
if __name__ == '__main__':
app.run(debug=True,port=8055)