Bar Chart Missing Label

Hello everyone,

I am working on creating a dashboard analyzing the SuperCluster Astronaut Database. There are some data points I want to display in a bar chart that describe the accomplishments of the astronauts. I want to display the total awards per category for any selected country.

Here is my code to produce this dashboard:

#Import packages
from enum import unique
import pandas as pd
import numpy as np
import os
import plotly.express as px
import dash
from dash import dcc
from dash import html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import requests
import collections


#Download the astronaut database from SuperCluster
astronaut_db_url = 'https://supercluster-iadb.s3.us-east-2.amazonaws.com/adb.json'
astronauts_db = requests.get(astronaut_db_url).json()


#Make dataframes
df1 = pd.json_normalize(astronauts_db['astronauts'])
df2 = pd.json_normalize(astronauts_db['missions'])

#Grab columns
df_astro = df1[['_id','astroNumber','awards','name','gender','inSpace','overallNumber','spacewalkCount','species','speciesGroup',
                'totalMinutesInSpace','totalSecondsSpacewalking','lastLaunchDate.utc']]

df_miss = df2[['_id','astronauts','keywords','name',
               'seriesName','shortDescription','vagueLaunchDate',
               'landDate.utc','launchDate.utc']]


#Change column names
df_astro = df_astro.rename(columns={'_id': 'astronaut_id'})

#Get row per award
df_awards = df_astro[['astronaut_id', 'awards']].copy()
df_awards['awards'] = df_awards['awards'].apply(lambda awards: [award['title'] for award in awards])

#Join awards column back on astronaut df
df_astro = pd.merge(df_astro,df_awards,how='left',on=['astronaut_id'])

#Clean up astronaut df
del df_astro['awards_x']
df_astro = df_astro.rename(columns={'awards_y': 'awards'})


#Change column names
df_miss = df_miss.rename(columns={'_id': 'mission_id'})

#Expand df to have multiple rows (many astronauts per mission)
df_test = df_miss.explode(['astronauts']).reset_index(drop=True)


#Pull out list of astronauts from JSON format
astronauts = pd.json_normalize(df_test['astronauts'])


#Add list of astronauts back into mission df
df_miss = pd.concat([df_test, astronauts], axis=1)

#Change column names
df_miss = df_miss.rename(columns={'_id': 'astronaut_id'})
del df_miss['astronauts']

#Cleaning time/day variables
df_miss['launch_time'] = pd.to_datetime(df_miss['launchDate.utc']).dt.time
df_miss['land_time'] = pd.to_datetime(df_miss['landDate.utc']).dt.time
df_miss['launch_date'] = df_miss['vagueLaunchDate']
df_miss['land_date'] = pd.to_datetime(df_miss['landDate.utc']).dt.date

del df_miss['vagueLaunchDate'],df_miss['landDate.utc'], df_miss['launchDate.utc']

#Join astronaut database with mission database
df_full = pd.merge(df_miss,df_astro,how='left',on=['astronaut_id'])

# Number of Awards per Astronaut
df_full['num_awards'] = df_full['awards'].str.len()
del df_full['lastLaunchDate.utc']


df_full = df_full.rename(columns={'name_x': 'mission_name'})
df_full = df_full.rename(columns={'name_y': 'astronaut_name'})


#Get the countries
from bs4 import BeautifulSoup
#!pip install selenium
from selenium import webdriver
#!pip install webdriver_manager
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.chrome.options import Options


data = []

url = 'https://www.supercluster.com/astronauts?ascending=false&limit=5000&list=true&sort=launch%20order'

options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.maximize_window()
driver.get(url)
time.sleep(10)

soup = BeautifulSoup(driver.page_source, 'lxml')
driver.close()
tags = soup.select('.astronaut_cell.x')

for item in tags:
    name = item.select_one('.bau.astronaut_cell__title.bold.mr05').get_text()
    country = item.select_one('.mouseover__contents.rel.py05.px075.bau.caps.small.ac')
    if country:
        country=country.get_text()
    
    data.append([name, country])



cols=['name','country']
df = pd.DataFrame(data,columns=cols)

df['names'] = df['name'].str.split(", ")

df['last_names'] = df['names'].str[0]
df['first_names'] = df['names'].str[1]
df['full_names'] = df['first_names'] + ' ' + df['last_names']
del df['names'], df['first_names'], df['name'], df['last_names']

df = df.rename(columns={'full_names': 'astronaut_name'})

#Join country onto full astro df
astro_db = pd.merge(df_full,df,how='left',on=['astronaut_name'])

astro_db['launch_year'] = astro_db['launch_date'].str[0:4].astype(int)

#choice - test out dropdown
astro_db['ones'] = 1
country_condensed = astro_db[['country','ones']]
country_condensed = country_condensed.groupby(['country']).sum().reset_index()
country_condensed = country_condensed[country_condensed['ones']>1]

country_choices = country_condensed['country'].astype('str').unique()
country_choices = sorted(country_choices)


app = dash.Dash(__name__,assets_folder=os.path.join(os.curdir,"assets"))
server = app.server
app.layout = html.Div(
    children=[
            dbc.Row([
                dbc.Col([
                    dcc.Dropdown(
                        id='dropdown0',
                        style={'color':'black'},
                        options=[{'label': i, 'value': i} for i in country_choices],
                        value=country_choices[-1]
                    )
                ])
            ]),
            dbc.Row([
                dbc.Col([
                    dcc.Graph(id='award_bar_chart')
                ],width=12)
            ])
        ]
)


@app.callback(

    Output('award_bar_chart','figure'),
    Input('dropdown0','value')
)
def bar_chart(dd0):
    filtered = astro_db[astro_db['country']==dd0]
    filtered = filtered.drop_duplicates(subset='astronaut_name', keep="first")
    
    #Pull out unique awards per country
    unique_awards = filtered[['astronaut_name','country','awards']]
    unique_awards['awards_string'] = [','.join(map(str, l)) for l in unique_awards['awards']]
    
    
    unique_awards['ISS_Visitor'] = np.where(unique_awards['awards_string'].str.contains('ISS Visitor'),1,0)
    unique_awards['Crossed_Karman'] = np.where(unique_awards['awards_string'].str.contains('Crossed Kármán Line'),1,0)
    unique_awards['Elite_Spacewalker'] = np.where(unique_awards['awards_string'].str.contains('Elite Spacewalker'),1,0)
    unique_awards['Space_Resident'] = np.where(unique_awards['awards_string'].str.contains('Space Resident'),1,0)
    unique_awards['Frequent_Walker'] = np.where(unique_awards['awards_string'].str.contains('Frequent Walker'),1,0)
    unique_awards['Frequent_Flyer'] = np.where(unique_awards['awards_string'].str.contains('Frequent Flyer'),1,0)
    unique_awards['Elite_Spaceflyer'] = np.where(unique_awards['awards_string'].str.contains('Elite Spaceflyer'),1,0)
    unique_awards['Moonwalker'] = np.where(unique_awards['awards_string'].str.contains('Moonwalker'),1,0)
    unique_awards['Memorial'] = np.where(unique_awards['awards_string'].str.contains('Memorial'),1,0)
    unique_awards['Crossed_80KM'] = np.where(unique_awards['awards_string'].str.contains('Crossed 80km Line'),1,0)

    del unique_awards['awards'], unique_awards['awards_string'], unique_awards['country']


    num_a = len(unique_awards[unique_awards['Crossed_80KM']==1]['astronaut_name'].unique())
    num_b = len(unique_awards[unique_awards['Crossed_Karman']==1]['astronaut_name'].unique())
    num_c = len(unique_awards[unique_awards['ISS_Visitor']==1]['astronaut_name'].unique())
    num_d = len(unique_awards[unique_awards['Elite_Spacewalker']==1]['astronaut_name'].unique())
    num_e = len(unique_awards[unique_awards['Space_Resident']==1]['astronaut_name'].unique())
    num_f = len(unique_awards[unique_awards['Frequent_Walker']==1]['astronaut_name'].unique())
    num_g = len(unique_awards[unique_awards['Frequent_Flyer']==1]['astronaut_name'].unique())
    num_h = len(unique_awards[unique_awards['Elite_Spaceflyer']==1]['astronaut_name'].unique())
    num_i = len(unique_awards[unique_awards['Moonwalker']==1]['astronaut_name'].unique())
    num_j = len(unique_awards[unique_awards['Memorial']==1]['astronaut_name'].unique())


    bar_dict = {
        num_a: 'Crossed 80KM Line',
        num_b: 'Crossed Kármán Line',
        num_c: 'ISS Visitor',
        num_d: 'Elite Spacewalker',
        num_e: 'Space Resident',
        num_f: 'Frequent Walker',
        num_g: 'Frequent Flyer',
        num_h: 'Elite Spaceflyer',
        num_i: 'Moonwalker',
        num_j: 'Memorial'
    }
    


    od = collections.OrderedDict(sorted(bar_dict.items(),reverse=True))
    new_df = pd.DataFrame(od.items(), columns=['# Astronauts', 'Awards'])
    
    new_df = new_df[new_df['# Astronauts']>0]
    
    bar_fig = px.bar(new_df,
        x = 'Awards',
        y = '# Astronauts',
        template='plotly_dark'
    )
    bar_fig

    return bar_fig

if __name__=='__main__':
	app.run_server()

I have no idea what is going on under the hood, but I’m getting different results in the bar chart than what is reflected in the data. Can someone help me figure out what is going on?

Issue #1: When I’ve run the code under the callback for just the United States of America - I get the correct numbers, but for some reason when packaging everything into the bar chart, the award, “Crossed 80KM Line” is not included in the chart.

Issue #2: When I run the entire app, only several awards show up with incorrect numbers in the bar chart.

I am absolutely stumped. Any help would be appreciated!

Thank you!

Any help would be appreciated!