I’m working from the data set linked here and trying to produce plotly
graphs that resemble these I’ve made with matplotlib
(see image below).
The full reproducible code is below.
import datetime as dt
import plotly.graph_objects as go
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Read-in admissions data
filename_admitdata = '/Users/Shared/lsdata_cleaned.csv'
df11 = pd.read_csv(filename_admitdata, low_memory=False)
# Convert sent_at and decision_at to datetime
df11.loc[:, 'sent_at'] = pd.to_datetime(df11['sent_at'])
df11.loc[:, 'decision_at'] = pd.to_datetime(df11['decision_at'])
cycles = [18, 19, 20, 21]
# Normalize years
for i, cycle in enumerate(cycles[1:]):
df11.loc[df11['cycle'] == cycle, 'sent_at'] = \
df11[df11['cycle'] == cycle]['sent_at'].map(lambda x: dt.datetime(x.year - (i + 1), x.month, x.day))
df11.loc[df11['cycle'] == cycle, 'decision_at'] = \
df11[df11['cycle'] == cycle]['decision_at'].map(lambda x: dt.datetime(x.year - (i + 1), x.month, x.day))
top_eleven_list = ['Yale University', 'Harvard University', 'Stanford University', 'University of Chicago',
'Columbia University', 'New York University', 'University of Pennsylvania', 'University of Virginia',
'University of Michigan', 'University of California—Berkeley', 'Northwestern University']
fig = go.Figure()
# Add traces
for c in cycles:
fig.add_trace(go.Scatter(
x=df11[(df11['school_name'] == 'Yale University') & (df11['cycle'] == c)]['sent_at'],
y=df11[(df11['school_name'] == 'Yale University') & (df11['cycle'] == c)]['decision_at'],
mode='markers',
marker=dict(
size=6,
color=df11[(df11['school_name'] == 'Yale University') & (df11['cycle'] == c)]['color'],
symbol=df11[(df11['school_name'] == 'Yale University') & (df11['cycle'] == c)]['marker'],
line=dict(
width=1,
color=df11[(df11['school_name'] == 'Yale University') & (df11['cycle'] == c)]['splits'])),
name=str(c-1) + '/' + str(c),
type='scatter',
text=df11['cycle'],
hovertemplate='Cycle: 20%{text}<br>' + 'Sent: %{x}<br>' + 'Decision: %{y}<extra></extra>'
)
)
# Dropdown menu
updatemenu = []
buttons = []
# Button with one option for each school
for school in top_eleven_list:
buttons.append(
dict(
method='update',
label=school,
visible=True,
args=[{
'x': [df11[(df11['school_name'] == school) & (df11['cycle'] == c)]['sent_at'] for c in cycles],
'y': [df11[(df11['school_name'] == school) & (df11['cycle'] == c)]['decision_at'] for c in cycles],
'marker':
dict(
size=5,
color=list(itertools.chain(*[df11[(df11['school_name'] == school) & (df11['cycle'] == c)]['color'] for c in cycles])),
symbol=list(itertools.chain(*[df11[(df11['school_name'] == school) & (df11['cycle'] == c)]['marker'] for c in cycles])),
line=dict(
width=1,
color=list(itertools.chain(*[df11[(df11['school_name'] == school) & (df11['cycle'] == c)]['splits'] for c in cycles])))),
'name': [str(c-1) + '/' + str(c) for c in cycles],
},
],
)
)
# Adjust updatemenus
updatemenu = []
menu = dict()
updatemenu.append(menu)
updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction'] = 'down'
updatemenu[0]['showactive'] = True
updatemenu[0]['pad'] = {"r": 10, "t": 10}
updatemenu[0]['x'] = 0.0
updatemenu[0]['xanchor'] = 'left'
updatemenu[0]['y'] = 1.1
updatemenu[0]['yanchor'] = 'top'
fig.update_layout(
updatemenus=updatemenu,
xaxis_title="Sent Date",
yaxis_title="Decision Date",
legend_title="App. Cycle"
)
fig.update_xaxes(
dtick="M1",
tickformat="%B",
ticklabelmode="period",
range=[min(df11['sent_at']) - dt.timedelta(days=4), dt.datetime(2018, 3, 15)]
)
fig.update_yaxes(
dtick="M1",
tickformat="%B",
ticklabelmode="period",
range=[min(df11['decision_at']) - dt.timedelta(days=7), dt.datetime(2018, 5, 7)]
)
fig.show()
I have the following questions/troubles:
- When I select a (new) school from the dropdown, the marker symbols seem to scrambled and in the legend they all change to be the same one. (See images below.)
- Can the legend marker color be modified, to be gray/neutral rather than (randomly) as from plot marker colors?
- Can the sent_at and decision_at hover labels be more specific than just months? “03/12” rather than “March”?
Original:
After selecting Stanford from the dropdown:
From matplotlib
: