Hello Everyone,
My contributiion/approach to this week 8, let´s me start by saying it´s quite similar to Ester´s dashboard, just my focus was on Dogs Breeds
. 
In Summary
The code provides an interactive tool to analyze the fate of dog breeds at the Dallas Animal Shelter, exploring the relationship between breed, exit probability, distribution, and length of stay.
These are the images
The code
import pandas as pd
import plotly.express as px
from sksurv.nonparametric import kaplan_meier_estimator
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
DATA_FILE = 'Dallas_Animal_Shelter_Data_Fiscal_Year_Jan_2024.csv'
OUTCOME_MAPPING = {
'ADOPTION': 'Exit', 'RETURNED TO OWNER': 'Exit', 'TRANSFER': 'Exit', 'FOSTER': 'Exit', 'DISPOSAL': 'Exit',
'TNR': 'Exit', 'WILDLIFE': 'Exit', 'SNR': 'Exit', 'EUTHANIZED': 'Stay', 'DIED': 'Stay', 'LOST EXP': 'Stay',
'FOUND EXP': 'Stay', 'TREATMENT': 'Stay', 'MISSING': 'Stay'
}
TOP_BREEDS_COUNT = 20
def load_and_preprocess_data():
df = pd.read_csv(DATA_FILE)
df_dogs = df[df['Animal_Type'] == 'DOG'].copy() # Usar .copy() para evitar warnings
df_dogs['Outcome_Category'] = df_dogs['Outcome_Type'].replace(OUTCOME_MAPPING)
df_dogs['Intake_DateTime'] = pd.to_datetime(df_dogs['Intake_Date'] + ' ' + df_dogs['Intake_Time'])
df_dogs['Outcome_DateTime'] = pd.to_datetime(df_dogs['Outcome_Date'] + ' ' + df_dogs['Outcome_Time'])
df_dogs['Duration'] = (df_dogs['Outcome_DateTime'] - df_dogs['Intake_DateTime']).dt.total_seconds() / (60 * 60 * 24)
df_dogs['Event'] = df_dogs['Outcome_Category'] == 'Exit'
df_dogs.dropna(subset=['Duration'], inplace=True)
df_dogs = df_dogs[df_dogs['Duration'] >= 0]
top_breeds = df_dogs['Animal_Breed'].value_counts().nlargest(TOP_BREEDS_COUNT).index
df_top_breeds = df_dogs[df_dogs['Animal_Breed'].isin(top_breeds)]
return df_top_breeds, top_breeds
df_top_breeds, top_breeds = load_and_preprocess_data()
breed_colors = {breed: px.colors.qualitative.D3[i % len(px.colors.qualitative.D3)] for i, breed in enumerate(top_breeds)}
def calculate_survival_curves(breeds):
survival_data = []
for breed in breeds:
breed_data = df_top_breeds[df_top_breeds['Animal_Breed'] == breed]
time, survival_prob = kaplan_meier_estimator(breed_data['Event'].astype(bool), breed_data['Duration'])
survival_data.append(pd.DataFrame({'Time': time, 'Survival Probability': survival_prob, 'Breed': breed}))
return pd.concat(survival_data)
def create_survival_plot(survival_df):
if survival_df.empty:
return px.line(title="No data to show")
fig = px.line(survival_df, x='Time', y='Survival Probability', color='Breed',
color_discrete_map=breed_colors,markers=True,
labels={'Time': 'Days', 'Survival Probability': '% Exit Probability'},
line_shape="spline", template='plotly_white',
)
fig.update_layout(
title_font=dict(size=20, family='Arial', color='black'),
xaxis_title_font=dict(size=14, family='Arial', color='black'),
yaxis_title_font=dict(size=14, family='Arial', color='black'),
legend_title_font=dict(size=16, family='Arial', color='black')
)
return fig
def create_breed_count_plot(filtered_df):
breed_counts = filtered_df['Animal_Breed'].value_counts().reset_index()
breed_counts.columns = ['Breed', 'Count']
fig_bar_breedcount = px.bar(breed_counts, x='Breed', y='Count',
text_auto='.2f', template='plotly_white', labels={'Breed': ''},
color='Breed', color_discrete_map=breed_colors)
fig_bar_breedcount.update_yaxes(visible=False)
fig_bar_breedcount.update_layout(showlegend=False)
return fig_bar_breedcount
def create_breed_duration_plot(filtered_df):
breed_duration = (filtered_df.groupby('Animal_Breed')['Duration'].agg(['mean', 'median'])
.reset_index().sort_values('mean', ascending=False))
fig_bar_breedmean = px.bar(breed_duration, x='Animal_Breed', y='mean',
text_auto='.2f', template='plotly_white', labels={'Animal_Breed': ''},
color='Animal_Breed', color_discrete_map=breed_colors)
fig_bar_breedmean.update_yaxes(visible=False)
fig_bar_breedmean.update_layout(showlegend=False)
for index, row in breed_duration.iterrows():
fig_bar_breedmean.add_annotation(x=row['Animal_Breed'], y=row['mean'],
text=f"Median: {row['median']:.2f}", showarrow=False, yshift=10)
return fig_bar_breedmean
#Styles spaces
style_space = {'border': 'none', 'height': '5px', 'background': 'linear-gradient(to right, #007bff, #ff7b00)', 'margin': '10px 0'}
# Dash App
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.LUX])
app.title=' Dallas Animal Shelter'
app.layout = dbc.Container([
html.Hr(style=style_space),
html.H2("Dog Diaries: Analyzing Breeds' Fate in Dallas Animal Shelter", style={'text-align': 'center'}),
html.Hr(style=style_space),
html.Div([
html.P("Analyzing only dogs, which make up 80% of the shelter animals, we selected the top 20 breeds, representing 90% of this group", style={'text-align': 'center', 'margin-top': '20px', 'font-style': 'italic','font-size': '24px', 'color': 'black'}),
html.Hr(style=style_space)
]),
dbc.Row([
dbc.Col(dbc.Card(dcc.Dropdown(id='breed-dropdown', options=[{'label': breed, 'value': breed} for breed in top_breeds],
value=top_breeds[:3], multi=True)), width=12,class_name="btn-group dash-dropdown")
]),
html.Hr(style={'border': 'none', 'height': '2px', 'background': 'linear-gradient(to right, #007bff, #ff7b00)', 'margin': '10px 0'}),
dbc.Row([html.H5("Exit Probability by Days: A Closer Look at Each Breed",style={'text-align': 'center'}),
dbc.Col(dcc.Graph(id='survival-plot'), width=12)]),
dbc.Row(
[dbc.Col([
html.H5("Top Dog Breeds at Dallas Shelter: A Count Analysis",style={'text-align': 'center'}),
dcc.Graph(id='breed-count-plot')], width=6),
dbc.Col([
html.H5("Shelter Stays: Average and Median Duration Dog Breeds",style={'text-align': 'center'}),
dcc.Graph(id='breed-duration-plot')], width=6)
])
],fluid=True)
@app.callback(
[Output('survival-plot', 'figure'), Output('breed-count-plot', 'figure'),
Output('breed-duration-plot', 'figure')],
Input('breed-dropdown', 'value')
)
def update_plot(selected_breeds):
if selected_breeds:
filtered_df = df_top_breeds[df_top_breeds['Animal_Breed'].isin(selected_breeds)]
return (create_survival_plot(calculate_survival_curves(selected_breeds)),
create_breed_count_plot(filtered_df),
create_breed_duration_plot(filtered_df))
else:
return create_survival_plot(pd.DataFrame()), px.bar(), px.bar()
if __name__ == '__main__':
app.run_server(debug=True)
I´m trying to uploading to PyCafe, unfortunately it can not be possible since I´m using a library
sksurv which PyCafe does not recognize or can work with it, may be Py Cafe limitations, I do not know, anyway I´ll keep trying in case I succeed, I´ll share the app, up and running.
Any clues/comments/suggestions are more than welcome