I had a simple approach and really wanted to get a Dash app running. Simple bars, line and pie charts slicing the data based of LLM recommendations on what it thought would be different interesting illustrations
I struggled with the vacuum errors as some of the problems were descriptive and I had to import and attempt a couple libraries to solve the keyword search in the problem, didnt help that some were even in different languages lol.
Common Faults in Specific Products (e.g., Vacuum Cleaners)
import dash
from dash import dcc, html
import plotly.express as px
import plotly.graph_objects as go
from dash.dependencies import Input, Output
import pandas as pd
from collections import Counter
import re
df = pd.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2024/week-43/OpenRepair_Data_RepairCafeInt_202407.csv", low_memory=False)
# Define a basic set of common stopwords
stop_words = set([
"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves",
"he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their",
"theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was",
"were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and",
"but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between",
"into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off",
"over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any",
"both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than",
"too", "very", "s", "t", "can", "will", "just", "don", "should", "now"
# Initialize the Dash app
app = dash.Dash(__name__)
# Layout
app.layout = html.Div([
html.H1("Repair Event Data Analysis"),
html.H2("Top 5 Product Categories Seen at Events"),
html.H2("Barriers to Repair"),
html.H2("Average Age of Products at Repair Events"),
html.H2("Repair Attempts Over Time"),
html.H2("Repair Success Rate Distribution"),
html.H2("Common Faults in Specific Products (e.g., Vacuum Cleaners)"),
Output('top-5-product-categories', 'figure'),
Input('top-5-product-categories', 'id')
def update_top5_product_categories_chart(_):
top_categories = df['product_category'].value_counts().nlargest(5)
fig = px.pie(values=top_categories.values, names=top_categories.index,
title="Top 5 Product Categories Seen at Events")
return fig
Output('barriers-to-repair', 'figure'),
Input('barriers-to-repair', 'id')
def update_barriers_to_repair_chart(_):
# Convert to DataFrame and ensure proper columns
barriers = df['repair_barrier_if_end_of_life'].fillna("No Barrier").value_counts().reset_index()
barriers.columns = ["Barrier", "Count"]
# Create bar chart
fig = px.bar(barriers, x="Barrier", y="Count",
title="Barriers to Repair", labels={"Barrier": "Barrier", "Count": "Count"})
except Exception as e:
# Create an empty figure with an error message if something goes wrong
fig = go.Figure()
text=f"Error generating chart: {str(e)}",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
return fig
Output('average-age-products', 'figure'),
Input('average-age-products', 'id')
def update_average_age_products_chart(_):
# Ensure 'product_age' is numeric
df['product_age'] = pd.to_numeric(df['product_age'], errors='coerce')
# Group by product category, calculate mean, reset index, and get top 10
avg_age = df.groupby('product_category', as_index=False)['product_age'].mean().dropna().nlargest(10, 'product_age')
# Check the structure of avg_age for debugging
print(avg_age.head()) # This will output to the console for inspection
# Explicitly cast 'product_category' to string and 'product_age' to float for Plotly
avg_age['product_category'] = avg_age['product_category'].astype(str)
avg_age['product_age'] = avg_age['product_age'].astype(float)
# Create the bar chart
fig = px.bar(avg_age, x='product_category', y='product_age',
title="Average Age of Products at Events",
labels={"product_category": "Product Category", "product_age": "Average Age"})
return fig
Output('repair-attempts-over-time', 'figure'),
Input('repair-attempts-over-time', 'id')
def update_repair_attempts_over_time_chart(_):
df['event_year'] = pd.to_datetime(df['event_date'], errors='coerce').dt.year
repair_attempts = df['event_year'].value_counts().sort_index()
fig = px.line(repair_attempts, x=repair_attempts.index, y=repair_attempts.values,
title="Repair Attempts Over Time", labels={"x": "Year", "y": "Number of Repair Attempts"})
return fig
Output('repair-success-rate', 'figure'),
Input('repair-success-rate', 'id')
def update_repair_success_rate_chart(_):
success_rate = df['repair_status'].value_counts()
fig = px.pie(success_rate, values=success_rate.values, names=success_rate.index,
hole=0.3, title="Repair Success Rate Distribution")
return fig
Output('common-faults-products', 'figure'),
Input('common-faults-products', 'id')
def update_common_faults_products_chart(_):
vacuum_problems = df[df['product_category'] == 'Vacuum']['problem'].dropna()
if vacuum_problems.empty:
fig = go.Figure()
text="No data available for common faults in Vacuum Cleaners",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
all_words = []
for description in vacuum_problems:
words = re.findall(r'\b\w+\b', description.lower())
filtered_words = [word for word in words if word not in stop_words and len(word) > 1]
word_counts = Counter(all_words).most_common(10)
keywords, counts = zip(*word_counts)
fig = px.bar(x=keywords, y=counts, title="Common Keywords in Vacuum Cleaner Problems",
labels={"x": "Keyword", "y": "Count"})
return fig
if __name__ == '__main__':