spaCy is a popular natural language processing library that provides advanced features for tokenization, part-of-speech tagging, and dependency parsing. The spaCy docs on Visualizers shows how to use spaCy with Jupyter notebooks and other frameworks, but doesn’t show how easy it is to use with Dash.
Here are the missing docs!
If you create something cool - please post it in show-and-tell, or add your example here!
Using spaCy natural language processing with Dash
1. Visualizing the dependency parse
The dependency visualizer, dep, shows part-of-speech tags and syntactic dependencies.
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import base64
nlp = spacy.load("en_core_web_sm")
initial_text = "I went to Seattle"
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
def get_svg(svg: str, style: str = ""):
"""Convert an SVG to a base64-encoded image."""
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
return html
app.layout = dbc.Container(
[
html.H3(
"Natural Language Processing with spaCy",
className="text-center bg-primary text-white p-2 mb-4",
),
html.Div("Enter text to analyze"),
dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"),
html.H4("Dependency Parse and Part of Speech Tags"),
dbc.Card(
dcc.Markdown(id="html", dangerously_allow_html=True),
body=True,
className="mb-5",
),
],
)
@app.callback(
Output("html", "children"), Input("user-input", "value")
)
def display(text):
if text is None:
return None, None
# new lines mess up the displacy renderer
text = text.replace("\n", " ")
doc = nlp(text)
sentence_spans = list(doc.sents)
for sent in sentence_spans:
html = displacy.render(
sent, style="dep",
)
return get_svg(html)
if __name__ == "__main__":
app.run_server(debug=True)
2. Visualizing the entity recognizer
The entity visualizer, ent
, highlights named entities and their labels in a text.
from dash import Dash, dcc, html
import spacy
from spacy import displacy
app = Dash(__name__)
text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
html_doc = displacy.render(doc, style="ent")
app.layout = html.Div(dcc.Markdown([html_doc], dangerously_allow_html=True))
if __name__ == "__main__":
app.run_server(debug=True)
3. Visualizing spans
The span visualizer, span
, highlights overlapping spans in a text.
from dash import Dash, dcc, html
import spacy
from spacy.tokens import Span
from spacy import displacy
app = Dash(__name__)
text = "Welcome to the Bank of China."
nlp = spacy.blank("en")
doc = nlp(text)
doc.spans["sc"] = [
Span(doc, 3, 6, "ORG"),
Span(doc, 5, 6, "GPE"),
]
html_doc = displacy.render(doc, style="span" )
html_doc = html_doc.replace("\n", " ")
app.layout = html.Div(dcc.Markdown([html_doc], dangerously_allow_html=True))
if __name__ == "__main__":
app.run_server(debug=True)
Minimal examples
Example 1 - Entity recognizer with multiple text areas
In this example we show the entity recognizer with multiple text areas. It uses pattern matching callbacks to update,
add and/or delete text areas.
import dash
from dash import Dash, dcc, html, Input, Output, State, MATCH
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container(
dbc.Row(
dbc.Col(
[
html.H3(
"Natural Language Processing with spaCy",
className="text-center bg-primary text-white p-2 mb-4",
),
dbc.Button(
"Add Text Input area",
id="pattern-match-add-card",
n_clicks=0,
className="mb-3",
),
html.Div(id="pattern-match-container", children=[], className="mt-4"),
]
)
),
fluid=True,
)
def get_entities(input_text):
# new lines mess up the displacy renderer
input_text = input_text.replace("\n", " ")
doc = nlp(input_text)
return displacy.render(doc.ents, style="ent")
def make_card(n_clicks):
return dbc.Card(
[
dbc.CardHeader(
[
f"Text Input {n_clicks + 1} ",
html.Div(
dbc.Button(
"X",
id={"type": "delete-card", "index": n_clicks},
n_clicks=0,
color="secondary",
),
className="ms-auto",
),
],
className="hstack",
),
dbc.Row(
[
dbc.Col(
dcc.Textarea(
id={"type": "text-input", "index": n_clicks},
className="w-100",
),
),
dbc.Col(
dcc.Markdown(
id={"type": "entities", "index": n_clicks},
dangerously_allow_html=True,
),
),
]
),
],
className="m-1",
id={"type": "card", "index": n_clicks},
)
@app.callback(
Output("pattern-match-container", "children"),
Input("pattern-match-add-card", "n_clicks"),
State("pattern-match-container", "children"),
)
def add_card(
n_clicks,
cards,
):
new_card = make_card(n_clicks)
cards.append(new_card)
return cards
@app.callback(
Output({"type": "card", "index": MATCH}, "style"),
Input({"type": "delete-card", "index": MATCH}, "n_clicks"),
prevent_initial_call=True,
)
def remove_card(_):
return {"display": "none"}
@app.callback(
Output({"type": "entities", "index": MATCH}, "children"),
Input({"type": "text-input", "index": MATCH}, "value"),
)
def update_figure(text_input):
if text_input is None:
return dash.no_update
return get_entities(text_input)
if __name__ == "__main__":
app.run_server(debug=True)
Example 2 - dcc.Dropdown with spacY color coded entity labels
This examples creates a dropdown with the color coded entity lables. It uses components in the options
prop of the dcc.Dropdown
.
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy.displacy.render import DEFAULT_LABEL_COLORS
nlp = spacy.load("en_core_web_sm")
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
options = [
{
"label": html.Div(
[label],
style={
"background-color": color,
"font-weight": "bold",
"padding": 5,
"border-radius": "0.35em",
},
),
"value": label,
}
for label, color in DEFAULT_LABEL_COLORS.items()
]
label_dropdown = dbc.InputGroup(
[
dcc.Dropdown(options, multi=True, id="label-dropdown", style={"width": 400}),
dbc.Button("Select All", id="all"),
]
)
app.layout = dbc.Container(label_dropdown)
@app.callback(
Output("label-dropdown", "value"),
Input("all", "n_clicks"),
)
def select_all_labels(_):
return list(DEFAULT_LABEL_COLORS.keys())
if __name__ == "__main__":
app.run_server(debug=True)
Example 3 - Entity recognizer with visualizer and data table
This example shows the details of the entity recognizer in a DataTable.
from dash import Dash, dash_table, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")
initial_text = "I went to Seattle"
NER_ATTRS = ["text", "start_char", "end_char", "label_",]
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
table = dash_table.DataTable(
id="table",
columns=[{"name": c, "id": c} for c in NER_ATTRS + ["description"]],
filter_action="native",
sort_action="native",
page_size=10,
style_table={"overflowX": "auto"},
)
app.layout = dbc.Container(
[
html.H3(
"Natural Language Processing with spaCy",
className="text-center bg-primary text-white p-4 mb-4",
),
html.Div("Enter text to analyze"),
dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"),
html.H4("Named Entities"),
dbc.Card(
dcc.Markdown(id="html", dangerously_allow_html=True),
body=True,
className="mb-5",
),
html.Div(table),
],
)
@app.callback(
Output("html", "children"), Output("table", "data"), Input("user-input", "value")
)
def display(text):
if text is None:
return None, None
# new lines mess up the displacy renderer
text = text.replace("\n", " ")
doc = nlp(text)
html = displacy.render(doc, style="ent")
# if doc.ents:
table_data = [
[str(getattr(ent, attr)) for attr in NER_ATTRS]
for ent in doc.ents
# if ent.label_ in label_select
]
if table_data:
dff = pd.DataFrame(table_data, columns=NER_ATTRS)
dff["description"] = dff["label_"].apply(lambda x: spacy.explain(x))
return html, dff.to_dict("records")
return html, None
if __name__ == "__main__":
app.run_server(debug=True)