spaCy natural language processing with Dash

spaCy is a popular natural language processing library that provides advanced features for tokenization, part-of-speech tagging, and dependency parsing. The spaCy docs on Visualizers shows how to use spaCy with Jupyter notebooks and other frameworks, but doesn’t show how easy it is to use with Dash.

:tada: Here are the missing docs!

If you create something cool - please post it in show-and-tell, or add your example here!

Using spaCy natural language processing with Dash

1. Visualizing the dependency parse

The dependency visualizer, dep, shows part-of-speech tags and syntactic dependencies.

spacy-dependency-parse

from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import base64

nlp = spacy.load("en_core_web_sm")

initial_text = "I went to Seattle"

app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])


def get_svg(svg: str, style: str = ""):
    """Convert an SVG to a base64-encoded image."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
    return  html


app.layout = dbc.Container(
    [
        html.H3(
            "Natural Language Processing with spaCy",
            className="text-center bg-primary text-white p-2 mb-4",
        ),
        html.Div("Enter text to analyze"),
        dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"),
        html.H4("Dependency Parse and Part of Speech Tags"),
        dbc.Card(
            dcc.Markdown(id="html", dangerously_allow_html=True),
            body=True,
            className="mb-5",
        ),

    ],
)


@app.callback(
    Output("html", "children"),  Input("user-input", "value")
)
def display(text):
    if text is None:
        return None, None

    # new lines mess up the displacy renderer
    text = text.replace("\n", " ")
    doc = nlp(text)
    sentence_spans = list(doc.sents)
    for sent in sentence_spans:
        html = displacy.render(
            sent, style="dep",
        )
        return get_svg(html)


if __name__ == "__main__":
    app.run_server(debug=True)


2. Visualizing the entity recognizer

The entity visualizer, ent, highlights named entities and their labels in a text.

named-entities


from dash import Dash, dcc, html
import spacy
from spacy import displacy
app = Dash(__name__)

text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
html_doc = displacy.render(doc, style="ent")


app.layout = html.Div(dcc.Markdown([html_doc], dangerously_allow_html=True))


if __name__ == "__main__":
    app.run_server(debug=True)



3. Visualizing spans

The span visualizer, span, highlights overlapping spans in a text.

spacy-span


from dash import Dash, dcc, html
import spacy
from spacy.tokens import Span
from spacy import displacy
app = Dash(__name__)

text = "Welcome to the Bank of China."
nlp = spacy.blank("en")
doc = nlp(text)

doc.spans["sc"] = [
    Span(doc, 3, 6, "ORG"),
    Span(doc, 5, 6, "GPE"),
]

html_doc = displacy.render(doc, style="span" )
html_doc = html_doc.replace("\n", " ")


app.layout = html.Div(dcc.Markdown([html_doc], dangerously_allow_html=True))


if __name__ == "__main__":
    app.run_server(debug=True)



Minimal examples

Example 1 - Entity recognizer with multiple text areas

In this example we show the entity recognizer with multiple text areas. It uses pattern matching callbacks to update,
add and/or delete text areas.

spacy-ent-app

import dash
from dash import Dash, dcc, html, Input, Output, State, MATCH

import dash_bootstrap_components as dbc
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container(
    dbc.Row(
        dbc.Col(
            [
                html.H3(
                    "Natural Language Processing with spaCy",
                    className="text-center bg-primary text-white p-2 mb-4",
                ),
                dbc.Button(
                    "Add Text Input area",
                    id="pattern-match-add-card",
                    n_clicks=0,
                    className="mb-3",
                ),
                html.Div(id="pattern-match-container", children=[], className="mt-4"),
            ]
        )
    ),
    fluid=True,
)


def get_entities(input_text):
    # new lines mess up the displacy renderer
    input_text = input_text.replace("\n", " ")

    doc = nlp(input_text)
    return displacy.render(doc.ents, style="ent")


def make_card(n_clicks):
    return dbc.Card(
        [
            dbc.CardHeader(
                [
                    f"Text Input {n_clicks + 1} ",
                    html.Div(
                        dbc.Button(
                            "X",
                            id={"type": "delete-card", "index": n_clicks},
                            n_clicks=0,
                            color="secondary",
                        ),
                        className="ms-auto",
                    ),
                ],
                className="hstack",
            ),
            dbc.Row(
                [
                    dbc.Col(
                        dcc.Textarea(
                            id={"type": "text-input", "index": n_clicks},
                            className="w-100",

                        ),
                    ),
                    dbc.Col(
                        dcc.Markdown(
                            id={"type": "entities", "index": n_clicks},
                            dangerously_allow_html=True,
                        ),
                    ),
                ]
            ),
        ],
        className="m-1",
        id={"type": "card", "index": n_clicks},
    )


@app.callback(
    Output("pattern-match-container", "children"),
    Input("pattern-match-add-card", "n_clicks"),
    State("pattern-match-container", "children"),
)
def add_card(
    n_clicks,
    cards,
):
    new_card = make_card(n_clicks)
    cards.append(new_card)
    return cards


@app.callback(
    Output({"type": "card", "index": MATCH}, "style"),
    Input({"type": "delete-card", "index": MATCH}, "n_clicks"),
    prevent_initial_call=True,
)
def remove_card(_):
    return {"display": "none"}


@app.callback(
    Output({"type": "entities", "index": MATCH}, "children"),
    Input({"type": "text-input", "index": MATCH}, "value"),
)
def update_figure(text_input):
    if text_input is None:
        return dash.no_update
    return get_entities(text_input)


if __name__ == "__main__":
    app.run_server(debug=True)



Example 2 - dcc.Dropdown with spacY color coded entity labels

This examples creates a dropdown with the color coded entity lables. It uses components in the options prop of the dcc.Dropdown.

ent-label-dropdown


from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy

from spacy.displacy.render import DEFAULT_LABEL_COLORS

nlp = spacy.load("en_core_web_sm")


app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])

options = [
    {
        "label": html.Div(
            [label],
            style={
                "background-color": color,
                "font-weight": "bold",
                "padding": 5,
                "border-radius": "0.35em",
            },
        ),
        "value": label,
    }
    for label, color in DEFAULT_LABEL_COLORS.items()
]


label_dropdown = dbc.InputGroup(
    [
        dcc.Dropdown(options, multi=True, id="label-dropdown", style={"width": 400}),
        dbc.Button("Select All", id="all"),
    ]
)

app.layout = dbc.Container(label_dropdown)


@app.callback(
    Output("label-dropdown", "value"),
    Input("all", "n_clicks"),
)
def select_all_labels(_):
    return list(DEFAULT_LABEL_COLORS.keys())


if __name__ == "__main__":
    app.run_server(debug=True)



Example 3 - Entity recognizer with visualizer and data table

This example shows the details of the entity recognizer in a DataTable.

ent-with table

from dash import Dash, dash_table, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import pandas as pd

nlp = spacy.load("en_core_web_sm")

initial_text = "I went to Seattle"
NER_ATTRS = ["text",  "start_char", "end_char", "label_",]


app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])

table = dash_table.DataTable(
    id="table",
    columns=[{"name": c, "id": c} for c in NER_ATTRS + ["description"]],
    filter_action="native",
    sort_action="native",
    page_size=10,
    style_table={"overflowX": "auto"},
)

app.layout = dbc.Container(
    [
        html.H3(
            "Natural Language Processing with spaCy",
            className="text-center bg-primary text-white p-4 mb-4",
        ),
        html.Div("Enter text to analyze"),
        dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"),
        html.H4("Named Entities"),
        dbc.Card(
            dcc.Markdown(id="html", dangerously_allow_html=True),
            body=True,
            className="mb-5",
        ),
        html.Div(table),
    ],
)


@app.callback(
    Output("html", "children"), Output("table", "data"), Input("user-input", "value")
)
def display(text):
    if text is None:
        return None, None

    # new lines mess up the displacy renderer
    text = text.replace("\n", " ")

    doc = nlp(text)
    html = displacy.render(doc, style="ent")

   # if doc.ents:
    table_data = [
        [str(getattr(ent, attr)) for attr in NER_ATTRS]
        for ent in doc.ents
        # if ent.label_ in label_select
    ]

    if table_data:
        dff = pd.DataFrame(table_data, columns=NER_ATTRS)
        dff["description"] = dff["label_"].apply(lambda x: spacy.explain(x))
        return html, dff.to_dict("records")
    return html, None


if __name__ == "__main__":
    app.run_server(debug=True)
5 Likes

Nice @AnnMarieW! It never get’s boring here on the forums…

3 Likes