Set PCA loadings (aka arrows) in a 3d Scatter plot

I’m trying to plot a PCA in 3D.
For those who don’t know a PCA is simply plotted as a scatterplot and annotated with arrows that represents some feature of the analyzed objects, with different lengths based on how important that feature is.

This is a 2D example


import numpy as np
import pandas as pd
from scipy.stats import norm
import plotly.express as px
import plotly.graph_objects as go

def make_pca_plot(feature_df, features,  
                  arrowsize = 1,
                  arrowhead = 1,
                  arrowscale = 6,
                  ):

    pca = PCA()
    pca.fit(feature_df[features])
    components = pca.fit_transform(feature_df[features])
    loadings = pca.components_.T * np.sqrt(pca.explained_variance_)

    fig = px.scatter(components, x=0, y=1)

    for i, feature in enumerate(features):
        fig.add_annotation(
            ax=0, ay=0,
            axref="x", ayref="y",
            x=loadings[i, 0]*arrowscale,
            y=loadings[i, 1]*arrowscale,
            showarrow=True,
            arrowsize=arrowsize,
            arrowhead=arrowhead,
            xanchor="right",
            yanchor="top"
        )
        fig.add_annotation(
            x=loadings[i, 0]*arrowscale,
            y=loadings[i, 1]*arrowscale,
            ax=0, ay=0,
            xanchor="center",
            yanchor="bottom",
            text=feature,
            yshift=5,
        )
    fig.update_layout(title='Total explained variance PC1+PC2: {}'.format(round(pca.explained_variance_ratio_[0:2].cumsum()[-1],2)))
    return fig

np.random.seed(10)
testdf = pd.DataFrame({
    'A' : np.random.rand(100), 
    'B' : norm(0,1).rvs(100),
    'C' : norm(2,1).rvs(100),
    'D' : norm(2,1).rvs(100),
})

make_pca_plot(testdf, testdf.columns)

As you can see there are arrows that start from the origin of the axes and end in set position depending on the loading variable value for that feature.

I want to obtain the same result in a 3D scatter plot, but I can’t manage to do that. Specifically i do not understand how to set the start of the arrow on the origin.

So far I obtained this

def make_3D_pca_plot(feature_df, features,  
                  arrowsize = 1,
                  arrowhead = 1,
                  arrowscale = 6,
                  ):

    pca = PCA()
    pca.fit(feature_df[features])
    components = pca.fit_transform(feature_df[features])
    loadings = pca.components_.T * np.sqrt(pca.explained_variance_)

    fig = px.scatter_3d(components, x=0, y=1, z=2)

    fig.update_layout(
        scene = dict(
            annotations=[
                dict(
                    # ax=0, ay=0, 
                    showarrow = True,
                    arrowsize=arrowsize,
                    arrowhead=arrowhead,
                    x = loadings[i, 0]*arrowscale,
                    y = loadings[i, 1]*arrowscale,
                    z = loadings[i, 2]*arrowscale,
                    xanchor="center",
                    yanchor="bottom",
                    text = feature,
                    yshift=5,
                )
            for i, feature in enumerate(features)]
        )
    )
    fig.update_layout(title='Total explained variance PC 1+2+3: {}'.format(round(pca.explained_variance_ratio_[0:3].cumsum()[-1],2)))
    return fig

make_3D_pca_plot(testdf, testdf.columns[0:3],  
                  arrowscale=4
                  )

image

if i uncomment the ax or ay parameter the arrows disappear.
Also you can tweak arrowscale to see what is happening to the annotations.

HI @Mirk0_98, I actually never used annotitions in 3D. Does the example 3D Annotations help?

Instead of using annotations you could use additional scatter3d traces. Here I did this in 2D but it’s going to be pretty similar in 3D: