I’m trying to plot a PCA in 3D.
For those who don’t know a PCA is simply plotted as a scatterplot and annotated with arrows that represents some feature of the analyzed objects, with different lengths based on how important that feature is.
This is a 2D example
import numpy as np
import pandas as pd
from scipy.stats import norm
import plotly.express as px
import plotly.graph_objects as go
def make_pca_plot(feature_df, features,
arrowsize = 1,
arrowhead = 1,
arrowscale = 6,
):
pca = PCA()
pca.fit(feature_df[features])
components = pca.fit_transform(feature_df[features])
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
fig = px.scatter(components, x=0, y=1)
for i, feature in enumerate(features):
fig.add_annotation(
ax=0, ay=0,
axref="x", ayref="y",
x=loadings[i, 0]*arrowscale,
y=loadings[i, 1]*arrowscale,
showarrow=True,
arrowsize=arrowsize,
arrowhead=arrowhead,
xanchor="right",
yanchor="top"
)
fig.add_annotation(
x=loadings[i, 0]*arrowscale,
y=loadings[i, 1]*arrowscale,
ax=0, ay=0,
xanchor="center",
yanchor="bottom",
text=feature,
yshift=5,
)
fig.update_layout(title='Total explained variance PC1+PC2: {}'.format(round(pca.explained_variance_ratio_[0:2].cumsum()[-1],2)))
return fig
np.random.seed(10)
testdf = pd.DataFrame({
'A' : np.random.rand(100),
'B' : norm(0,1).rvs(100),
'C' : norm(2,1).rvs(100),
'D' : norm(2,1).rvs(100),
})
make_pca_plot(testdf, testdf.columns)
As you can see there are arrows that start from the origin of the axes and end in set position depending on the loading
variable value for that feature.
I want to obtain the same result in a 3D scatter plot, but I can’t manage to do that. Specifically i do not understand how to set the start of the arrow on the origin.
So far I obtained this
def make_3D_pca_plot(feature_df, features,
arrowsize = 1,
arrowhead = 1,
arrowscale = 6,
):
pca = PCA()
pca.fit(feature_df[features])
components = pca.fit_transform(feature_df[features])
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
fig = px.scatter_3d(components, x=0, y=1, z=2)
fig.update_layout(
scene = dict(
annotations=[
dict(
# ax=0, ay=0,
showarrow = True,
arrowsize=arrowsize,
arrowhead=arrowhead,
x = loadings[i, 0]*arrowscale,
y = loadings[i, 1]*arrowscale,
z = loadings[i, 2]*arrowscale,
xanchor="center",
yanchor="bottom",
text = feature,
yshift=5,
)
for i, feature in enumerate(features)]
)
)
fig.update_layout(title='Total explained variance PC 1+2+3: {}'.format(round(pca.explained_variance_ratio_[0:3].cumsum()[-1],2)))
return fig
make_3D_pca_plot(testdf, testdf.columns[0:3],
arrowscale=4
)
if i uncomment the ax
or ay
parameter the arrows disappear.
Also you can tweak arrowscale
to see what is happening to the annotations.