I’ve been trying to use different data than what’s offered on the heatmap/dendrogram plotly page (https://plotly.com/python/dendrogram/), but i can’t figure out how to add different labels to the xaxis of the dendrogram. Every time I get “index out of range”.
here’s what i’m working with:
df looks like this (about 400 rows and 12 columns):
fraud accountid V1 V2 V3 V4 V5 V6 fraud nonfraud fraud_rate fpr total
0 5534 -0.613696 3.698772 -5.534941 5.620486 1.649263 -2.335145 1.000000 0.000000 1.000000 0.000000 1.000000
338 3153 0.725646 2.300894 -5.329976 4.007683 -1.730411 -1.732193 1.000000 0.000000 1.000000 0.000000 1.000000
336 8073 -3.576362 3.299436 -7.460433 7.783634 -0.398549 -1.968441 1.000000 0.000000 1.000000 0.000000 1.000000
335 9375 -4.710529 8.636214 -15.496222 10.313349 -4.351341 -3.322689 1.000000 0.000000 1.000000 0.000000 1.000000
334 943 1.852889 1.069593 -1.776101 4.617410 0.770413 -0.400859 1.000000 0.000000 1.000000 0.000000 1.000000
333 9475 -2.207631 3.259076 -5.436365 3.684737 -3.066401 -0.671323 1.000000 0.000000 1.000000 0.000000 1.000000
332 6136 -0.758469 -0.045410 -0.168438 -1.313275 -1.901763 0.739433 1.000000 0.000000 1.000000 0.000000 1.000000
331 2470 -13.192671 12.785971 -9.906650 3.320337 -4.801176 5.760059 1.000000 0.000000 1.000000 0.000000 1.000000
330 2504 -3.240187 2.978122 -4.162314 3.869124 -3.645256 -0.126271 1.000000 0.000000 1.000000 0.000000 1.000000
329 7837 -14.179165 7.421370 -21.405836 11.927512 -7.974281 -2.202710 1.000000 0.000000 1.000000 0.000000 1.000000
328 9373 0.923764 0.344048 -2.880004 1.721680 -3.019565 -0.639736 1.000000 0.000000 1.000000 0.000000 1.000000
327 3460 1.159373 2.844795 -4.050680 4.777701 2.948980 -2.010361 1.000000 0.000000 1.000000 0.000000 1.000000
326 8978 -1.599457 2.607720 -2.987193 3.064156 -2.497914 -0.541103 1.000000 0.000000 1.000000 0.000000 1.000000
325 2283 -3.365319 2.426503 -3.752227 0.276017 -2.305870 -1.961578 1.000000 0.000000 1.000000 0.000000 1.000000
324 7284 -23.914101 13.765942 -25.733734 6.290918 -17.784824 -4.572498 1.000000 0.000000 1.000000 0.000000 1.000000
323 2126 -4.599447 2.762540 -4.656530 5.201403 -2.470388 -0.357618 1.000000 0.000000 1.000000 0.000000 1.000000
322 8069 -16.526507 8.584972 -18.649853 9.505594 -13.793819 -2.832404 1.000000 0.000000 1.000000 0.000000 1.000000
321 6003 -2.450367 2.107729 -5.140663 1.411304 -1.690780 -0.736427 1.000000 0.000000 1.000000 0.000000 1.000000
320 9374 -1.522305 1.505152 0.372364 2.286869 -0.526519 0.998593 1.000000 0.000000 1.000000 0.000000 1.000000
319 2504 -15.271362 8.326581 -22.338591 11.885313 -8.721334 -2.324307 1.000000 0.000000 1.000000 0.000000 1.000000
the code:
#create dendrograms
fig = ff.create_dendrogram(fraud_df, orientation='bottom')
for i in range(len(fig['data'])):
fig['data'][i]['yaxis'] = 'y2'
fig.show()
dendro_side = ff.create_dendrogram(fraud_df, orientation='right')
for i in range(len(dendro_side['data'])):
dendro_side['data'][i]['xaxis'] = 'x2'
dendro_side.show()
# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
fig.add_trace(data)
# Create Heatmap
dendro_leaves = dendro_side['layout']['yaxis']['ticktext'] #grabs just the values of y axis
dendro_leaves = list(map(int, dendro_leaves)) #put in single column list
data_dist = pdist(fraud_df) #create pdist from df
heat_data = squareform(data_dist) #put in squareform for heatmap
heat_data = heat_data[dendro_leaves,:] #substitute dendro_leave sin the squareform of the pdist of the df
heat_data = heat_data[:,dendro_leaves] #switch axex
heat_data
#create heatmap
heatmap = [
go.Heatmap(
x = dendro_leaves,
y = dendro_leaves,
z = heat_data,
showlegend = True,
colorscale = 'plotly3'
)
]
#add data to x and y axes of heatmap
heatmap[0]['x'] = fig['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']
# Add Heatmap Data to Figure
for data in heatmap:
fig.add_trace(data)
# Edit Layout
fig.update_layout({'width':2000
, 'height':1200
, 'showlegend':True
, 'hovermode': 'closest'
, 'title': "Clustered Heatmap of Creditcard Fraud Data"
})
# # Edit xaxis
fig.update_layout(xaxis={'domain': [.15, 1],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'ticks':""})
# Edit xaxis2
fig.update_layout(xaxis2={'domain': [0, .15],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""})
# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .85],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks': ""
})
# Edit yaxis2
fig.update_layout(yaxis2={'domain':[.825, .975],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""})
# fig.show()
Result:
If i exclude adding labels in the dendrogram figure, it works but the xaxis is the 400 rows of the df index. How do i get the xaxis to be the columns of the dataframe?