Iβm trying to use Networkx 2.1 and Plotly 2.7.0 to generate a graph of a minimum spanning tree using a distance matrix as input. I am using Python 2.7.
The problem is that the edges of the nodes shown in the Plotly graph do not match the edgelist of the Networkx minimum spanning tree.
The distance matrix (csv file) has this format:
ST0_1,0,0,1109,1109,1091,1091,1125,1126,1126,1125,1127,1126,1127,1127,1125,1125,1125,1126,1126,1127,1127,1127,1127,1122,1130,1126,1127
ST0_2,0,0,1109,1109,1091,1091,1125,1126,1126,1125,1127,1126,1127,1127,1125,1125,1125,1126,1126,1127,1127,1127,1127,1122,1130,1126,1127
ST1_3,1109,1109,0,0,1107,1107,1047,1048,1048,1047,1049,1048,1049,1049,1049,1049,1049,1047,1049,1049,1047,1047,1047,1050,1050,1049,1049
ST1_4,1109,1109,0,0,1107,1107,1047,1048,1048,1047,1049,1048,1049,1049,1049,1049,1049,1047,1049,1049,1047,1047,1047,1050,1050,1049,1049
ST22_5,1091,1091,1107,1107,0,0,1100,1101,1101,1100,1102,1101,1102,1102,1101,1101,1101,1101,1100,1102,1101,1101,1102,1099,1103,1102,1102
ST22_6,1091,1091,1107,1107,0,0,1100,1101,1101,1100,1102,1101,1102,1102,1101,1101,1101,1101,1100,1102,1101,1101,1102,1099,1103,1102,1102
ST2,1125,1125,1047,1047,1100,1100,0,5,6,8,8,9,8,10,7,7,8,11,16,12,8,7,11,72,43,11,17
ST7,1126,1126,1048,1048,1101,1101,5,0,1,7,3,4,3,5,6,6,7,10,15,13,7,6,10,72,43,6,16
ST9,1126,1126,1048,1048,1101,1101,6,1,0,8,2,3,2,4,7,7,8,11,16,14,8,7,11,73,44,5,17
ST5,1125,1125,1047,1047,1100,1100,8,7,8,0,10,11,10,12,7,7,8,7,16,16,10,9,13,73,44,13,17
ST13,1127,1127,1049,1049,1102,1102,8,3,2,10,0,3,2,4,9,9,10,13,18,16,10,9,13,75,46,7,19
ST11,1126,1126,1048,1048,1101,1101,9,4,3,11,3,0,3,1,10,10,11,14,19,17,11,10,14,76,47,8,20
ST10,1127,1127,1049,1049,1102,1102,8,3,2,10,2,3,0,4,9,9,10,13,18,16,10,9,13,75,46,7,19
ST12,1127,1127,1049,1049,1102,1102,10,5,4,12,4,1,4,0,11,11,12,15,20,18,12,11,15,77,48,9,21
ST16_7,1125,1125,1049,1049,1101,1101,7,6,7,7,9,10,9,11,0,0,3,10,15,15,9,8,12,72,43,12,16
ST16_8,1125,1125,1049,1049,1101,1101,7,6,7,7,9,10,9,11,0,0,3,10,15,15,9,8,12,72,43,12,16
ST15,1125,1125,1049,1049,1101,1101,8,7,8,8,10,11,10,12,3,3,0,11,16,16,10,9,13,73,44,13,17
ST6,1126,1126,1047,1047,1101,1101,11,10,11,7,13,14,13,15,10,10,11,0,17,15,11,12,12,70,43,14,18
ST18,1126,1126,1049,1049,1100,1100,16,15,16,16,18,19,18,20,15,15,16,17,0,22,16,17,18,77,49,21,7
ST17,1127,1127,1049,1049,1102,1102,12,13,14,16,16,17,16,18,15,15,16,15,22,0,14,15,15,75,50,15,23
ST4,1127,1127,1047,1047,1101,1101,8,7,8,10,10,11,10,12,9,9,10,11,16,14,0,1,3,73,44,13,17
ST3,1127,1127,1047,1047,1101,1101,7,6,7,9,9,10,9,11,8,8,9,12,17,15,1,0,4,74,45,12,18
ST8,1127,1127,1047,1047,1102,1102,11,10,11,13,13,14,13,15,12,12,13,12,18,15,3,4,0,72,47,14,20
ST21,1122,1122,1050,1050,1099,1099,72,72,73,73,75,76,75,77,72,72,73,70,77,75,73,74,72,0,83,74,79
ST20,1130,1130,1050,1050,1103,1103,43,43,44,44,46,47,46,48,43,43,44,43,49,50,44,45,47,83,0,49,50
ST14,1126,1126,1049,1049,1102,1102,11,6,5,13,7,8,7,9,12,12,13,14,21,15,13,12,14,74,49,0,22
ST19,1127,1127,1049,1049,1102,1102,17,16,17,17,19,20,19,21,16,16,17,18,7,23,17,18,20,79,50,22,0
Here is the Python script:
import csv
import numpy as np
import networkx as nx
from plotly.offline import plot
import plotly.graph_objs as go
from networkx.drawing.nx_agraph import graphviz_layout
def read_dist_matrix(file_name):
""" Reads a distance matrix in PHYLIP format.
Requires: file_name is the name of a file that has the distance matrix
in PHYLIP format.
Ensures: distance matrix as numpy array and a list with the names/ids of
each sequence.
"""
with open(file_name) as f:
#first_line = f.readline().strip()
reader = csv.reader(f, delimiter=',')
names = list(zip(*reader))[0]
cols = tuple(list(range(1,len(names)+1)))
dist_matrix = np.loadtxt(file_name, delimiter=',',
usecols=cols)
return [dist_matrix, names]
def dist_to_graph(matrix, names):
""" Converts numpy array representing distance matrix to graph.
Requires: matrix as a numpy array. names as a list with the names
of each row/column.
Ensures: networkx graph labeled with the right names.
"""
G = nx.from_numpy_matrix(matrix)
nodes = list(G.nodes())
names_dict = {}
for n in range(len(nodes)):
names_dict[nodes[n]] = names[n]
G = nx.relabel_nodes(G, names_dict)
return G
ola = read_dist_matrix("out.csv")
ola_g = dist_to_graph(ola[0], ola[1])
mst = nx.minimum_spanning_tree(ola_g, algorithm='prim')
pos = graphviz_layout(mst, prog='neato')
labels = list(mst.nodes.keys())
X = []
Y = []
for k in pos:
X.append(pos[k][0])
Y.append(pos[k][1])
tracer = go.Scatter(x=X, y=Y,
mode='lines',
line=go.Line(color='#888', width=2),
hoverinfo = 'none',
showlegend=False)
tracer_marker = go.Scatter(x=X, y=Y,
mode='markers+text',
text=labels,
textposition='top',
marker = go.Marker(size= 15,
line = dict(width = 2)),
hoverinfo = 'none',
showlegend=False)
layout = dict(title='Test', showlegend=False,
xaxis=dict(title='',
titlefont=dict(size=20),
showgrid=False,
zeroline=False,
showline=False,
ticks='',
showticklabels=False,
showspikes=False),
yaxis=dict(title='',
titlefont=dict(size=20),
showgrid=False,
zeroline=False,
showline=False,
ticks='',
showticklabels=False,
showspikes=False)
)
fig = dict(data=[tracer, tracer_marker], layout=layout)
plot(fig, filename='./test_plotly.html', auto_open=True, show_link=False)
This is a screenshot of the output HTML:
If we run the command mst.edges(data=True)
we see that the nodes ST7 and ST5 are connected but in the Plotly graph they arenβt.
A similar question has been asked here, but it was on R:
Can anyone help me with this?