I’m trying to analyze a large graph data using RAPIDS minimum spanning tree package. When I run my code it says RuntimeError
. The RAPIDS and CUDA are the latest versions.
The graph is undirected and about 27GB big. Using NVIDIA A100 80GB PCIe
(4 of them)
My code:
### Convert KG2 into undirected graph
# Define column names
cols = ['subject', 'object', 'predicate']
# Load the TSV file into a cuDF DataFrame
gdf = cudf.read_csv('graph.tsv', names=cols, delimiter='t')
# Convert node identifiers to strings (if they are not already)
gdf['subject'] = gdf['subject'].astype(str)
gdf['object'] = gdf['object'].astype(str)
# Create a cuGraph Graph object
G = cugraph.Graph()
# Create the graph from the cuDF DataFrame's edgelist
G.from_cudf_edgelist(gdf, source='subject', destination='object')
# List of vertexes
lst_nodes = ['C:7045767', 'G:0007271', 'G:0035249', 'G:0005947', 'G:0004129',
'C:26519', 'U:C0132173', 'G:0018393', 'G:0006979', 'H:0025464',
'G:0007613', 'C:64645', 'P:000010173', 'G:0014055', 'G:0061535',
'G:0150076', 'M:0001152', 'H:0002185', 'M:0004975', 'E:0005816',
'C:60425']
# Check for existing nodes in the graph
existing_nodes = G.nodes().to_pandas().tolist()
missing_nodes = set(lst_nodes) - set(existing_nodes)
# Print missing nodes
if missing_nodes:
print(f"Some nodes in lst_nodes are not in the graph: {missing_nodes}")
else:
print("All nodes are present in the graph.")
# Proceed with the rest of the code only if there are no missing nodes
def compute_subgraph_mst(lst_nodes):
# Compute the MST of the entire graph
mst = cugraph.minimum_spanning_tree(G)
# Filter the MST to include only the subgraph with lst_nodes
subgraph = mst[mst['src'].isin(lst_nodes) | mst['dst'].isin(lst_nodes)]
# Convert the subgraph to an edge list
edges = subgraph[['src', 'dst']].to_pandas().values.tolist()
# Create a new graph from the subgraph
subgraph_g = cugraph.Graph()
subgraph_g.from_pandas_edgelist(pd.DataFrame(edges, columns=['src', 'dst']))
# Compute the MST of the subgraph
subgraph_mst = cugraph.minimum_spanning_tree(subgraph_g)
return subgraph_mst
# Proceed with the rest of the code only with existing nodes
if not missing_nodes:
subgraph_mst = compute_subgraph_mst(lst_nodes)
else:
lst_nodes = [node for node in lst_nodes if node not in missing_nodes]
print(f"Proceeding with nodes: {lst_nodes}")
subgraph_mst = compute_subgraph_mst(lst_nodes)
Output:
Some nodes in lst_nodes are not in the graph: {'E:0005816', 'C:60425'}
Proceeding with nodes: ['P:7045767', 'G:0007271', 'G:0035249', 'G:0005947', 'G:0004129', 'C:26519', 'U:C0132173', 'G:0018393', 'G:0006979', 'H:0025464', 'G:0007613', 'C:64645', 'P:000010173', 'G:0014055', 'G:0061535', 'G:0150076', 'M:0001152', 'H:0002185', 'M:0004975']
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[3], line 64
62 lst_nodes = [node for node in lst_nodes if node not in missing_nodes]
63 print(f"Proceeding with nodes: {lst_nodes}")
---> 64 subgraph_mst = compute_subgraph_mst(lst_nodes)
Cell In[3], line 38, in compute_subgraph_mst(lst_nodes)
36 def compute_subgraph_mst(lst_nodes):
37 # Compute the MST of the entire graph
---> 38 mst = cugraph.minimum_spanning_tree(G)
40 # Filter the MST to include only the subgraph with lst_nodes
41 subgraph = mst[mst['src'].isin(lst_nodes) | mst['dst'].isin(lst_nodes)]
File /scratch/USERNAME/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cugraph/tree/minimum_spanning_tree.py:105, in minimum_spanning_tree(G, weight, algorithm, ignore_nan)
103 return cugraph_to_nx(mst)
104 else:
--> 105 return _minimum_spanning_tree_subgraph(G)
File /scratch/USERNAME/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cugraph/tree/minimum_spanning_tree.py:26, in _minimum_spanning_tree_subgraph(G)
24 if G.is_directed():
25 raise ValueError("input graph must be undirected")
---> 26 mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)
27 if G.renumbered:
28 mst_df = G.unrenumber(mst_df, "src")
File minimum_spanning_tree_wrapper.pyx:71, in cugraph.tree.minimum_spanning_tree_wrapper.minimum_spanning_tree()
File minimum_spanning_tree_wrapper.pyx:52, in cugraph.tree.minimum_spanning_tree_wrapper.mst_double()
RuntimeError: parallel_for: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered