graphrag/graphrag/index/operations/layout_graph/methods/zero.py
Nathan Evans 61b3d6d56a
Migrate helper verbs (#1248)
* Remove genid

* Move snapshot_rows

* Move snapshot

* Delete spread_json

* Delete unzip

* Delete zip

* Move unpack_graph

* Move compute_edge_combined_degree

* Delete create_graph

* Delete concat

* Delete text replace

* Delete text_translate

* Move text_split

* Inline aggregate override

* Move cluster_graph

* Move merge_graphs

* Semver

* Move text_chunk

* Move layout_graph and fix some __init__s

* Move extract_covariates

* Rename text_split -> split_text

* Move extract_entities

* Move summarize_descriptions

* Rename text_chunk -> chunk_text

* Move community report creation

* Remove verb-level packing operators

* Streamline some naming

* Streamline param name/order

* Move mock LLM data to tests

* Fixed missed rename

* Update some strategy refs

* Rename run_gi

* Inject mock responses into integ test config
2024-10-09 13:46:44 -07:00

64 lines
1.9 KiB
Python

# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing run and _create_node_position methods definitions."""
import logging
import traceback
from typing import Any
import networkx as nx
from graphrag.index.graph.visualization import (
GraphLayout,
NodePosition,
get_zero_positions,
)
from graphrag.index.typing import ErrorHandlerFn
# TODO: This could be handled more elegantly, like what columns to use
# for "size" or "cluster"
# We could also have a boolean to indicate to use node sizes or clusters
log = logging.getLogger(__name__)
def run(
graph: nx.Graph,
_args: dict[str, Any],
on_error: ErrorHandlerFn,
) -> GraphLayout:
"""Run method definition."""
node_clusters = []
node_sizes = []
nodes = list(graph.nodes)
for node_id in nodes:
node = graph.nodes[node_id]
cluster = node.get("cluster", node.get("community", -1))
node_clusters.append(cluster)
size = node.get("degree", node.get("size", 0))
node_sizes.append(size)
additional_args = {}
if len(node_clusters) > 0:
additional_args["node_categories"] = node_clusters
if len(node_sizes) > 0:
additional_args["node_sizes"] = node_sizes
try:
return get_zero_positions(node_labels=nodes, **additional_args)
except Exception as e:
log.exception("Error running zero-position")
on_error(e, traceback.format_exc(), None)
# Umap may fail due to input sparseness or memory pressure.
# For now, in these cases, we'll just return a layout with all nodes at (0, 0)
result = []
for i in range(len(nodes)):
cluster = node_clusters[i] if len(node_clusters) > 0 else 1
result.append(
NodePosition(x=0, y=0, label=nodes[i], size=0, cluster=str(cluster))
)
return result