diff --git a/.semversioner/next-release/patch-20240910212339849025.json b/.semversioner/next-release/patch-20240910212339849025.json new file mode 100644 index 00000000..d11690df --- /dev/null +++ b/.semversioner/next-release/patch-20240910212339849025.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Deep copy txt units on local search to avoid race conditions" +} diff --git a/graphrag/query/structured_search/local_search/mixed_context.py b/graphrag/query/structured_search/local_search/mixed_context.py index 21eac2e8..e0608e4b 100644 --- a/graphrag/query/structured_search/local_search/mixed_context.py +++ b/graphrag/query/structured_search/local_search/mixed_context.py @@ -3,6 +3,7 @@ """Algorithms to build context data for local search prompt.""" import logging +from copy import deepcopy from typing import Any import pandas as pd @@ -319,7 +320,7 @@ class LocalSearchMixedContext(LocalContextBuilder): for text_id in entity.text_unit_ids or []: if text_id not in text_unit_ids_set and text_id in self.text_units: text_unit_ids_set.add(text_id) - selected_unit = self.text_units[text_id] + selected_unit = deepcopy(self.text_units[text_id]) num_relationships = count_relationships( selected_unit, entity, self.relationships )