From 482246528d47fecd6076818333288b02a120d59e Mon Sep 17 00:00:00 2001 From: Ha Trinh <90728133+ha2trinh@users.noreply.github.com> Date: Mon, 5 Aug 2024 22:31:36 +0000 Subject: [PATCH] fix json parsing logic and warning message (#833) * fix json parsing logic and warning message * amended warning message --------- Co-authored-by: Alonso Guevara --- .../next-release/patch-20240805211407192658.json | 4 ++++ .../structured_search/global_search/search.py | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 .semversioner/next-release/patch-20240805211407192658.json diff --git a/.semversioner/next-release/patch-20240805211407192658.json b/.semversioner/next-release/patch-20240805211407192658.json new file mode 100644 index 00000000..16767910 --- /dev/null +++ b/.semversioner/next-release/patch-20240805211407192658.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Change json parsing error in the map step of global search to warning" +} diff --git a/graphrag/query/structured_search/global_search/search.py b/graphrag/query/structured_search/global_search/search.py index c87bb425..12dc45fe 100644 --- a/graphrag/query/structured_search/global_search/search.py +++ b/graphrag/query/structured_search/global_search/search.py @@ -192,7 +192,9 @@ class GlobalSearch(BaseSearch): # parse search response json processed_response = self.parse_search_response(search_response) except ValueError: - log.exception("Error parsing search response json") + log.warning( + "Warning: Error parsing search response json - skipping this batch" + ) processed_response = [] return SearchResult( @@ -230,15 +232,19 @@ class GlobalSearch(BaseSearch): """ search_response, _j = try_parse_json_object(search_response) if _j == {}: - return [{"answer": "not avaliable", "score": 0}] + return [{"answer": "", "score": 0}] + + parsed_elements = json.loads(search_response).get("points") + if not parsed_elements or not isinstance(parsed_elements, list): + return [{"answer": "", "score": 0}] - parsed_elements = json.loads(search_response)["points"] return [ { "answer": element["description"], "score": int(element["score"]), } for element in parsed_elements + if "description" in element and "score" in element ] async def _reduce_response( @@ -277,6 +283,9 @@ class GlobalSearch(BaseSearch): if len(filtered_key_points) == 0 and not self.allow_general_knowledge: # return no data answer if no key points are found + log.warning( + "Warning: All map responses have score 0 (i.e., no relevant information found from the dataset), returning a canned 'I do not know' answer. You can try enabling `allow_general_knowledge` to encourage the LLM to incorporate relevant general knowledge, at the risk of increasing hallucinations." + ) return SearchResult( response=NO_DATA_ANSWER, context_data="",