mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
Improve and cleanup logging output of indexing (#1144)
This commit is contained in:
parent
aa5b426f1d
commit
594084f156
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Improve logging."
|
||||
}
|
||||
@ -74,7 +74,7 @@ NODE2VEC_WINDOW_SIZE = 2
|
||||
NODE2VEC_ITERATIONS = 3
|
||||
NODE2VEC_RANDOM_SEED = 597832
|
||||
REPORTING_TYPE = ReportingType.file
|
||||
REPORTING_BASE_DIR = "output/${timestamp}/reports"
|
||||
REPORTING_BASE_DIR = "output/${timestamp}/logs"
|
||||
SNAPSHOTS_GRAPHML = False
|
||||
SNAPSHOTS_RAW_ENTITIES = False
|
||||
SNAPSHOTS_TOP_LEVEL_NODES = False
|
||||
|
||||
@ -54,15 +54,12 @@ embeddings:
|
||||
# max_retry_wait: {defs.LLM_MAX_RETRY_WAIT}
|
||||
# sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
|
||||
# concurrent_requests: {defs.LLM_CONCURRENT_REQUESTS} # the number of parallel inflight requests that may be made
|
||||
|
||||
|
||||
|
||||
|
||||
chunks:
|
||||
size: {defs.CHUNK_SIZE}
|
||||
overlap: {defs.CHUNK_OVERLAP}
|
||||
group_by_columns: [{",".join(defs.CHUNK_GROUP_BY_COLUMNS)}] # by default, we don't allow chunks to cross documents
|
||||
|
||||
|
||||
input:
|
||||
type: {defs.INPUT_TYPE.value} # or blob
|
||||
file_type: {defs.INPUT_FILE_TYPE.value} # or csv
|
||||
|
||||
@ -77,7 +77,6 @@ def load_llm_embeddings(
|
||||
raise ValueError(msg)
|
||||
if cache is not None:
|
||||
cache = cache.child(name)
|
||||
|
||||
return loaders[llm_type]["load"](on_error, cache, llm_config or {})
|
||||
|
||||
msg = f"Unknown LLM type {llm_type}"
|
||||
|
||||
@ -78,7 +78,7 @@ class BlobWorkflowCallbacks(NoopWorkflowCallbacks):
|
||||
blob_client = self._blob_service_client.get_blob_client(
|
||||
self._container_name, self._blob_name
|
||||
)
|
||||
blob_client.append_block(json.dumps(log, ensure_ascii=False) + "\n")
|
||||
blob_client.append_block(json.dumps(log, indent=4, ensure_ascii=False) + "\n")
|
||||
|
||||
# update the blob's block count
|
||||
self._num_blocks += 1
|
||||
|
||||
@ -42,6 +42,7 @@ class FileWorkflowCallbacks(NoopWorkflowCallbacks):
|
||||
"source": str(cause),
|
||||
"details": details,
|
||||
},
|
||||
indent=4,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
+ "\n"
|
||||
|
||||
@ -24,7 +24,7 @@ def load_pipeline_reporter(
|
||||
config: PipelineReportingConfig | None, root_dir: str | None
|
||||
) -> WorkflowCallbacks:
|
||||
"""Create a reporter for the given pipeline config."""
|
||||
config = config or PipelineFileReportingConfig(base_dir="reports")
|
||||
config = config or PipelineFileReportingConfig(base_dir="logs")
|
||||
|
||||
match config.type:
|
||||
case ReportingType.file:
|
||||
|
||||
@ -7,6 +7,7 @@ import traceback
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
from openai import RateLimitError
|
||||
from typing_extensions import Unpack
|
||||
|
||||
from graphrag.llm.types import (
|
||||
@ -52,6 +53,10 @@ class BaseLLM(ABC, LLM[TIn, TOut], Generic[TIn, TOut]):
|
||||
try:
|
||||
output = await self._execute_llm(input, **kwargs)
|
||||
return LLMOutput(output=output)
|
||||
except RateLimitError:
|
||||
# for improved readability, do not log rate limit exceptions,
|
||||
# they are logged/handled elsewhere
|
||||
raise
|
||||
except Exception as e:
|
||||
stack_trace = traceback.format_exc()
|
||||
if self._on_error:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user