[bug fix]Fix community_report config doesn't work in settings.yaml (#405)

* fix community_report doesn't work in settings.yaml

* add semversioner

* fix unittest about community report to community reports of env

---------

Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
This commit is contained in:
Kylin 2024-07-09 12:48:02 +08:00 committed by GitHub
parent f46d5d5743
commit e2572c7fab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 11 additions and 7 deletions

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "fix community_report doesn't work in settings.yaml"
}

View File

@ -151,8 +151,8 @@ These settings control the data input used by the pipeline. Any settings with a
| `GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION` | The claim_description prompting argument to utilize. | `string` | optional | "Any claims or facts that could be relevant to threat analysis." |
| `GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE` | The claim extraction prompt to utilize. | `string` | optional | `None` |
| `GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS` | The maximum number of redrives (gleanings) to invoke when extracting claims in a loop. | `int` | optional | 0 |
| `GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE` | The community report extraction prompt to utilize. | `string` | optional | `None` |
| `GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH` | The maximum number of tokens to generate per community report. | `int` | optional | 1500 |
| `GRAPHRAG_COMMUNITY_REPORTS_PROMPT_FILE` | The community reports extraction prompt to utilize. | `string` | optional | `None` |
| `GRAPHRAG_COMMUNITY_REPORTS_MAX_LENGTH` | The maximum number of tokens to generate per community reports. | `int` | optional | 1500 |
## Storage

View File

@ -427,7 +427,7 @@ def create_graphrag_config(
community_report_config = values.get("community_reports") or {}
with (
reader.envvar_prefix(Section.community_report),
reader.envvar_prefix(Section.community_reports),
reader.use(community_report_config),
):
community_reports_model = CommunityReportsConfig(
@ -583,7 +583,7 @@ class Section(str, Enum):
cache = "CACHE"
chunk = "CHUNK"
claim_extraction = "CLAIM_EXTRACTION"
community_report = "COMMUNITY_REPORT"
community_reports = "COMMUNITY_REPORTS"
embedding = "EMBEDDING"
entity_extraction = "ENTITY_EXTRACTION"
graphrag = "GRAPHRAG"

View File

@ -111,7 +111,7 @@ claim_extraction:
description: "{defs.CLAIM_DESCRIPTION}"
max_gleanings: {defs.CLAIM_MAX_GLEANINGS}
community_report:
community_reports:
## llm: override the global llm settings for this task
## parallelization: override the global parallelization settings for this task
## async_mode: override the global async_mode settings for this task

View File

@ -92,8 +92,8 @@ ALL_ENV_VARS = {
"GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION": "test 123",
"GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS": "5000",
"GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE": "tests/unit/config/prompt-a.txt",
"GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH": "23456",
"GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE": "tests/unit/config/prompt-b.txt",
"GRAPHRAG_COMMUNITY_REPORTS_MAX_LENGTH": "23456",
"GRAPHRAG_COMMUNITY_REPORTS_PROMPT_FILE": "tests/unit/config/prompt-b.txt",
"GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS": "17",
"GRAPHRAG_EMBEDDING_BATCH_SIZE": "1000000",
"GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS": "12",