Merge remote-tracking branch 'origin/main' into custom_vector_store_schema

This commit is contained in:
Gaudy Blanco 2025-09-18 20:05:51 -06:00
commit f6d923ea62
7 changed files with 33 additions and 59 deletions

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Remove hard-coded community rate limiter."
}

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Fix multi-index search."
}

View File

@ -65,11 +65,10 @@ deployment_name: <azure_model_deployment_name>
```
#### Using Managed Auth on Azure
To use managed auth, add an additional value to your model config and comment out or remove the api_key line:
To use managed auth, edit the auth_type in your model config and *remove* the api_key line:
```yaml
auth_type: azure_managed_identity # Default auth_type is is api_key
# api_key: ${GRAPHRAG_API_KEY}
```
You will also need to login with [az login](https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli) and select the subscription with your endpoint.
@ -116,4 +115,4 @@ Please refer to [Query Engine](query/overview.md) docs for detailed information
- For more details about configuring GraphRAG, see the [configuration documentation](config/overview.md).
- To learn more about Initialization, refer to the [Initialization documentation](config/init.md).
- For more details about using the CLI, refer to the [CLI documentation](cli.md).
- Check out our [visualization guide](visualization_guide.md) for a more interactive experience in debugging and exploring the knowledge graph.
- Check out our [visualization guide](visualization_guide.md) for a more interactive experience in debugging and exploring the knowledge graph.

View File

@ -97,9 +97,19 @@ def create_cosmosdb_cache(**kwargs) -> PipelineCache:
return JsonPipelineCache(storage)
def create_noop_cache(**_kwargs) -> PipelineCache:
"""Create a no-op cache implementation."""
return NoopPipelineCache()
def create_memory_cache(**kwargs) -> PipelineCache:
"""Create a memory cache implementation."""
return InMemoryCache(**kwargs)
# --- register built-in cache implementations ---
CacheFactory.register(CacheType.none.value, NoopPipelineCache)
CacheFactory.register(CacheType.memory.value, InMemoryCache)
CacheFactory.register(CacheType.none.value, create_noop_cache)
CacheFactory.register(CacheType.memory.value, create_memory_cache)
CacheFactory.register(CacheType.file.value, create_file_cache)
CacheFactory.register(CacheType.blob.value, create_blob_cache)
CacheFactory.register(CacheType.cosmosdb.value, create_cosmosdb_cache)

View File

@ -16,7 +16,6 @@ from graphrag.index.operations.summarize_communities.typing import (
Finding,
StrategyConfig,
)
from graphrag.index.utils.rate_limiter import RateLimiter
from graphrag.language_model.manager import ModelManager
from graphrag.language_model.protocol.base import ChatModel
@ -51,8 +50,6 @@ async def _run_extractor(
level: int,
args: StrategyConfig,
) -> CommunityReport | None:
# RateLimiter
rate_limiter = RateLimiter(rate=1, per=60)
extractor = CommunityReportsExtractor(
model,
extraction_prompt=args.get("extraction_prompt", None),
@ -63,7 +60,6 @@ async def _run_extractor(
)
try:
await rate_limiter.acquire()
results = await extractor(input)
report = results.structured_output
if report is None:

View File

@ -1,40 +0,0 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""Rate limiter utility."""
import asyncio
import time
class RateLimiter:
"""
The original TpmRpmLLMLimiter strategy did not account for minute-based rate limiting when scheduled.
The RateLimiter was introduced to ensure that the CommunityReportsExtractor could be scheduled to adhere to rate configurations on a per-minute basis.
"""
# TODO: RateLimiter scheduled: using asyncio for async_mode
def __init__(self, rate: int, per: int):
self.rate = rate
self.per = per
self.allowance = rate
self.last_check = time.monotonic()
async def acquire(self):
"""Acquire a token from the rate limiter."""
current = time.monotonic()
elapsed = current - self.last_check
self.last_check = current
self.allowance += elapsed * (self.rate / self.per)
if self.allowance > self.rate:
self.allowance = self.rate
if self.allowance < 1.0:
sleep_time = (1.0 - self.allowance) * (self.per / self.rate)
await asyncio.sleep(sleep_time)
self.allowance = 0.0
else:
self.allowance -= 1.0

View File

@ -190,57 +190,58 @@ def update_context_data(
"""
updated_context_data = {}
for key in context_data:
entries = context_data[key].to_dict(orient="records")
updated_entry = []
if key == "reports":
updated_entry = [
dict(
{k: entry[k] for k in entry},
entry,
index_name=links["community_reports"][int(entry["id"])][
"index_name"
],
index_id=links["community_reports"][int(entry["id"])]["id"],
)
for entry in context_data[key]
for entry in entries
]
if key == "entities":
updated_entry = [
dict(
{k: entry[k] for k in entry},
entry,
entity=entry["entity"].split("-")[0],
index_name=links["entities"][int(entry["id"])]["index_name"],
index_id=links["entities"][int(entry["id"])]["id"],
)
for entry in context_data[key]
for entry in entries
]
if key == "relationships":
updated_entry = [
dict(
{k: entry[k] for k in entry},
entry,
source=entry["source"].split("-")[0],
target=entry["target"].split("-")[0],
index_name=links["relationships"][int(entry["id"])]["index_name"],
index_id=links["relationships"][int(entry["id"])]["id"],
)
for entry in context_data[key]
for entry in entries
]
if key == "claims":
updated_entry = [
dict(
{k: entry[k] for k in entry},
entry,
entity=entry["entity"].split("-")[0],
index_name=links["covariates"][int(entry["id"])]["index_name"],
index_id=links["covariates"][int(entry["id"])]["id"],
)
for entry in context_data[key]
for entry in entries
]
if key == "sources":
updated_entry = [
dict(
{k: entry[k] for k in entry},
entry,
index_name=links["text_units"][int(entry["id"])]["index_name"],
index_id=links["text_units"][int(entry["id"])]["id"],
)
for entry in context_data[key]
for entry in entries
]
updated_context_data[key] = updated_entry
return updated_context_data