mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
Merge remote-tracking branch 'origin/main' into custom_vector_store_schema
This commit is contained in:
commit
f6d923ea62
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Remove hard-coded community rate limiter."
|
||||
}
|
||||
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Fix multi-index search."
|
||||
}
|
||||
@ -65,11 +65,10 @@ deployment_name: <azure_model_deployment_name>
|
||||
```
|
||||
|
||||
#### Using Managed Auth on Azure
|
||||
To use managed auth, add an additional value to your model config and comment out or remove the api_key line:
|
||||
To use managed auth, edit the auth_type in your model config and *remove* the api_key line:
|
||||
|
||||
```yaml
|
||||
auth_type: azure_managed_identity # Default auth_type is is api_key
|
||||
# api_key: ${GRAPHRAG_API_KEY}
|
||||
```
|
||||
|
||||
You will also need to login with [az login](https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli) and select the subscription with your endpoint.
|
||||
@ -116,4 +115,4 @@ Please refer to [Query Engine](query/overview.md) docs for detailed information
|
||||
- For more details about configuring GraphRAG, see the [configuration documentation](config/overview.md).
|
||||
- To learn more about Initialization, refer to the [Initialization documentation](config/init.md).
|
||||
- For more details about using the CLI, refer to the [CLI documentation](cli.md).
|
||||
- Check out our [visualization guide](visualization_guide.md) for a more interactive experience in debugging and exploring the knowledge graph.
|
||||
- Check out our [visualization guide](visualization_guide.md) for a more interactive experience in debugging and exploring the knowledge graph.
|
||||
|
||||
14
graphrag/cache/factory.py
vendored
14
graphrag/cache/factory.py
vendored
@ -97,9 +97,19 @@ def create_cosmosdb_cache(**kwargs) -> PipelineCache:
|
||||
return JsonPipelineCache(storage)
|
||||
|
||||
|
||||
def create_noop_cache(**_kwargs) -> PipelineCache:
|
||||
"""Create a no-op cache implementation."""
|
||||
return NoopPipelineCache()
|
||||
|
||||
|
||||
def create_memory_cache(**kwargs) -> PipelineCache:
|
||||
"""Create a memory cache implementation."""
|
||||
return InMemoryCache(**kwargs)
|
||||
|
||||
|
||||
# --- register built-in cache implementations ---
|
||||
CacheFactory.register(CacheType.none.value, NoopPipelineCache)
|
||||
CacheFactory.register(CacheType.memory.value, InMemoryCache)
|
||||
CacheFactory.register(CacheType.none.value, create_noop_cache)
|
||||
CacheFactory.register(CacheType.memory.value, create_memory_cache)
|
||||
CacheFactory.register(CacheType.file.value, create_file_cache)
|
||||
CacheFactory.register(CacheType.blob.value, create_blob_cache)
|
||||
CacheFactory.register(CacheType.cosmosdb.value, create_cosmosdb_cache)
|
||||
|
||||
@ -16,7 +16,6 @@ from graphrag.index.operations.summarize_communities.typing import (
|
||||
Finding,
|
||||
StrategyConfig,
|
||||
)
|
||||
from graphrag.index.utils.rate_limiter import RateLimiter
|
||||
from graphrag.language_model.manager import ModelManager
|
||||
from graphrag.language_model.protocol.base import ChatModel
|
||||
|
||||
@ -51,8 +50,6 @@ async def _run_extractor(
|
||||
level: int,
|
||||
args: StrategyConfig,
|
||||
) -> CommunityReport | None:
|
||||
# RateLimiter
|
||||
rate_limiter = RateLimiter(rate=1, per=60)
|
||||
extractor = CommunityReportsExtractor(
|
||||
model,
|
||||
extraction_prompt=args.get("extraction_prompt", None),
|
||||
@ -63,7 +60,6 @@ async def _run_extractor(
|
||||
)
|
||||
|
||||
try:
|
||||
await rate_limiter.acquire()
|
||||
results = await extractor(input)
|
||||
report = results.structured_output
|
||||
if report is None:
|
||||
|
||||
@ -1,40 +0,0 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Rate limiter utility."""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
|
||||
class RateLimiter:
|
||||
"""
|
||||
The original TpmRpmLLMLimiter strategy did not account for minute-based rate limiting when scheduled.
|
||||
|
||||
The RateLimiter was introduced to ensure that the CommunityReportsExtractor could be scheduled to adhere to rate configurations on a per-minute basis.
|
||||
"""
|
||||
|
||||
# TODO: RateLimiter scheduled: using asyncio for async_mode
|
||||
|
||||
def __init__(self, rate: int, per: int):
|
||||
self.rate = rate
|
||||
self.per = per
|
||||
self.allowance = rate
|
||||
self.last_check = time.monotonic()
|
||||
|
||||
async def acquire(self):
|
||||
"""Acquire a token from the rate limiter."""
|
||||
current = time.monotonic()
|
||||
elapsed = current - self.last_check
|
||||
self.last_check = current
|
||||
self.allowance += elapsed * (self.rate / self.per)
|
||||
|
||||
if self.allowance > self.rate:
|
||||
self.allowance = self.rate
|
||||
|
||||
if self.allowance < 1.0:
|
||||
sleep_time = (1.0 - self.allowance) * (self.per / self.rate)
|
||||
await asyncio.sleep(sleep_time)
|
||||
self.allowance = 0.0
|
||||
else:
|
||||
self.allowance -= 1.0
|
||||
@ -190,57 +190,58 @@ def update_context_data(
|
||||
"""
|
||||
updated_context_data = {}
|
||||
for key in context_data:
|
||||
entries = context_data[key].to_dict(orient="records")
|
||||
updated_entry = []
|
||||
if key == "reports":
|
||||
updated_entry = [
|
||||
dict(
|
||||
{k: entry[k] for k in entry},
|
||||
entry,
|
||||
index_name=links["community_reports"][int(entry["id"])][
|
||||
"index_name"
|
||||
],
|
||||
index_id=links["community_reports"][int(entry["id"])]["id"],
|
||||
)
|
||||
for entry in context_data[key]
|
||||
for entry in entries
|
||||
]
|
||||
if key == "entities":
|
||||
updated_entry = [
|
||||
dict(
|
||||
{k: entry[k] for k in entry},
|
||||
entry,
|
||||
entity=entry["entity"].split("-")[0],
|
||||
index_name=links["entities"][int(entry["id"])]["index_name"],
|
||||
index_id=links["entities"][int(entry["id"])]["id"],
|
||||
)
|
||||
for entry in context_data[key]
|
||||
for entry in entries
|
||||
]
|
||||
if key == "relationships":
|
||||
updated_entry = [
|
||||
dict(
|
||||
{k: entry[k] for k in entry},
|
||||
entry,
|
||||
source=entry["source"].split("-")[0],
|
||||
target=entry["target"].split("-")[0],
|
||||
index_name=links["relationships"][int(entry["id"])]["index_name"],
|
||||
index_id=links["relationships"][int(entry["id"])]["id"],
|
||||
)
|
||||
for entry in context_data[key]
|
||||
for entry in entries
|
||||
]
|
||||
if key == "claims":
|
||||
updated_entry = [
|
||||
dict(
|
||||
{k: entry[k] for k in entry},
|
||||
entry,
|
||||
entity=entry["entity"].split("-")[0],
|
||||
index_name=links["covariates"][int(entry["id"])]["index_name"],
|
||||
index_id=links["covariates"][int(entry["id"])]["id"],
|
||||
)
|
||||
for entry in context_data[key]
|
||||
for entry in entries
|
||||
]
|
||||
if key == "sources":
|
||||
updated_entry = [
|
||||
dict(
|
||||
{k: entry[k] for k in entry},
|
||||
entry,
|
||||
index_name=links["text_units"][int(entry["id"])]["index_name"],
|
||||
index_id=links["text_units"][int(entry["id"])]["id"],
|
||||
)
|
||||
for entry in context_data[key]
|
||||
for entry in entries
|
||||
]
|
||||
updated_context_data[key] = updated_entry
|
||||
return updated_context_data
|
||||
|
||||
Loading…
Reference in New Issue
Block a user