mirror of
https://github.com/microsoft/graphrag.git
synced 2026-02-04 18:22:44 +08:00
Some checks are pending
Python CI / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python CI / python-ci (ubuntu-latest, 3.11) (push) Waiting to run
Python CI / python-ci (windows-latest, 3.10) (push) Waiting to run
Python CI / python-ci (windows-latest, 3.11) (push) Waiting to run
Python Integration Tests / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python Integration Tests / python-ci (windows-latest, 3.10) (push) Waiting to run
Python Notebook Tests / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python Notebook Tests / python-ci (windows-latest, 3.10) (push) Waiting to run
Python Smoke Tests / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python Smoke Tests / python-ci (windows-latest, 3.10) (push) Waiting to run
* Simplify Factory interface * Migrate CacheFactory to standard base class * Migrate LoggerFactory to standard base class * Migrate StorageFactory to standard base class * Migrate VectorStoreFactory to standard base class * Update vector store example notebook * Delete notebook outputs * Move default providers into factories * Move retry/limit tests into integ * Split language model factories * Set smoke test tpm/rpm * Fix factory integ tests * Add method to smoke test, switch text to 'fast' * Fix text smoke config for fast workflow * Add new workflows to text smoke test * Convert input readers to a proper factory * Remove covariates from fast smoke test * Update docs for input factory * Bump smoke runtime * Even longer runtime * min-csv timeout * Remove unnecessary lambdas
84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
# Copyright (c) 2024 Microsoft Corporation.
|
|
# Licensed under the MIT License
|
|
|
|
from graphrag.config.enums import InputFileType
|
|
from graphrag.config.models.input_config import InputConfig
|
|
from graphrag.config.models.storage_config import StorageConfig
|
|
from graphrag.index.input.factory import InputReaderFactory
|
|
from graphrag.utils.api import create_storage_from_config
|
|
|
|
|
|
async def test_csv_loader_one_file():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/one-csv",
|
|
),
|
|
file_type=InputFileType.csv,
|
|
file_pattern=".*\\.csv$",
|
|
)
|
|
storage = create_storage_from_config(config.storage)
|
|
documents = (
|
|
await InputReaderFactory()
|
|
.create(config.file_type, {"storage": storage, "config": config})
|
|
.read_files()
|
|
)
|
|
assert documents.shape == (2, 4)
|
|
assert documents["title"].iloc[0] == "input.csv"
|
|
|
|
|
|
async def test_csv_loader_one_file_with_title():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/one-csv",
|
|
),
|
|
file_type=InputFileType.csv,
|
|
file_pattern=".*\\.csv$",
|
|
title_column="title",
|
|
)
|
|
storage = create_storage_from_config(config.storage)
|
|
documents = (
|
|
await InputReaderFactory()
|
|
.create(config.file_type, {"storage": storage, "config": config})
|
|
.read_files()
|
|
)
|
|
assert documents.shape == (2, 4)
|
|
assert documents["title"].iloc[0] == "Hello"
|
|
|
|
|
|
async def test_csv_loader_one_file_with_metadata():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/one-csv",
|
|
),
|
|
file_type=InputFileType.csv,
|
|
file_pattern=".*\\.csv$",
|
|
title_column="title",
|
|
metadata=["title"],
|
|
)
|
|
storage = create_storage_from_config(config.storage)
|
|
documents = (
|
|
await InputReaderFactory()
|
|
.create(config.file_type, {"storage": storage, "config": config})
|
|
.read_files()
|
|
)
|
|
print(documents)
|
|
assert documents.shape == (2, 5)
|
|
assert documents["metadata"][0] == {"title": "Hello"}
|
|
|
|
|
|
async def test_csv_loader_multiple_files():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/multiple-csvs",
|
|
),
|
|
file_type=InputFileType.csv,
|
|
file_pattern=".*\\.csv$",
|
|
)
|
|
storage = create_storage_from_config(config.storage)
|
|
documents = (
|
|
await InputReaderFactory()
|
|
.create(config.file_type, {"storage": storage, "config": config})
|
|
.read_files()
|
|
)
|
|
assert documents.shape == (4, 4)
|