mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 00:57:23 +08:00
72 lines
2.3 KiB
Python
72 lines
2.3 KiB
Python
# Copyright (c) 2024 Microsoft Corporation.
|
|
# Licensed under the MIT License
|
|
|
|
from graphrag.index.input.input_config import InputConfig
|
|
from graphrag.index.input.input_file_type import InputFileType
|
|
from graphrag.index.input.input_reader_factory import (
|
|
create_input_reader,
|
|
)
|
|
from graphrag_storage import StorageConfig, create_storage
|
|
|
|
|
|
async def test_csv_loader_one_file():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/one-csv",
|
|
),
|
|
file_type=InputFileType.Csv,
|
|
file_pattern=".*\\.csv$",
|
|
)
|
|
storage = create_storage(config.storage)
|
|
reader = create_input_reader(config, storage)
|
|
documents = await reader.read_files()
|
|
assert documents.shape == (2, 4)
|
|
assert documents["title"].iloc[0] == "input.csv"
|
|
|
|
|
|
async def test_csv_loader_one_file_with_title():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/one-csv",
|
|
),
|
|
file_type=InputFileType.Csv,
|
|
file_pattern=".*\\.csv$",
|
|
title_column="title",
|
|
)
|
|
storage = create_storage(config.storage)
|
|
reader = create_input_reader(config, storage)
|
|
documents = await reader.read_files()
|
|
assert documents.shape == (2, 4)
|
|
assert documents["title"].iloc[0] == "Hello"
|
|
|
|
|
|
async def test_csv_loader_one_file_with_metadata():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/one-csv",
|
|
),
|
|
file_type=InputFileType.Csv,
|
|
file_pattern=".*\\.csv$",
|
|
title_column="title",
|
|
metadata=["title"],
|
|
)
|
|
storage = create_storage(config.storage)
|
|
reader = create_input_reader(config, storage)
|
|
documents = await reader.read_files()
|
|
assert documents.shape == (2, 5)
|
|
assert documents["metadata"][0] == {"title": "Hello"}
|
|
|
|
|
|
async def test_csv_loader_multiple_files():
|
|
config = InputConfig(
|
|
storage=StorageConfig(
|
|
base_dir="tests/unit/indexing/input/data/multiple-csvs",
|
|
),
|
|
file_type=InputFileType.Csv,
|
|
file_pattern=".*\\.csv$",
|
|
)
|
|
storage = create_storage(config.storage)
|
|
reader = create_input_reader(config, storage)
|
|
documents = await reader.read_files()
|
|
assert documents.shape == (4, 4)
|