dify/web/app/components/datasets/create/step-two/index.spec.tsx
2026-01-09 10:21:18 +08:00

2198 lines
70 KiB
TypeScript

import type { Model } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { DataSourceProvider, NotionPage } from '@/models/common'
import type {
CrawlOptions,
CrawlResultItem,
CustomFile,
FileIndexingEstimateResponse,
FullDocumentDetail,
PreProcessingRule,
Rules,
} from '@/models/datasets'
import type { RetrievalConfig } from '@/types/app'
import { act, fireEvent, render, renderHook, screen } from '@testing-library/react'
import { ConfigurationMethodEnum, ModelStatusEnum, ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { ChunkingMode, DataSourceType, ProcessMode } from '@/models/datasets'
import { RETRIEVE_METHOD } from '@/types/app'
import { PreviewPanel } from './components/preview-panel'
import { StepTwoFooter } from './components/step-two-footer'
import {
DEFAULT_MAXIMUM_CHUNK_LENGTH,
DEFAULT_OVERLAP,
DEFAULT_SEGMENT_IDENTIFIER,
defaultParentChildConfig,
IndexingType,
useDocumentCreation,
useIndexingConfig,
useIndexingEstimate,
usePreviewState,
useSegmentationState,
} from './hooks'
import escape from './hooks/escape'
import unescape from './hooks/unescape'
// ============================================
// Mock external dependencies
// ============================================
// Mock dataset detail context
const mockDataset = {
id: 'test-dataset-id',
doc_form: ChunkingMode.text,
data_source_type: DataSourceType.FILE,
embedding_model: 'text-embedding-ada-002',
embedding_model_provider: 'openai',
retrieval_model_dict: {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
} as RetrievalConfig,
}
let mockCurrentDataset: typeof mockDataset | null = null
const mockMutateDatasetRes = vi.fn()
vi.mock('@/context/dataset-detail', () => ({
useDatasetDetailContextWithSelector: (selector: (state: { dataset: typeof mockDataset | null, mutateDatasetRes: () => void }) => unknown) =>
selector({ dataset: mockCurrentDataset, mutateDatasetRes: mockMutateDatasetRes }),
}))
// Note: @/context/i18n is globally mocked in vitest.setup.ts, no need to mock here
// Note: @/hooks/use-breakpoints uses real import
// Mock model hooks
const mockEmbeddingModelList = [
{ provider: 'openai', model: 'text-embedding-ada-002' },
{ provider: 'cohere', model: 'embed-english-v3.0' },
]
const mockDefaultEmbeddingModel = { provider: { provider: 'openai' }, model: 'text-embedding-ada-002' }
// Model[] type structure for rerank model list (simplified mock)
const mockRerankModelList: Model[] = [{
provider: 'cohere',
icon_small: { en_US: 'cohere-icon', zh_Hans: 'cohere-icon' },
label: { en_US: 'Cohere', zh_Hans: 'Cohere' },
models: [{
model: 'rerank-english-v3.0',
label: { en_US: 'Rerank English v3.0', zh_Hans: 'Rerank English v3.0' },
model_type: ModelTypeEnum.rerank,
features: [],
fetch_from: ConfigurationMethodEnum.predefinedModel,
status: ModelStatusEnum.active,
model_properties: {},
load_balancing_enabled: false,
}],
status: ModelStatusEnum.active,
}]
const mockRerankDefaultModel = { provider: { provider: 'cohere' }, model: 'rerank-english-v3.0' }
let mockIsRerankDefaultModelValid = true
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
useModelListAndDefaultModelAndCurrentProviderAndModel: () => ({
modelList: mockRerankModelList,
defaultModel: mockRerankDefaultModel,
currentModel: mockIsRerankDefaultModelValid,
}),
useModelList: () => ({ data: mockEmbeddingModelList }),
useDefaultModel: () => ({ data: mockDefaultEmbeddingModel }),
}))
// Mock service hooks
const mockFetchDefaultProcessRuleMutate = vi.fn()
vi.mock('@/service/knowledge/use-create-dataset', () => ({
useFetchDefaultProcessRule: ({ onSuccess }: { onSuccess: (data: { rules: Rules, limits: { indexing_max_segmentation_tokens_length: number } }) => void }) => ({
mutate: (url: string) => {
mockFetchDefaultProcessRuleMutate(url)
onSuccess({
rules: {
segmentation: { separator: '\\n', max_tokens: 500, chunk_overlap: 50 },
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: true },
{ id: 'remove_urls_emails', enabled: false },
],
parent_mode: 'paragraph',
subchunk_segmentation: { separator: '\\n', max_tokens: 256 },
},
limits: { indexing_max_segmentation_tokens_length: 4000 },
})
},
isPending: false,
}),
useFetchFileIndexingEstimateForFile: () => ({
mutate: vi.fn(),
data: undefined,
isIdle: true,
isPending: false,
reset: vi.fn(),
}),
useFetchFileIndexingEstimateForNotion: () => ({
mutate: vi.fn(),
data: undefined,
isIdle: true,
isPending: false,
reset: vi.fn(),
}),
useFetchFileIndexingEstimateForWeb: () => ({
mutate: vi.fn(),
data: undefined,
isIdle: true,
isPending: false,
reset: vi.fn(),
}),
useCreateFirstDocument: () => ({
mutateAsync: vi.fn().mockImplementation(async (params: unknown, options?: { onSuccess?: (data: unknown) => void }) => {
const data = { dataset: { id: 'new-dataset-id' } }
options?.onSuccess?.(data)
return data
}),
isPending: false,
}),
useCreateDocument: () => ({
mutateAsync: vi.fn().mockImplementation(async (params: unknown, options?: { onSuccess?: (data: unknown) => void }) => {
const data = { document: { id: 'new-doc-id' } }
options?.onSuccess?.(data)
return data
}),
isPending: false,
}),
getNotionInfo: vi.fn().mockReturnValue([{ workspace_id: 'ws-1', pages: [{ page_id: 'page-1' }] }]),
getWebsiteInfo: vi.fn().mockReturnValue({ provider: 'jinaReader', job_id: 'job-123', urls: ['https://test.com'] }),
}))
vi.mock('@/service/knowledge/use-dataset', () => ({
useInvalidDatasetList: () => vi.fn(),
}))
// Mock amplitude tracking (external service)
vi.mock('@/app/components/base/amplitude', () => ({
trackEvent: vi.fn(),
}))
// Note: @/app/components/base/toast - uses real import (base component)
// Note: @/app/components/datasets/common/check-rerank-model - uses real import
// Note: @/app/components/base/float-right-container - uses real import (base component)
// Mock checkShowMultiModalTip - requires complex model list structure
vi.mock('@/app/components/datasets/settings/utils', () => ({
checkShowMultiModalTip: () => false,
}))
// ============================================
// Test data factories
// ============================================
const createMockFile = (overrides?: Partial<CustomFile>): CustomFile => ({
id: 'file-1',
name: 'test-file.pdf',
extension: 'pdf',
size: 1024,
type: 'application/pdf',
lastModified: Date.now(),
...overrides,
} as CustomFile)
const createMockNotionPage = (overrides?: Partial<NotionPage>): NotionPage => ({
page_id: 'notion-page-1',
page_name: 'Test Notion Page',
page_icon: null,
type: 'page',
...overrides,
} as NotionPage)
const createMockWebsitePage = (overrides?: Partial<CrawlResultItem>): CrawlResultItem => ({
source_url: 'https://example.com/page1',
title: 'Test Website Page',
description: 'Test description',
markdown: '# Test Content',
...overrides,
} as CrawlResultItem)
const createMockDocumentDetail = (overrides?: Partial<FullDocumentDetail>): FullDocumentDetail => ({
id: 'doc-1',
doc_form: ChunkingMode.text,
doc_language: 'English',
file: { id: 'file-1', name: 'test.pdf', extension: 'pdf' },
notion_page: createMockNotionPage(),
website_page: createMockWebsitePage(),
dataset_process_rule: {
mode: ProcessMode.general,
rules: {
segmentation: { separator: '\\n\\n', max_tokens: 1024, chunk_overlap: 50 },
pre_processing_rules: [{ id: 'remove_extra_spaces', enabled: true }],
},
},
...overrides,
} as FullDocumentDetail)
const createMockRules = (overrides?: Partial<Rules>): Rules => ({
segmentation: { separator: '\\n\\n', max_tokens: 1024, chunk_overlap: 50 },
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: true },
{ id: 'remove_urls_emails', enabled: false },
],
parent_mode: 'paragraph',
subchunk_segmentation: { separator: '\\n', max_tokens: 512 },
...overrides,
})
const createMockEstimate = (overrides?: Partial<FileIndexingEstimateResponse>): FileIndexingEstimateResponse => ({
total_segments: 10,
total_nodes: 10,
tokens: 5000,
total_price: 0.01,
currency: 'USD',
qa_preview: [{ question: 'Q1', answer: 'A1' }],
preview: [{ content: 'Chunk 1 content', child_chunks: ['Child 1', 'Child 2'] }],
...overrides,
})
// ============================================
// Utility Functions Tests (escape/unescape)
// ============================================
describe('escape utility', () => {
beforeEach(() => {
vi.clearAllMocks()
})
// Tests for escape function
describe('escape function', () => {
it('should return empty string for null/undefined input', () => {
expect(escape(null as unknown as string)).toBe('')
expect(escape(undefined as unknown as string)).toBe('')
expect(escape('')).toBe('')
})
it('should escape newline characters', () => {
expect(escape('\n')).toBe('\\n')
expect(escape('\r')).toBe('\\r')
expect(escape('\n\r')).toBe('\\n\\r')
})
it('should escape tab characters', () => {
expect(escape('\t')).toBe('\\t')
})
it('should escape other special characters', () => {
expect(escape('\0')).toBe('\\0')
expect(escape('\b')).toBe('\\b')
expect(escape('\f')).toBe('\\f')
expect(escape('\v')).toBe('\\v')
})
it('should escape single quotes', () => {
expect(escape('\'')).toBe('\\\'')
})
it('should handle mixed content', () => {
expect(escape('Hello\nWorld\t!')).toBe('Hello\\nWorld\\t!')
})
it('should not escape regular characters', () => {
expect(escape('Hello World')).toBe('Hello World')
expect(escape('abc123')).toBe('abc123')
})
it('should return empty string for non-string input', () => {
expect(escape(123 as unknown as string)).toBe('')
expect(escape({} as unknown as string)).toBe('')
})
})
})
describe('unescape utility', () => {
beforeEach(() => {
vi.clearAllMocks()
})
// Tests for unescape function
describe('unescape function', () => {
it('should unescape newline characters', () => {
expect(unescape('\\n')).toBe('\n')
expect(unescape('\\r')).toBe('\r')
})
it('should unescape tab characters', () => {
expect(unescape('\\t')).toBe('\t')
})
it('should unescape other special characters', () => {
expect(unescape('\\0')).toBe('\0')
expect(unescape('\\b')).toBe('\b')
expect(unescape('\\f')).toBe('\f')
expect(unescape('\\v')).toBe('\v')
})
it('should unescape single and double quotes', () => {
expect(unescape('\\\'')).toBe('\'')
expect(unescape('\\"')).toBe('"')
})
it('should unescape backslash', () => {
expect(unescape('\\\\')).toBe('\\')
})
it('should unescape hex sequences', () => {
expect(unescape('\\x41')).toBe('A') // 0x41 = 65 = 'A'
expect(unescape('\\x5A')).toBe('Z') // 0x5A = 90 = 'Z'
})
it('should unescape short hex (2-digit) sequences', () => {
// Short hex format: \xNN (2 hexadecimal digits)
expect(unescape('\\xA5')).toBe('¥') // Yen sign
expect(unescape('\\x7F')).toBe('\x7F') // Delete character
expect(unescape('\\x00')).toBe('\x00') // Null character via hex
})
it('should unescape octal sequences', () => {
expect(unescape('\\101')).toBe('A') // Octal 101 = 65 = 'A'
expect(unescape('\\132')).toBe('Z') // Octal 132 = 90 = 'Z'
expect(unescape('\\7')).toBe('\x07') // Single digit octal
})
it('should unescape unicode sequences', () => {
expect(unescape('\\u0041')).toBe('A')
expect(unescape('\\u{41}')).toBe('A')
})
it('should unescape Python-style unicode', () => {
expect(unescape('\\U00000041')).toBe('A')
})
it('should handle mixed content', () => {
expect(unescape('Hello\\nWorld\\t!')).toBe('Hello\nWorld\t!')
})
it('should not modify regular text', () => {
expect(unescape('Hello World')).toBe('Hello World')
})
})
})
// ============================================
// useSegmentationState Hook Tests
// ============================================
describe('useSegmentationState', () => {
beforeEach(() => {
vi.clearAllMocks()
})
// Tests for initial state
describe('Initial State', () => {
it('should initialize with default values', () => {
const { result } = renderHook(() => useSegmentationState())
expect(result.current.segmentationType).toBe(ProcessMode.general)
expect(result.current.segmentIdentifier).toBe(DEFAULT_SEGMENT_IDENTIFIER)
expect(result.current.maxChunkLength).toBe(DEFAULT_MAXIMUM_CHUNK_LENGTH)
expect(result.current.overlap).toBe(DEFAULT_OVERLAP)
expect(result.current.rules).toEqual([])
expect(result.current.parentChildConfig).toEqual(defaultParentChildConfig)
})
it('should initialize with custom segmentation type', () => {
const { result } = renderHook(() =>
useSegmentationState({ initialSegmentationType: ProcessMode.parentChild }),
)
expect(result.current.segmentationType).toBe(ProcessMode.parentChild)
})
})
// Tests for state setters
describe('State Management', () => {
it('should update segmentation type', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setSegmentationType(ProcessMode.parentChild)
})
expect(result.current.segmentationType).toBe(ProcessMode.parentChild)
})
it('should update max chunk length', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setMaxChunkLength(2048)
})
expect(result.current.maxChunkLength).toBe(2048)
})
it('should update overlap', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setOverlap(100)
})
expect(result.current.overlap).toBe(100)
})
it('should update rules', () => {
const { result } = renderHook(() => useSegmentationState())
const newRules: PreProcessingRule[] = [{ id: 'test', enabled: true }]
act(() => {
result.current.setRules(newRules)
})
expect(result.current.rules).toEqual(newRules)
})
})
// Tests for setSegmentIdentifier with escape
describe('setSegmentIdentifier', () => {
it('should escape special characters', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setSegmentIdentifier('\n\n')
})
expect(result.current.segmentIdentifier).toBe('\\n\\n')
})
it('should use default when empty and canEmpty is false', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setSegmentIdentifier('')
})
expect(result.current.segmentIdentifier).toBe(DEFAULT_SEGMENT_IDENTIFIER)
})
it('should allow empty when canEmpty is true', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setSegmentIdentifier('', true)
})
expect(result.current.segmentIdentifier).toBe('')
})
})
// Tests for toggleRule
describe('toggleRule', () => {
it('should toggle rule enabled state', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setRules([
{ id: 'rule1', enabled: true },
{ id: 'rule2', enabled: false },
])
})
act(() => {
result.current.toggleRule('rule1')
})
expect(result.current.rules.find(r => r.id === 'rule1')?.enabled).toBe(false)
expect(result.current.rules.find(r => r.id === 'rule2')?.enabled).toBe(false)
})
it('should not affect other rules', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setRules([
{ id: 'rule1', enabled: true },
{ id: 'rule2', enabled: false },
])
})
act(() => {
result.current.toggleRule('rule2')
})
expect(result.current.rules.find(r => r.id === 'rule1')?.enabled).toBe(true)
expect(result.current.rules.find(r => r.id === 'rule2')?.enabled).toBe(true)
})
})
// Tests for parent-child config
describe('Parent-Child Configuration', () => {
it('should update parent config delimiter with truthy value', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.updateParentConfig('delimiter', '\n\n\n')
})
expect(result.current.parentChildConfig.parent.delimiter).toBe('\\n\\n\\n')
})
it('should update parent config delimiter with empty value', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.updateParentConfig('delimiter', '')
})
expect(result.current.parentChildConfig.parent.delimiter).toBe('')
})
it('should update parent config maxLength', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.updateParentConfig('maxLength', 2048)
})
expect(result.current.parentChildConfig.parent.maxLength).toBe(2048)
})
it('should update child config delimiter with truthy value', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.updateChildConfig('delimiter', '\n')
})
expect(result.current.parentChildConfig.child.delimiter).toBe('\\n')
})
it('should update child config delimiter with empty value', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.updateChildConfig('delimiter', '')
})
expect(result.current.parentChildConfig.child.delimiter).toBe('')
})
it('should update child config maxLength', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.updateChildConfig('maxLength', 256)
})
expect(result.current.parentChildConfig.child.maxLength).toBe(256)
})
it('should set chunk for context mode', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setChunkForContext('full-doc')
})
expect(result.current.parentChildConfig.chunkForContext).toBe('full-doc')
})
})
// Tests for resetToDefaults
describe('resetToDefaults', () => {
it('should reset to default config when available', () => {
const { result } = renderHook(() => useSegmentationState())
// Set non-default values and default config
act(() => {
result.current.setMaxChunkLength(2048)
result.current.setOverlap(100)
result.current.setDefaultConfig(createMockRules())
})
// Reset - should use default config values
act(() => {
result.current.resetToDefaults()
})
expect(result.current.maxChunkLength).toBe(1024)
expect(result.current.overlap).toBe(50)
expect(result.current.parentChildConfig).toEqual(defaultParentChildConfig)
})
it('should only reset parentChildConfig when no default config', () => {
const { result } = renderHook(() => useSegmentationState())
// Set non-default values without setting defaultConfig
act(() => {
result.current.setMaxChunkLength(2048)
result.current.setOverlap(100)
result.current.setChunkForContext('full-doc')
})
// Reset - should only reset parentChildConfig since no default config
act(() => {
result.current.resetToDefaults()
})
// Values stay the same since no defaultConfig
expect(result.current.maxChunkLength).toBe(2048)
expect(result.current.overlap).toBe(100)
// But parentChildConfig is always reset
expect(result.current.parentChildConfig).toEqual(defaultParentChildConfig)
})
})
// Tests for applyConfigFromRules
describe('applyConfigFromRules', () => {
it('should apply general config from rules', () => {
const { result } = renderHook(() => useSegmentationState())
const rules = createMockRules({
segmentation: { separator: '---', max_tokens: 512, chunk_overlap: 25 },
})
act(() => {
result.current.applyConfigFromRules(rules, false)
})
expect(result.current.maxChunkLength).toBe(512)
expect(result.current.overlap).toBe(25)
})
it('should apply hierarchical config from rules', () => {
const { result } = renderHook(() => useSegmentationState())
const rules = createMockRules({
parent_mode: 'paragraph',
subchunk_segmentation: { separator: '\n', max_tokens: 256 },
})
act(() => {
result.current.applyConfigFromRules(rules, true)
})
expect(result.current.parentChildConfig.chunkForContext).toBe('paragraph')
expect(result.current.parentChildConfig.child.maxLength).toBe(256)
})
it('should apply full hierarchical parent-child config from rules', () => {
const { result } = renderHook(() => useSegmentationState())
const rules = createMockRules({
segmentation: { separator: '\n\n', max_tokens: 1024, chunk_overlap: 50 },
parent_mode: 'full-doc',
subchunk_segmentation: { separator: '\n', max_tokens: 128 },
})
act(() => {
result.current.applyConfigFromRules(rules, true)
})
// Should set parent config from segmentation
expect(result.current.parentChildConfig.parent.delimiter).toBe('\\n\\n')
expect(result.current.parentChildConfig.parent.maxLength).toBe(1024)
// Should set child config from subchunk_segmentation
expect(result.current.parentChildConfig.child.delimiter).toBe('\\n')
expect(result.current.parentChildConfig.child.maxLength).toBe(128)
// Should set chunkForContext
expect(result.current.parentChildConfig.chunkForContext).toBe('full-doc')
})
})
// Tests for getProcessRule
describe('getProcessRule', () => {
it('should return general process rule', () => {
const { result } = renderHook(() => useSegmentationState())
const processRule = result.current.getProcessRule(ChunkingMode.text)
expect(processRule.mode).toBe(ProcessMode.general)
expect(processRule.rules.segmentation.max_tokens).toBe(DEFAULT_MAXIMUM_CHUNK_LENGTH)
})
it('should return hierarchical process rule for parent-child', () => {
const { result } = renderHook(() => useSegmentationState())
const processRule = result.current.getProcessRule(ChunkingMode.parentChild)
expect(processRule.mode).toBe('hierarchical')
expect(processRule.rules.parent_mode).toBe('paragraph')
expect(processRule.rules.subchunk_segmentation).toBeDefined()
})
})
})
// ============================================
// useIndexingConfig Hook Tests
// ============================================
describe('useIndexingConfig', () => {
beforeEach(() => {
vi.clearAllMocks()
mockIsRerankDefaultModelValid = true
})
// Tests for initial state
// Note: Hook has useEffect that syncs state, so we test the state after effects settle
describe('Initial State', () => {
it('should initialize with QUALIFIED when API key is set', async () => {
const { result } = renderHook(() =>
useIndexingConfig({ isAPIKeySet: true, hasSetIndexType: false }),
)
// After effects settle, indexType should be QUALIFIED
await vi.waitFor(() => {
expect(result.current.indexType).toBe(IndexingType.QUALIFIED)
})
})
it('should initialize with ECONOMICAL when API key is not set', async () => {
const { result } = renderHook(() =>
useIndexingConfig({ isAPIKeySet: false, hasSetIndexType: false }),
)
await vi.waitFor(() => {
expect(result.current.indexType).toBe(IndexingType.ECONOMICAL)
})
})
it('should use initial index type when provided', async () => {
const { result } = renderHook(() =>
useIndexingConfig({
isAPIKeySet: false,
hasSetIndexType: true,
initialIndexType: IndexingType.QUALIFIED,
}),
)
await vi.waitFor(() => {
expect(result.current.indexType).toBe(IndexingType.QUALIFIED)
})
})
})
// Tests for state setters
describe('State Management', () => {
it('should update index type', async () => {
const { result } = renderHook(() =>
useIndexingConfig({ isAPIKeySet: true, hasSetIndexType: false }),
)
// Wait for initial effects to settle
await vi.waitFor(() => {
expect(result.current.indexType).toBeDefined()
})
act(() => {
result.current.setIndexType(IndexingType.ECONOMICAL)
})
expect(result.current.indexType).toBe(IndexingType.ECONOMICAL)
})
it('should update embedding model', async () => {
const { result } = renderHook(() =>
useIndexingConfig({ isAPIKeySet: true, hasSetIndexType: false }),
)
await vi.waitFor(() => {
expect(result.current.embeddingModel).toBeDefined()
})
act(() => {
result.current.setEmbeddingModel({ provider: 'cohere', model: 'embed-v3' })
})
expect(result.current.embeddingModel).toEqual({ provider: 'cohere', model: 'embed-v3' })
})
it('should update retrieval config', async () => {
const { result } = renderHook(() =>
useIndexingConfig({ isAPIKeySet: true, hasSetIndexType: false }),
)
await vi.waitFor(() => {
expect(result.current.retrievalConfig).toBeDefined()
})
const newConfig: RetrievalConfig = {
search_method: RETRIEVE_METHOD.hybrid,
reranking_enable: true,
reranking_model: { reranking_provider_name: 'cohere', reranking_model_name: 'rerank-v3' },
top_k: 5,
score_threshold_enabled: true,
score_threshold: 0.7,
}
act(() => {
result.current.setRetrievalConfig(newConfig)
})
expect(result.current.retrievalConfig).toEqual(newConfig)
})
})
// Tests for getIndexingTechnique
describe('getIndexingTechnique', () => {
it('should return initial type when set', async () => {
const { result } = renderHook(() =>
useIndexingConfig({
isAPIKeySet: true,
hasSetIndexType: true,
initialIndexType: IndexingType.ECONOMICAL,
}),
)
await vi.waitFor(() => {
expect(result.current.getIndexingTechnique()).toBe(IndexingType.ECONOMICAL)
})
})
it('should return current type when no initial type', async () => {
const { result } = renderHook(() =>
useIndexingConfig({ isAPIKeySet: true, hasSetIndexType: false }),
)
await vi.waitFor(() => {
expect(result.current.indexType).toBeDefined()
})
act(() => {
result.current.setIndexType(IndexingType.ECONOMICAL)
})
expect(result.current.getIndexingTechnique()).toBe(IndexingType.ECONOMICAL)
})
})
// Tests for initialRetrievalConfig handling
describe('initialRetrievalConfig', () => {
it('should skip retrieval config sync when initialRetrievalConfig is provided', async () => {
const customRetrievalConfig: RetrievalConfig = {
search_method: RETRIEVE_METHOD.hybrid,
reranking_enable: true,
reranking_model: { reranking_provider_name: 'custom', reranking_model_name: 'custom-model' },
top_k: 10,
score_threshold_enabled: true,
score_threshold: 0.8,
}
const { result } = renderHook(() =>
useIndexingConfig({
isAPIKeySet: true,
hasSetIndexType: false,
initialRetrievalConfig: customRetrievalConfig,
}),
)
await vi.waitFor(() => {
expect(result.current.retrievalConfig).toBeDefined()
})
// Should use the provided initial config, not the default synced one
expect(result.current.retrievalConfig.search_method).toBe(RETRIEVE_METHOD.hybrid)
expect(result.current.retrievalConfig.top_k).toBe(10)
})
})
})
// ============================================
// usePreviewState Hook Tests
// ============================================
describe('usePreviewState', () => {
beforeEach(() => {
vi.clearAllMocks()
})
const defaultOptions = {
dataSourceType: DataSourceType.FILE,
files: [createMockFile()],
notionPages: [createMockNotionPage()],
websitePages: [createMockWebsitePage()],
}
// Tests for initial state
describe('Initial State', () => {
it('should initialize with first file for FILE data source', () => {
const { result } = renderHook(() => usePreviewState(defaultOptions))
expect(result.current.previewFile).toEqual(defaultOptions.files[0])
})
it('should initialize with first notion page for NOTION data source', () => {
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: DataSourceType.NOTION }),
)
expect(result.current.previewNotionPage).toEqual(defaultOptions.notionPages[0])
})
it('should initialize with document detail when provided', () => {
const documentDetail = createMockDocumentDetail()
const { result } = renderHook(() =>
usePreviewState({
...defaultOptions,
documentDetail,
datasetId: 'test-id',
}),
)
expect(result.current.previewFile).toEqual(documentDetail.file)
})
})
// Tests for getPreviewPickerItems
describe('getPreviewPickerItems', () => {
it('should return files for FILE data source', () => {
const { result } = renderHook(() => usePreviewState(defaultOptions))
const items = result.current.getPreviewPickerItems()
expect(items).toEqual(defaultOptions.files)
})
it('should return mapped notion pages for NOTION data source', () => {
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: DataSourceType.NOTION }),
)
const items = result.current.getPreviewPickerItems()
expect(items[0]).toEqual({
id: 'notion-page-1',
name: 'Test Notion Page',
extension: 'md',
})
})
it('should return mapped website pages for WEB data source', () => {
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: DataSourceType.WEB }),
)
const items = result.current.getPreviewPickerItems()
expect(items[0]).toEqual({
id: 'https://example.com/page1',
name: 'Test Website Page',
extension: 'md',
})
})
it('should return empty array for unknown data source', () => {
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: 'unknown' as DataSourceType }),
)
const items = result.current.getPreviewPickerItems()
expect(items).toEqual([])
})
})
// Tests for getPreviewPickerValue
describe('getPreviewPickerValue', () => {
it('should return file value for FILE data source', () => {
const { result } = renderHook(() => usePreviewState(defaultOptions))
const value = result.current.getPreviewPickerValue()
expect(value).toEqual(defaultOptions.files[0])
})
it('should return mapped notion page value for NOTION data source', () => {
const notionPage = createMockNotionPage({ page_id: 'page-123', page_name: 'My Page' })
const { result } = renderHook(() =>
usePreviewState({
...defaultOptions,
dataSourceType: DataSourceType.NOTION,
notionPages: [notionPage],
}),
)
const value = result.current.getPreviewPickerValue()
expect(value).toEqual({
id: 'page-123',
name: 'My Page',
extension: 'md',
})
})
it('should return mapped website page value for WEB data source', () => {
const websitePage = createMockWebsitePage({ source_url: 'https://test.com', title: 'Test Title' })
const { result } = renderHook(() =>
usePreviewState({
...defaultOptions,
dataSourceType: DataSourceType.WEB,
websitePages: [websitePage],
}),
)
const value = result.current.getPreviewPickerValue()
expect(value).toEqual({
id: 'https://test.com',
name: 'Test Title',
extension: 'md',
})
})
it('should return empty value for unknown data source', () => {
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: 'unknown' as DataSourceType }),
)
const value = result.current.getPreviewPickerValue()
expect(value).toEqual({ id: '', name: '', extension: '' })
})
it('should handle undefined notion page gracefully', () => {
const { result } = renderHook(() =>
usePreviewState({
...defaultOptions,
dataSourceType: DataSourceType.NOTION,
notionPages: [],
}),
)
const value = result.current.getPreviewPickerValue()
expect(value).toEqual({
id: '',
name: '',
extension: 'md',
})
})
it('should handle undefined website page gracefully', () => {
const { result } = renderHook(() =>
usePreviewState({
...defaultOptions,
dataSourceType: DataSourceType.WEB,
websitePages: [],
}),
)
const value = result.current.getPreviewPickerValue()
expect(value).toEqual({
id: '',
name: '',
extension: 'md',
})
})
})
// Tests for handlePreviewChange
describe('handlePreviewChange', () => {
it('should update preview file for FILE data source', () => {
const files = [createMockFile(), createMockFile({ id: 'file-2', name: 'second.pdf' })]
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, files }),
)
act(() => {
result.current.handlePreviewChange({ id: 'file-2', name: 'second.pdf' })
})
expect(result.current.previewFile).toEqual({ id: 'file-2', name: 'second.pdf' })
})
it('should update preview notion page for NOTION data source', () => {
const notionPages = [
createMockNotionPage(),
createMockNotionPage({ page_id: 'notion-page-2', page_name: 'Second Page' }),
]
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: DataSourceType.NOTION, notionPages }),
)
act(() => {
result.current.handlePreviewChange({ id: 'notion-page-2', name: 'Second Page' })
})
expect(result.current.previewNotionPage?.page_id).toBe('notion-page-2')
})
it('should update preview website page for WEB data source', () => {
const websitePages = [
createMockWebsitePage(),
createMockWebsitePage({ source_url: 'https://example.com/page2', title: 'Second Page' }),
]
const { result } = renderHook(() =>
usePreviewState({ ...defaultOptions, dataSourceType: DataSourceType.WEB, websitePages }),
)
act(() => {
result.current.handlePreviewChange({ id: 'https://example.com/page2', name: 'Second Page' })
})
expect(result.current.previewWebsitePage?.source_url).toBe('https://example.com/page2')
})
})
})
// ============================================
// useDocumentCreation Hook Tests
// ============================================
describe('useDocumentCreation', () => {
beforeEach(() => {
vi.clearAllMocks()
})
const defaultOptions = {
dataSourceType: DataSourceType.FILE,
files: [createMockFile()],
notionPages: [] as NotionPage[],
notionCredentialId: '',
websitePages: [] as CrawlResultItem[],
}
// Tests for validateParams
describe('validateParams', () => {
it('should return false when overlap exceeds max chunk length', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const isValid = result.current.validateParams({
segmentationType: 'general',
maxChunkLength: 100,
limitMaxChunkLength: 4000,
overlap: 200,
indexType: IndexingType.QUALIFIED,
embeddingModel: { provider: 'openai', model: 'text-embedding-ada-002' },
rerankModelList: [],
retrievalConfig: {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
})
expect(isValid).toBe(false)
})
it('should return false when max chunk length exceeds limit', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const isValid = result.current.validateParams({
segmentationType: 'general',
maxChunkLength: 5000,
limitMaxChunkLength: 4000,
overlap: 50,
indexType: IndexingType.QUALIFIED,
embeddingModel: { provider: 'openai', model: 'text-embedding-ada-002' },
rerankModelList: [],
retrievalConfig: {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
})
expect(isValid).toBe(false)
})
it('should return true for valid params', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const isValid = result.current.validateParams({
segmentationType: 'general',
maxChunkLength: 1000,
limitMaxChunkLength: 4000,
overlap: 50,
indexType: IndexingType.QUALIFIED,
embeddingModel: { provider: 'openai', model: 'text-embedding-ada-002' },
rerankModelList: [],
retrievalConfig: {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
})
expect(isValid).toBe(true)
})
})
// Tests for buildCreationParams
describe('buildCreationParams', () => {
it('should build params for file upload', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
expect(params).toBeDefined()
expect(params?.doc_form).toBe(ChunkingMode.text)
expect(params?.doc_language).toBe('English')
expect(params?.data_source?.type).toBe(DataSourceType.FILE)
})
it('should build params for setting mode', () => {
const documentDetail = createMockDocumentDetail()
const { result } = renderHook(() =>
useDocumentCreation({
...defaultOptions,
isSetting: true,
documentDetail,
}),
)
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
expect(params?.original_document_id).toBe(documentDetail.id)
})
it('should build params for notion_import data source', () => {
const { result } = renderHook(() =>
useDocumentCreation({
...defaultOptions,
dataSourceType: DataSourceType.NOTION,
notionPages: [createMockNotionPage()],
notionCredentialId: 'notion-cred-123',
}),
)
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
expect(params).toBeDefined()
expect(params?.data_source?.type).toBe(DataSourceType.NOTION)
expect(params?.data_source?.info_list.notion_info_list).toBeDefined()
})
it('should build params for website_crawl data source', () => {
const { result } = renderHook(() =>
useDocumentCreation({
...defaultOptions,
dataSourceType: DataSourceType.WEB,
websitePages: [createMockWebsitePage()],
websiteCrawlProvider: 'jinaReader' as DataSourceProvider,
websiteCrawlJobId: 'job-123',
crawlOptions: { max_depth: 2 } as CrawlOptions,
}),
)
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
expect(params).toBeDefined()
expect(params?.data_source?.type).toBe(DataSourceType.WEB)
expect(params?.data_source?.info_list.website_info_list).toBeDefined()
})
})
// Tests for validateParams edge cases
describe('validateParams - additional cases', () => {
it('should return false when embedding model is missing for QUALIFIED index type', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const isValid = result.current.validateParams({
segmentationType: 'general',
maxChunkLength: 500,
limitMaxChunkLength: 4000,
overlap: 50,
indexType: IndexingType.QUALIFIED,
embeddingModel: { provider: '', model: '' },
rerankModelList: mockRerankModelList,
retrievalConfig: {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
})
expect(isValid).toBe(false)
})
it('should return false when rerank model is required but not selected', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
// isReRankModelSelected returns false when:
// - indexMethod === 'high_quality' (IndexingType.QUALIFIED)
// - reranking_enable === true
// - rerankModelSelected === false (model not found in list)
const isValid = result.current.validateParams({
segmentationType: 'general',
maxChunkLength: 500,
limitMaxChunkLength: 4000,
overlap: 50,
indexType: IndexingType.QUALIFIED,
embeddingModel: { provider: 'openai', model: 'text-embedding-ada-002' },
rerankModelList: [], // Empty list means model won't be found
retrievalConfig: {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: true, // Reranking enabled
reranking_model: {
reranking_provider_name: 'nonexistent',
reranking_model_name: 'nonexistent-model',
},
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
})
expect(isValid).toBe(false)
})
})
// Tests for executeCreation
describe('executeCreation', () => {
it('should call createFirstDocumentMutation when datasetId is not provided', async () => {
const mockOnStepChange = vi.fn()
const mockUpdateIndexingTypeCache = vi.fn()
const mockUpdateResultCache = vi.fn()
const mockUpdateRetrievalMethodCache = vi.fn()
const mockOnSave = vi.fn()
const { result } = renderHook(() =>
useDocumentCreation({
...defaultOptions,
datasetId: undefined,
onStepChange: mockOnStepChange,
updateIndexingTypeCache: mockUpdateIndexingTypeCache,
updateResultCache: mockUpdateResultCache,
updateRetrievalMethodCache: mockUpdateRetrievalMethodCache,
onSave: mockOnSave,
}),
)
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
await act(async () => {
await result.current.executeCreation(params!, IndexingType.QUALIFIED, {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
})
})
expect(mockOnStepChange).toHaveBeenCalledWith(1)
})
it('should call createDocumentMutation when datasetId is provided', async () => {
const mockOnStepChange = vi.fn()
const { result } = renderHook(() =>
useDocumentCreation({
...defaultOptions,
datasetId: 'existing-dataset-id',
onStepChange: mockOnStepChange,
}),
)
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
await act(async () => {
await result.current.executeCreation(params!, IndexingType.QUALIFIED, {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
})
})
expect(mockOnStepChange).toHaveBeenCalledWith(1)
})
it('should call onSave when in setting mode', async () => {
const mockOnSave = vi.fn()
const documentDetail = createMockDocumentDetail()
const { result } = renderHook(() =>
useDocumentCreation({
...defaultOptions,
datasetId: 'existing-dataset-id',
isSetting: true,
documentDetail,
onSave: mockOnSave,
}),
)
const params = result.current.buildCreationParams(
ChunkingMode.text,
'English',
{ mode: ProcessMode.general, rules: createMockRules() },
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
await act(async () => {
await result.current.executeCreation(params!, IndexingType.QUALIFIED, {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
})
})
expect(mockOnSave).toHaveBeenCalled()
})
})
// Tests for validatePreviewParams
describe('validatePreviewParams', () => {
it('should return true for valid max chunk length', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const isValid = result.current.validatePreviewParams(1000)
expect(isValid).toBe(true)
})
it('should return false when max chunk length exceeds maximum', () => {
const { result } = renderHook(() => useDocumentCreation(defaultOptions))
const isValid = result.current.validatePreviewParams(10000)
expect(isValid).toBe(false)
})
})
})
// ============================================
// useIndexingEstimate Hook Tests
// ============================================
describe('useIndexingEstimate', () => {
beforeEach(() => {
vi.clearAllMocks()
})
const defaultOptions = {
dataSourceType: DataSourceType.FILE,
currentDocForm: ChunkingMode.text,
docLanguage: 'English',
files: [createMockFile()],
previewNotionPage: createMockNotionPage(),
notionCredentialId: '',
previewWebsitePage: createMockWebsitePage(),
indexingTechnique: IndexingType.QUALIFIED,
processRule: { mode: ProcessMode.general, rules: createMockRules() },
}
// Tests for initial state
describe('Initial State', () => {
it('should initialize with idle state', () => {
const { result } = renderHook(() => useIndexingEstimate(defaultOptions))
expect(result.current.isIdle).toBe(true)
expect(result.current.isPending).toBe(false)
expect(result.current.estimate).toBeUndefined()
})
})
// Tests for fetchEstimate
describe('fetchEstimate', () => {
it('should have fetchEstimate function', () => {
const { result } = renderHook(() => useIndexingEstimate(defaultOptions))
expect(typeof result.current.fetchEstimate).toBe('function')
})
it('should have reset function', () => {
const { result } = renderHook(() => useIndexingEstimate(defaultOptions))
expect(typeof result.current.reset).toBe('function')
})
it('should call fetchEstimate for FILE data source', () => {
const { result } = renderHook(() =>
useIndexingEstimate({
...defaultOptions,
dataSourceType: DataSourceType.FILE,
previewFileName: 'test-file.pdf',
}),
)
act(() => {
result.current.fetchEstimate()
})
// fetchEstimate should be callable without error
expect(result.current.fetchEstimate).toBeDefined()
})
it('should call fetchEstimate for NOTION data source', () => {
const { result } = renderHook(() =>
useIndexingEstimate({
...defaultOptions,
dataSourceType: DataSourceType.NOTION,
previewNotionPage: createMockNotionPage(),
notionCredentialId: 'cred-123',
}),
)
act(() => {
result.current.fetchEstimate()
})
expect(result.current.fetchEstimate).toBeDefined()
})
it('should call fetchEstimate for WEB data source', () => {
const { result } = renderHook(() =>
useIndexingEstimate({
...defaultOptions,
dataSourceType: DataSourceType.WEB,
previewWebsitePage: createMockWebsitePage(),
websiteCrawlProvider: 'jinaReader' as DataSourceProvider,
websiteCrawlJobId: 'job-123',
crawlOptions: { max_depth: 2 } as CrawlOptions,
}),
)
act(() => {
result.current.fetchEstimate()
})
expect(result.current.fetchEstimate).toBeDefined()
})
})
// Tests for getCurrentMutation based on data source type
describe('Data Source Selection', () => {
it('should use file query for FILE data source', () => {
const { result } = renderHook(() =>
useIndexingEstimate({
...defaultOptions,
dataSourceType: DataSourceType.FILE,
}),
)
expect(result.current.currentMutation).toBeDefined()
expect(result.current.isIdle).toBe(true)
})
it('should use notion query for NOTION data source', () => {
const { result } = renderHook(() =>
useIndexingEstimate({
...defaultOptions,
dataSourceType: DataSourceType.NOTION,
}),
)
expect(result.current.currentMutation).toBeDefined()
expect(result.current.isIdle).toBe(true)
})
it('should use website query for WEB data source', () => {
const { result } = renderHook(() =>
useIndexingEstimate({
...defaultOptions,
dataSourceType: DataSourceType.WEB,
websiteCrawlProvider: 'jinaReader' as DataSourceProvider,
websiteCrawlJobId: 'job-123',
}),
)
expect(result.current.currentMutation).toBeDefined()
expect(result.current.isIdle).toBe(true)
})
})
})
// ============================================
// StepTwoFooter Component Tests
// ============================================
describe('StepTwoFooter', () => {
beforeEach(() => {
vi.clearAllMocks()
})
const defaultProps = {
isSetting: false,
isCreating: false,
onPrevious: vi.fn(),
onCreate: vi.fn(),
onCancel: vi.fn(),
}
// Tests for rendering
describe('Rendering', () => {
it('should render without crashing', () => {
render(<StepTwoFooter {...defaultProps} />)
// Should render Previous and Next buttons with correct text
expect(screen.getByText(/previousStep/i)).toBeInTheDocument()
expect(screen.getByText(/nextStep/i)).toBeInTheDocument()
})
it('should render Previous and Next buttons when not in setting mode', () => {
render(<StepTwoFooter {...defaultProps} />)
expect(screen.getByText(/previousStep/i)).toBeInTheDocument()
expect(screen.getByText(/nextStep/i)).toBeInTheDocument()
})
it('should render Save and Cancel buttons when in setting mode', () => {
render(<StepTwoFooter {...defaultProps} isSetting={true} />)
expect(screen.getByText(/save/i)).toBeInTheDocument()
expect(screen.getByText(/cancel/i)).toBeInTheDocument()
})
})
// Tests for user interactions
describe('User Interactions', () => {
it('should call onPrevious when Previous button is clicked', () => {
const onPrevious = vi.fn()
render(<StepTwoFooter {...defaultProps} onPrevious={onPrevious} />)
fireEvent.click(screen.getByText(/previousStep/i))
expect(onPrevious).toHaveBeenCalledTimes(1)
})
it('should call onCreate when Next/Save button is clicked', () => {
const onCreate = vi.fn()
render(<StepTwoFooter {...defaultProps} onCreate={onCreate} />)
fireEvent.click(screen.getByText(/nextStep/i))
expect(onCreate).toHaveBeenCalledTimes(1)
})
it('should call onCancel when Cancel button is clicked in setting mode', () => {
const onCancel = vi.fn()
render(<StepTwoFooter {...defaultProps} isSetting={true} onCancel={onCancel} />)
fireEvent.click(screen.getByText(/cancel/i))
expect(onCancel).toHaveBeenCalledTimes(1)
})
})
// Tests for loading state
describe('Loading State', () => {
it('should show loading state on Next button when creating', () => {
render(<StepTwoFooter {...defaultProps} isCreating={true} />)
const nextButton = screen.getByText(/nextStep/i).closest('button')
// Button has disabled:btn-disabled class which handles the loading state
expect(nextButton).toHaveClass('disabled:btn-disabled')
})
it('should show loading state on Save button when creating in setting mode', () => {
render(<StepTwoFooter {...defaultProps} isSetting={true} isCreating={true} />)
const saveButton = screen.getByText(/save/i).closest('button')
// Button has disabled:btn-disabled class which handles the loading state
expect(saveButton).toHaveClass('disabled:btn-disabled')
})
})
})
// ============================================
// PreviewPanel Component Tests
// ============================================
describe('PreviewPanel', () => {
beforeEach(() => {
vi.clearAllMocks()
})
const defaultProps = {
isMobile: false,
dataSourceType: DataSourceType.FILE,
currentDocForm: ChunkingMode.text,
estimate: undefined as FileIndexingEstimateResponse | undefined,
parentChildConfig: defaultParentChildConfig,
isSetting: false,
pickerFiles: [{ id: 'file-1', name: 'test.pdf', extension: 'pdf' }],
pickerValue: { id: 'file-1', name: 'test.pdf', extension: 'pdf' },
isIdle: true,
isPending: false,
onPickerChange: vi.fn(),
}
// Tests for rendering
describe('Rendering', () => {
it('should render without crashing', () => {
render(<PreviewPanel {...defaultProps} />)
// Check for the preview header title text
expect(screen.getByText('datasetCreation.stepTwo.preview')).toBeInTheDocument()
})
it('should render idle state when isIdle is true', () => {
render(<PreviewPanel {...defaultProps} isIdle={true} />)
expect(screen.getByText(/previewChunkTip/i)).toBeInTheDocument()
})
it('should render loading skeleton when isPending is true', () => {
render(<PreviewPanel {...defaultProps} isIdle={false} isPending={true} />)
// Should show skeleton containers
expect(screen.queryByText(/previewChunkTip/i)).not.toBeInTheDocument()
})
})
// Tests for different doc forms
describe('Preview Content', () => {
it('should render text preview when docForm is text', () => {
const estimate = createMockEstimate()
render(
<PreviewPanel
{...defaultProps}
isIdle={false}
estimate={estimate}
currentDocForm={ChunkingMode.text}
/>,
)
expect(screen.getByText('Chunk 1 content')).toBeInTheDocument()
})
it('should render QA preview when docForm is qa', () => {
const estimate = createMockEstimate()
render(
<PreviewPanel
{...defaultProps}
isIdle={false}
estimate={estimate}
currentDocForm={ChunkingMode.qa}
/>,
)
expect(screen.getByText('Q1')).toBeInTheDocument()
expect(screen.getByText('A1')).toBeInTheDocument()
})
it('should show chunk count badge for non-QA doc form', () => {
const estimate = createMockEstimate({ total_segments: 25 })
render(
<PreviewPanel
{...defaultProps}
isIdle={false}
estimate={estimate}
currentDocForm={ChunkingMode.text}
/>,
)
expect(screen.getByText(/25/)).toBeInTheDocument()
})
it('should render parent-child preview when docForm is parentChild', () => {
const estimate = createMockEstimate({
preview: [
{ content: 'Parent chunk content', child_chunks: ['Child 1', 'Child 2', 'Child 3'] },
],
})
render(
<PreviewPanel
{...defaultProps}
isIdle={false}
estimate={estimate}
currentDocForm={ChunkingMode.parentChild}
parentChildConfig={{
...defaultParentChildConfig,
chunkForContext: 'paragraph',
}}
/>,
)
// Should render parent chunk label
expect(screen.getByText('Chunk-1')).toBeInTheDocument()
// Should render child chunks
expect(screen.getByText('Child 1')).toBeInTheDocument()
expect(screen.getByText('Child 2')).toBeInTheDocument()
expect(screen.getByText('Child 3')).toBeInTheDocument()
})
it('should limit child chunks when chunkForContext is full-doc', () => {
// FULL_DOC_PREVIEW_LENGTH is 50, so we need more than 50 chunks to test the limit
const manyChildChunks = Array.from({ length: 60 }, (_, i) => `ChildChunk${i + 1}`)
const estimate = createMockEstimate({
preview: [{ content: 'Parent content', child_chunks: manyChildChunks }],
})
render(
<PreviewPanel
{...defaultProps}
isIdle={false}
estimate={estimate}
currentDocForm={ChunkingMode.parentChild}
parentChildConfig={{
...defaultParentChildConfig,
chunkForContext: 'full-doc',
}}
/>,
)
// Should render parent chunk
expect(screen.getByText('Chunk-1')).toBeInTheDocument()
// full-doc mode limits to FULL_DOC_PREVIEW_LENGTH (50)
expect(screen.getByText('ChildChunk1')).toBeInTheDocument()
expect(screen.getByText('ChildChunk50')).toBeInTheDocument()
// Should not render beyond the limit
expect(screen.queryByText('ChildChunk51')).not.toBeInTheDocument()
})
it('should render multiple parent chunks in parent-child mode', () => {
const estimate = createMockEstimate({
preview: [
{ content: 'Parent 1', child_chunks: ['P1-C1'] },
{ content: 'Parent 2', child_chunks: ['P2-C1'] },
],
})
render(
<PreviewPanel
{...defaultProps}
isIdle={false}
estimate={estimate}
currentDocForm={ChunkingMode.parentChild}
/>,
)
expect(screen.getByText('Chunk-1')).toBeInTheDocument()
expect(screen.getByText('Chunk-2')).toBeInTheDocument()
expect(screen.getByText('P1-C1')).toBeInTheDocument()
expect(screen.getByText('P2-C1')).toBeInTheDocument()
})
})
// Tests for picker
describe('Document Picker', () => {
it('should call onPickerChange when document is selected', () => {
const onPickerChange = vi.fn()
render(<PreviewPanel {...defaultProps} onPickerChange={onPickerChange} />)
// The picker interaction would be tested through the actual component
expect(onPickerChange).not.toHaveBeenCalled()
})
})
})
// ============================================
// Edge Cases Tests
// ============================================
describe('Edge Cases', () => {
beforeEach(() => {
vi.clearAllMocks()
})
describe('Empty/Null Values', () => {
it('should handle empty files array in usePreviewState', () => {
const { result } = renderHook(() =>
usePreviewState({
dataSourceType: DataSourceType.FILE,
files: [],
notionPages: [],
websitePages: [],
}),
)
expect(result.current.previewFile).toBeUndefined()
})
it('should handle empty notion pages array', () => {
const { result } = renderHook(() =>
usePreviewState({
dataSourceType: DataSourceType.NOTION,
files: [],
notionPages: [],
websitePages: [],
}),
)
expect(result.current.previewNotionPage).toBeUndefined()
})
it('should handle empty website pages array', () => {
const { result } = renderHook(() =>
usePreviewState({
dataSourceType: DataSourceType.WEB,
files: [],
notionPages: [],
websitePages: [],
}),
)
expect(result.current.previewWebsitePage).toBeUndefined()
})
})
describe('Boundary Conditions', () => {
it('should handle very large chunk length', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setMaxChunkLength(999999)
})
expect(result.current.maxChunkLength).toBe(999999)
})
it('should handle zero overlap', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setOverlap(0)
})
expect(result.current.overlap).toBe(0)
})
it('should handle special characters in segment identifier', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setSegmentIdentifier('<<>>')
})
expect(result.current.segmentIdentifier).toBe('<<>>')
})
})
describe('Callback Stability', () => {
it('should maintain stable setSegmentIdentifier reference', () => {
const { result, rerender } = renderHook(() => useSegmentationState())
const initialSetter = result.current.setSegmentIdentifier
rerender()
expect(result.current.setSegmentIdentifier).toBe(initialSetter)
})
it('should maintain stable toggleRule reference', () => {
const { result, rerender } = renderHook(() => useSegmentationState())
const initialToggle = result.current.toggleRule
rerender()
expect(result.current.toggleRule).toBe(initialToggle)
})
it('should maintain stable getProcessRule reference', () => {
const { result, rerender } = renderHook(() => useSegmentationState())
// Update some state to trigger re-render
act(() => {
result.current.setMaxChunkLength(2048)
})
rerender()
// getProcessRule depends on state, so it may change but should remain a function
expect(typeof result.current.getProcessRule).toBe('function')
})
})
})
// ============================================
// Integration Scenarios
// ============================================
describe('Integration Scenarios', () => {
beforeEach(() => {
vi.clearAllMocks()
mockCurrentDataset = null
})
describe('Document Creation Flow', () => {
it('should build and validate params for file upload workflow', () => {
const files = [createMockFile()]
const { result: segResult } = renderHook(() => useSegmentationState())
const { result: creationResult } = renderHook(() =>
useDocumentCreation({
dataSourceType: DataSourceType.FILE,
files,
notionPages: [],
notionCredentialId: '',
websitePages: [],
}),
)
// Build params
const params = creationResult.current.buildCreationParams(
ChunkingMode.text,
'English',
segResult.current.getProcessRule(ChunkingMode.text),
{
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: { reranking_provider_name: '', reranking_model_name: '' },
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
},
{ provider: 'openai', model: 'text-embedding-ada-002' },
IndexingType.QUALIFIED,
)
expect(params).toBeDefined()
expect(params?.data_source?.info_list.file_info_list?.file_ids).toContain('file-1')
})
it('should handle parent-child document form', () => {
const { result } = renderHook(() => useSegmentationState())
act(() => {
result.current.setSegmentationType(ProcessMode.parentChild)
result.current.setChunkForContext('full-doc')
result.current.updateParentConfig('maxLength', 2048)
result.current.updateChildConfig('maxLength', 512)
})
const processRule = result.current.getProcessRule(ChunkingMode.parentChild)
expect(processRule.mode).toBe('hierarchical')
expect(processRule.rules.parent_mode).toBe('full-doc')
expect(processRule.rules.segmentation.max_tokens).toBe(2048)
expect(processRule.rules.subchunk_segmentation?.max_tokens).toBe(512)
})
})
describe('Preview Flow', () => {
it('should handle preview file change flow', () => {
const files = [
createMockFile({ id: 'file-1', name: 'first.pdf' }),
createMockFile({ id: 'file-2', name: 'second.pdf' }),
]
const { result } = renderHook(() =>
usePreviewState({
dataSourceType: DataSourceType.FILE,
files,
notionPages: [],
websitePages: [],
}),
)
// Initial state
expect(result.current.getPreviewPickerValue().name).toBe('first.pdf')
// Change preview
act(() => {
result.current.handlePreviewChange({ id: 'file-2', name: 'second.pdf' })
})
expect(result.current.previewFile).toEqual({ id: 'file-2', name: 'second.pdf' })
})
})
describe('Escape/Unescape Round Trip', () => {
it('should preserve original string through escape/unescape', () => {
const original = '\n\n'
const escaped = escape(original)
const unescaped = unescape(escaped)
expect(unescaped).toBe(original)
})
it('should handle complex strings without backslashes', () => {
// This string contains control characters but no literal backslashes.
const original = 'Hello\nWorld\t!\r\n'
const escaped = escape(original)
const unescaped = unescape(escaped)
expect(unescaped).toBe(original)
})
it('should document behavior for strings with existing backslashes', () => {
// When the original string already contains backslash sequences,
// escape/unescape are not perfectly symmetric because escape()
// does not escape backslashes.
const original = 'Hello\\nWorld'
const escaped = escape(original)
const unescaped = unescape(escaped)
// The unescaped value interprets "\n" as a newline, so it differs from the original.
expect(unescaped).toBe('Hello\nWorld')
expect(unescaped).not.toBe(original)
})
})
})