dify/web/app/components/datasets/create/embedding-process/rule-detail.tsx
Coding On Star 98df99b0ca
Some checks are pending
autofix.ci / autofix (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
feat(embedding-process): implement embedding process components and polling logic (#30622)
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
2026-01-09 10:21:27 +08:00

134 lines
5.1 KiB
TypeScript

import type { FC } from 'react'
import type { ProcessRuleResponse } from '@/models/datasets'
import Image from 'next/image'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
import { ProcessMode } from '@/models/datasets'
import { RETRIEVE_METHOD } from '@/types/app'
import { indexMethodIcon, retrievalIcon } from '../icons'
import { IndexingType } from '../step-two'
type RuleDetailProps = {
sourceData?: ProcessRuleResponse
indexingType?: string
retrievalMethod?: RETRIEVE_METHOD
}
// Lookup table for pre-processing rule names
const PRE_PROCESSING_RULE_KEYS = {
remove_extra_spaces: 'stepTwo.removeExtraSpaces',
remove_urls_emails: 'stepTwo.removeUrlEmails',
remove_stopwords: 'stepTwo.removeStopwords',
} as const
// Lookup table for retrieval method icons
const RETRIEVAL_ICON_MAP: Partial<Record<RETRIEVE_METHOD, string>> = {
[RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
[RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
[RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
[RETRIEVE_METHOD.invertedIndex]: retrievalIcon.fullText,
[RETRIEVE_METHOD.keywordSearch]: retrievalIcon.fullText,
}
const isNumber = (value: unknown): value is number => typeof value === 'number'
const RuleDetail: FC<RuleDetailProps> = ({ sourceData, indexingType, retrievalMethod }) => {
const { t } = useTranslation()
const segmentationRuleLabels = {
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
}
const getRuleName = useCallback((key: string): string | undefined => {
const translationKey = PRE_PROCESSING_RULE_KEYS[key as keyof typeof PRE_PROCESSING_RULE_KEYS]
return translationKey ? t(translationKey, { ns: 'datasetCreation' }) : undefined
}, [t])
const getModeValue = useCallback((): string => {
if (!sourceData?.mode)
return '-'
if (sourceData.mode === ProcessMode.general)
return t('embedding.custom', { ns: 'datasetDocuments' })
const parentModeLabel = sourceData.rules?.parent_mode === 'paragraph'
? t('parentMode.paragraph', { ns: 'dataset' })
: t('parentMode.fullDoc', { ns: 'dataset' })
return `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${parentModeLabel}`
}, [sourceData, t])
const getSegmentLengthValue = useCallback((): string | number => {
if (!sourceData?.mode)
return '-'
const maxTokens = isNumber(sourceData.rules?.segmentation?.max_tokens)
? sourceData.rules.segmentation.max_tokens
: '-'
if (sourceData.mode === ProcessMode.general)
return maxTokens
const childMaxTokens = isNumber(sourceData.rules?.subchunk_segmentation?.max_tokens)
? sourceData.rules.subchunk_segmentation.max_tokens
: '-'
return `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
}, [sourceData, t])
const getTextCleaningValue = useCallback((): string => {
if (!sourceData?.mode)
return '-'
const enabledRules = sourceData.rules?.pre_processing_rules?.filter(rule => rule.enabled) || []
const ruleNames = enabledRules
.map((rule) => {
const name = getRuleName(rule.id)
return typeof name === 'string' ? name : ''
})
.filter(name => name)
return ruleNames.length > 0 ? ruleNames.join(',') : '-'
}, [sourceData, getRuleName])
const fieldValueGetters: Record<string, () => string | number> = {
mode: getModeValue,
segmentLength: getSegmentLengthValue,
textCleaning: getTextCleaningValue,
}
const isEconomical = indexingType === IndexingType.ECONOMICAL
const indexMethodIconSrc = isEconomical ? indexMethodIcon.economical : indexMethodIcon.high_quality
const indexModeLabel = t(`stepTwo.${isEconomical ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' })
const effectiveRetrievalMethod = isEconomical ? 'keyword_search' : (retrievalMethod ?? 'semantic_search')
const retrievalLabel = t(`retrieval.${effectiveRetrievalMethod}.title`, { ns: 'dataset' })
const retrievalIconSrc = RETRIEVAL_ICON_MAP[retrievalMethod as keyof typeof RETRIEVAL_ICON_MAP] ?? retrievalIcon.vector
return (
<div className="flex flex-col gap-1">
{Object.keys(segmentationRuleLabels).map(field => (
<FieldInfo
key={field}
label={segmentationRuleLabels[field as keyof typeof segmentationRuleLabels]}
displayedValue={String(fieldValueGetters[field]())}
/>
))}
<FieldInfo
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
displayedValue={indexModeLabel}
valueIcon={<Image className="size-4" src={indexMethodIconSrc} alt="" />}
/>
<FieldInfo
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
displayedValue={retrievalLabel}
valueIcon={<Image className="size-4" src={retrievalIconSrc} alt="" />}
/>
</div>
)
}
export default RuleDetail