mirror of
https://github.com/langgenius/dify.git
synced 2026-01-13 21:57:48 +08:00
feat(embedding-process): implement embedding process components and polling logic (#30622)
Some checks are pending
autofix.ci / autofix (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Some checks are pending
autofix.ci / autofix (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
This commit is contained in:
parent
9848823dcd
commit
98df99b0ca
1562
web/app/components/datasets/create/embedding-process/index.spec.tsx
Normal file
1562
web/app/components/datasets/create/embedding-process/index.spec.tsx
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,47 +1,29 @@
|
||||
import type { FC } from 'react'
|
||||
import type {
|
||||
DataSourceInfo,
|
||||
FullDocumentDetail,
|
||||
IndexingStatusResponse,
|
||||
LegacyDataSourceInfo,
|
||||
ProcessRuleResponse,
|
||||
} from '@/models/datasets'
|
||||
import type { FullDocumentDetail } from '@/models/datasets'
|
||||
import type { RETRIEVE_METHOD } from '@/types/app'
|
||||
import {
|
||||
RiArrowRightLine,
|
||||
RiCheckboxCircleFill,
|
||||
RiErrorWarningFill,
|
||||
RiLoader2Fill,
|
||||
RiTerminalBoxLine,
|
||||
} from '@remixicon/react'
|
||||
import Image from 'next/image'
|
||||
import Link from 'next/link'
|
||||
import { useRouter } from 'next/navigation'
|
||||
import * as React from 'react'
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
|
||||
import NotionIcon from '@/app/components/base/notion-icon'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import PriorityLabel from '@/app/components/billing/priority-label'
|
||||
import { Plan } from '@/app/components/billing/type'
|
||||
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
|
||||
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
|
||||
import { useProviderContext } from '@/context/provider-context'
|
||||
import { useDatasetApiAccessUrl } from '@/hooks/use-api-access-url'
|
||||
import { DataSourceType, ProcessMode } from '@/models/datasets'
|
||||
import { fetchIndexingStatusBatch as doFetchIndexingStatus } from '@/service/datasets'
|
||||
import { useProcessRule } from '@/service/knowledge/use-dataset'
|
||||
import { useInvalidDocumentList } from '@/service/knowledge/use-document'
|
||||
import { RETRIEVE_METHOD } from '@/types/app'
|
||||
import { sleep } from '@/utils'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import DocumentFileIcon from '../../common/document-file-icon'
|
||||
import { indexMethodIcon, retrievalIcon } from '../icons'
|
||||
import { IndexingType } from '../step-two'
|
||||
import IndexingProgressItem from './indexing-progress-item'
|
||||
import RuleDetail from './rule-detail'
|
||||
import UpgradeBanner from './upgrade-banner'
|
||||
import { useIndexingStatusPolling } from './use-indexing-status-polling'
|
||||
import { createDocumentLookup } from './utils'
|
||||
|
||||
type Props = {
|
||||
type EmbeddingProcessProps = {
|
||||
datasetId: string
|
||||
batchId: string
|
||||
documents?: FullDocumentDetail[]
|
||||
@ -49,333 +31,121 @@ type Props = {
|
||||
retrievalMethod?: RETRIEVE_METHOD
|
||||
}
|
||||
|
||||
const RuleDetail: FC<{
|
||||
sourceData?: ProcessRuleResponse
|
||||
indexingType?: string
|
||||
retrievalMethod?: RETRIEVE_METHOD
|
||||
}> = ({ sourceData, indexingType, retrievalMethod }) => {
|
||||
// Status header component
|
||||
const StatusHeader: FC<{ isEmbedding: boolean, isCompleted: boolean }> = ({
|
||||
isEmbedding,
|
||||
isCompleted,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const segmentationRuleMap = {
|
||||
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
|
||||
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
|
||||
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
|
||||
}
|
||||
|
||||
const getRuleName = (key: string) => {
|
||||
if (key === 'remove_extra_spaces')
|
||||
return t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' })
|
||||
|
||||
if (key === 'remove_urls_emails')
|
||||
return t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' })
|
||||
|
||||
if (key === 'remove_stopwords')
|
||||
return t('stepTwo.removeStopwords', { ns: 'datasetCreation' })
|
||||
}
|
||||
|
||||
const isNumber = (value: unknown) => {
|
||||
return typeof value === 'number'
|
||||
}
|
||||
|
||||
const getValue = useCallback((field: string) => {
|
||||
let value: string | number | undefined = '-'
|
||||
const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
|
||||
? sourceData.rules.segmentation.max_tokens
|
||||
: value
|
||||
const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
|
||||
? sourceData.rules.subchunk_segmentation.max_tokens
|
||||
: value
|
||||
switch (field) {
|
||||
case 'mode':
|
||||
value = !sourceData?.mode
|
||||
? value
|
||||
: sourceData.mode === ProcessMode.general
|
||||
? (t('embedding.custom', { ns: 'datasetDocuments' }) as string)
|
||||
: `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${sourceData?.rules?.parent_mode === 'paragraph'
|
||||
? t('parentMode.paragraph', { ns: 'dataset' })
|
||||
: t('parentMode.fullDoc', { ns: 'dataset' })}`
|
||||
break
|
||||
case 'segmentLength':
|
||||
value = !sourceData?.mode
|
||||
? value
|
||||
: sourceData.mode === ProcessMode.general
|
||||
? maxTokens
|
||||
: `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
|
||||
break
|
||||
default:
|
||||
value = !sourceData?.mode
|
||||
? value
|
||||
: sourceData?.rules?.pre_processing_rules?.filter(rule =>
|
||||
rule.enabled).map(rule => getRuleName(rule.id)).join(',')
|
||||
break
|
||||
}
|
||||
return value
|
||||
}, [sourceData])
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-1">
|
||||
{Object.keys(segmentationRuleMap).map((field) => {
|
||||
return (
|
||||
<FieldInfo
|
||||
key={field}
|
||||
label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
|
||||
displayedValue={String(getValue(field))}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
<FieldInfo
|
||||
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
|
||||
displayedValue={t(`stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' }) as string}
|
||||
valueIcon={(
|
||||
<Image
|
||||
className="size-4"
|
||||
src={
|
||||
indexingType === IndexingType.ECONOMICAL
|
||||
? indexMethodIcon.economical
|
||||
: indexMethodIcon.high_quality
|
||||
}
|
||||
alt=""
|
||||
/>
|
||||
)}
|
||||
/>
|
||||
<FieldInfo
|
||||
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
|
||||
// displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
|
||||
displayedValue={t(`retrieval.${indexingType === IndexingType.ECONOMICAL ? 'keyword_search' : retrievalMethod ?? 'semantic_search'}.title`, { ns: 'dataset' })}
|
||||
valueIcon={(
|
||||
<Image
|
||||
className="size-4"
|
||||
src={
|
||||
retrievalMethod === RETRIEVE_METHOD.fullText
|
||||
? retrievalIcon.fullText
|
||||
: retrievalMethod === RETRIEVE_METHOD.hybrid
|
||||
? retrievalIcon.hybrid
|
||||
: retrievalIcon.vector
|
||||
}
|
||||
alt=""
|
||||
/>
|
||||
)}
|
||||
/>
|
||||
<div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
|
||||
{isEmbedding && (
|
||||
<>
|
||||
<RiLoader2Fill className="size-4 animate-spin" />
|
||||
<span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
|
||||
</>
|
||||
)}
|
||||
{isCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
|
||||
// Action buttons component
|
||||
const ActionButtons: FC<{
|
||||
apiReferenceUrl: string
|
||||
onNavToDocuments: () => void
|
||||
}> = ({ apiReferenceUrl, onNavToDocuments }) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className="mt-6 flex items-center gap-x-2 py-2">
|
||||
<Link href={apiReferenceUrl} target="_blank" rel="noopener noreferrer">
|
||||
<Button className="w-fit gap-x-0.5 px-3">
|
||||
<RiTerminalBoxLine className="size-4" />
|
||||
<span className="px-0.5">Access the API</span>
|
||||
</Button>
|
||||
</Link>
|
||||
<Button
|
||||
className="w-fit gap-x-0.5 px-3"
|
||||
variant="primary"
|
||||
onClick={onNavToDocuments}
|
||||
>
|
||||
<span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
|
||||
<RiArrowRightLine className="size-4 stroke-current stroke-1" />
|
||||
</Button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const EmbeddingProcess: FC<EmbeddingProcessProps> = ({
|
||||
datasetId,
|
||||
batchId,
|
||||
documents = [],
|
||||
indexingType,
|
||||
retrievalMethod,
|
||||
}) => {
|
||||
const { enableBilling, plan } = useProviderContext()
|
||||
|
||||
const getFirstDocument = documents[0]
|
||||
|
||||
const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([])
|
||||
const fetchIndexingStatus = async () => {
|
||||
const status = await doFetchIndexingStatus({ datasetId, batchId })
|
||||
setIndexingStatusDetail(status.data)
|
||||
return status.data
|
||||
}
|
||||
|
||||
const [isStopQuery, setIsStopQuery] = useState(false)
|
||||
const isStopQueryRef = useRef(isStopQuery)
|
||||
useEffect(() => {
|
||||
isStopQueryRef.current = isStopQuery
|
||||
}, [isStopQuery])
|
||||
const stopQueryStatus = () => {
|
||||
setIsStopQuery(true)
|
||||
}
|
||||
|
||||
const startQueryStatus = async () => {
|
||||
if (isStopQueryRef.current)
|
||||
return
|
||||
|
||||
try {
|
||||
const indexingStatusBatchDetail = await fetchIndexingStatus()
|
||||
const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status))
|
||||
if (isCompleted) {
|
||||
stopQueryStatus()
|
||||
return
|
||||
}
|
||||
await sleep(2500)
|
||||
await startQueryStatus()
|
||||
}
|
||||
catch {
|
||||
await sleep(2500)
|
||||
await startQueryStatus()
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
setIsStopQuery(false)
|
||||
startQueryStatus()
|
||||
return () => {
|
||||
stopQueryStatus()
|
||||
}
|
||||
}, [])
|
||||
|
||||
// get rule
|
||||
const { data: ruleDetail } = useProcessRule(getFirstDocument?.id)
|
||||
|
||||
const router = useRouter()
|
||||
const invalidDocumentList = useInvalidDocumentList()
|
||||
const navToDocumentList = () => {
|
||||
const apiReferenceUrl = useDatasetApiAccessUrl()
|
||||
|
||||
// Polling hook for indexing status
|
||||
const { statusList, isEmbedding, isEmbeddingCompleted } = useIndexingStatusPolling({
|
||||
datasetId,
|
||||
batchId,
|
||||
})
|
||||
|
||||
// Get process rule for the first document
|
||||
const firstDocumentId = documents[0]?.id
|
||||
const { data: ruleDetail } = useProcessRule(firstDocumentId)
|
||||
|
||||
// Document lookup utilities - memoized for performance
|
||||
const documentLookup = useMemo(
|
||||
() => createDocumentLookup(documents),
|
||||
[documents],
|
||||
)
|
||||
|
||||
const handleNavToDocuments = () => {
|
||||
invalidDocumentList()
|
||||
router.push(`/datasets/${datasetId}/documents`)
|
||||
}
|
||||
const apiReferenceUrl = useDatasetApiAccessUrl()
|
||||
|
||||
const isEmbedding = useMemo(() => {
|
||||
return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
|
||||
}, [indexingStatusBatchDetail])
|
||||
const isEmbeddingCompleted = useMemo(() => {
|
||||
return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || ''))
|
||||
}, [indexingStatusBatchDetail])
|
||||
|
||||
const getSourceName = (id: string) => {
|
||||
const doc = documents.find(document => document.id === id)
|
||||
return doc?.name
|
||||
}
|
||||
const getFileType = (name?: string) => name?.split('.').pop() || 'txt'
|
||||
const getSourcePercent = (detail: IndexingStatusResponse) => {
|
||||
const completedCount = detail.completed_segments || 0
|
||||
const totalCount = detail.total_segments || 0
|
||||
if (totalCount === 0)
|
||||
return 0
|
||||
const percent = Math.round(completedCount * 100 / totalCount)
|
||||
return percent > 100 ? 100 : percent
|
||||
}
|
||||
const getSourceType = (id: string) => {
|
||||
const doc = documents.find(document => document.id === id)
|
||||
return doc?.data_source_type as DataSourceType
|
||||
}
|
||||
|
||||
const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
|
||||
return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
|
||||
}
|
||||
|
||||
const getIcon = (id: string) => {
|
||||
const doc = documents.find(document => document.id === id)
|
||||
const info = doc?.data_source_info
|
||||
if (info && isLegacyDataSourceInfo(info))
|
||||
return info.notion_page_icon
|
||||
return undefined
|
||||
}
|
||||
const isSourceEmbedding = (detail: IndexingStatusResponse) =>
|
||||
['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
|
||||
const showUpgradeBanner = enableBilling && plan.type !== Plan.team
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="flex flex-col gap-y-3">
|
||||
<div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
|
||||
{isEmbedding && (
|
||||
<>
|
||||
<RiLoader2Fill className="size-4 animate-spin" />
|
||||
<span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
|
||||
</>
|
||||
)}
|
||||
{isEmbeddingCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
|
||||
</div>
|
||||
{
|
||||
enableBilling && plan.type !== Plan.team && (
|
||||
<div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
|
||||
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
|
||||
<ZapFast className="h-4 w-4 text-[#FB6514]" />
|
||||
</div>
|
||||
<div className="mx-3 grow text-[13px] font-medium text-gray-700">
|
||||
{t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
|
||||
</div>
|
||||
<UpgradeBtn loc="knowledge-speed-up" />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
<StatusHeader isEmbedding={isEmbedding} isCompleted={isEmbeddingCompleted} />
|
||||
|
||||
{showUpgradeBanner && <UpgradeBanner />}
|
||||
|
||||
<div className="flex flex-col gap-0.5 pb-2">
|
||||
{indexingStatusBatchDetail.map(indexingStatusDetail => (
|
||||
<div
|
||||
key={indexingStatusDetail.id}
|
||||
className={cn(
|
||||
'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
|
||||
indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
|
||||
)}
|
||||
>
|
||||
{isSourceEmbedding(indexingStatusDetail) && (
|
||||
<div
|
||||
className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
|
||||
style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }}
|
||||
/>
|
||||
)}
|
||||
<div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
|
||||
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
|
||||
<DocumentFileIcon
|
||||
size="sm"
|
||||
className="shrink-0"
|
||||
name={getSourceName(indexingStatusDetail.id)}
|
||||
extension={getFileType(getSourceName(indexingStatusDetail.id))}
|
||||
/>
|
||||
)}
|
||||
{getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
|
||||
<NotionIcon
|
||||
className="shrink-0"
|
||||
type="page"
|
||||
src={getIcon(indexingStatusDetail.id)}
|
||||
/>
|
||||
)}
|
||||
<div className="flex w-0 grow items-center gap-1" title={getSourceName(indexingStatusDetail.id)}>
|
||||
<div className="system-xs-medium truncate text-text-secondary">
|
||||
{getSourceName(indexingStatusDetail.id)}
|
||||
</div>
|
||||
{
|
||||
enableBilling && (
|
||||
<PriorityLabel className="ml-0" />
|
||||
)
|
||||
}
|
||||
</div>
|
||||
{isSourceEmbedding(indexingStatusDetail) && (
|
||||
<div className="shrink-0 text-xs text-text-secondary">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
|
||||
)}
|
||||
{indexingStatusDetail.indexing_status === 'error' && (
|
||||
<Tooltip
|
||||
popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
|
||||
offset={4}
|
||||
popupContent={indexingStatusDetail.error}
|
||||
>
|
||||
<span>
|
||||
<RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
|
||||
</span>
|
||||
</Tooltip>
|
||||
)}
|
||||
{indexingStatusDetail.indexing_status === 'completed' && (
|
||||
<RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
{statusList.map(detail => (
|
||||
<IndexingProgressItem
|
||||
key={detail.id}
|
||||
detail={detail}
|
||||
name={documentLookup.getName(detail.id)}
|
||||
sourceType={documentLookup.getSourceType(detail.id)}
|
||||
notionIcon={documentLookup.getNotionIcon(detail.id)}
|
||||
enableBilling={enableBilling}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<Divider type="horizontal" className="my-0 bg-divider-subtle" />
|
||||
|
||||
<RuleDetail
|
||||
sourceData={ruleDetail}
|
||||
indexingType={indexingType}
|
||||
retrievalMethod={retrievalMethod}
|
||||
/>
|
||||
</div>
|
||||
<div className="mt-6 flex items-center gap-x-2 py-2">
|
||||
<Link
|
||||
href={apiReferenceUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
<Button
|
||||
className="w-fit gap-x-0.5 px-3"
|
||||
>
|
||||
<RiTerminalBoxLine className="size-4" />
|
||||
<span className="px-0.5">Access the API</span>
|
||||
</Button>
|
||||
</Link>
|
||||
<Button
|
||||
className="w-fit gap-x-0.5 px-3"
|
||||
variant="primary"
|
||||
onClick={navToDocumentList}
|
||||
>
|
||||
<span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
|
||||
<RiArrowRightLine className="size-4 stroke-current stroke-1" />
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<ActionButtons
|
||||
apiReferenceUrl={apiReferenceUrl}
|
||||
onNavToDocuments={handleNavToDocuments}
|
||||
/>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
@ -0,0 +1,120 @@
|
||||
import type { FC } from 'react'
|
||||
import type { IndexingStatusResponse } from '@/models/datasets'
|
||||
import {
|
||||
RiCheckboxCircleFill,
|
||||
RiErrorWarningFill,
|
||||
} from '@remixicon/react'
|
||||
import NotionIcon from '@/app/components/base/notion-icon'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import PriorityLabel from '@/app/components/billing/priority-label'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import DocumentFileIcon from '../../common/document-file-icon'
|
||||
import { getFileType, getSourcePercent, isSourceEmbedding } from './utils'
|
||||
|
||||
type IndexingProgressItemProps = {
|
||||
detail: IndexingStatusResponse
|
||||
name?: string
|
||||
sourceType?: DataSourceType
|
||||
notionIcon?: string
|
||||
enableBilling?: boolean
|
||||
}
|
||||
|
||||
// Status icon component for completed/error states
|
||||
const StatusIcon: FC<{ status: string, error?: string }> = ({ status, error }) => {
|
||||
if (status === 'completed')
|
||||
return <RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
|
||||
|
||||
if (status === 'error') {
|
||||
return (
|
||||
<Tooltip
|
||||
popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
|
||||
offset={4}
|
||||
popupContent={error}
|
||||
>
|
||||
<span>
|
||||
<RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
|
||||
</span>
|
||||
</Tooltip>
|
||||
)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
// Source type icon component
|
||||
const SourceTypeIcon: FC<{
|
||||
sourceType?: DataSourceType
|
||||
name?: string
|
||||
notionIcon?: string
|
||||
}> = ({ sourceType, name, notionIcon }) => {
|
||||
if (sourceType === DataSourceType.FILE) {
|
||||
return (
|
||||
<DocumentFileIcon
|
||||
size="sm"
|
||||
className="shrink-0"
|
||||
name={name}
|
||||
extension={getFileType(name)}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
if (sourceType === DataSourceType.NOTION) {
|
||||
return (
|
||||
<NotionIcon
|
||||
className="shrink-0"
|
||||
type="page"
|
||||
src={notionIcon}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
const IndexingProgressItem: FC<IndexingProgressItemProps> = ({
|
||||
detail,
|
||||
name,
|
||||
sourceType,
|
||||
notionIcon,
|
||||
enableBilling,
|
||||
}) => {
|
||||
const isEmbedding = isSourceEmbedding(detail)
|
||||
const percent = getSourcePercent(detail)
|
||||
const isError = detail.indexing_status === 'error'
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
|
||||
isError && 'bg-state-destructive-hover-alt',
|
||||
)}
|
||||
>
|
||||
{isEmbedding && (
|
||||
<div
|
||||
className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
|
||||
style={{ width: `${percent}%` }}
|
||||
/>
|
||||
)}
|
||||
<div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
|
||||
<SourceTypeIcon
|
||||
sourceType={sourceType}
|
||||
name={name}
|
||||
notionIcon={notionIcon}
|
||||
/>
|
||||
<div className="flex w-0 grow items-center gap-1" title={name}>
|
||||
<div className="system-xs-medium truncate text-text-secondary">
|
||||
{name}
|
||||
</div>
|
||||
{enableBilling && <PriorityLabel className="ml-0" />}
|
||||
</div>
|
||||
{isEmbedding && (
|
||||
<div className="shrink-0 text-xs text-text-secondary">{`${percent}%`}</div>
|
||||
)}
|
||||
<StatusIcon status={detail.indexing_status} error={detail.error} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default IndexingProgressItem
|
||||
@ -0,0 +1,133 @@
|
||||
import type { FC } from 'react'
|
||||
import type { ProcessRuleResponse } from '@/models/datasets'
|
||||
import Image from 'next/image'
|
||||
import { useCallback } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
|
||||
import { ProcessMode } from '@/models/datasets'
|
||||
import { RETRIEVE_METHOD } from '@/types/app'
|
||||
import { indexMethodIcon, retrievalIcon } from '../icons'
|
||||
import { IndexingType } from '../step-two'
|
||||
|
||||
type RuleDetailProps = {
|
||||
sourceData?: ProcessRuleResponse
|
||||
indexingType?: string
|
||||
retrievalMethod?: RETRIEVE_METHOD
|
||||
}
|
||||
|
||||
// Lookup table for pre-processing rule names
|
||||
const PRE_PROCESSING_RULE_KEYS = {
|
||||
remove_extra_spaces: 'stepTwo.removeExtraSpaces',
|
||||
remove_urls_emails: 'stepTwo.removeUrlEmails',
|
||||
remove_stopwords: 'stepTwo.removeStopwords',
|
||||
} as const
|
||||
|
||||
// Lookup table for retrieval method icons
|
||||
const RETRIEVAL_ICON_MAP: Partial<Record<RETRIEVE_METHOD, string>> = {
|
||||
[RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
|
||||
[RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
|
||||
[RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
|
||||
[RETRIEVE_METHOD.invertedIndex]: retrievalIcon.fullText,
|
||||
[RETRIEVE_METHOD.keywordSearch]: retrievalIcon.fullText,
|
||||
}
|
||||
|
||||
const isNumber = (value: unknown): value is number => typeof value === 'number'
|
||||
|
||||
const RuleDetail: FC<RuleDetailProps> = ({ sourceData, indexingType, retrievalMethod }) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const segmentationRuleLabels = {
|
||||
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
|
||||
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
|
||||
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
|
||||
}
|
||||
|
||||
const getRuleName = useCallback((key: string): string | undefined => {
|
||||
const translationKey = PRE_PROCESSING_RULE_KEYS[key as keyof typeof PRE_PROCESSING_RULE_KEYS]
|
||||
return translationKey ? t(translationKey, { ns: 'datasetCreation' }) : undefined
|
||||
}, [t])
|
||||
|
||||
const getModeValue = useCallback((): string => {
|
||||
if (!sourceData?.mode)
|
||||
return '-'
|
||||
|
||||
if (sourceData.mode === ProcessMode.general)
|
||||
return t('embedding.custom', { ns: 'datasetDocuments' })
|
||||
|
||||
const parentModeLabel = sourceData.rules?.parent_mode === 'paragraph'
|
||||
? t('parentMode.paragraph', { ns: 'dataset' })
|
||||
: t('parentMode.fullDoc', { ns: 'dataset' })
|
||||
|
||||
return `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${parentModeLabel}`
|
||||
}, [sourceData, t])
|
||||
|
||||
const getSegmentLengthValue = useCallback((): string | number => {
|
||||
if (!sourceData?.mode)
|
||||
return '-'
|
||||
|
||||
const maxTokens = isNumber(sourceData.rules?.segmentation?.max_tokens)
|
||||
? sourceData.rules.segmentation.max_tokens
|
||||
: '-'
|
||||
|
||||
if (sourceData.mode === ProcessMode.general)
|
||||
return maxTokens
|
||||
|
||||
const childMaxTokens = isNumber(sourceData.rules?.subchunk_segmentation?.max_tokens)
|
||||
? sourceData.rules.subchunk_segmentation.max_tokens
|
||||
: '-'
|
||||
|
||||
return `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
|
||||
}, [sourceData, t])
|
||||
|
||||
const getTextCleaningValue = useCallback((): string => {
|
||||
if (!sourceData?.mode)
|
||||
return '-'
|
||||
|
||||
const enabledRules = sourceData.rules?.pre_processing_rules?.filter(rule => rule.enabled) || []
|
||||
const ruleNames = enabledRules
|
||||
.map((rule) => {
|
||||
const name = getRuleName(rule.id)
|
||||
return typeof name === 'string' ? name : ''
|
||||
})
|
||||
.filter(name => name)
|
||||
return ruleNames.length > 0 ? ruleNames.join(',') : '-'
|
||||
}, [sourceData, getRuleName])
|
||||
|
||||
const fieldValueGetters: Record<string, () => string | number> = {
|
||||
mode: getModeValue,
|
||||
segmentLength: getSegmentLengthValue,
|
||||
textCleaning: getTextCleaningValue,
|
||||
}
|
||||
|
||||
const isEconomical = indexingType === IndexingType.ECONOMICAL
|
||||
const indexMethodIconSrc = isEconomical ? indexMethodIcon.economical : indexMethodIcon.high_quality
|
||||
const indexModeLabel = t(`stepTwo.${isEconomical ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' })
|
||||
|
||||
const effectiveRetrievalMethod = isEconomical ? 'keyword_search' : (retrievalMethod ?? 'semantic_search')
|
||||
const retrievalLabel = t(`retrieval.${effectiveRetrievalMethod}.title`, { ns: 'dataset' })
|
||||
const retrievalIconSrc = RETRIEVAL_ICON_MAP[retrievalMethod as keyof typeof RETRIEVAL_ICON_MAP] ?? retrievalIcon.vector
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-1">
|
||||
{Object.keys(segmentationRuleLabels).map(field => (
|
||||
<FieldInfo
|
||||
key={field}
|
||||
label={segmentationRuleLabels[field as keyof typeof segmentationRuleLabels]}
|
||||
displayedValue={String(fieldValueGetters[field]())}
|
||||
/>
|
||||
))}
|
||||
<FieldInfo
|
||||
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
|
||||
displayedValue={indexModeLabel}
|
||||
valueIcon={<Image className="size-4" src={indexMethodIconSrc} alt="" />}
|
||||
/>
|
||||
<FieldInfo
|
||||
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
|
||||
displayedValue={retrievalLabel}
|
||||
valueIcon={<Image className="size-4" src={retrievalIconSrc} alt="" />}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default RuleDetail
|
||||
@ -0,0 +1,22 @@
|
||||
import type { FC } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
|
||||
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
|
||||
|
||||
const UpgradeBanner: FC = () => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
|
||||
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
|
||||
<ZapFast className="h-4 w-4 text-[#FB6514]" />
|
||||
</div>
|
||||
<div className="mx-3 grow text-[13px] font-medium text-gray-700">
|
||||
{t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
|
||||
</div>
|
||||
<UpgradeBtn loc="knowledge-speed-up" />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default UpgradeBanner
|
||||
@ -0,0 +1,90 @@
|
||||
import type { IndexingStatusResponse } from '@/models/datasets'
|
||||
import { useEffect, useRef, useState } from 'react'
|
||||
import { fetchIndexingStatusBatch } from '@/service/datasets'
|
||||
|
||||
const POLLING_INTERVAL = 2500
|
||||
const COMPLETED_STATUSES = ['completed', 'error', 'paused'] as const
|
||||
const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
|
||||
|
||||
type IndexingStatusPollingParams = {
|
||||
datasetId: string
|
||||
batchId: string
|
||||
}
|
||||
|
||||
type IndexingStatusPollingResult = {
|
||||
statusList: IndexingStatusResponse[]
|
||||
isEmbedding: boolean
|
||||
isEmbeddingCompleted: boolean
|
||||
}
|
||||
|
||||
const isStatusCompleted = (status: string): boolean =>
|
||||
COMPLETED_STATUSES.includes(status as typeof COMPLETED_STATUSES[number])
|
||||
|
||||
const isAllCompleted = (statusList: IndexingStatusResponse[]): boolean =>
|
||||
statusList.every(item => isStatusCompleted(item.indexing_status))
|
||||
|
||||
/**
|
||||
* Custom hook for polling indexing status with automatic stop on completion.
|
||||
* Handles the polling lifecycle and provides derived states for UI rendering.
|
||||
*/
|
||||
export const useIndexingStatusPolling = ({
|
||||
datasetId,
|
||||
batchId,
|
||||
}: IndexingStatusPollingParams): IndexingStatusPollingResult => {
|
||||
const [statusList, setStatusList] = useState<IndexingStatusResponse[]>([])
|
||||
const isStopPollingRef = useRef(false)
|
||||
|
||||
useEffect(() => {
|
||||
// Reset polling state on mount
|
||||
isStopPollingRef.current = false
|
||||
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
const fetchStatus = async (): Promise<IndexingStatusResponse[]> => {
|
||||
const response = await fetchIndexingStatusBatch({ datasetId, batchId })
|
||||
setStatusList(response.data)
|
||||
return response.data
|
||||
}
|
||||
|
||||
const poll = async (): Promise<void> => {
|
||||
if (isStopPollingRef.current)
|
||||
return
|
||||
|
||||
try {
|
||||
const data = await fetchStatus()
|
||||
if (isAllCompleted(data)) {
|
||||
isStopPollingRef.current = true
|
||||
return
|
||||
}
|
||||
}
|
||||
catch {
|
||||
// Continue polling on error
|
||||
}
|
||||
|
||||
if (!isStopPollingRef.current) {
|
||||
timeoutId = setTimeout(() => {
|
||||
poll()
|
||||
}, POLLING_INTERVAL)
|
||||
}
|
||||
}
|
||||
|
||||
poll()
|
||||
|
||||
return () => {
|
||||
isStopPollingRef.current = true
|
||||
if (timeoutId)
|
||||
clearTimeout(timeoutId)
|
||||
}
|
||||
}, [datasetId, batchId])
|
||||
|
||||
const isEmbedding = statusList.some(item =>
|
||||
EMBEDDING_STATUSES.includes(item?.indexing_status as typeof EMBEDDING_STATUSES[number]),
|
||||
)
|
||||
|
||||
const isEmbeddingCompleted = statusList.length > 0 && isAllCompleted(statusList)
|
||||
|
||||
return {
|
||||
statusList,
|
||||
isEmbedding,
|
||||
isEmbeddingCompleted,
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,64 @@
|
||||
import type {
|
||||
DataSourceInfo,
|
||||
DataSourceType,
|
||||
FullDocumentDetail,
|
||||
IndexingStatusResponse,
|
||||
LegacyDataSourceInfo,
|
||||
} from '@/models/datasets'
|
||||
|
||||
const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
|
||||
|
||||
/**
|
||||
* Type guard for legacy data source info with upload_file property
|
||||
*/
|
||||
export const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
|
||||
return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a status indicates the source is being embedded
|
||||
*/
|
||||
export const isSourceEmbedding = (detail: IndexingStatusResponse): boolean =>
|
||||
EMBEDDING_STATUSES.includes(detail.indexing_status as typeof EMBEDDING_STATUSES[number])
|
||||
|
||||
/**
|
||||
* Calculate the progress percentage for a document
|
||||
*/
|
||||
export const getSourcePercent = (detail: IndexingStatusResponse): number => {
|
||||
const completedCount = detail.completed_segments || 0
|
||||
const totalCount = detail.total_segments || 0
|
||||
|
||||
if (totalCount === 0)
|
||||
return 0
|
||||
|
||||
const percent = Math.round(completedCount * 100 / totalCount)
|
||||
return Math.min(percent, 100)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get file extension from filename, defaults to 'txt'
|
||||
*/
|
||||
export const getFileType = (name?: string): string =>
|
||||
name?.split('.').pop() || 'txt'
|
||||
|
||||
/**
|
||||
* Document lookup utilities - provides document info by ID from a list
|
||||
*/
|
||||
export const createDocumentLookup = (documents: FullDocumentDetail[]) => {
|
||||
const documentMap = new Map(documents.map(doc => [doc.id, doc]))
|
||||
|
||||
return {
|
||||
getDocument: (id: string) => documentMap.get(id),
|
||||
|
||||
getName: (id: string) => documentMap.get(id)?.name,
|
||||
|
||||
getSourceType: (id: string) => documentMap.get(id)?.data_source_type as DataSourceType | undefined,
|
||||
|
||||
getNotionIcon: (id: string) => {
|
||||
const info = documentMap.get(id)?.data_source_info
|
||||
if (info && isLegacyDataSourceInfo(info))
|
||||
return info.notion_page_icon
|
||||
return undefined
|
||||
},
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user