mirror of
https://github.com/langgenius/dify.git
synced 2026-02-10 13:04:25 +08:00
**Problem:**
The telemetry system had unnecessary abstraction layers and bad practices
from the last 3 commits introducing the gateway implementation:
- TelemetryFacade class wrapper around emit() function
- String literals instead of SignalType enum
- Dictionary mapping enum → string instead of enum → enum
- Unnecessary ENTERPRISE_TELEMETRY_GATEWAY_ENABLED feature flag
- Duplicate guard checks scattered across files
- Non-thread-safe TelemetryGateway singleton pattern
- Missing guard in ops_trace_task.py causing RuntimeError spam
**Solution:**
1. Deleted TelemetryFacade - replaced with thin emit() function in core/telemetry/__init__.py
2. Added SignalType enum ('trace' | 'metric_log') to enterprise/telemetry/contracts.py
3. Replaced CASE_TO_TRACE_TASK_NAME dict with CASE_TO_TRACE_TASK: dict[TelemetryCase, TraceTaskName]
4. Deleted is_gateway_enabled() and _emit_legacy() - using existing ENTERPRISE_ENABLED + ENTERPRISE_TELEMETRY_ENABLED instead
5. Extracted _should_drop_ee_only_event() helper to eliminate duplicate checks
6. Moved TelemetryGateway singleton to ext_enterprise_telemetry.py:
- Init once in init_app() for thread-safety
- Access via get_gateway() function
7. Re-added guard to ops_trace_task.py to prevent RuntimeError when EE=OFF but CE tracing enabled
8. Updated 11 caller files to import 'emit as telemetry_emit' instead of 'TelemetryFacade'
**Result:**
- 322 net lines deleted (533 removed, 211 added)
- All 91 tests pass
- Thread-safe singleton pattern
- Cleaner API surface: from TelemetryFacade.emit() to telemetry_emit()
- Proper enum usage throughout
- No RuntimeError spam in EE=OFF + CE=ON scenario
76 lines
2.7 KiB
Python
76 lines
2.7 KiB
Python
import json
|
|
import logging
|
|
|
|
from celery import shared_task
|
|
from flask import current_app
|
|
|
|
from core.ops.entities.config_entity import OPS_FILE_PATH, OPS_TRACE_FAILED_KEY
|
|
from core.ops.entities.trace_entity import trace_info_info_map
|
|
from core.rag.models.document import Document
|
|
from extensions.ext_redis import redis_client
|
|
from extensions.ext_storage import storage
|
|
from models.model import Message
|
|
from models.workflow import WorkflowRun
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@shared_task(queue="ops_trace")
|
|
def process_trace_tasks(file_info):
|
|
"""
|
|
Async process trace tasks
|
|
Usage: process_trace_tasks.delay(tasks_data)
|
|
"""
|
|
from core.ops.ops_trace_manager import OpsTraceManager
|
|
|
|
app_id = file_info.get("app_id")
|
|
file_id = file_info.get("file_id")
|
|
file_path = f"{OPS_FILE_PATH}{app_id}/{file_id}.json"
|
|
file_data = json.loads(storage.load(file_path))
|
|
trace_info = file_data.get("trace_info")
|
|
trace_info_type = file_data.get("trace_info_type")
|
|
trace_instance = OpsTraceManager.get_ops_trace_instance(app_id)
|
|
|
|
if trace_info.get("message_data"):
|
|
trace_info["message_data"] = Message.from_dict(data=trace_info["message_data"])
|
|
if trace_info.get("workflow_data"):
|
|
trace_info["workflow_data"] = WorkflowRun.from_dict(data=trace_info["workflow_data"])
|
|
if trace_info.get("documents"):
|
|
trace_info["documents"] = [Document.model_validate(doc) for doc in trace_info["documents"]]
|
|
|
|
try:
|
|
trace_type = trace_info_info_map.get(trace_info_type)
|
|
if trace_type:
|
|
trace_info = trace_type(**trace_info)
|
|
|
|
from extensions.ext_enterprise_telemetry import is_enabled as is_ee_telemetry_enabled
|
|
|
|
if is_ee_telemetry_enabled():
|
|
from enterprise.telemetry.enterprise_trace import EnterpriseOtelTrace
|
|
|
|
try:
|
|
EnterpriseOtelTrace().trace(trace_info)
|
|
except Exception:
|
|
logger.warning("Enterprise trace failed for app_id: %s", app_id, exc_info=True)
|
|
|
|
if trace_instance:
|
|
with current_app.app_context():
|
|
trace_instance.trace(trace_info)
|
|
|
|
logger.info("Processing trace tasks success, app_id: %s", app_id)
|
|
except Exception as e:
|
|
logger.info("error:\n\n\n%s\n\n\n\n", e)
|
|
failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}"
|
|
redis_client.incr(failed_key)
|
|
logger.info("Processing trace tasks failed, app_id: %s", app_id)
|
|
finally:
|
|
try:
|
|
storage.delete(file_path)
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Failed to delete trace file %s for app_id %s: %s",
|
|
file_path,
|
|
app_id,
|
|
e,
|
|
)
|