zydi-web/test_history/deep.py

import io
import os
import json
import base64
import requests
import re
from PIL import Image
from datetime import datetime, timedelta
from decord import VideoReader, cpu

SILICONFLOW_URL = "https://api.siliconflow.cn/v1/chat/completions"
API_KEY = "sk-ytxabphvgxrjbvnqiwercjyrabvlukwddqsmvnqnvwuazamd"

class MediaAnalysisSystem:
    def __init__(self):
        self.MAX_NUM_FRAMES = 5  # 最大帧数设为10
        self.MIN_NUM_FRAMES = 3   # 最小帧数设为3

    def encode_video(self, video_data):
        def uniform_sample(l, n):
            gap = len(l) / n
            return [l[int(i * gap + gap / 2)] for i in range(n)]

        video_file = io.BytesIO(video_data)
        vr = VideoReader(video_file, ctx=cpu(0))
        sample_fps = round(vr.get_avg_fps() / 1)
        frame_idx = list(range(0, len(vr), sample_fps))

        # 确保帧数在3-10之间
        num_frames = min(max(3, len(frame_idx)), self.MAX_NUM_FRAMES)
        if len(frame_idx) > num_frames:
            frame_idx = uniform_sample(frame_idx, num_frames)

        frames = vr.get_batch(frame_idx).asnumpy()
        frames = [Image.fromarray(v.astype('uint8')) for v in frames]

        # 压缩图片尺寸和质量
        compressed_frames = []
        for frame in frames:
            # 保持宽高比的情况下调整大小
            frame.thumbnail((600, 600), Image.Resampling.LANCZOS)
            buffered = io.BytesIO()
            frame.save(buffered, format="JPEG", quality=85)
            compressed_frames.append(Image.open(buffered))

        print(f'处理后的帧数: {len(compressed_frames)}')
        return compressed_frames

    def process_video(self, video_data, object_name):
        if not video_data:
            raise ValueError(f"Empty video data for {object_name}")
        print(f"Processing video: {object_name}, data size: {len(video_data)} bytes")
        frames = self.encode_video(video_data)

        # 构建单个请求的消息内容
        messages = [{
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": """Please analyze these images as a time series in detail, including the following aspects:
                    1. Exact count of people in the scene
                    2. Individual behavior analysis for each person
                    3. Facial expression recognition and emotional state assessment
                    4. Overall scene and environment detailed description
                    5. Interactions between people
                    6. Detailed environmental conditions description
                    7. Items and furniture appearing in the environment
                    8. Any suspicious or abnormal activities
                    9. Personnel specific characteristics (estimated age range, gender, clothing)
                    10. Movement patterns and directions of people
                    11. Carried items or objects
                    12. Group dynamics and gathering situations
                    13. Video timestamp analysis (if available)"""
                }
            ]
        }]

        # 一次性添加所有图片到消息内容
        for frame in frames:
            base64_image = self.image_to_base64(frame)
            messages[0]["content"].append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}",
                    "detail": "auto"
                }
            })

        try:
            response = self._make_api_request(messages)
            answer = response["choices"][0]["message"]["content"]
            extracted_info = self.extract_info(answer)

            return {
                "original_answer": answer,
                "extracted_info": extracted_info,
                "num_frames": len(frames),
            }

        except Exception as e:
            print(f"API请求失败: {str(e)}")
            raise

    def _make_api_request(self, messages):
        payload = {
            "model": "deepseek-ai/deepseek-vl2",
            "messages": messages,
            "stream": False,
            "max_tokens": 1024,
            "temperature": 0.7,
            "top_p": 0.7,
            "top_k": 50,
            "frequency_penalty": 0.5,
            "n": 1,
            "response_format": {"type": "text"}
        }

        headers = {
            "Authorization": f"Bearer {API_KEY}",
            "Content-Type": "application/json"
        }

        response = requests.post(
            SILICONFLOW_URL,
            json=payload,
            headers=headers,
            timeout=60  # 增加超时时间到60秒
        )

        if response.status_code != 200:
            raise Exception(f"Siliconflow API 错误: {response.status_code}")

        return response.json()

    @staticmethod
    def image_to_base64(image):
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        return base64.b64encode(buffered.getvalue()).decode()

    @staticmethod
    def extract_time_from_filename(object_name):
        filename = os.path.basename(object_name)
        time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0]

        try:
            start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S")
            end_time = start_time + timedelta(seconds=10)
            return start_time, end_time
        except ValueError:
            print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。")
            return datetime.now(), datetime.now() + timedelta(seconds=10)

    @staticmethod
    def extract_info(answer):
        info = {
            "environment": None,
            "num_people": None,
            "actions": [],
            "objects": [],
            "furniture": [],
            "emotions": [],
            "features": []
        }

        environments = ["office", "indoor", "outdoor", "meeting room"]
        for env in environments:
            if env.lower() in answer.lower():
                info["environment"] = env
                break

        people_patterns = [
            r'(\d+)\s*(person|people|individual|staff|user|child|adult|female|male)',
            r'(one|two|three|four|five|six|seven|eight|nine|ten)\s*(person|people|individual|staff|user|child|adult|female|male)',
            r'(a|few)\s*(person|people|individual|staff|user|child|adult|female|male)',
            r'several\s*(person|people|individual|staff|user|child|adult|female|male)?',
            r'(male|female)',
            r'(adult|minor|youth|elderly)\s*(person|group)',
            r'(employee|worker|student|customer|audience|visitor|passenger)',
            r'(crowd|public|mass|people)',
            r'(men|women|old|young|adult|child)'
        ]
        for pattern in people_patterns:
            match = re.search(pattern, answer)
            if match:
                if match.group(1).isdigit():
                    info["num_people"] = int(match.group(1))
                elif match.group(1) in ['a', 'one','an']:
                    info["num_people"] = 1
                else:
                    num_word_to_digit = {
                        'two': 2, 'three': 3, 'four': 4, 'five': 5,
                        'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10
                    }
                    info["num_people"] = num_word_to_digit.get(match.group(1), 0)
                break

        actions = ["sleeping", "sitting", "eating", "standing", "falling", "dancing", "squatting", "crouching", "turning", "falling down", "lying down", "turning around", "jumping", "lying", "sleeping", "talking", "waking up", "reading", "writing", "studying", "using phone", "dining", "moving things", "sightseeing", "walking", "strolling", "reading", "writing", "using phone", "using computer", "studying", "working", "using laptop", "eating", "drinking", "organizing"]
        for action in actions:
            if action in answer:
                info["actions"].append(action)
        emotions = ["happy", "angry", "sad", "surprised", "scared", "disgusted", "calm", "relaxed", "neutral", "focused", "thinking"]
        objects = ["water bottle", "office supplies", "documents", "computer", "fan", "mouse", "keyboard", "tissue", "book", "pen", "bag", "box", "water cup", "cup", "mug", "glass", "folder", "backpack", "bookshelf", "filing cabinet", "phone"]
        furniture = ["chair", "desk", "coffee table", "filing cabinet", "bed", "sofa", "cabinet", "shelf", "camera", "cushion", "office chair", "TV", "whiteboard", "monitor", "storage rack", "file rack"]
        features = ["wearing glasses", "not wearing glasses", "long hair", "short hair", "long hair", "short hair", "wearing hat", "not wearing hat", "wearing mask", "not wearing mask", "male", "female", "fat", "thin", "tall", "short", "man", "woman", "adult"]

        for obj in objects:
            if obj in answer:
                info["objects"].append(obj)

        for item in furniture:
            if item in answer:
                info["furniture"].append(item)

        for feature in features:
            if feature in answer:
                info["features"].append(feature)

        for emotion in emotions:
            if emotion in answer:
                info["emotions"].append(emotion)

        return info

# 初始化 MediaAnalysisSystem
media_analysis_system = MediaAnalysisSystem()

class MediaAnalysisError(Exception):
    """自定义媒体分析异常类"""
    pass

def process_video_folder(system, folder_path, output_path=None):
    """处理文件夹中的所有视频文件并保存结果"""
    # 支持的视频格式
    valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'}
    results = {}

    # 确保文件夹存在
    if not os.path.exists(folder_path):
        raise MediaAnalysisError(f"错误：文件夹 '{folder_path}' 不存在")

    # 设置输出路径
    if output_path is None:
        output_path = os.getcwd()  # 如果未指定，使用当前目录
    elif not os.path.exists(output_path):
        os.makedirs(output_path)  # 如果输出目录不存在，创建它

    # 获取所有视频文件
    video_files = [
        f for f in os.listdir(folder_path)
        if os.path.splitext(f)[1].lower() in valid_extensions
    ]

    if not video_files:
        raise MediaAnalysisError(f"错误：在文件夹 '{folder_path}' 中未找到支持的视频文件")

    print(f"\n找到 {len(video_files)} 个视频文件，开始处理...\n")

    # 生成输出文件名
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    folder_name = os.path.basename(os.path.normpath(folder_path))
    output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json")

    # 处理每个视频文件并实时保存结果
    for i, video_file in enumerate(video_files, 1):
        video_path = os.path.join(folder_path, video_file)
        print(f"正在处理 ({i}/{len(video_files)}): {video_file}")

        try:
            with open(video_path, "rb") as f:
                video_data = f.read()
                result = system.process_video(video_data, video_file)
                # 修改结果存储格式
                results[video_file] = {
                    "video_analysis": {
                        "deepseek-vl2": result
                    }
                }

                # 实时保存当前结果到JSON文件
                with open(output_file, 'w', encoding='utf-8') as f:
                    json.dump(results, f, ensure_ascii=False, indent=2)

                print(f"✓ 成功处理并保存: {video_file}")
        except Exception as e:
            print(f"✗ 处理失败 {video_file}: {str(e)}")
            results[video_file] = {
                "video_analysis": {
                    "deepseek-vl2": {"error": str(e)}
                }
            }
            # 即使处理失败也保存当前结果
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, ensure_ascii=False, indent=2)

    print(f"\n所有分析结果已保存到: {output_file}")
    return results

class MediaAnalysisError(Exception):
    """自定义媒体分析异常类"""
    pass

def main():
    try:
        system = MediaAnalysisSystem()

        # 添加文件夹路径输入处理
        folder_path = input("请输入视频文件夹路径: ").strip()
        output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip()

        # 如果用户没有输入输出路径，则使用None（将使用当前目录）
        output_path = output_path if output_path else None

        # 处理文件夹中的视频
        results = process_video_folder(system, folder_path, output_path)

        # 显示处理统计
        success_count = sum(1 for r in results.values() if "error" not in r)
        print(f"\n处理完成！成功: {success_count}/{len(results)}")

    except MediaAnalysisError as e:
        print(f"\n错误: {str(e)}")
    except Exception as e:
        print(f"\n未预期的错误: {str(e)}")

if __name__ == "__main__":
    main()