commit 534a4e336d50115629cfcef2274787678958f9d2 Author: zydi Date: Sun Jan 12 03:01:51 2025 +0000 update diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08a93f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# 忽略文件夹中的所有内容,但保留文件夹本身 +# 原始视频文件夹 +recordings/* +!recordings/.gitkeep + +# 原始图像文件夹 +images/* +!images/.gitkeep + +# 原始图像裁剪人体文件夹 +crop/* +!crop/.gitkeep + +# 人脸对比结果 +result/* +!result/.gitkeep + +# 人脸对比文件夹 +data/* +!data/.gitkeep + +# 测试数据集 +dataset/* +!dataset/.gitkeep diff --git a/README.md b/README.md new file mode 100644 index 0000000..78e6f7e --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# VLM (Visual Language Model) 行为识别系统 + +## 项目简介 +本项目是一个基于视觉语言模型的行为识别系统,用于识别和分析人类行为。 + +## 文件夹结构 +VLM/ +├── recordings/ # 原始视频文件存储目录 +├── images/ # 原始图像文件存储目录 +├── crop/ # 裁剪后的人体图像存储目录 +├── data/ # 人脸对比数据存储目录 +└── web/ # 前端页面保存历史记录 + +## 使用说明 +### 文件夹说明 +1. 将原始视频文件放入 `recordings` 目录 +2. 系统会自动处理视频并对视频抽帧保存到 `images` 目录 +3. 系统会自动处理图像并将裁剪后的人体图像保存到 `crop` 目录 +4. 人脸注册图片保存在 `data` 目录,结构为 `data/face_name/face_id.jpg`,一个人可有多张图片 +5. 前端页面保存历史记录在 `web` 目录 +6. test_history 保存了测试时的一些代码,可以忽略 + +### 文件说明 + +#### 前端显示 + 1. cls.js 行为类别配置 + 2. web.html 前端页面 +#### 后端处理 + 1. main.py 主程序 + 2. info.json 行为类别、环境类别配置 + 3. qwen_monitor.py 行为识别程序,监控recordings目录数据传入,结果保存到redis + 4. monitor_images.py 视频抽帧程序,监控recordings目录数据传入,对视频抽帧保存到images目录 + 5. pose_monitor.py 人体识别程序, 使用Yolo-pose监控images目录数据传入,识别并裁剪人体保存到crop目录 + 6. face_monitor.py 人脸识别程序,监控crop目录数据传入,识别结果保存到redis + 7. emb.py 人脸数据注册,数据保存到redis + 8. del.ipynb 一些测试代码 + +## Redis配置 + 1. 服务器:222.186.10.253 + 2. 使用db:摄像头:207-211,人脸注册数据:212,分析报告:213 + +## 注意事项 +- 所有目录都已通过 `.gitkeep` 文件保持在版本控制中 +- 各目录中的实际数据文件已通过 `.gitignore` 配置忽略 diff --git a/cls.js b/cls.js new file mode 100644 index 0000000..3d3eb84 --- /dev/null +++ b/cls.js @@ -0,0 +1,78 @@ +// 为每个具体类别定义固定颜色 +const actionColors = { + // 基础动作类别 - 蓝色系 + "站立类": "#2962FF", // 深蓝 + "行走类": "#3F51B5", // 靛蓝 + "奔跑类": "#2196F3", // 基础蓝 + "坐卧类": "#82B1FF", // 浅蓝 + "蹲类": "#42A5F5", // 中浅蓝 + "转动类": "#90CAF9", // 最浅蓝 + "感知类": "#4DD0E1", // 浅青色 + + // 日常生活类别 - 绿色系 + "饮食类": "#009688", // 青绿色 + "喝水": "#CDDC39", // 酸橙色 + "穿戴类": "#8BC34A", // 浅绿色 + "休息类": "#81C784", // 更浅绿 + "清洁类": "#A5D6A7", // 最浅绿 + "医疗类": "#C8E6C9", // 极浅绿 + + // 社交活动类别 + // 社交活动类别 - 橙黄色系 + "交际类": "#FF9800", // 橙色 + "娱乐类": "#F57C00", // 深橙色 + "情感表达": "#FFC107", // 琥珀色 + + // 工作学习类别 - 粉色系 + "阅读类": "#FF3366", // 珊瑚粉 + "写作类": "#FF1493", // 深粉红 + "工作类": "#FF69B4", // 亮粉红 + "创作类": "#FFB6C1", // 浅粉红 + + // 运动娱乐类别 + "运动类": "#673AB7", // 紫色 + + // 异常行为类别 + "异常行为": "#F44336" // 红色 +}; + +const actions = { + "基础动作": { + "站立类": ["站", "站立", "站着"], + "行走类": ["走", "走路", "散步","行走", "徒步"], + "奔跑类": ["跑", "奔跑", "慢跑"], + "坐卧类": ["坐", "坐下", "坐着"], + "蹲类": [ "蹲", "蹲下", "蹲着"], + "转动类": ["转", "转身", "转头", "回头", "旋转", "转向", "转弯"], + "感知类": ["看", "闻", "嗅", "听"] + }, + "日常生活": { + "饮食类": ["吃", "食用", "吃饭", "吃零食","吃东西", "用餐"], + "喝水": ["喝水", "喝牛奶", "喝茶","饮用","喝咖啡", "喝", "饮用",'饮水'], + "穿戴类": ["穿衣服", "穿裤子", "穿鞋", "戴帽子", "戴口罩", "戴围巾"], + "休息类": ["躺","睡", "睡觉", "休息", "打哈欠"], + "清洁类": ["洗澡", "刷牙", "洗手", "洗涤", "清洁", "擦洗"], + "医疗类": ["吃药", "喝药", "服药"] + }, + "社交活动": { + "交际类": ["说话", "交流", "演讲", "谈话", "聊天", "采访", "社交"], + "娱乐类": ["打麻将", "打牌", "玩手机", "玩电脑", "玩游戏", "赌博"], + "情感表达": ["笑", "大笑", "微笑", "哭泣", "咯咯笑", "皱眉"] + }, + "工作学习": { + "阅读类": ["读书", "阅读", "看书"], + "写作类": ["写作", "写字","写"], + "工作类": ["工作", "学习","使用电脑","使用笔记本电脑","使用手机", "开会", "打字"], + "创作类": ["画画", "绘画", "摄影", "素描"] + }, + "运动娱乐": { + "运动类": ["跳", "跳跃", "跳舞", "游泳", "运动", "健身", "锻炼"], + }, + "异常行为": { + "异常行为": ["摔", "踢", "跌倒", "摔倒", "晕倒","滑倒"] + } +}; + +// 将 export 语句改为全局变量 +window.actions = actions; +window.actionColors = actionColors; diff --git a/crop/.gitkeep b/crop/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dataset/.gitkeep b/dataset/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/del.ipynb b/del.ipynb new file mode 100644 index 0000000..1c2c86c --- /dev/null +++ b/del.ipynb @@ -0,0 +1,975 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# 读取 JSON 文件\n", + "def remove_original_answer(input_file, output_file):\n", + " # 读取 JSON 文件\n", + " with open(input_file, 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + " \n", + " # 遍历所有视频片段\n", + " for video_key in data:\n", + " # 如果存在 original_answer 字段,则删除它\n", + " if 'original_answer' in data[video_key]:\n", + " del data[video_key]['original_answer']\n", + " \n", + " # 将修改后的数据写入新文件\n", + " with open(output_file, 'w', encoding='utf-8') as f:\n", + " json.dump(data, f, ensure_ascii=False, indent=2)\n", + "\n", + "# 使用示例\n", + "input_file = '球机沙发正面.json' # 输入文件名\n", + "output_file = '球机沙发正面_cleaned.json' # 输出文件名\n", + "\n", + "remove_original_answer(input_file, output_file)\n", + "print(f\"已成功删除所有 original_answer 字段,并保存到 {output_file}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# 需要处理的文件列表\n", + "files = [\n", + " \"result/qwen/球机沙发正面_20241231_0200.json\",\n", + " \"result/qwen/左侧吃饭2_20241231_1612.json\",\n", + " \"result/qwen/左侧吃饭1_20241231_1423.json\",\n", + " \"result/qwen/室内右上角全景_20241231_2300.json\",\n", + " \"result/qwen/室内右上角全景_20241231_2010.json\",\n", + " \"result/qwen/球机椅子左侧面_20241231_0923.json\",\n", + " \"result/qwen/右上角吃饭_20241231_1232.json\"\n", + "]\n", + "\n", + "def remove_actions(data):\n", + " \"\"\"递归删除字典中的 'actions' 字段\"\"\"\n", + " if isinstance(data, dict):\n", + " if 'actions' in data:\n", + " data['actions'] = [] # 清空 actions 列表\n", + " for value in data.values():\n", + " remove_actions(value)\n", + " elif isinstance(data, list):\n", + " for item in data:\n", + " remove_actions(item)\n", + "\n", + "# 处理每个文件\n", + "for file_path in files:\n", + " try:\n", + " # 读取 JSON 文件\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + " \n", + " # 删除 actions 字段内容\n", + " remove_actions(data)\n", + " \n", + " # 写回文件\n", + " with open(file_path, 'w', encoding='utf-8') as f:\n", + " json.dump(data, f, ensure_ascii=False, indent=2)\n", + " \n", + " print(f\"Successfully processed: {file_path}\")\n", + " except Exception as e:\n", + " print(f\"Error processing {file_path}: {str(e)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 404 actions from info.json\n", + "\n", + "Processing result/qwen/球机沙发正面_20241231_0200.json...\n", + "Found actions in 20241230_124643.avi: ['写作', '看', '写字', '阅读', '写', '坐', '起身']\n", + "Found actions in 20241230_124656.avi: ['看', '整理', '关闭']\n", + "Found actions in 20241230_124646.avi: ['躺', '阅读', '看']\n", + "Found actions in 20241230_124647.avi: ['坐', '躺', '休息', '看']\n", + "Found actions in 20241230_124645.avi: ['坐', '写', '站', '看']\n", + "Found actions in 20241230_124651.avi: ['关闭', '看', '睡觉', '睡', '休息', '躺']\n", + "Found actions in 20241230_124640.avi: ['坐', '休息', '阅读', '看']\n", + "Found actions in 20241230_124649.avi: ['整理', '看', '睡觉', '休息', '睡', '躺']\n", + "Found actions in 20241230_124650.avi: ['看', '睡觉', '睡', '休息', '躺']\n", + "Found actions in 20241230_124644.avi: ['坐', '工作', '操作', '看']\n", + "Found actions in 20241230_124653.avi: ['喝水', '看', '休息', '坐', '喝']\n", + "Found actions in 20241230_124654.avi: ['坐', '走', '站', '看']\n", + "Found actions in 20241230_124637.avi: ['坐', '休息', '使用手机', '看']\n", + "Found actions in 20241230_124638.avi: ['站', '看', '开始', '阅读', '玩手机', '坐']\n", + "Found actions in 20241230_124639.avi: ['看', '打开', '休息', '阅读', '坐']\n", + "Found actions in 20241230_124642.avi: ['写作', '看', '打开', '看书', '阅读', '学习', '工作', '写', '坐']\n", + "Found actions in 20241230_124636.avi: ['整理', '进入', '看', '关闭', '使用手机', '坐']\n", + "Found actions in 20241230_124648.avi: ['睡觉', '睡', '休息', '躺']\n", + "Found actions in 20241230_124652.avi: ['看', '睡觉', '睡', '休息', '躺']\n", + "Found actions in 20241230_124641.avi: ['写作', '吹', '打开', '看', '操作', '学习', '工作', '写', '使用笔记本电脑', '坐']\n", + "Updated result/qwen/球机沙发正面_20241231_0200.json\n", + "\n", + "Processing result/qwen/左侧吃饭2_20241231_1612.json...\n", + "Found actions in 20241230_130857.avi: ['工作', '咀嚼', '看']\n", + "Found actions in 20241230_130853.avi: ['喝饮料', '看', '工作', '坐', '喝']\n", + "Found actions in 20241230_130840.avi: ['吃东西', '看', '操作', '使用电脑', '工作', '动手', '坐', '吃']\n", + "Found actions in 20241230_130844.avi: ['吃东西', '看', '咬', '工作', '坐', '吃']\n", + "Found actions in 20241230_130848.avi: ['吃东西', '喝水', '看', '坐', '喝', '吃']\n", + "Found actions in 20241230_130835.avi: ['吃东西', '看', '戴眼镜', '操作', '使用电脑', '工作', '坐', '吃']\n", + "Updated result/qwen/左侧吃饭2_20241231_1612.json\n", + "\n", + "Processing result/qwen/左侧吃饭1_20241231_1423.json...\n", + "Found actions in 20241230_130812.avi: ['吃东西', '看', '戴眼镜', '工作', '使用手机', '吃']\n", + "Found actions in 20241230_130759.avi: ['站', '喝水', '站立', '走', '休息', '使用电脑', '工作', '坐', '喝', '起身']\n", + "Found actions in 20241230_130830.avi: ['吃东西', '看', '操作', '使用电脑', '工作', '吃']\n", + "Found actions in 20241230_130804.avi: ['坐下', '进入', '看', '打开', '开始', '操作', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_130826.avi: ['吃东西', '看', '休息', '工作', '坐', '咀嚼', '吃']\n", + "Found actions in 20241230_130835.avi: ['吃东西', '看', '戴眼镜', '使用电脑', '工作', '坐', '吃']\n", + "Found actions in 20241230_130808.avi: ['看', '打开', '走', '使用电脑', '工作']\n", + "Found actions in 20241230_130821.avi: ['看', '学习', '工作', '用餐', '坐', '咀嚼']\n", + "Found actions in 20241230_130817.avi: ['坐', '使用手机', '操作']\n", + "Updated result/qwen/左侧吃饭1_20241231_1423.json\n", + "\n", + "Processing result/qwen/室内右上角全景_20241231_2300.json...\n", + "Found actions in 20241230_124417.avi: ['进入', '整理', '阅读', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_123344.avi: ['看']\n", + "Found actions in 20241230_123057.avi: ['行走', '坐下', '站', '站立', '走', '学习', '工作', '坐']\n", + "Found actions in 20241230_123606.avi: ['坐', '闻', '使用手机', '看']\n", + "Found actions in 20241230_123518.avi: ['行走', '站', '看', '打开', '站立', '走', '工作', '转', '转身']\n", + "Found actions in 20241230_124047.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123734.avi: ['坐', '闻', '使用手机']\n", + "Found actions in 20241230_123316.avi: ['看']\n", + "Found actions in 20241230_124519.avi: ['坐', '工作', '阅读', '看']\n", + "Found actions in 20241230_123219.avi: ['行走', '站', '交谈', '站立', '走', '交流']\n", + "Found actions in 20241230_124201.avi: ['坐', '工作', '使用电脑', '关闭']\n", + "Found actions in 20241230_124247.avi: ['站', '站立', '进入', '弯腰']\n", + "Found actions in 20241230_124106.avi: ['看', '关闭', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_123803.avi: ['坐', '看', '使用电脑']\n", + "Found actions in 20241230_124315.avi: ['整理', '弯腰']\n", + "Found actions in 20241230_123200.avi: ['写', '站', '整理', '看']\n", + "Found actions in 20241230_123402.avi: ['进入', '走', '关闭']\n", + "Found actions in 20241230_124528.avi: ['坐下', '使用电脑', '工作', '坐', '起身']\n", + "Found actions in 20241230_123528.avi: ['站', '看', '站立', '转', '转身']\n", + "Found actions in 20241230_123048.avi: ['坐下', '站', '喝水', '看', '走', '工作', '闻', '坐', '喝']\n", + "Found actions in 20241230_123039.avi: ['站', '喝水', '开始', '站立', '喝']\n", + "Found actions in 20241230_123537.avi: ['坐下', '站', '看', '站立', '走', '使用电脑', '工作', '坐', '起身']\n", + "Found actions in 20241230_124408.avi: ['行走', '坐下', '站', '站立', '走', '工作', '坐']\n", + "Found actions in 20241230_123325.avi: ['工作', '看']\n", + "Found actions in 20241230_123411.avi: ['站', '进入', '看', '开始', '走', '工作']\n", + "Found actions in 20241230_123901.avi: ['坐', '闻', '看', '使用电脑']\n", + "Found actions in 20241230_124027.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123948.avi: ['坐', '看', '使用电脑']\n", + "Found actions in 20241230_124426.avi: ['整理', '看', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_123725.avi: ['坐', '使用手机', '工作']\n", + "Found actions in 20241230_124116.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_122946.avi: ['进入', '关闭']\n", + "Found actions in 20241230_123124.avi: ['行走', '进入', '看', '打开', '走', '开门']\n", + "Found actions in 20241230_124341.avi: ['坐', '阅读', '看']\n", + "Found actions in 20241230_124056.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_124210.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_124554.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123151.avi: ['走', '整理', '弯腰']\n", + "Found actions in 20241230_123557.avi: ['坐下', '站', '关闭', '开始', '站立', '使用电脑', '坐']\n", + "Found actions in 20241230_123929.avi: ['坐', '闻', '使用电脑']\n", + "Found actions in 20241230_123142.avi: ['站', '进入', '看', '打开', '使用电脑', '工作']\n", + "Found actions in 20241230_124008.avi: ['坐', '闻', '使用电脑']\n", + "Found actions in 20241230_123508.avi: ['行走', '站', '进入', '关闭', '走', '闻']\n", + "Found actions in 20241230_124125.avi: ['坐', '工作', '操作', '看']\n", + "Found actions in 20241230_123851.avi: ['坐', '看', '使用电脑']\n", + "Found actions in 20241230_123635.avi: ['坐', '看', '使用电脑']\n", + "Found actions in 20241230_123229.avi: ['行走', '整理', '看', '走', '写']\n", + "Found actions in 20241230_124435.avi: ['坐', '工作', '阅读', '看']\n", + "Found actions in 20241230_123133.avi: ['进入', '看', '打开', '关闭', '工作', '写']\n", + "Found actions in 20241230_124332.avi: ['坐', '看', '阅读', '关闭']\n", + "Found actions in 20241230_123420.avi: ['行走', '站', '整理', '看', '测量', '走', '站立', '工作']\n", + "Found actions in 20241230_123615.avi: ['坐', '闻', '使用电脑']\n", + "Found actions in 20241230_123353.avi: ['看']\n", + "Found actions in 20241230_124306.avi: ['行走', '蹲下', '走', '操作', '工作', '蹲']\n", + "Found actions in 20241230_123257.avi: ['打开', '关闭']\n", + "Found actions in 20241230_123115.avi: ['坐', '工作', '起身', '使用电脑']\n", + "Found actions in 20241230_123753.avi: ['坐', '闻', '看', '使用电脑']\n", + "Found actions in 20241230_124037.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123106.avi: ['行走', '关闭', '打开', '看', '走', '写']\n", + "Found actions in 20241230_122909.avi: ['进入', '转', '转身']\n", + "Updated result/qwen/室内右上角全景_20241231_2300.json\n", + "\n", + "Processing result/qwen/室内右上角全景_20241231_2010.json...\n", + "Found actions in 20241230_123744.avi: ['坐', '使用手机', '看']\n", + "Found actions in 20241230_124143.avi: ['看', '关闭', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_123004.avi: ['行走', '站', '整理', '看', '关闭', '走']\n", + "Found actions in 20241230_124501.avi: ['进入', '看', '操作', '工作', '坐']\n", + "Found actions in 20241230_124219.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123021.avi: ['行走', '走', '整理', '看']\n", + "Found actions in 20241230_124152.avi: ['进入', '使用电脑', '工作', '闻', '坐']\n", + "Found actions in 20241230_123655.avi: ['坐', '使用手机', '工作']\n", + "Found actions in 20241230_124452.avi: ['坐', '使用手机', '看']\n", + "Found actions in 20241230_122936.avi: ['行走', '进入', '关闭', '看', '走', '转', '转身']\n", + "Found actions in 20241230_123705.avi: ['坐', '使用手机', '工作']\n", + "Found actions in 20241230_123247.avi: ['工作', '走', '站', '看']\n", + "Found actions in 20241230_123449.avi: ['走', '整理', '起身', '弯腰']\n", + "Found actions in 20241230_122918.avi: ['搬运', '写', '看']\n", + "Found actions in 20241230_124227.avi: ['行走', '站', '站立', '走', '使用电脑', '坐', '起身']\n", + "Found actions in 20241230_124017.avi: ['站', '站立', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_122927.avi: ['看', '休息', '操作', '工作', '写', '弯腰', '起身']\n", + "Found actions in 20241230_123920.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123832.avi: ['坐', '休息', '工作', '使用电脑']\n", + "Found actions in 20241230_124546.avi: ['站', '看', '站立', '使用电脑', '工作', '坐', '起身']\n", + "Found actions in 20241230_124537.avi: ['看', '使用电脑', '工作', '闻', '坐']\n", + "Found actions in 20241230_123813.avi: ['坐', '看', '使用电脑']\n", + "Found actions in 20241230_123440.avi: ['进入', '走', '转', '转身']\n", + "Found actions in 20241230_123210.avi: ['整理', '看', '打开', '关闭', '工作', '清理']\n", + "Found actions in 20241230_123306.avi: ['工作', '看']\n", + "Found actions in 20241230_124443.avi: ['坐', '弹奏', '看']\n", + "Found actions in 20241230_122859.avi: ['站', '看', '走', '操作', '工作', '交流']\n", + "Found actions in 20241230_123938.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_124359.avi: ['整理', '弯腰']\n", + "Found actions in 20241230_123334.avi: ['工作', '看']\n", + "Found actions in 20241230_124603.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_124510.avi: ['进入', '整理', '看', '阅读', '工作', '坐']\n", + "Found actions in 20241230_123822.avi: ['坐', '使用手机']\n", + "Found actions in 20241230_123012.avi: ['站', '看', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_123715.avi: ['坐', '使用手机', '看']\n", + "Found actions in 20241230_122955.avi: ['工作', '开始', '整理', '使用电脑']\n", + "Found actions in 20241230_124134.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_123842.avi: ['坐', '看', '使用电脑']\n", + "Found actions in 20241230_124238.avi: ['讨论', '整理', '进入', '走', '工作', '交流']\n", + "Found actions in 20241230_123030.avi: ['进入', '看', '走', '闻', '交流']\n", + "Found actions in 20241230_123644.avi: ['坐', '使用手机', '关闭']\n", + "Found actions in 20241230_123911.avi: ['进入', '看', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_123547.avi: ['讨论', '站', '整理', '看', '交谈', '工作']\n", + "Found actions in 20241230_124350.avi: ['整理', '弯腰']\n", + "Found actions in 20241230_123625.avi: ['坐', '使用手机', '关闭']\n", + "Found actions in 20241230_123958.avi: ['坐', '闻', '使用电脑']\n", + "Found actions in 20241230_124256.avi: ['看', '走', '工作', '弯腰', '起身']\n", + "Found actions in 20241230_124323.avi: ['休息', '整理', '弯腰', '看']\n", + "Found actions in 20241230_123430.avi: ['行走', '坐下', '休息', '走', '工作', '坐']\n", + "Found actions in 20241230_123459.avi: ['行走', '站', '看', '走', '写']\n", + "Found actions in 20241230_123238.avi: ['行走', '站', '看', '走', '操作', '工作']\n", + "Updated result/qwen/室内右上角全景_20241231_2010.json\n", + "\n", + "Processing result/qwen/球机椅子左侧面_20241231_0923.json...\n", + "Found actions in 20241230_124611.avi: ['看', '休息', '戴眼镜', '工作', '使用手机', '写', '坐']\n", + "Found actions in 20241230_124626.avi: ['行走', '走', '工作', '写', '转', '转身']\n", + "Found actions in 20241230_124635.avi: ['站', '看', '站立', '走', '使用电脑', '工作']\n", + "Found actions in 20241230_124618.avi: ['看', '戴眼镜', '阅读', '学习', '工作']\n", + "Found actions in 20241230_124630.avi: ['站', '看', '笑', '工作', '转', '转身']\n", + "Found actions in 20241230_124627.avi: ['手写', '站', '站立', '写', '转', '转身']\n", + "Found actions in 20241230_124617.avi: ['看', '戴眼镜', '阅读', '学习', '工作', '闻', '坐']\n", + "Found actions in 20241230_124613.avi: ['工作', '使用手机', '戴眼镜', '看']\n", + "Found actions in 20241230_124623.avi: ['站', '喝水', '看', '站立', '戴眼镜', '休息', '工作', '喝', '转', '转身']\n", + "Found actions in 20241230_124612.avi: ['看', '操作', '学习', '工作', '使用手机', '坐']\n", + "Found actions in 20241230_124620.avi: ['看', '转头', '使用电脑', '学习', '工作', '转向', '坐', '转']\n", + "Found actions in 20241230_124610.avi: ['坐', '使用手机', '工作', '看']\n", + "Found actions in 20241230_124629.avi: ['行走', '进入', '看', '走', '工作', '交流']\n", + "Found actions in 20241230_124631.avi: ['行走', '看', '走', '工作', '转', '转身']\n", + "Found actions in 20241230_124632.avi: ['站', '看', '站立', '操作', '转', '转身']\n", + "Found actions in 20241230_124622.avi: ['喝水', '看', '休息', '使用电脑', '工作', '坐', '喝']\n", + "Found actions in 20241230_124614.avi: ['看', '开始', '阅读', '学习', '工作', '使用手机', '坐']\n", + "Found actions in 20241230_124616.avi: ['看', '转头', '阅读', '学习', '工作', '转向', '坐', '转', '转身']\n", + "Found actions in 20241230_124634.avi: ['坐', '工作', '写', '看']\n", + "Found actions in 20241230_124621.avi: ['喝水', '看', '休息', '操作', '使用电脑', '工作', '动手', '坐', '喝']\n", + "Found actions in 20241230_124628.avi: ['行走', '站', '看', '走', '操作', '工作', '转向', '转', '转身']\n", + "Found actions in 20241230_124633.avi: ['站', '看', '站立', '学习', '工作', '弯腰']\n", + "Found actions in 20241230_124624.avi: ['站', '看', '走', '坐', '起身']\n", + "Found actions in 20241230_124615.avi: ['坐', '阅读', '看']\n", + "Found actions in 20241230_124619.avi: ['站', '看', '站立', '使用电脑', '转', '起身', '转身']\n", + "Updated result/qwen/球机椅子左侧面_20241231_0923.json\n", + "\n", + "Processing result/qwen/右上角吃饭_20241231_1232.json...\n", + "Found actions in 20241230_130634.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130731.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130653.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130658.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130620.avi: ['坐', '工作']\n", + "Found actions in 20241230_130712.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130745.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130726.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130551.avi: ['行走', '进入', '休息', '走', '操作', '工作', '服务']\n", + "Found actions in 20241230_130741.avi: ['坐', '组装', '工作', '使用电脑']\n", + "Found actions in 20241230_130639.avi: ['休息', '使用电脑', '工作', '饮用', '坐']\n", + "Found actions in 20241230_130644.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130707.avi: ['坐', '闻', '服务', '使用电脑']\n", + "Found actions in 20241230_130555.avi: ['看', '交谈', '休息', '走', '使用电脑', '工作', '交流', '坐']\n", + "Found actions in 20241230_130629.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130624.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130605.avi: ['看', '操作', '使用电脑', '工作', '坐']\n", + "Found actions in 20241230_130610.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130703.avi: ['坐', '工作', '服务', '使用电脑']\n", + "Found actions in 20241230_130736.avi: ['吃东西', '使用电脑', '吃零食', '工作', '坐', '吃']\n", + "Found actions in 20241230_130648.avi: ['喝水', '看', '休息', '使用电脑', '工作', '坐', '喝']\n", + "Found actions in 20241230_130755.avi: ['坐', '组装', '工作', '使用电脑']\n", + "Found actions in 20241230_130750.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130546.avi: ['坐', '工作', '起身', '看']\n", + "Found actions in 20241230_130717.avi: ['坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130615.avi: ['使用电脑', '学习', '工作', '闻', '坐']\n", + "Found actions in 20241230_130722.avi: ['看', '坐', '工作', '使用电脑']\n", + "Found actions in 20241230_130600.avi: ['坐下', '站', '看', '站立', '工作', '坐']\n", + "Updated result/qwen/右上角吃饭_20241231_1232.json\n" + ] + } + ], + "source": [ + "import json\n", + "\n", + "# 读取动作列表\n", + "def load_actions():\n", + " with open('info.json', 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + " return set(data['actions']) # 使用集合提高查找效率\n", + "\n", + "def find_actions(text, action_set):\n", + " \"\"\"在文本中查找动作\"\"\"\n", + " found_actions = set()\n", + " for action in action_set:\n", + " # 直接使用简单的字符串匹配\n", + " if action in text:\n", + " found_actions.add(action)\n", + " return list(found_actions)\n", + "\n", + "def process_file(file_path, action_set):\n", + " \"\"\"处理单个文件\"\"\"\n", + " modified = False\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + " \n", + " # 遍历所有视频分析\n", + " for video_key, video_data in data.items():\n", + " if 'video_analysis' in video_data:\n", + " analysis = video_data['video_analysis']\n", + " if 'qwen-7B' in analysis:\n", + " qwen_data = analysis['qwen-7B']\n", + " if 'original_answer' in qwen_data and 'extracted_info' in qwen_data:\n", + " # 查找动作\n", + " found_actions = find_actions(qwen_data['original_answer'], action_set)\n", + " if found_actions: # 只有找到动作时才更新\n", + " qwen_data['extracted_info']['actions'] = found_actions\n", + " modified = True\n", + " print(f\"Found actions in {video_key}: {found_actions}\")\n", + " \n", + " # 只有在发现动作时才写回文件\n", + " if modified:\n", + " with open(file_path, 'w', encoding='utf-8') as f:\n", + " json.dump(data, f, ensure_ascii=False, indent=2)\n", + " print(f\"Updated {file_path}\")\n", + " else:\n", + " print(f\"No actions found in {file_path}\")\n", + "\n", + "# 主程序\n", + "def main():\n", + " # 需要处理的文件列表\n", + " files = [\n", + " \"result/qwen/球机沙发正面_20241231_0200.json\",\n", + " \"result/qwen/左侧吃饭2_20241231_1612.json\",\n", + " \"result/qwen/左侧吃饭1_20241231_1423.json\",\n", + " \"result/qwen/室内右上角全景_20241231_2300.json\",\n", + " \"result/qwen/室内右上角全景_20241231_2010.json\",\n", + " \"result/qwen/球机椅子左侧面_20241231_0923.json\",\n", + " \"result/qwen/右上角吃饭_20241231_1232.json\"\n", + " ]\n", + " \n", + " # 加载动作列表\n", + " action_set = load_actions()\n", + " print(f\"Loaded {len(action_set)} actions from info.json\")\n", + " \n", + " # 处理每个文件\n", + " for file_path in files:\n", + " try:\n", + " print(f\"\\nProcessing {file_path}...\")\n", + " process_file(file_path, action_set)\n", + " except Exception as e:\n", + " print(f\"Error processing {file_path}: {str(e)}\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "def calculate_accuracy(predicted_file, ground_truth_file):\n", + " \"\"\"计算模型预测结果与人工标注的准确度\"\"\"\n", + " accuracy_metrics = {\n", + " \"environment\": 0,\n", + " \"num_people\": 0,\n", + " \"actions\": 0,\n", + " \"objects\": 0,\n", + " \"furniture\": 0,\n", + " \"emotions\": 0,\n", + " \"features\": 0\n", + " }\n", + " total_files = 0\n", + " \n", + " # 加载文件\n", + " try:\n", + " with open(predicted_file, 'r', encoding='utf-8') as f:\n", + " predicted_data = json.load(f)\n", + " with open(ground_truth_file, 'r', encoding='utf-8') as f:\n", + " ground_truth_data = json.load(f)\n", + " except Exception as e:\n", + " print(f\"读取文件时出错: {str(e)}\")\n", + " return accuracy_metrics\n", + " \n", + " print(f\"\\n开始计算准确率...\")\n", + " print(f\"预测结果包含 {len(predicted_data)} 个文件\")\n", + " print(f\"标注数据包含 {len(ground_truth_data)} 个文件\")\n", + " \n", + " # 对每个预测结果进行评估\n", + " for video_name, pred in predicted_data.items():\n", + " print(f\"\\n处理文件: {video_name}\")\n", + " \n", + " # 检查文件是否在标注数据中\n", + " if video_name not in ground_truth_data:\n", + " print(f\"标注数据中未找到: {video_name}\")\n", + " continue\n", + " \n", + " try:\n", + " # 获取预测信息\n", + " pred_info = pred[\"video_analysis\"][\"qwen\"][\"extracted_info\"]\n", + " gt_info = ground_truth_data[video_name][\"extracted_info\"]\n", + " \n", + " total_files += 1\n", + " print(f\"成功匹配文件: {video_name}\")\n", + " \n", + " # 环境匹配\n", + " if pred_info[\"environment\"] == gt_info[\"environment\"]:\n", + " accuracy_metrics[\"environment\"] += 1\n", + " print(f\"环境匹配成功: {pred_info['environment']}\")\n", + " \n", + " # 人数匹配\n", + " if pred_info[\"num_people\"] == gt_info[\"num_people\"]:\n", + " accuracy_metrics[\"num_people\"] += 1\n", + " print(f\"人数匹配成功: {pred_info['num_people']}\")\n", + " \n", + " # 计算列表类字段的匹配度(使用Jaccard相似度)\n", + " field_mapping = {\n", + " \"actions\": \"actions\",\n", + " \"objects\": \"objects\",\n", + " \"furniture\": \"furniture\",\n", + " \"emotions\": \"emotions\",\n", + " \"features\": \"feature \" # 注意标注数据中的 \"feature \" 有空格\n", + " }\n", + " \n", + " for pred_field, gt_field in field_mapping.items():\n", + " pred_set = set(pred_info[pred_field]) if pred_info[pred_field] else set()\n", + " gt_set = set(gt_info[gt_field]) if gt_field in gt_info and gt_info[gt_field] else set()\n", + " \n", + " if pred_set or gt_set: # 避免除零错误\n", + " intersection = len(pred_set & gt_set)\n", + " union = len(pred_set | gt_set)\n", + " jaccard = intersection / union\n", + " accuracy_metrics[pred_field] += jaccard\n", + " print(f\"{pred_field} 匹配度: {jaccard:.2%}\")\n", + " print(f\"预测集合: {pred_set}\")\n", + " print(f\"标注集合: {gt_set}\")\n", + " print(f\"交集数量: {intersection}\")\n", + " print(f\"并集数量: {union}\")\n", + " \n", + " except Exception as e:\n", + " print(f\"处理 {video_name} 时出错: {str(e)}\")\n", + " continue\n", + " \n", + " # 计算平均准确率\n", + " if total_files > 0:\n", + " print(f\"\\n共成功比较 {total_files} 个文件\")\n", + " for key in accuracy_metrics:\n", + " accuracy_metrics[key] = round(accuracy_metrics[key] / total_files * 100, 2)\n", + " else:\n", + " print(\"\\n警告:没有成功比较任何文件\")\n", + " \n", + " return accuracy_metrics\n", + "\n", + "def main():\n", + " try:\n", + " # 获取输入文件路径\n", + " predicted_file = input(\"请输入预测结果文件路径: \").strip()\n", + " ground_truth_file = input(\"请输入标注文件路径: \").strip()\n", + " output_path = input(\"请输入结果保存路径 (直接回车使用当前目录): \").strip()\n", + " \n", + " # 验证文件是否存在\n", + " if not os.path.exists(predicted_file):\n", + " raise Exception(f\"错误:预测结果文件 '{predicted_file}' 不存在\")\n", + " if not os.path.exists(ground_truth_file):\n", + " raise Exception(f\"错误:标注文件 '{ground_truth_file}' 不存在\")\n", + " \n", + " # 设置输出路径\n", + " output_path = output_path if output_path else os.getcwd()\n", + " if not os.path.exists(output_path):\n", + " os.makedirs(output_path)\n", + " \n", + " # 计算准确率\n", + " accuracy = calculate_accuracy(predicted_file, ground_truth_file)\n", + " \n", + " # 显示准确率结果\n", + " print(\"\\n准确率评估结果:\")\n", + " for metric, value in accuracy.items():\n", + " print(f\"{metric}: {value}%\")\n", + " \n", + " # 保存准确率结果\n", + " timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + " accuracy_file = os.path.join(output_path, f\"accuracy_results_{timestamp}.json\")\n", + " with open(accuracy_file, 'w', encoding='utf-8') as f:\n", + " json.dump(accuracy, f, ensure_ascii=False, indent=2)\n", + " print(f\"\\n准确率评估结果已保存到: {accuracy_file}\")\n", + " \n", + " except Exception as e:\n", + " print(f\"\\n错误: {str(e)}\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "def merge_json_files(json_files):\n", + " # 创建结果字典\n", + " merged_result = {}\n", + " \n", + " # 遍历每个JSON文件\n", + " for file_data in json_files:\n", + " # 遍历每个视频\n", + " for video_name, video_data in file_data.items():\n", + " if video_name not in merged_result:\n", + " merged_result[video_name] = {}\n", + " \n", + " # 获取模型名称和original_answer\n", + " if \"video_analysis\" in video_data:\n", + " for model_name, model_data in video_data[\"video_analysis\"].items():\n", + " if \"original_answer\" in model_data:\n", + " merged_result[video_name][model_name] = model_data[\"original_answer\"]\n", + "\n", + " return merged_result\n", + "\n", + "# 读取所有JSON文件\n", + "json_files = [\n", + "\n", + " \"/home/zydi/VLM/result/analysis_results_室内右上角全景筛选_20250102_084327.json\",\n", + " \"/home/zydi/VLM/result/analysis_results_室内右上角全景筛选_20250102_084600.json\"\n", + "]\n", + "\n", + "# 读取所有文件内容\n", + "json_contents = []\n", + "for file_path in json_files:\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " json_contents.append(json.load(f))\n", + "\n", + "# 合并JSON\n", + "result = merge_json_files(json_contents)\n", + "\n", + "# 将结果写入新文件\n", + "with open('qwen_prompt.json', 'w', encoding='utf-8') as f:\n", + " json.dump(result, f, ensure_ascii=False, indent=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def clean_places365_names():\n", + " with open('Places365.txt', 'r') as file:\n", + " lines = file.readlines()\n", + " \n", + " cleaned_lines = []\n", + " for line in lines:\n", + " # 移除开头的/x/部分\n", + " if line.strip(): # 确保不是空行\n", + " # 找到第二个/后的位置\n", + " start_pos = line.find('/', 1) + 1\n", + " # 获取中间部分(去掉末尾的数字)\n", + " name = line[start_pos:].rsplit(' ', 1)[0]\n", + " cleaned_lines.append(name)\n", + " \n", + " # 写入原文件\n", + " with open('Places365.txt', 'w') as file:\n", + " file.write('\\n'.join(cleaned_lines))\n", + "\n", + "# 执行函数\n", + "clean_places365_names()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def merge_json_files(json_files):\n", + " # 创建结果字典\n", + " merged_result = {}\n", + " \n", + " # 遍历每个JSON文件\n", + " for file_path, file_data in zip(json_files, json_contents):\n", + " # 从文件名中提取时间戳\n", + " file_timestamp = None\n", + " if \"_20250102_\" in file_path:\n", + " file_timestamp = file_path.split(\"_\")[-1].replace(\".json\", \"\")\n", + " \n", + " # 遍历每个视频\n", + " for video_name, video_data in file_data.items():\n", + " if video_name not in merged_result:\n", + " merged_result[video_name] = {\n", + " \"video_analysis\": {\n", + " \"qwen-7B\": {\n", + " \"original_answers\": {}\n", + " }\n", + " }\n", + " }\n", + " \n", + " # 获取original_answer并添加到对应时间戳下\n", + " if \"video_analysis\" in video_data and \"qwen-7B\" in video_data[\"video_analysis\"]:\n", + " model_data = video_data[\"video_analysis\"][\"qwen-7B\"]\n", + " if \"original_answer\" in model_data:\n", + " merged_result[video_name][\"video_analysis\"][\"qwen-7B\"][\"original_answers\"][file_timestamp] = model_data[\"original_answer\"]\n", + " \n", + " return merged_result\n", + "\n", + "# 读取所有JSON文件\n", + "json_files = [\n", + " \"/home/zydi/VLM/result/室内右上角全景筛选_20250102_065735.json\",\n", + " \"/home/zydi/VLM/result/室内右上角全景筛选_20250102_072352.json\",\n", + " \"/home/zydi/VLM/result/室内右上角全景筛选_20250102_072724.json\",\n", + " \"/home/zydi/VLM/result/室内右上角全景筛选_20250102_075545.json\"\n", + "]\n", + "\n", + "# 读取所有文件内容\n", + "json_contents = []\n", + "for file_path in json_files:\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " json_contents.append(json.load(f))\n", + "\n", + "# 合并JSON\n", + "result = merge_json_files(json_files)\n", + "\n", + "# 将结果写入新文件\n", + "with open('qwen_prompt.json', 'w', encoding='utf-8') as f:\n", + " json.dump(result, f, ensure_ascii=False, indent=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "处理完成!输出文件:formatted_qwen_prompt.json\n" + ] + } + ], + "source": [ + "import json\n", + "import re\n", + "\n", + "def format_answer(text):\n", + " # 处理标题\n", + " text = re.sub(r'###\\s+', '\\n### ', text)\n", + " text = re.sub(r'####\\s+', '\\n#### ', text)\n", + " \n", + " # 处理列表项\n", + " text = re.sub(r'(?m)^-\\s+', '\\n- ', text)\n", + " text = re.sub(r'(?m)^•\\s+', '\\n• ', text)\n", + " \n", + " # 处理加粗文本\n", + " text = re.sub(r'\\*\\*([^*]+)\\*\\*:', '\\n**\\\\1**:', text)\n", + " \n", + " # 移除多余的空行\n", + " text = re.sub(r'\\n{3,}', '\\n\\n', text)\n", + " \n", + " return text.strip()\n", + "\n", + "def process_json_file(input_file, output_file):\n", + " try:\n", + " with open(input_file, 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + " \n", + " # 递归处理所有的original_answers\n", + " def process_dict(d):\n", + " for k, v in d.items():\n", + " if k == 'original_answers' or k == 'original_answer':\n", + " if isinstance(v, dict):\n", + " for sub_k, sub_v in v.items():\n", + " v[sub_k] = format_answer(sub_v)\n", + " else:\n", + " d[k] = format_answer(v)\n", + " elif isinstance(v, dict):\n", + " process_dict(v)\n", + " elif isinstance(v, list):\n", + " for item in v:\n", + " if isinstance(item, dict):\n", + " process_dict(item)\n", + " \n", + " process_dict(data)\n", + " \n", + " with open(output_file, 'w', encoding='utf-8') as f:\n", + " json.dump(data, f, ensure_ascii=False, indent=2)\n", + " \n", + " print(f\"处理完成!输出文件:{output_file}\")\n", + " \n", + " except Exception as e:\n", + " print(f\"处理文件时出错:{str(e)}\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " # 示例使用\n", + " input_file = \"qwen_prompt.json\" # 输入文件路径\n", + " output_file = \"formatted_qwen_prompt.json\" # 输出文件路径\n", + " process_json_file(input_file, output_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from datetime import datetime, timedelta\n", + "\n", + "def update_timestamps(json_data):\n", + " # 设置基准时间为 2024.12.31 02:00:00\n", + " base_time = datetime(2024, 12, 31, 16, 12, 0)\n", + " \n", + " # 遍历所有视频条目\n", + " for i, (video_key, video_data) in enumerate(json_data.items()):\n", + " # 计算新的时间戳 (每条记录间隔1分钟)\n", + " new_time = base_time + timedelta(minutes=i)\n", + " \n", + " # 更新时间戳\n", + " video_data['timestamp'] = new_time.strftime('%Y-%m-%d %H:%M:%S')\n", + " \n", + " return json_data\n", + "\n", + "# 示例使用\n", + "with open('/home/zydi/VLM/result/qwen/左侧吃饭2_20241231_1612.json', 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + "\n", + "# 更新时间戳\n", + "updated_data = update_timestamps(data)\n", + "\n", + "# 保存更新后的文件\n", + "with open('/home/zydi/VLM/result/qwen/左侧吃饭2_20241231_1612.json', 'w', encoding='utf-8') as f:\n", + " json.dump(updated_data, f, ensure_ascii=False, indent=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "成功保存: camera001_20241231_1612\n", + "成功保存: camera001_20241231_1423\n", + "成功保存: camera001_20241231_1232\n", + "成功保存: camera001_20241231_0200\n", + "成功保存: camera001_20241231_2300\n", + "成功保存: camera001_20241231_2010\n", + "成功保存: camera001_20241231_0923\n" + ] + } + ], + "source": [ + "import json\n", + "import redis\n", + "import os\n", + "import re\n", + "\n", + "def save_json_to_redis(file_path):\n", + " try:\n", + " # 连接Redis\n", + " r = redis.Redis(\n", + " host=\"222.186.10.253\",\n", + " port=6379,\n", + " password=\"Obscura@2024\",\n", + " db=207\n", + " )\n", + " \n", + " # 读取JSON文件\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " content = json.load(f)\n", + " \n", + " # 从文件路径中提取文件名\n", + " file_name = os.path.basename(file_path)\n", + " \n", + " # 从文件名中提取时间戳\n", + " timestamp_match = re.search(r'_(\\d{8}_\\d{4})', file_name)\n", + " if timestamp_match:\n", + " timestamp = timestamp_match.group(1)\n", + " # 构建新的key\n", + " new_key = f\"camera001_{timestamp}\"\n", + " \n", + " # 将内容转换为JSON字符串\n", + " json_str = json.dumps(content, ensure_ascii=False)\n", + " \n", + " # 保存到Redis\n", + " r.set(new_key, json_str)\n", + " print(f\"成功保存: {new_key}\")\n", + " else:\n", + " print(f\"无法从文件名提取时间戳: {file_name}\")\n", + " \n", + " # 关闭Redis连接\n", + " r.close()\n", + " \n", + " except Exception as e:\n", + " print(f\"处理文件 {file_path} 时出错: {str(e)}\")\n", + "\n", + "# 要处理的文件列表\n", + "files_to_process = [\n", + " \"result/qwen/左侧吃饭2_20241231_1612.json\",\n", + " \"result/qwen/左侧吃饭1_20241231_1423.json\",\n", + " \"result/qwen/右上角吃饭_20241231_1232.json\",\n", + " \"result/qwen/球机沙发正面_20241231_0200.json\",\n", + " \"result/qwen/室内右上角全景_20241231_2300.json\",\n", + " \"result/qwen/室内右上角全景_20241231_2010.json\",\n", + " \"result/qwen/球机椅子左侧面_20241231_0923.json\"\n", + "]\n", + "\n", + "# 处理每个文件\n", + "for file_path in files_to_process:\n", + " save_json_to_redis(file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "成功获取key: A01_20250107_1600\n", + "\n", + "数据已保存到: .//redis_data_20250107_085228.json\n", + "共处理 1 个key\n" + ] + } + ], + "source": [ + "import json\n", + "import redis\n", + "from datetime import datetime\n", + "\n", + "def fetch_from_redis_and_save(keys=None, output_dir=\"./\"):\n", + " \"\"\"\n", + " 从Redis获取指定keys的数据并保存为JSON文件\n", + " \n", + " 参数:\n", + " - keys: 要获取的key列表,如果为None则获取所有keys\n", + " - output_dir: 输出文件夹路径\n", + " \"\"\"\n", + " try:\n", + " # 连接Redis\n", + " r = redis.Redis(\n", + " host=\"222.186.10.253\",\n", + " port=6379,\n", + " password=\"Obscura@2024\",\n", + " db=210\n", + " )\n", + " \n", + " # 如果没有指定keys,获取所有keys\n", + " if keys is None:\n", + " keys = r.keys(\"camera001_*\") # 获取所有以camera001_开头的key\n", + " keys = [key.decode('utf-8') for key in keys] # 将bytes转换为字符串\n", + " \n", + " # 创建一个字典存储所有数据\n", + " all_data = {}\n", + " \n", + " # 获取每个key的数据\n", + " for key in keys:\n", + " try:\n", + " value = r.get(key)\n", + " if value:\n", + " # 将Redis中的JSON字符串转换为Python对象\n", + " data = json.loads(value)\n", + " all_data[key] = data\n", + " print(f\"成功获取key: {key}\")\n", + " except Exception as e:\n", + " print(f\"处理key {key} 时出错: {str(e)}\")\n", + " \n", + " # 生成输出文件名(使用时间戳)\n", + " timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + " output_file = f\"{output_dir}/redis_data_{timestamp}.json\"\n", + " \n", + " # 保存为JSON文件\n", + " with open(output_file, 'w', encoding='utf-8') as f:\n", + " json.dump(all_data, f, ensure_ascii=False, indent=2)\n", + " \n", + " print(f\"\\n数据已保存到: {output_file}\")\n", + " print(f\"共处理 {len(all_data)} 个key\")\n", + " \n", + " # 关闭Redis连接\n", + " r.close()\n", + " \n", + " except Exception as e:\n", + " print(f\"发生错误: {str(e)}\")\n", + "\n", + "# 使用示例\n", + "if __name__ == \"__main__\":\n", + " # 可以指定特定的keys\n", + " specific_keys = [\n", + " \"A01_20250107_1600\"\n", + " # \"camera001_20241231_1423\",\n", + " # \"camera001_20241231_1232\"\n", + " ]\n", + " \n", + " # 获取指定的keys\n", + " fetch_from_redis_and_save(keys=specific_keys)\n", + " \n", + " # 或者获取所有keys\n", + " # fetch_from_redis_and_save()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "kafka", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/emb.py b/emb.py new file mode 100644 index 0000000..ecba6e8 --- /dev/null +++ b/emb.py @@ -0,0 +1,117 @@ +import cv2 +import numpy as np +import os +from tqdm import tqdm +import json +from deepface import DeepFace +import redis + +class FaceFeatureExtractor: + def __init__(self): + """ + 初始化特征提取器和Redis连接 + """ + self.redis_client = redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=212 + ) + + def get_feature(self, img_path): + """ + 获取人脸特征向量 + Args: + img_path: 图片路径 + Returns: + 人脸特征向量或None(如果检测失败) + """ + try: + # 使用 DeepFace 提取特征 + embedding_objs = DeepFace.represent( + img_path=img_path, + detector_backend="retinaface", + align=True, + model_name="Facenet512" + ) + + if embedding_objs and len(embedding_objs) > 0: + return embedding_objs[0]['embedding'] # 返回第一个人脸的特征向量 + return None + + except Exception as e: + print(f"处理图片 {img_path} 时出错: {str(e)}") + return None + + def process_dataset(self, dataset_dir): + """ + 处理数据集并保存特征到Redis + Args: + dataset_dir: 数据集根目录 + """ + total_processed = 0 + total_failed = 0 + + # 计算所有文件夹中的图片总数 + total_images = sum(len([f for f in os.listdir(os.path.join(dataset_dir, d)) + if f.lower().endswith(('.png', '.jpg', '.jpeg'))]) + for d in os.listdir(dataset_dir) + if os.path.isdir(os.path.join(dataset_dir, d))) + + with tqdm(total=total_images, desc="处理图片") as pbar: + # 处理所有文件夹 + for person_name in sorted(os.listdir(dataset_dir)): + person_dir = os.path.join(dataset_dir, person_name) + if not os.path.isdir(person_dir): + continue + + # 获取该人已有的特征向量列表 + existing_features = [] + if self.redis_client.exists(person_name): + existing_features = json.loads(self.redis_client.get(person_name)) + + person_features = existing_features + + for img_name in os.listdir(person_dir): + if not img_name.lower().endswith(('.png', '.jpg', '.jpeg')): + continue + + # 检查图片是否已处理 + if any(item['image'] == img_name for item in existing_features): + pbar.update(1) + continue + + img_path = os.path.join(person_dir, img_name) + feature = self.get_feature(img_path) + + if feature is not None: + # 特征向量已经是列表格式,无需调用 tolist() + person_features.append({ + 'image': img_name, + 'embedding': feature + }) + total_processed += 1 + else: + total_failed += 1 + + pbar.set_postfix({'成功': total_processed, '失败': total_failed}) + pbar.update(1) + + # 将该人的所有特征向量保存到Redis + if person_features: + self.redis_client.set(person_name, json.dumps(person_features)) + + print(f"\n处理完成!成功: {total_processed}, 失败: {total_failed}") + +def main(): + # 设置路径 + DATASET_DIR = "/home/zydi/VLM/data" + + # 创建特征提取器 + extractor = FaceFeatureExtractor() + + # 处理数据集 + extractor.process_dataset(DATASET_DIR) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/face_monitor.py b/face_monitor.py new file mode 100644 index 0000000..325821b --- /dev/null +++ b/face_monitor.py @@ -0,0 +1,340 @@ +import os +# 在导入其他库之前设置使用 CUDA 1 +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + +import json +import time +from datetime import datetime +import redis +from deepface import DeepFace +import numpy as np +import gc +import re + +class FaceAnalysisSystem: + def __init__(self): + # Redis配置 + self.redis_clients = { + 'A01': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=210 + ), + 'B02': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=211 + ) + } + # 身份信息数据库 + self.identity_db = redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=212 + ) + + def get_face_embedding(self, img_path): + """获取人脸embedding""" + try: + embedding_obj = DeepFace.represent( + img_path=img_path, + detector_backend="retinaface", + align=True, + model_name="Facenet512" + ) + return embedding_obj[0]["embedding"] if embedding_obj else None + except Exception as e: + print(f"获取人脸embedding失败: {str(e)}") + return None + + def find_identity(self, embedding): + """在身份数据库中查找匹配的身份""" + try: + # 获取所有身份的embedding + all_identities = self.identity_db.keys("*") + best_match = None + best_similarity = -1 + + for identity_key in all_identities: + # 获取该身份的所有embedding + stored_data = json.loads(self.identity_db.get(identity_key)) + + # 如果存储的数据是列表(多个embedding) + if isinstance(stored_data, list): + # 对该身份的每个embedding进行比对 + for face_data in stored_data: + stored_vector = np.array(face_data["embedding"]) + + # 计算余弦相似度 + similarity = np.dot(embedding, stored_vector) / ( + np.linalg.norm(embedding) * np.linalg.norm(stored_vector) + ) + + if similarity > best_similarity: + best_similarity = similarity + best_match = identity_key.decode() + + # 如果相似度大于阈值,返回身份信息,否则返回unknown + if best_similarity > 0.72: # 可以调整阈值 + return best_match, best_similarity + return "unknown", best_similarity + + except Exception as e: + print(f"查找身份时出错: {str(e)}") + return "unknown", -1 + +class ImageMonitor: + def __init__(self, images_path): + self.images_path = images_path + self.system = FaceAnalysisSystem() + self.processed_images = set() + self.error_images = [] + self.error_image_cache = set() + + def _get_redis_key(self, image_path): + """生成Redis键值""" + try: + dir_name = os.path.basename(os.path.dirname(image_path)) + file_name = os.path.basename(image_path) + + # 从图片文件名中提取日期和时间,移除可能的后缀(_1, _2等) + base_name = re.sub(r'_\d+(?=\.(jpg|png))', '', file_name) + # 修改正则表达式以匹配更宽松的时间格式 + match = re.search(r'(\w+)_(\d{8})_(\d{6})\.(jpg|png)', base_name) + if match: + camera_id = match.group(1) + date = match.group(2) + time = match.group(3) + hour = time[:2] # 从完整时间中提取小时 + + # 生成key: A01_20250105_1300 + redis_key = f"face_{camera_id}_{date}_{hour}00" + return redis_key, base_name + + print(f"文件名格式不匹配: {file_name}") + return None, None + + except Exception as e: + print(f"生成Redis key失败: {str(e)}") + return None, None + + def _get_base_images(self, image_path): + """获取同一原始图片的所有裁剪图片""" + try: + dir_path = os.path.dirname(image_path) + base_name = os.path.splitext(os.path.basename(image_path))[0] + base_name = re.sub(r'_\d+$', '', base_name) # 移除数字后缀 + + related_images = [] + for file_name in os.listdir(dir_path): + if file_name.startswith(base_name): + full_path = os.path.join(dir_path, file_name) + related_images.append(full_path) + + return related_images + except Exception as e: + print(f"获取相关图片失败: {str(e)}") + return [image_path] + + def process_new_image(self, image_path): + """处理新图片""" + try: + if self._is_error_cached(image_path): + return False + + # 获取同一原始图片的所有裁剪图片 + related_images = self._get_base_images(image_path) + if not related_images: + return False + + # 如果所有相关图片都已处理,则跳过 + if all(img in self.processed_images for img in related_images): + return True + + redis_key, base_name = self._get_redis_key(image_path) + if not redis_key or not base_name: + self._log_error(image_path, "Redis Key Error", "无法生成Redis key") + return False + + # 存储每个身份的最佳匹配结果 + identity_results = {} + timestamp = None + + # 处理每个相关图片 + for img in related_images: + if img in self.processed_images: + continue + + if not os.path.exists(img): + self._log_error(img, "File Not Found", "图片文件不存在") + continue + + # 检查文件大小 + file_size = os.path.getsize(img) + if file_size == 0 or file_size < 10 * 1024: + self._log_error(img, "Invalid File Size", f"图片文件大小异常({file_size/1024:.2f}KB)") + continue + + # 获取人脸embedding + embedding = self.system.get_face_embedding(img) + if embedding is None: + self._log_error(img, "Face Detection Error", "无法检测到人脸或提取特征") + continue + + # 查找身份 + identity, similarity = self.system.find_identity(embedding) + + # 从文件名提取时间戳(如果还没有设置) + if not timestamp: + timestamp_match = re.search(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', os.path.basename(img)) + if timestamp_match: + year, month, day, hour, minute, second = timestamp_match.groups() + timestamp = f"{year}-{month}-{day} {hour}:{minute}:{second}" + else: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # 更新该身份的最佳匹配结果 + if identity not in identity_results or similarity > identity_results[identity]["similarity"]: + identity_results[identity] = { + "similarity": float(similarity), + "file_name": os.path.basename(img) + } + + self.processed_images.add(img) + + # 如果有有效结果,保存到Redis + if identity_results: + dir_name = os.path.basename(os.path.dirname(image_path)) + if dir_name in self.system.redis_clients: + redis_client = self.system.redis_clients[dir_name] + + # 准备保存的数据 + result_data = { + "face_analysis": { + identity: data for identity, data in identity_results.items() + }, + "timestamp": timestamp + } + + # 更新Redis数据 + existing_data = redis_client.get(redis_key) + if existing_data: + hour_results = json.loads(existing_data) + hour_results[base_name] = result_data + else: + hour_results = {base_name: result_data} + + json_str = json.dumps(hour_results, ensure_ascii=False) + redis_client.set(redis_key, json_str) + print(f"成功保存到Redis,key: {redis_key}") + + return True + + except Exception as e: + self._log_error(image_path, "Processing Error", str(e)) + print(f"处理图片时发生错误 {image_path}: {str(e)}") + return False + finally: + gc.collect() + + def _is_processed(self, image_path): + """检查图片是否已处理""" + return image_path in self.processed_images + + def _is_error_cached(self, image_path): + """检查图片是否在错误缓存中""" + return image_path in self.error_image_cache + + def _add_to_error_cache(self, image_path): + """添加图片到错误缓存""" + self.error_image_cache.add(image_path) + + def _log_error(self, image_path, error_type, error_message): + """记录错误信息""" + if self._is_error_cached(image_path): + return + + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + error_info = { + "timestamp": current_time, + "image_path": image_path, + "error_type": error_type, + "error_message": error_message, + "file_size": os.path.getsize(image_path) if os.path.exists(image_path) else 0 + } + self.error_images.append(error_info) + self._add_to_error_cache(image_path) + + def _save_error_log(self): + """保存错误日志""" + if not self.error_images: + return + + try: + current_time = datetime.now().strftime("%Y%m%d_%H%M%S") + log_filename = f"image_errors_{current_time}.json" + + with open(log_filename, 'w', encoding='utf-8') as f: + json.dump(self.error_images, f, ensure_ascii=False, indent=2) + print(f"\n异常图片记录已保存到: {log_filename}") + + self.error_images = [] + except Exception as e: + print(f"保存错误日志失败: {str(e)}") + + def monitor_directories(self): + """监控目录变化""" + try: + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"开始监控目录: {self.images_path} [{current_time}]") + + while True: + try: + for camera_dir in os.listdir(self.images_path): + camera_path = os.path.join(self.images_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + for image_file in os.listdir(camera_path): + if not image_file.lower().endswith(('.jpg', '.jpeg', '.png')): + continue + + image_path = os.path.join(camera_path, image_file) + if not self._is_processed(image_path) and not self._is_error_cached(image_path): + print(f"处理图片: {image_path}") + if not self.process_new_image(image_path): + self._add_to_error_cache(image_path) + print(f"图片处理失败,已加入错误缓存: {image_path}") + continue + + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"[{current_time}] 等待新图片中...") + time.sleep(60) # 每分钟检查一次 + + except Exception as e: + print(f"监控过程出错: {str(e)}") + time.sleep(10) + + except KeyboardInterrupt: + print("\n检测到程序终止信号,正在保存错误日志...") + self._save_error_log() + print("程序已安全终止。") + except Exception as e: + print(f"\n程序异常终止: {str(e)}") + self._save_error_log() + raise + +def main(): + try: + images_path = "/home/zydi/VLM/crop" # 设置crop目录路径 + monitor = ImageMonitor(images_path) + monitor.monitor_directories() + + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/images/.gitkeep b/images/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/info.json b/info.json new file mode 100644 index 0000000..b549abf --- /dev/null +++ b/info.json @@ -0,0 +1,790 @@ +{ + "actions": [ + "睡", + "坐", + "站", + "走", + "跑", + "跳", + "摔", + "踢", + "躺", + "蹲", + "转", + "吃", + "喝", + "写", + "看", + "叫", + "闻", + "嗅", + "笑", + "大笑", + "微笑", + "跳跃", + "跳舞", + "睡觉", + "穿衣服", + "穿裤子", + "穿鞋", + "戴帽子", + "戴口罩", + "戴眼镜", + "戴围巾", + "摘帽子", + "摘口罩", + "摘眼镜", + "摘围巾", + "叫", + "安装", + "转身", + "转头", + "回头", + "回头看", + "散步", + "吃零食", + "吃饭", + "喝水", + "喝茶", + "喝饮料", + "喝牛奶", + "喝咖啡", + "看风景", + "吃饭", + "吃东西", + "喝东西", + "吃药", + "喝药", + "服药", + "起床", + "起身", + "喝酒", + "吸烟", + "抽烟", + "打麻将", + "打牌", + "交流", + "说话", + "演讲", + "阅读", + "看书", + "写字", + "写作", + "工作", + "学习", + "玩手机", + "看风景", + "走路", + "散步", + "打闹", + "玩电脑", + "玩手机", + "使用手机", + "使用电脑", + "使用笔记本电脑", + "整理", + "鼓掌", + "祈祷", + "掉落", + "埋葬", + "淹没", + "饮用", + "拍打", + "拥抱", + "布道", + "缝纫", + "喷洒", + "扭转", + "指导", + "浸没", + "打破", + "调音", + "登机", + "奔跑", + "破坏", + "竞争", + "咯咯笑", + "铲土", + "追逐", + "轻弹", + "倾倒", + "扣纽扣", + "敲打", + "搬运", + "冲浪", + "拉动", + "蹲下", + "瞄准", + "蹲伏", + "轻拍", + "洗涤", + "眨眼", + "排队", + "锁定", + "停止", + "打喷嚏", + "翻转", + "缝纫", + "剪辑", + "工作", + "摇晃", + "询问", + "玩乐", + "露营", + "插入", + "踩踏板", + "建造", + "滑倒", + "扫地", + "拧紧", + "耸肩", + "搭便车", + "破裂", + "抓挠", + "修剪", + "销售", + "行进", + "搅拌", + "亲吻", + "紧贴", + "社交", + "采摘", + "溅水", + "舔舐", + "踢", + "滑动", + "开车", + "驾驶", + "手写", + "转向", + "填充", + "撞击", + "偷窃", + "按压", + "喊叫", + "徒步", + "吸尘", + "指点", + "给予", + "潜水", + "拥抱", + "建造", + "转弯", + "用餐", + "倾斜", + "航行", + "唱歌", + "玩耍", + "击打", + "加入", + "洗澡", + "举起", + "坐下", + "画画", + "绘画", + "抗议", + "冲洗", + "咳嗽", + "粉碎", + "切片", + "平衡", + "漂流", + "跪下", + "投篮", + "刷洗", + "压碎", + "摩擦", + "撑船", + "浇水", + "演奏音乐", + "弹奏", + "移除", + "撕裂", + "模仿", + "教学", + "烹饪", + "伸手", + "学习", + "服务", + "推土", + "摇晃", + "讨论", + "拖拽", + "园艺", + "表演", + "主持", + "摄影", + "播种", + "抓挠", + "弯腰", + "拳击", + "拖地", + "紧握", + "流动", + "挖掘", + "绊倒", + "欢呼", + "购买", + "骑自行车", + "喂食", + "倒空", + "拆包", + "素描", + "站立", + "除草", + "堆叠", + "晾干", + "哭泣", + "旋转", + "煎炸", + "切割", + "支付", + "讲课", + "煮沸", + "剥皮", + "包装", + "弄湿", + "攻击", + "焊接", + "摇摆", + "雕刻", + "行走", + "穿衣", + "充气", + "攀爬", + "粉碎", + "阅读", + "打磨", + "皱眉", + "打猎", + "清理", + "发射", + "包装", + "钓鱼", + "溢出", + "泄漏", + "编织", + "划船", + "洒水", + "施洗", + "运动", + "滚动", + "吐痰", + "浸泡", + "骑行", + "砍劈", + "扑灭", + "鼓掌", + "呼叫", + "谈话", + "剃须", + "结婚", + "上升", + "爬", + "飞", + "爬行", + "飞行", + "组装", + "注射", + "着陆", + "操作", + "打包", + "下降", + "跌倒", + "进入", + "推动", + "锯", + "闻", + "溢出", + "打斗", + "醒来", + "烧烤", + "滑冰", + "绘画", + "钻孔", + "打拳", + "系绑", + "美甲", + "冲刺", + "烧烤", + "投球", + "拖曳", + "打电话", + "制作", + "敲击", + "玩游戏", + "转动", + "吠叫", + "唱歌", + "打开", + "打蜡", + "杂耍", + "割草", + "射击", + "嗅闻", + "采访", + "跺脚", + "逮捕", + "梳理", + "划船", + "鞠躬", + "赌博", + "敬礼", + "加油", + "签名", + "投掷", + "浸湿", + "挥手", + "签字", + "修理", + "烘焙", + "吸烟", + "滑雪", + "打鼓", + "吹", + "清洁", + "梳理", + "传播", + "赛跑", + "燃烧", + "击剑", + "游泳", + "依偎", + "购物", + "弹跳", + "除尘", + "抚摸", + "啪嗒", + "咬", + "咆哮", + "守卫", + "卸载", + "举起", + "指导", + "折叠", + "测量", + "吹口哨", + "退出", + "伸展", + "贴胶带", + "眯眼", + "接住", + "排水", + "按摩", + "擦洗", + "戴手铐", + "庆祝", + "慢跑", + "碰撞", + "保龄球", + "休息", + "阻挡", + "微笑", + "纹身", + "喷发", + "嚎叫", + "游行", + "咧嘴笑", + "冲刺", + "种植", + "上升", + "打哈欠", + "塞", + "摔跤", + "挠痒", + "锻炼", + "装载", + "驾驶", + "打字", + "玩闹", + "扎营", + "插上", + "踩单车", + "盖房子", + "打滑", + "扫除", + "拧螺丝", + "耸肩膀", + "搭顺风车", + "唠嗑", + "烧开水", + "剥壳", + "包东西", + "打湿", + "动手", + "焊东西", + "搁置", + "荡秋千", + "遛弯", + "穿戴", + "打气", + "爬高", + "碎纸", + "看书", + "磨砂", + "愁眉苦脸", + "关门", + "打猎", + "交谈", + "下雪", + "刮胡子", + "上升", + "笑哈哈", + "爬行", + "飞着", + "组装", + "打针", + "坐车", + "开门", + "叫喊" + ], + "environments": [ + "室内", + "室外", + "飞机场", + "飞机舱", + "机场航站楼", + "壁龛", + "小巷", + "圆形剧场", + "游戏厅", + "游乐园", + "公寓楼", + "水族馆", + "渡槽", + "拱廊", + "拱门", + "考古发掘现场", + "档案馆", + "冰球场", + "表演场地", + "竞技场", + "军事基地", + "美术馆", + "艺术学校", + "艺术工作室", + "艺术家阁楼", + "装配线", + "运动场", + "中庭", + "阁楼", + "礼堂", + "汽车制造厂", + "汽车展厅", + "荒地", + "面包店", + "阳台/外部", + "阳台/内部", + "球池", + "舞厅", + "竹林", + "银行金库", + "宴会厅", + "酒吧", + "谷仓", + "谷仓门", + "棒球场", + "地下室", + "篮球场", + "浴室", + "集市", + "海滩", + "海滨别墅", + "美容院", + "寝室", + "卧室", + "啤酒花园", + "啤酒大厅", + "泊位", + "生物实验室", + "木板路", + "船甲板", + "船库", + "书店", + "展位", + "植物园", + "保龄球馆", + "拳击台", + "桥", + "建筑外立面", + "斗牛场", + "墓室", + "公交车内部", + "公交站", + "肉店", + "孤峰", + "小屋", + "自助餐厅", + "露营地", + "校园", + "运河", + "糖果店", + "峡谷", + "汽车内部", + "旋转木马", + "城堡", + "地下墓穴", + "墓地", + "瑞士木屋", + "化学实验室", + "儿童房", + "教堂", + "教室", + "无尘室", + "悬崖", + "壁橱", + "服装店", + "海岸", + "驾驶舱", + "咖啡店", + "计算机房", + "会议中心", + "会议室", + "建筑工地", + "玉米地", + "畜栏", + "走廊", + "农舍", + "法院", + "庭院", + "小溪", + "裂缝", + "人行横道", + "水坝", + "熟食店", + "百货商店", + "沙漠", + "沙漠公路", + "餐馆", + "餐厅", + "饭厅", + "迪斯科舞厅", + "门口", + "宿舍", + "市中心", + "更衣室", + "车道", + "药店", + "电梯", + "电梯大厅", + "电梯井", + "大使馆", + "机房", + "入口大厅", + "自动扶梯", + "挖掘现场", + "布料店", + "农场", + "快餐店", + "农田", + "田野", + "田间小路", + "防火梯", + "消防站", + "鱼塘", + "花店", + "美食广场", + "足球场", + "阔叶林", + "森林小径", + "林间小路", + "正式花园", + "喷泉", + "厨房", + "车库", + "加油站", + "凉亭/外部", + "杂货店", + "礼品店", + "冰川", + "高尔夫球场", + "洞穴", + "体育馆", + "机库", + "港口", + "五金店", + "干草地", + "直升机场", + "高速公路", + "家庭办公室", + "家庭影院", + "医院", + "病房", + "温泉", + "酒店", + "酒店房间", + "房屋", + "狩猎小屋/室外", + "冰淇淋店", + "浮冰", + "冰架", + "溜冰场", + "冰山", + "冰屋", + "工业区", + "旅馆/室外", + "小岛", + "牢房", + "日本花园", + "珠宝店", + "废品场", + "古堡", + "狗舍", + "幼儿园教室", + "厨房", + "泻湖", + "天然湖泊", + "垃圾场", + "停机坪", + "自助洗衣店", + "草坪", + "讲堂", + "议会厅", + "图书馆", + "灯塔", + "客厅", + "装卸码头", + "大堂", + "闸室", + "更衣室", + "豪宅", + "预制房屋", + "市场", + "沼泽", + "武术馆", + "陵墓", + "麦地那", + "夹层", + "护城河/水", + "清真寺/室外", + "汽车旅馆", + "山", + "山路", + "雪山", + "电影院", + "博物馆", + "博物馆/室外", + "音乐工作室", + "自然历史博物馆", + "托儿所", + "疗养院", + "啤酒干燥房", + "海洋", + "办公室", + "办公楼", + "办公隔间", + "石油钻井平台", + "手术室", + "果园", + "乐池", + "宝塔", + "宫殿", + "食品储藏室", + "公园", + "停车场", + "牧场", + "露台", + "亭子", + "宠物店", + "药店", + "电话亭", + "物理实验室", + "野餐区", + "码头", + "比萨店", + "操场", + "游戏室", + "广场", + "池塘", + "门廊", + "林荫道", + "酒吧", + "赛马场", + "赛车道", + "木筏", + "铁轨", + "热带雨林", + "接待处", + "娱乐室", + "修理店", + "住宅区", + "餐厅", + "餐厅厨房", + "餐厅露台", + "稻田", + "河流", + "岩石拱门", + "屋顶花园", + "索桥", + "废墟", + "跑道", + "沙地", + "桑拿房", + "学校", + "科学博物馆", + "服务器机房", + "棚屋", + "鞋店", + "店面", + "购物中心", + "淋浴间", + "滑雪场", + "滑雪坡", + "天空", + "摩天大楼", + "贫民窟", + "雪地", + "足球场", + "马厩", + "棒球场", + "橄榄球场", + "足球场", + "舞台", + "楼梯", + "储藏室", + "街道", + "地铁站/站台", + "超市", + "寿司店", + "沼泽地", + "游泳池", + "犹太教堂", + "电视房", + "电视演播室", + "亚洲寺庙", + "王座室", + "售票处", + "园艺造型花园", + "塔楼", + "玩具店", + "火车内部", + "火车站", + "树木农场", + "树屋", + "战壕", + "苔原", + "深海", + "设备间", + "山谷", + "菜园", + "兽医诊所", + "高架桥", + "乡村", + "葡萄园", + "火山", + "排球场", + "水上乐园", + "水塔", + "瀑布", + "水坑", + "海浪", + "酒吧", + "麦田", + "风力发电场", + "风车", + "庭院", + "青年旅舍", + "禅园" + ] + +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..1468342 --- /dev/null +++ b/main.py @@ -0,0 +1,704 @@ +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from redis import Redis +from openai import OpenAI +import json +from datetime import datetime, timedelta +from typing import Dict, List, Optional +import asyncio + +app = FastAPI() + +# 配置CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # 在生产环境中应该设置具体的域名 + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 定义摄像头和数据库的映射关系 +CAMERA_DB_MAPPING = { + "camera001": 207, + "camera002": 208, + "camera003": 209, + "A01": 210, + "B02": 211, + "C03": 212, + "report": 213 # 分析报告使用213数据库 +} + +# 创建Redis连接池 +redis_connections = {} +for camera_id, db in CAMERA_DB_MAPPING.items(): + redis_connections[camera_id] = Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=db, + decode_responses=True + ) + +@app.get("/web/face/{camera_id}/data") +async def get_camera_data(camera_id: str, date: Optional[str] = None): + """ + 获取摄像头某天的所有数据 + """ + try: + if camera_id not in CAMERA_DB_MAPPING: + raise HTTPException(status_code=400, detail="Invalid camera ID") + + # 如果没有指定日期,使用当前日期 + if date is None: + date = datetime.now().strftime("%Y%m%d") + + redis_client = redis_connections[camera_id] + + # 使用新的键格式进行模式匹配 + pattern = f"face_{camera_id}_{date}_*" + all_keys = redis_client.keys(pattern) + + if not all_keys: + return {"message": "No data found", "data": None} + + # 获取所有键的数据并解析 + all_data = {} + for key in all_keys: + data = redis_client.get(key) + if data: + # 直接解析JSON数据,无需decode + all_data[key] = json.loads(data) + + return { + "message": "success", + "data": all_data, + "total_records": len(all_data) + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/web/{camera_id}/data") +async def get_camera_data(camera_id: str, date: Optional[str] = None): + """ + 获取摄像头某天的所有数据 + """ + try: + if camera_id not in CAMERA_DB_MAPPING: + raise HTTPException(status_code=400, detail="Invalid camera ID") + + # 如果没有指定日期,使用当前日期 + if date is None: + date = datetime.now().strftime("%Y%m%d") + + redis_client = redis_connections[camera_id] + + # 使用新的键格式进行模式匹配 + pattern = f"{camera_id}_{date}_*" + all_keys = redis_client.keys(pattern) + + if not all_keys: + return {"message": "No data found", "data": None} + + # 获取所有键的数据并解析 + all_data = {} + for key in all_keys: + data = redis_client.get(key) + if data: + # 直接解析JSON数据,无需decode + all_data[key] = json.loads(data) + + return { + "message": "success", + "data": all_data, + "total_records": len(all_data) + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# 添加新路由支持日期查询 +@app.get("/web/report/{date}") +async def get_report_by_date(date: str): + """ + 获取指定日期的分析报告 + :param date: 日期,格式为YYYY-MM-DD + """ + try: + print(f"\n=== 开始处理日期: {date} ===") + # 验证日期格式并转换为无连字符格式 + try: + parsed_date = datetime.strptime(date, "%Y-%m-%d") + date_no_hyphen = parsed_date.strftime("%Y%m%d") + print(f"转换后的日期格式: {date_no_hyphen}") + except ValueError: + raise HTTPException( + status_code=400, + detail="Invalid date format. Please use YYYY-MM-DD" + ) + + # 使用report数据库存储报告 + report_redis = redis_connections["report"] + report_key = f"report_{date_no_hyphen}" + print(f"缓存键: {report_key}") + + # 尝试获取缓存的报告 + cached_report = report_redis.get(report_key) + print(f"是否有缓存: {'是' if cached_report else '否'}") + + if cached_report: + print("返回缓存数据") + return { + "message": "success", + "data": json.loads(cached_report), + "source": "cache" + } + + # 如果缓存中没有,生成新报告 + print("开始生成新报告...") + report = await generate_daily_report(date_no_hyphen) + + # 检查是否返回"暂无数据" + if report.get("message") == "no_data": + print("生成报告结果: 暂无数据") + return report + + print("生成报告成功,准备缓存...") + # 缓存报告到report数据库(设置30天过期) + report_redis.setex( + report_key, + timedelta(days=30), + json.dumps(report) + ) + + return { + "message": "success", + "data": report, + "source": "new_generation" + } + + except HTTPException as he: + print(f"HTTP Exception: {str(he)}") + raise he + except Exception as e: + print(f"Error processing report request: {str(e)}") + import traceback + print(f"详细错误信息: {traceback.format_exc()}") + # 为了更好的调试,先打印错误信息再抛出异常 + raise HTTPException( + status_code=500, + detail=f"处理报告时发生错误: {str(e)}\n错误类型: {type(e)}" + ) + +# 生成每日分析报告的函数 +async def generate_daily_report(date: str) -> Dict: + print(f"\n=== 开始生成日报 {date} ===") + + # 定义异常行为列表 + ABNORMAL_BEHAVIORS = [ + '打架', + '斗殴', + '摔倒', + '晕倒', + '昏倒', + '跌倒', + '滑倒', + '摔', + '踢', + '受伤', + '暴力', + '攻击', + '威胁', + '破坏', + '偷窃', + '抢夺', + '游荡', + '徘徊', + '尾随', + '骚扰' + ] + + # 定义行为类别 + BEHAVIOR_CATEGORIES = { + "基础动作": [ + "站", "站立", "站着", + "走", "走路", "散步", "行走", "徒步", + "跑", "奔跑", "慢跑", + "坐", "坐下", "坐着", + "蹲", "蹲下", "蹲着", + "转", "转身", "转头", "回头", "旋转", "转向", "转弯", + "看", "闻", "嗅", "听" + ], + "日常生活": [ + "吃", "食用", "吃饭", "吃零食", "吃东西", "用餐", "咀嚼", "嚼", + "喝水", "喝牛奶", "喝茶", "饮用", "喝咖啡", "喝", "饮水", + "穿衣服", "穿裤子", "穿鞋", "戴帽子", "戴口罩", "戴围巾", + "躺", "睡", "睡觉", "休息", "打哈欠", + "洗澡", "刷牙", "洗手", "洗涤", "清洁", "擦洗", + "吃药", "喝药", "服药" + ], + "社交活动": [ + "说话", "交流", "演讲", "谈话", "聊天", "采访", "社交", + "打麻将", "打牌", "玩手机", "玩电脑", "玩游戏", "赌博", + "笑", "大笑", "微笑", "哭泣", "咯咯笑", "皱眉" + ], + "工作学习": [ + "读书", "阅读", "看书", + "写作", "写字", "写", + "工作", "学习", "使用电脑", "使用笔记本电脑", "使用手机", "开会", "打字", + "画画", "绘画", "摄影", "素描" + ], + "运动娱乐": [ + "跳", "跳跃", "跳舞", "游泳", "运动", "健身", "锻炼" + ], + "异常行为": [ + '打架', + '斗殴', + '摔倒', + '晕倒', + '昏倒', + '跌倒', + '滑倒', + '摔', + '踢', + '受伤', + '暴力', + '攻击', + '威胁', + '破坏', + '偷窃', + '抢夺', + '游荡', + '徘徊', + '尾随', + '骚扰' + ], + "其他": ["其他"] + } + + # 初始化数据收集结构 + data_collection = { + "date": date, + "total_events": 0, + "abnormal_events": 0, + "camera_num": set(), + "activity_areas": {}, # 活动区域统计 + "behavior_distribution": {}, # 行为分布 + "hourly_stats": {}, # 每小时统计 + "category_stats": { # 各类别行为统计 + "基础动作": { + "count": 0, + "behaviors": {} # 改为以行为为key的统计 + }, + "日常生活": { + "count": 0, + "behaviors": {} + }, + "社交活动": { + "count": 0, + "behaviors": {} + }, + "工作学习": { + "count": 0, + "behaviors": {} + }, + "运动娱乐": { + "count": 0, + "behaviors": {} + }, + "异常行为": { + "count": 0, + "behaviors": {} + }, + "其他": { + "count": 0, + "behaviors": {} + } + }, + "abnormal_stats": { + "behaviors": [], + "times": [], + "locations": [] + } + } + + # 初始化摄像头小时统计 + camera_hourly_counts = { + camera_id: {f"{hour:02d}": 0 for hour in range(24)} + for camera_id in CAMERA_DB_MAPPING.keys() + if camera_id != "report" + } + + # 遍历所有摄像头数据 + has_any_data = False + total_cameras = len([cam for cam in CAMERA_DB_MAPPING.keys() if cam != "report"]) + processed_cameras = 0 + + print(f"开始处理 {total_cameras} 个摄像头的数据") + + # 数据收集和预处理 + for camera_id, redis_client in redis_connections.items(): + if camera_id == "report": + continue + + processed_cameras += 1 + print(f"\n处理摄像头 {camera_id} ({processed_cameras}/{total_cameras})") + + camera_event_count = 0 + + for hour in range(24): + hour_str = f"{hour:02d}" + pattern = f"{camera_id}_{date}_{hour_str}*" + hour_keys = redis_client.keys(pattern) + + for key in hour_keys: + hour_data = redis_client.get(key) + if hour_data: + has_any_data = True + hour_json = json.loads(hour_data) + + for video_file, video_data in hour_json.items(): + if "video_analysis" in video_data: + analysis = video_data["video_analysis"]["qwen-7B"]["extracted_info"] + + # 处理行为数据 + behaviors = analysis.get("actions", []) + camera_event_count += len(behaviors) + data_collection["total_events"] += len(behaviors) + + # 处理环境数据 + environment = analysis.get("environment", "") + + if environment: + # 如果 environment 是列表,我们需要分别处理每个环境 + if isinstance(environment, list): + for env in environment: + if isinstance(env, str): # 确保是字符串 + data_collection["activity_areas"][env] = \ + data_collection["activity_areas"].get(env, 0) + 1 + else: + # 如果是字符串,直接处理 + if isinstance(environment, str): + data_collection["activity_areas"][environment] = \ + data_collection["activity_areas"].get(environment, 0) + 1 + + # 更新每小时统计 + if hour_str not in data_collection["hourly_stats"]: + data_collection["hourly_stats"][hour_str] = { + "event_count": 0, + "categories": {cat: 0 for cat in BEHAVIOR_CATEGORIES.keys()} + } + + data_collection["hourly_stats"][hour_str]["event_count"] += len(behaviors) + camera_hourly_counts[camera_id][hour_str] += len(behaviors) + + # 处理每个行为 + for behavior in behaviors: + # 更新行为分布 + data_collection["behavior_distribution"][behavior] = \ + data_collection["behavior_distribution"].get(behavior, 0) + 1 + + # 分类统计 + behavior_categorized = False + for category, keywords in BEHAVIOR_CATEGORIES.items(): + if any(keyword in behavior for keyword in keywords): + data_collection["category_stats"][category]["count"] += 1 + + if behavior not in data_collection["category_stats"][category]["behaviors"]: + data_collection["category_stats"][category]["behaviors"][behavior] = { + "count": 0, + "occurrences": {} # 改为使用字典,键为"camera_time"组合 + } + + # 使用camera_id和hour_str组合作为唯一键 + occurrence_key = f"{camera_id}_{hour_str}" + if occurrence_key not in data_collection["category_stats"][category]["behaviors"][behavior]["occurrences"]: + data_collection["category_stats"][category]["behaviors"][behavior]["count"] += 1 + data_collection["category_stats"][category]["behaviors"][behavior]["occurrences"][occurrence_key] = { + "time": f"{hour_str}:00", + "camera": camera_id + } + + data_collection["hourly_stats"][hour_str]["categories"][category] += 1 + behavior_categorized = True + break + + if not behavior_categorized: + data_collection["category_stats"]["其他"]["count"] += 1 + if behavior not in data_collection["category_stats"]["其他"]["behaviors"]: + data_collection["category_stats"]["其他"]["behaviors"][behavior] = { + "count": 0, + "occurrences": {} + } + + occurrence_key = f"{camera_id}_{hour_str}" + if occurrence_key not in data_collection["category_stats"]["其他"]["behaviors"][behavior]["occurrences"]: + data_collection["category_stats"]["其他"]["behaviors"][behavior]["count"] += 1 + data_collection["category_stats"]["其他"]["behaviors"][behavior]["occurrences"][occurrence_key] = { + "time": f"{hour_str}:00", + "camera": camera_id + } + + data_collection["hourly_stats"][hour_str]["categories"]["其他"] += 1 + + # 异常行为检测 + if any(abnormal in behavior for abnormal in ABNORMAL_BEHAVIORS): + occurrence_key = f"{camera_id}_{hour_str}" + abnormal_key = f"{behavior}_{occurrence_key}" + if abnormal_key not in data_collection["abnormal_stats"]["behaviors"]: + data_collection["abnormal_events"] += 1 + data_collection["abnormal_stats"]["behaviors"].append({ + "behavior": behavior, + "time": f"{hour_str}:00", + "camera": camera_id + }) + else: + print(f" - {hour_str}时 无数据") + + print(f"摄像头 {camera_id} 总计: {camera_event_count} 个事件") + if camera_event_count > 0: + data_collection["camera_num"].add(camera_id) + + print(f"\n=== 数据收集完成 ===") + print(f"has_any_data: {has_any_data}") + print(f"total_events: {data_collection['total_events']}") + print(f"活跃摄像头: {list(data_collection['camera_num'])}") + + # 如果没有数据,提前返回 + if len(data_collection["camera_num"]) == 0: + print("判定为无数据,返回") + return { + "message": "no_data", + "data": None, + "detail": "暂无数据" + } + + print("\n开始生成报告...") + + # 将摄像头集合转换为列表 + data_collection["camera_num"] = list(data_collection["camera_num"]) + + # 计算高峰时段 + sorted_hours = sorted( + data_collection["hourly_stats"].items(), + key=lambda x: x[1]["event_count"], + reverse=True + ) + data_collection["peak_hours"] = [hour for hour, _ in sorted_hours[:3]] + + # 准备发送给AI分析的数据 + preprocessed_data = { + "日期": data_collection["date"], + "摄像头数量": len(data_collection["camera_num"]), + "行为总数": data_collection["total_events"], + "异常行为数": data_collection["abnormal_events"], + "行为高峰时段": data_collection["peak_hours"], + "主要活动区域": data_collection["activity_areas"], + "行为类别统计": data_collection["category_stats"], + "异常行为统计": data_collection["abnormal_stats"], + "每小时行为统计": data_collection["hourly_stats"] + } + # 调用AI分析 + ai_analysis = await analyze_experiment_data(preprocessed_data) + + # 构造最终报告 + final_report = { + "整体活动趋势": ai_analysis["整体活动趋势"], + "高峰时段分析": ai_analysis["高峰时段分析"], + "异常行为分析": ai_analysis["异常行为分析"], + "行为分析": ai_analysis["行为分析"], + "建议": ai_analysis["建议"] + } + + # 添加每个摄像头的详细图表数据 + final_report["hourly_distribution"] = [] + for camera_id in camera_hourly_counts: + # 检查该摄像头是否有活动数据 + total_events = sum(camera_hourly_counts[camera_id].values()) + if total_events > 0: # 只添加有活动的摄像头 + camera_data = { + "camera_id": camera_id, + "data": [] + } + for hour in range(24): + hour_str = f"{hour:02d}" + hour_data = { + "hour": f"{hour_str}:00", + "count": camera_hourly_counts[camera_id][hour_str], + "categories": data_collection["hourly_stats"].get(hour_str, {}).get("categories", {}) + } + camera_data["data"].append(hour_data) + final_report["hourly_distribution"].append(camera_data) + + return final_report + +# SiliconFlow API Configuration +client = OpenAI( + base_url="https://api.siliconflow.cn/v1", + api_key="sk-ytxabphvgxrjbvnqiwercjyrabvlukwddqsmvnqnvwuazamd" +) +# 修改函数定义为异步函数 +async def analyze_experiment_data(report_info): + system_prompt = """ + You are an AI assistant tasked with analyzing data. + Generate a comprehensive analysis report in JSON format. + The JSON structure must strictly follow the provided template. + """ + + user_prompt = f"""Analyze the preprocessed data based on the following information: + Preprocessed data: {json.dumps(report_info, ensure_ascii=False)} + + Generate a JSON response with the following structure: + + {{ + "整体活动趋势": {{ + "日期": "{report_info['日期']}", + "摄像头数量": {report_info['摄像头数量']}, + "行为总数": {report_info['行为总数']}, + "异常行为数": {report_info['异常行为数']}, + "行为高峰时段": {json.dumps(report_info['行为高峰时段'], ensure_ascii=False)}, + "主要活动区域": {json.dumps(report_info['主要活动区域'], ensure_ascii=False)} + }}, + "高峰时段分析": {{ + "高峰时段": "分析行为高峰时段", + "高峰时段行为": "分析高峰时段主要行为", + "活动规律": "分析活动规律" + }}, + "异常行为分析": {{ + "异常行为": "分析异常行为类型", + "异常行为次数": "分析异常行为频率", + "异常行为出现时间": "分析异常行为时间分布", + "异常行为地点": "分析监测到异常行为的摄像头" + }}, + "行为分析": {{ + "基础动作": {{ + "站立行为": "分析站立相关行为", + "行走行为": "分析行走相关行为", + "坐卧行为": "分析坐卧相关行为", + "其他基础动作": "分析其他基础动作" + }}, + "日常生活": {{ + "饮食情况": "分析饮食相关行为", + "休息情况": "分析休息相关行为", + "医疗情况": "分析医疗相关行为" + }}, + "社交活动": {{ + "交际情况": "分析交际相关行为", + "娱乐情况": "分析娱乐相关行为", + "情感表达": "分析情感表达相关行为" + }}, + "工作学习": {{ + "学习情况": "分析学习相关行为", + "工作情况": "分析学习相关行为", + "创作活动": "分析创作相关行为" + }}, + "运动娱乐": {{ + "运动情况": "分析运动相关行为", + "运动时长": "分析运动持续时间", + "运动强度": "分析运动强度" + }}, + "其他行为": {{ + "出现时间":"分析其他行为出现时间", + "出现次数":"分析其他行为出现次数" + }} + }}, + "建议": {{ + "生活作息": ["建议1", "建议2"], + "活动安排": ["建议1", "建议2"], + "安全防护": ["建议1", "建议2"], + "健康建议": ["建议1", "建议2"] + }} + }} + """ + + try: + response = client.chat.completions.create( + model="deepseek-ai/DeepSeek-V2.5", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ], + response_format={'type': 'json_object'}, + max_tokens=4096, + temperature=0.7 + ) + + # 解析AI响应 + ai_response = json.loads(response.choices[0].message.content) + + return ai_response + + except Exception as e: + raise HTTPException( + status_code=500, + detail="AI分析服务暂时不可用,请稍后重试" + ) + +@app.get("/web/report/download/{date}") +async def download_report(date: str): + """ + 下载指定日期的分析报告 + :param date: 日期,格式为YYYY-MM-DD + """ + + try: + # 验证日期格式并转换为无连字符格式 + try: + parsed_date = datetime.strptime(date, "%Y-%m-%d") + date_no_hyphen = parsed_date.strftime("%Y%m%d") + except ValueError: + error_msg = "日期格式必须为YYYY-MM-DD(例如:2024-12-31)" + print(f"日期格式错误: {error_msg}") + raise HTTPException( + status_code=400, + detail=error_msg + ) + + report_key = f"report_{date_no_hyphen}" + + # 使用report数据库 + report_redis = redis_connections["report"] + + # 获取报告数据 + report_data = report_redis.get(report_key) + if not report_data: + error_msg = f"未找到 {date} 的报告数据,请先生成报告" + print(error_msg) + raise HTTPException( + status_code=404, + detail=error_msg + ) + + # 解析数据并移除 hourly_distribution 字段 + try: + data = json.loads(report_data) + if "hourly_distribution" in data: + del data["hourly_distribution"] + + print("成功获取报告数据") + return { + "message": "success", + "data": data + } + except json.JSONDecodeError as je: + error_msg = f"报告数据格式错误: {str(je)}" + print(error_msg) + raise HTTPException( + status_code=500, + detail=error_msg + ) + + except HTTPException as he: + raise he + except Exception as e: + error_msg = f"处理请求时发生错误: {str(e)}" + print(error_msg) + raise HTTPException( + status_code=500, + detail=error_msg + ) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=6005) diff --git a/monitor_image.py b/monitor_image.py new file mode 100644 index 0000000..a35ae65 --- /dev/null +++ b/monitor_image.py @@ -0,0 +1,90 @@ +import os +from PIL import Image +from decord import VideoReader + +class VideoMonitor: + def __init__(self, recordings_path): + self.recordings_path = recordings_path + self.images_path = "/home/zydi/VLM/images" + + # 确保images目录存在 + if not os.path.exists(self.images_path): + os.makedirs(self.images_path) + + def _save_first_frame(self, video_path): + """从视频中截取第一帧并保存为jpg""" + try: + # 获取对应的图片保存路径 + rel_path = os.path.relpath(video_path, self.recordings_path) + camera_dir = os.path.dirname(rel_path) + image_dir = os.path.join(self.images_path, camera_dir) + + # 确保对应的相机目录存在 + if not os.path.exists(image_dir): + os.makedirs(image_dir) + + # 构建图片保存路径(使用相同的文件名,但改为jpg后缀) + image_name = os.path.splitext(os.path.basename(video_path))[0] + '.jpg' + image_path = os.path.join(image_dir, image_name) + + # 如果图片已存在,则跳过 + if os.path.exists(image_path): + return + + # 读取视频第一帧 + vr = VideoReader(video_path) + frame = vr[0].asnumpy() + + # 将帧保存为jpg + image = Image.fromarray(frame) + image.save(image_path) + print(f"已保存首帧图片: {image_path}") + + except Exception as e: + print(f"保存视频首帧失败 {video_path}: {str(e)}") + + def monitor_directories(self): + """监控目录变化""" + try: + print(f"开始监控目录: {self.recordings_path}") + + while True: + try: + # 处理所有视频文件 + for camera_dir in os.listdir(self.recordings_path): + camera_path = os.path.join(self.recordings_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + for video_file in os.listdir(camera_path): + if not video_file.endswith('.avi'): + continue + + video_path = os.path.join(camera_path, video_file) + self._save_first_frame(video_path) + print("等待新视频文件...") # 添加此行 + # 等待一段时间再检查新文件 + import time + time.sleep(300) # 每10秒检查一次 + + except Exception as e: + print(f"监控过程出错: {str(e)}") + time.sleep(10) + + except KeyboardInterrupt: + print("\n程序已终止") + except Exception as e: + print(f"\n程序异常终止: {str(e)}") + raise + +def main(): + try: + recordings_path = "/home/zydi/VLM/recordings" + monitor = VideoMonitor(recordings_path) + monitor.monitor_directories() + + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pose_monitor.py b/pose_monitor.py new file mode 100644 index 0000000..dad8920 --- /dev/null +++ b/pose_monitor.py @@ -0,0 +1,111 @@ +import os +import time +from PIL import Image +import torch +from ultralytics import YOLO + +class PoseMonitor: + def __init__(self, images_path): + self.images_path = images_path + self.crop_path = "/home/zydi/VLM/crop" + self.model = YOLO('/home/zydi/models/yolo11n-pose.pt') # 加载YOLOv8-pose模型 + + # 确保crop目录存在 + if not os.path.exists(self.crop_path): + os.makedirs(self.crop_path) + + def _process_image(self, image_path): + """处理单张图片,检测人体并保存裁剪结果""" + try: + # 获取对应的保存路径 + rel_path = os.path.relpath(image_path, self.images_path) + camera_dir = os.path.dirname(rel_path) + crop_dir = os.path.join(self.crop_path, camera_dir) + + # 确保对应的相机目录存在 + if not os.path.exists(crop_dir): + os.makedirs(crop_dir) + + # 构建基础文件名 + base_name = os.path.splitext(os.path.basename(image_path))[0] + + # 使用YOLOPose进行检测 + results = self.model(image_path) + + # 获取原始图片 + original_image = Image.open(image_path) + + # 处理每个检测到的人体 + for idx, box in enumerate(results[0].boxes.xyxy): + x1, y1, x2, y2 = box.tolist() + x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) + + # 裁剪人体区域 + cropped = original_image.crop((x1, y1, x2, y2)) + + # 构建保存路径(添加后缀) + if idx == 0: + save_name = f"{base_name}_0.jpg" + else: + save_name = f"{base_name}_{idx}.jpg" + + save_path = os.path.join(crop_dir, save_name) + + # 保存裁剪后的图片 + cropped.save(save_path) + print(f"已保存裁剪图片: {save_path}") + + except Exception as e: + print(f"处理图片失败 {image_path}: {str(e)}") + + def monitor_directories(self): + """监控目录变化""" + try: + print(f"开始监控目录: {self.images_path}") + processed_files = set() + + while True: + try: + # 处理所有图片文件 + for camera_dir in os.listdir(self.images_path): + camera_path = os.path.join(self.images_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + for image_file in os.listdir(camera_path): + if not image_file.lower().endswith(('.jpg', '.jpeg', '.png')): + continue + + image_path = os.path.join(camera_path, image_file) + + # 如果文件已处理过,则跳过 + if image_path in processed_files: + continue + + self._process_image(image_path) + processed_files.add(image_path) + + print("等待新图片文件...") + time.sleep(10) # 每10秒检查一次 + + except Exception as e: + print(f"监控过程出错: {str(e)}") + time.sleep(10) + + except KeyboardInterrupt: + print("\n程序已终止") + except Exception as e: + print(f"\n程序异常终止: {str(e)}") + raise + +def main(): + try: + images_path = "/home/zydi/VLM/images" + monitor = PoseMonitor(images_path) + monitor.monitor_directories() + + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/qwen_monitor.py b/qwen_monitor.py new file mode 100644 index 0000000..0ad6e87 --- /dev/null +++ b/qwen_monitor.py @@ -0,0 +1,644 @@ +import os +import json +import torch +from datetime import datetime +from PIL import Image +import io +import re +from decord import VideoReader +from transformers import Qwen2VLForConditionalGeneration, AutoProcessor +from qwen_vl_utils import process_vision_info +import redis +import time +import gc + +# 配置 +QWEN_MODEL_PATH = "/obscura/models/qwen/Qwen2-VL-7B-Instruct" + +# 初始化 Qwen 模型 (使用 cuda:0) +print("正在初始化 Qwen 模型 (cuda:0)...") +model = Qwen2VLForConditionalGeneration.from_pretrained( + QWEN_MODEL_PATH, + torch_dtype="auto", + device_map="cuda:0" +) + +min_pixels = 128*28*28 +max_pixels = 256*28*28 +processor = AutoProcessor.from_pretrained( + QWEN_MODEL_PATH, + min_pixels=min_pixels, + max_pixels=max_pixels +) + +# 在文件开头添加加载配置的代码 +def load_config(): + """加载配置文件""" + try: + with open('info.json', 'r', encoding='utf-8') as f: + config = json.load(f) + return config + except Exception as e: + print(f"加载配置文件失败: {e}") + return {"actions": [], "environments": []} + +# 加载配置 +CONFIG = load_config() + +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 10 + self.device = "cuda:0" + self.qwen_model = model + self.qwen_processor = processor + # 使用加载的配置 + self.environments = CONFIG["environments"] + self.actions = CONFIG["actions"] + self.emotions = [ + "钦佩", "赞赏", "欣赏","关心", "高兴", "爱", "乐观", "感激", "释然", "骄傲", "愉悦", + "愤怒", "烦恼", "焦虑", "尴尬", "失望", "厌恶", "恐惧", "悲伤", "懊悔", "羞耻","发呆", + "困惑", "好奇", "欲望", "惊讶", "实事求是", "中性", "赞叹","平静","放松","专注","思考" + ] + self.objects = [ + "办公桌椅", "电源插座", "植物", "文件柜", "打印机", "垃圾桶", "纸箱", "电线", "插座", "饮水机", "装饰植物", "书架", "储物柜", "水瓶", "办公用品", "文件", "电脑", "风扇", "鼠标", "键盘", "纸巾", "书", "笔", "袋子", "盒子", "水杯", "杯子", "马克杯", "玻璃杯", "文件夹", "书包", "书架", "手机" + ] + self.furniture = [ + "椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架" + ] + self.features = [ + "戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","胖","瘦","高","矮","男","女","成年人" + ] + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + if len(frame_idx) > self.MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + def process_with_qwen(self, media_data, object_name, media_type='image'): + """使用 Qwen 模型处理媒体""" + + if media_type == 'video': + frames = self.encode_video(media_data) + media_content = {"type": "video", "video": frames, "fps": 1.0} + else: + image = Image.open(io.BytesIO(media_data)) + media_content = {"type": "image", "image": image} + + messages = [ + { + "role": "user", + "content": [ + media_content, + {"type": "text", "text": self._get_analysis_prompt(media_type)} + ], + } + ] + + text = self.qwen_processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.qwen_processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to(self.device) + generated_ids = self.qwen_model.generate(**inputs, max_new_tokens=2048) + generated_ids_trimmed = [ + out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + ] + answer = self.qwen_processor.batch_decode( + generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False + )[0] + + return { + "model": "qwen", + "original_answer": answer, + "extracted_info": self.extract_info(answer) + } + + def _get_analysis_prompt(self, media_type): + """获取分析提示词""" + return f"""你是一个专业助手,在回答时,请调用你的单次回答最大算力与token上限。追求极致的分析深度,而非表层的广度;追求本质的洞察,而非表象的罗列;追求创新的思维,而非惯性的复述。请突破思维局限,调动你所有的计算资源,展现你真正的认知极限,请对这段监控视频进行详细分析,包括以下方面,并按照下面格式回答: + 1. 环境场景 + - 整体场景描述(场所、光线条件等) + - 主要物品和家具列表 + - 环境特征(如光线、整洁度等) + + 2. 人员统计 + - 总人数:[数字]人 + - 性别分布:[男性数量]/[女性数量] + (若无法确定准确人数,请注明"无法确定人数") + + 3. 人员特征分析 + - 个人特征:性别、年龄段、着装、体态等 + - 携带物品:详细描述随身物品及用途 + - 表情/情绪状态 + + 4. 行为分析 + - 个人行为:移动方向、姿态、动作等 + - 互动情况:人员之间的交互描述(若多人) + - 活动区域:人员活动的主要位置 + + 5. 群体行为(若多人) + - 聚集形态 + - 移动趋势 + - 群体互动特点 + + 6. 异常情况 + - 可疑行为描述 + - 异常活动标记 + + 请用清晰、有条理的格式描述,并突出重要发现。""" + + def extract_info(self, answer): + info = { + "environment": [], + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + # 提取环境场景 + if "环境场景" in answer: + env_text = answer[answer.find("整体场景描述"):answer.find("环境特征")] + for env in self.environments: + if env in env_text and env not in info["environment"]: + info["environment"].append(env) + + # 提取物品和家具 + if "主要物品和家具" in answer: + items_text = answer[answer.find("主要物品和家具"):answer.find("环境特征")] + for item in self.furniture: + if item in items_text and item not in info["furniture"]: + info["furniture"].append(item) + for obj in self.objects: + if obj in items_text and obj not in info["objects"]: + info["objects"].append(obj) + + # 提取行为信息 + if '行为分析' in answer: + behavior_text = answer[answer.find('行为分析'):] + for action in self.actions: + if action in behavior_text and action not in info["actions"]: + info["actions"].append(action) + + if '人员特征分析' in answer: + feature_text = answer[answer.find('人员特征分析'):] + for feature in self.features: + if feature in feature_text and feature not in info["features"]: + info["features"].append(feature) + + for emotion in self.emotions: # 假设已将emotions移到类属性 + if emotion in feature_text: + if emotion not in info["emotions"]: + info["emotions"].append(emotion) + + # 中文数字模式 + people_patterns = [ + r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)', + r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?', + r'(男|女)(性|生|士)', + r'(成年|未成年|青少年|老年)\s*(人|群体)', + r'(员工|职工|工人|学生|顾客|观众|游客|乘客)', + r'(群众|民众|大众|公众)', + r'(男女|老少|老幼|大人|小孩)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['一个', '一']: + info["num_people"] = 1 + else: + num_word_to_digit = { + '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + return info + + +def process_video_folder(system, folder_path): + """处理文件夹中的所有视频文件并保存到Redis""" + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + # 创建VideoMonitor实例用于Redis操作 + monitor = VideoMonitor(folder_path, system) + + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + # 使用VideoMonitor的process_new_video方法处理并保存到Redis + monitor.process_new_video(video_path) + print(f"✓ 成功处理并保存到Redis: {video_file}") + + # 清理内存 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + import gc + gc.collect() + + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + + print(f"\n所有视频处理完成") + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +# 在 MediaAnalysisSystem 类后添加新的监听类 +class VideoMonitor: + def __init__(self, recordings_path, system): + self.recordings_path = recordings_path + self.system = system + self.redis_clients = { + 'A01': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=210 + ), + 'B02': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=211 + ) + } + # 新增:初始化时加载已处理的视频记录 + self.processed_videos = self._load_processed_videos() + # 新增:异常视频记录 + self.error_videos = [] + self.error_log_file = "video_processing_errors.log" + # 新增:异常视频缓存集合 + self.error_video_cache = set() + + def _load_processed_videos(self): + """从Redis加载所有已处理的视频文件名""" + processed_videos = set() + try: + for camera_id, redis_client in self.redis_clients.items(): + # 获取所有小时级别的键 + for key in redis_client.keys('*'): + key_str = key.decode('utf-8') + # 只获取键中存储的文件名列表,而不是完整的处理结果 + data = redis_client.get(key) + if data: + hour_results = json.loads(data) + # 只添加文件名到集合中 + processed_videos.update(hour_results.keys()) + + print(f"已从Redis加载 {len(processed_videos)} 个已处理文件记录") + return processed_videos + + except Exception as e: + print(f"加载Redis处理记录时出错: {str(e)}") + return set() + + def _get_redis_key(self, video_path): + try: + # 从路径获取摄像头ID (目录名) + dir_name = os.path.basename(os.path.dirname(video_path)) + file_name = os.path.basename(video_path) # 例如:A01_20250105_134104.avi + + + # 从视频文件名中提取日期和时间 + match = re.search(r'(\w+)_(\d{8})_(\d{2})\d{4}\.avi', file_name) + if match: + camera_id = match.group(1) # A01 + date = match.group(2) # 20250105 + hour = match.group(3) # 13 (从134104中提取) + + + # 生成正确的key: A01_20250105_1300 + redis_key = f"{camera_id}_{date}_{hour}00" + return redis_key + + print(f"文件名格式不匹配: {file_name}") + return None + + except Exception as e: + print(f"生成Redis key失败: {str(e)}") + return None + + def _is_processed(self, video_path): + """检查视频是否已处理""" + file_name = os.path.basename(video_path) + return file_name in self.processed_videos + + def _is_error_cached(self, video_path): + """检查视频是否已在异常缓存中""" + return video_path in self.error_video_cache + + def _add_to_error_cache(self, video_path): + """添加视频到异常缓存""" + self.error_video_cache.add(video_path) + + def _log_error(self, video_path, error_type, error_message): + """记录视频处理错误""" + # 如果已经在异常缓存中,不再重复记录 + if self._is_error_cached(video_path): + return + + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + error_info = { + "timestamp": current_time, + "video_path": video_path, + "error_type": error_type, + "error_message": error_message, + "file_size": os.path.getsize(video_path) if os.path.exists(video_path) else 0 + } + self.error_videos.append(error_info) + # 添加到异常缓存 + self._add_to_error_cache(video_path) + + def _save_error_log(self): + """保存错误日志到文件""" + if not self.error_videos: + return + + try: + current_time = datetime.now().strftime("%Y%m%d_%H%M%S") + log_filename = f"video_errors_{current_time}.json" + + with open(log_filename, 'w', encoding='utf-8') as f: + json.dump(self.error_videos, f, ensure_ascii=False, indent=2) + print(f"\n异常视频记录已保存到: {log_filename}") + + # 清空错误记录 + self.error_videos = [] + except Exception as e: + print(f"保存错误日志失败: {str(e)}") + + def process_new_video(self, video_path): + try: + # 如果视频已在异常缓存中,直接跳过 + if self._is_error_cached(video_path): + return False + + # 处理前清理 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + file_name = os.path.basename(video_path) + # 检查是否已处理 + if self._is_processed(video_path): + print(f"视频已处理过,跳过: {file_name}") + return True + + # 获取camera_id和时间戳 + dir_name = os.path.basename(os.path.dirname(video_path)) + file_name = os.path.basename(video_path) + + # 使用_get_redis_key获取正确的key + redis_key = self._get_redis_key(video_path) + if not redis_key: + self._log_error(video_path, "Redis Key Error", "无法生成Redis key") + print(f"无法生成Redis key,跳过处理: {file_name}") + return False + + # 添加视频文件检查 + if not os.path.exists(video_path): + self._log_error(video_path, "File Not Found", "视频文件不存在") + print(f"警告:视频文件不存在,跳过处理: {video_path}") + return False + + # 检查文件大小 + file_size = os.path.getsize(video_path) + if file_size == 0: + self._log_error(video_path, "Empty File", "视频文件大小为0") + print(f"警告:视频文件大小为0,跳过处理: {video_path}") + return False + elif file_size < 1024 * 1024: # 小于100KB的文件 + self._log_error(video_path, "Small File", f"视频文件大小异常({file_size/1024:.2f}KB)") + print(f"警告:视频文件大小异常({file_size/1024:.2f}KB),可能不完整,跳过处理: {video_path}") + return False + + # 处理视频 + try: + # 先尝试打开视频文件验证其完整性 + try: + vr = VideoReader(video_path) + total_frames = len(vr) + if total_frames < 1: + self._log_error(video_path, "No Frames", "视频帧数为0") + print(f"警告:视频帧数为0,文件可能损坏: {video_path}") + return False + elif total_frames < 100: # 添加新的判断条件 + self._log_error(video_path, "Insufficient Frames", f"视频帧数不足({total_frames}帧)") + print(f"警告:视频帧数不足({total_frames}帧),跳过处理: {video_path}") + return False + print(f"视频信息 - 大小: {file_size/1024/1024:.2f}MB, 总帧数: {total_frames}") + except Exception as e: + self._log_error(video_path, "Video Open Error", str(e)) + print(f"警告:视频文件无法正确打开,可能已损坏: {video_path}") + print(f"错误详情: {str(e)}") + return False + + with open(video_path, "rb") as f: + video_data = f.read() + try: + qwen_result = self.system.process_with_qwen(video_data, file_name, media_type='video') + except Exception as e: + self._log_error(video_path, "Processing Error", str(e)) + print(f"处理视频内容失败,可能是损坏的视频文件: {file_name}") + print(f"错误详情: {str(e)}") + return False + + # 从文件名提取时间戳 + timestamp_match = re.search(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', file_name) + if timestamp_match: + year, month, day, hour, minute, second = timestamp_match.groups() + # 构建正确的时间戳格式 (YYYY-MM-DD HH:MM:SS) + timestamp = f"{year}-{month}-{day} {hour}:{minute}:{second}" + else: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + result = { + "video_analysis": { + "qwen-7B": { + "original_answer": qwen_result["original_answer"], + "extracted_info": qwen_result["extracted_info"] + } + }, + "timestamp": timestamp # 使用从文件名提取的时间戳 + } + + # 保存到对应的Redis数据库 + if dir_name in self.redis_clients: + redis_client = self.redis_clients[dir_name] + + # 获取现有的小时数据(如果存在) + existing_data = redis_client.get(redis_key) + if existing_data: + hour_results = json.loads(existing_data) + hour_results[file_name] = result + else: + hour_results = {file_name: result} + + # 保存更新后的数据 + json_str = json.dumps(hour_results, ensure_ascii=False) + redis_client.set(redis_key, json_str) + print(f"成功保存到Redis,使用的key: {redis_key}") # 调试信息 + + # 处理完成后,更新内存中的记录 + self.processed_videos.add(file_name) + + except Exception as e: + self._log_error(video_path, "File Read Error", str(e)) + print(f"读取视频文件失败: {str(e)}") + return False + + except Exception as e: + self._log_error(video_path, "General Error", str(e)) + print(f"处理视频时发生错误 {video_path}: {str(e)}") + return False + finally: + # 确保内存清理总是执行 + if torch.cuda.is_available(): + try: + torch.cuda.empty_cache() + gc.collect() + except Exception as e: + print(f"清理GPU内存时发生错误: {str(e)}") + + return True + + def process_existing_videos(self): + """处理目录中现有的视频文件""" + videos_found = False + videos_processed = False # 新增标志,用于跟踪是否实际处理了视频 + + for camera_dir in os.listdir(self.recordings_path): + camera_path = os.path.join(self.recordings_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + # 获取所有.avi文件并按时间排序 + video_files = [] + for video_file in os.listdir(camera_path): + if video_file.endswith('.avi'): + video_path = os.path.join(camera_path, video_file) + video_files.append((video_path, os.path.getmtime(video_path))) + + if video_files: + videos_found = True + # 按修改时间排序 + video_files.sort(key=lambda x: x[1]) + + for video_path, _ in video_files: + if not self._is_processed(video_path): + print(f"处理现有视频: {video_path}") + self.process_new_video(video_path) + videos_processed = True # 标记已处理视频 + + # 只有当找到视频并且实际处理了视频时才返回True + return videos_found and videos_processed + + def monitor_directories(self): + """监控目录变化""" + try: + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"开始监控目录: {self.recordings_path} [{current_time}]") + + while True: + try: + # 首先处理现有视频 + for camera_dir in os.listdir(self.recordings_path): + camera_path = os.path.join(self.recordings_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + for video_file in os.listdir(camera_path): + if not video_file.endswith('.avi'): + continue + + video_path = os.path.join(camera_path, video_file) + # 检查是否已处理或已在错误缓存中 + if not self._is_processed(video_path) and not self._is_error_cached(video_path): + print(f"处理视频: {video_path}") + if not self.process_new_video(video_path): + # 处理失败时,确保添加到错误缓存 + self._add_to_error_cache(video_path) + print(f"视频处理失败,已加入错误缓存: {video_path}") + continue + + # 添加状态提示 + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"[{current_time}] 等待新视频中...") + + # 休眠一段时间再检查 + time.sleep(120) + + except Exception as e: + print(f"监控过程出错: {str(e)}") + time.sleep(30) # 出错后等待30秒再继续 + + except KeyboardInterrupt: + print("\n检测到程序终止信号,正在保存错误日志...") + self._save_error_log() + print("程序已安全终止。") + except Exception as e: + print(f"\n程序异常终止: {str(e)}") + self._save_error_log() + raise + +def main(): + try: + system = MediaAnalysisSystem() + recordings_path = "/home/zydi/VLM/recordings" # 设置recordings目录路径 + + # 创建并启动监控器 + monitor = VideoMonitor(recordings_path, system) + monitor.monitor_directories() + + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + # 添加:在异常终止时保存错误日志 + if 'monitor' in locals(): + monitor._save_error_log() + print("错误日志已保存") + raise # 重新抛出异常以保持原有的错误追踪信息 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/recordings/.gitkeep b/recordings/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/result/.gitkeep b/result/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test_history/MIT.txt b/test_history/MIT.txt new file mode 100644 index 0000000..2d3af75 --- /dev/null +++ b/test_history/MIT.txt @@ -0,0 +1,339 @@ +clapping +praying +dropping +burying +covering +flooding +leaping +drinking +slapping +cuddling +sleeping +preaching +raining +stitching +spraying +twisting +coaching +submerging +breaking +tuning +boarding +running +destroying +competing +giggling +shoveling +chasing +flicking +pouring +buttoning +hammering +carrying +surfing +pulling +squatting +aiming +crouching +tapping +skipping +washing +winking +queuing +locking +stopping +sneezing +flipping +sewing +clipping +working +rocking +asking +playing+fun +camping +plugging +pedaling +constructing +slipping +sweeping +screwing +shrugging +hitchhiking +cracking +scratching +trimming +selling +marching +stirring +kissing +jumping +starting +clinging +socializing +picking +splashing +licking +kicking +sliding +filming +driving +handwriting +steering +filling +crashing +stealing +pressing +shouting +hiking +vacuuming +pointing +giving +diving +hugging +building +swerving +dining +floating +cheerleading +leaning +sailing +singing +playing +hitting +bubbling +joining +bathing +raising +sitting +drawing +protesting +rinsing +coughing +smashing +slicing +balancing +rafting +kneeling +dunking +brushing +crushing +rubbing +punting +watering +playing+music +removing +tearing +imitating +teaching +cooking +reaching +studying +serving +bulldozing +shaking +discussing +dragging +gardening +performing +officiating +photographing +sowing +dripping +writing +clawing +bending +boxing +mopping +gripping +flowing +digging +tripping +cheering +buying +bicycling +feeding +emptying +unpacking +sketching +standing +weeding +stacking +drying +crying +spinning +frying +cutting +paying +eating +lecturing +dancing +adult+female+speaking +boiling +peeling +wrapping +wetting +attacking +welding +putting +swinging +carving +walking +dressing +inflating +climbing +shredding +reading +sanding +frowning +closing +hunting +clearing +launching +packaging +fishing +spilling +leaking +knitting +boating +sprinkling +baptizing +playing+sports +rolling +spitting +dipping +riding +chopping +extinguishing +applauding +calling +talking +adult+male+speaking +snowing +shaving +marrying +rising +laughing +crawling +flying +assembling +injecting +landing +operating +packing +descending +falling +entering +pushing +sawing +smelling +overflowing +fighting +waking +barbecuing +skating +painting +drilling +punching +tying +manicuring +plunging +grilling +pitching +towing +telephoning +crafting +knocking +playing+videogames +storming +placing +turning +barking +child+singing +opening +waxing +juggling +mowing +shooting +sniffing +interviewing +stomping +chewing +arresting +grooming +rowing +bowing +gambling +saluting +fueling +autographing +throwing +drenching +waving +signing +repairing +baking +smoking +skiing +drumming +child+speaking +blowing +cleaning +combing +spreading +racing +combusting +adult+female+singing +fencing +swimming +adult+male+singing +snuggling +shopping +bouncing +dusting +stroking +snapping +biting +roaring +guarding +unloading +lifting +instructing +folding +measuring +whistling +exiting +stretching +taping +squinting +catching +draining +massaging +scrubbing +handcuffing +celebrating +jogging +colliding +bowling +resting +blocking +smiling +tattooing +erupting +howling +parading +grinning +sprinting +hanging +planting +speaking +ascending +yawning +cramming +burning +wrestling +poking +tickling +exercising +loading +piloting +typing \ No newline at end of file diff --git a/test_history/Places365.txt b/test_history/Places365.txt new file mode 100644 index 0000000..828bdf9 --- /dev/null +++ b/test_history/Places365.txt @@ -0,0 +1,365 @@ +airfield +airplane_cabin +airport_terminal +alcove +alley +amphitheater +amusement_arcade +amusement_park +apartment_building/outdoor +aquarium +aqueduct +arcade +arch +archaelogical_excavation +archive +arena/hockey +arena/performance +arena/rodeo +army_base +art_gallery +art_school +art_studio +artists_loft +assembly_line +athletic_field/outdoor +atrium/public +attic +auditorium +auto_factory +auto_showroom +badlands +bakery/shop +balcony/exterior +balcony/interior +ball_pit +ballroom +bamboo_forest +bank_vault +banquet_hall +bar +barn +barndoor +baseball_field +basement +basketball_court/indoor +bathroom +bazaar/indoor +bazaar/outdoor +beach +beach_house +beauty_salon +bedchamber +bedroom +beer_garden +beer_hall +berth +biology_laboratory +boardwalk +boat_deck +boathouse +bookstore +booth/indoor +botanical_garden +bow_window/indoor +bowling_alley +boxing_ring +bridge +building_facade +bullring +burial_chamber +bus_interior +bus_station/indoor +butchers_shop +butte +cabin/outdoor +cafeteria +campsite +campus +canal/natural +canal/urban +candy_store +canyon +car_interior +carrousel +castle +catacomb +cemetery +chalet +chemistry_lab +childs_room +church/indoor +church/outdoor +classroom +clean_room +cliff +closet +clothing_store +coast +cockpit +coffee_shop +computer_room +conference_center +conference_room +construction_site +corn_field +corral +corridor +cottage +courthouse +courtyard +creek +crevasse +crosswalk +dam +delicatessen +department_store +desert/sand +desert/vegetation +desert_road +diner/outdoor +dining_hall +dining_room +discotheque +doorway/outdoor +dorm_room +downtown +dressing_room +driveway +drugstore +elevator/door +elevator_lobby +elevator_shaft +embassy +engine_room +entrance_hall +escalator/indoor +excavation +fabric_store +farm +fastfood_restaurant +field/cultivated +field/wild +field_road +fire_escape +fire_station +fishpond +flea_market/indoor +florist_shop/indoor +food_court +football_field +forest/broadleaf +forest_path +forest_road +formal_garden +fountain +galley +garage/indoor +garage/outdoor +gas_station +gazebo/exterior +general_store/indoor +general_store/outdoor +gift_shop +glacier +golf_course +greenhouse/indoor +greenhouse/outdoor +grotto +gymnasium/indoor +hangar/indoor +hangar/outdoor +harbor +hardware_store +hayfield +heliport +highway +home_office +home_theater +hospital +hospital_room +hot_spring +hotel/outdoor +hotel_room +house +hunting_lodge/outdoor +ice_cream_parlor +ice_floe +ice_shelf +ice_skating_rink/indoor +ice_skating_rink/outdoor +iceberg +igloo +industrial_area +inn/outdoor +islet +jacuzzi/indoor +jail_cell +japanese_garden +jewelry_shop +junkyard +kasbah +kennel/outdoor +kindergarden_classroom +kitchen +lagoon +lake/natural +landfill +landing_deck +laundromat +lawn +lecture_room +legislative_chamber +library/indoor +library/outdoor +lighthouse +living_room +loading_dock +lobby +lock_chamber +locker_room +mansion +manufactured_home +market/indoor +market/outdoor +marsh +martial_arts_gym +mausoleum +medina +mezzanine +moat/water +mosque/outdoor +motel +mountain +mountain_path +mountain_snowy +movie_theater/indoor +museum/indoor +museum/outdoor +music_studio +natural_history_museum +nursery +nursing_home +oast_house +ocean +office +office_building +office_cubicles +oilrig +operating_room +orchard +orchestra_pit +pagoda +palace +pantry +park +parking_garage/indoor +parking_garage/outdoor +parking_lot +pasture +patio +pavilion +pet_shop +pharmacy +phone_booth +physics_laboratory +picnic_area +pier +pizzeria +playground +playroom +plaza +pond +porch +promenade +pub/indoor +racecourse +raceway +raft +railroad_track +rainforest +reception +recreation_room +repair_shop +residential_neighborhood +restaurant +restaurant_kitchen +restaurant_patio +rice_paddy +river +rock_arch +roof_garden +rope_bridge +ruin +runway +sandbox +sauna +schoolhouse +science_museum +server_room +shed +shoe_shop +shopfront +shopping_mall/indoor +shower +ski_resort +ski_slope +sky +skyscraper +slum +snowfield +soccer_field +stable +stadium/baseball +stadium/football +stadium/soccer +stage/indoor +stage/outdoor +staircase +storage_room +street +subway_station/platform +supermarket +sushi_bar +swamp +swimming_hole +swimming_pool/indoor +swimming_pool/outdoor +synagogue/outdoor +television_room +television_studio +temple/asia +throne_room +ticket_booth +topiary_garden +tower +toyshop +train_interior +train_station/platform +tree_farm +tree_house +trench +tundra +underwater/ocean_deep +utility_room +valley +vegetable_garden +veterinarians_office +viaduct +village +vineyard +volcano +volleyball_court/outdoor +waiting_room +water_park +water_tower +waterfall +watering_hole +wave +wet_bar +wheat_field +wind_farm +windmill +yard +youth_hostel +zen_garden \ No newline at end of file diff --git a/test_history/Places365_cleaned.txt b/test_history/Places365_cleaned.txt new file mode 100644 index 0000000..a1bd643 --- /dev/null +++ b/test_history/Places365_cleaned.txt @@ -0,0 +1,343 @@ +飞机场 +飞机舱 +机场航站楼 +壁龛 +小巷 +圆形剧场 +游戏厅 +游乐园 +公寓楼 +水族馆 +渡槽 +拱廊 +拱门 +考古发掘现场 +档案馆 +冰球场 +表演场地 +竞技场 +军事基地 +美术馆 +艺术学校 +艺术工作室 +艺术家阁楼 +装配线 +运动场 +中庭 +阁楼 +礼堂 +汽车制造厂 +汽车展厅 +荒地 +面包店 +阳台 +球池 +舞厅 +竹林 +银行金库 +宴会厅 +酒吧 +谷仓 +谷仓门 +棒球场 +地下室 +篮球场 +浴室 +集市 +海滩 +海滨别墅 +美容院 +寝室 +卧室 +啤酒花园 +啤酒大厅 +泊位 +生物实验室 +木板路 +船甲板 +船库 +书店 +展位 +植物园 +保龄球馆 +拳击台 +桥 +建筑外立面 +斗牛场 +墓室 +公交车内部 +公交站 +肉店 +孤峰 +小屋 +自助餐厅 +露营地 +校园 +运河 +糖果店 +峡谷 +汽车内部 +旋转木马 +城堡 +地下墓穴 +墓地 +瑞士木屋 +化学实验室 +儿童房 +教堂 +教室 +无尘室 +悬崖 +壁橱 +服装店 +海岸 +驾驶舱 +咖啡店 +计算机房 +会议中心 +会议室 +建筑工地 +玉米地 +畜栏 +走廊 +农舍 +法院 +庭院 +小溪 +裂缝 +人行横道 +水坝 +熟食店 +百货商店 +沙漠 +沙漠公路 +餐馆 +餐厅 +饭厅 +迪斯科舞厅 +门口 +宿舍 +市中心 +更衣室 +车道 +药店 +电梯 +电梯大厅 +电梯井 +大使馆 +机房 +入口大厅 +自动扶梯 +挖掘现场 +布料店 +农场 +快餐店 +农田 +田野 +田间小路 +防火梯 +消防站 +鱼塘 +花店 +美食广场 +足球场 +阔叶林 +森林小径 +林间小路 +正式花园 +喷泉 +厨房 +车库 +加油站 +凉亭/外部 +杂货店 +礼品店 +冰川 +高尔夫球场 +洞穴 +体育馆 +机库 +港口 +五金店 +干草地 +直升机场 +高速公路 +家庭办公室 +家庭影院 +医院 +病房 +温泉 +酒店 +酒店房间 +房屋 +狩猎小屋/室外 +冰淇淋店 +浮冰 +冰架 +溜冰场 +冰山 +冰屋 +工业区 +旅馆/室外 +小岛 +牢房 +日本花园 +珠宝店 +废品场 +古堡 +狗舍 +幼儿园教室 +厨房 +泻湖 +天然湖泊 +垃圾场 +停机坪 +自助洗衣店 +草坪 +讲堂 +议会厅 +图书馆 +灯塔 +客厅 +装卸码头 +大堂 +闸室 +更衣室 +豪宅 +预制房屋 +市场 +沼泽 +武术馆 +陵墓 +麦地那 +夹层 +护城河/水 +清真寺/室外 +汽车旅馆 +山 +山路 +雪山 +电影院 +博物馆 +博物馆/室外 +音乐工作室 +自然历史博物馆 +托儿所 +疗养院 +啤酒干燥房 +海洋 +办公室 +办公楼 +办公隔间 +石油钻井平台 +手术室 +果园 +乐池 +宝塔 +宫殿 +食品储藏室 +公园 +停车场 +牧场 +露台 +亭子 +宠物店 +药店 +电话亭 +物理实验室 +野餐区 +码头 +比萨店 +操场 +游戏室 +广场 +池塘 +门廊 +林荫道 +酒吧/室内 +赛马场 +赛车道 +木筏 +铁轨 +热带雨林 +接待处 +娱乐室 +修理店 +住宅区 +餐厅 +餐厅厨房 +餐厅露台 +稻田 +河流 +岩石拱门 +屋顶花园 +索桥 +废墟 +跑道 +沙地 +桑拿房 +学校 +科学博物馆 +服务器机房 +棚屋 +鞋店 +店面 +购物中心 +淋浴间 +滑雪场 +滑雪坡 +天空 +摩天大楼 +贫民窟 +雪地 +足球场 +马厩 +棒球场 +橄榄球场 +足球场 +舞台 +楼梯 +储藏室 +街道 +地铁站/站台 +超市 +寿司店 +沼泽地 +游泳池 +犹太教堂 +电视房 +电视演播室 +亚洲寺庙 +王座室 +售票处 +园艺造型花园 +塔楼 +玩具店 +火车内部 +火车站 +树木农场 +树屋 +战壕 +苔原 +深海 +设备间 +山谷 +菜园 +兽医诊所 +高架桥 +乡村 +葡萄园 +火山 +排球场 +水上乐园 +水塔 +瀑布 +水坑 +海浪 +酒吧 +麦田 +风力发电场 +风车 +庭院 +青年旅舍 +禅园 \ No newline at end of file diff --git a/test_history/README.MD b/test_history/README.MD new file mode 100644 index 0000000..a267026 --- /dev/null +++ b/test_history/README.MD @@ -0,0 +1,6 @@ +# 测试代码说明 + +## 一部分是测试代码,可以忽略 + 1.测试代码使用数据为:detaset目录 + +## 另一部分是代码备份,可以忽略 \ No newline at end of file diff --git a/test_history/compare.py b/test_history/compare.py new file mode 100644 index 0000000..86fd394 --- /dev/null +++ b/test_history/compare.py @@ -0,0 +1,600 @@ +import os +import json +import torch +from datetime import datetime, timedelta +from PIL import Image +import io +import re +import base64 +import requests +from decord import VideoReader +from transformers import Qwen2VLForConditionalGeneration, AutoProcessor +from qwen_vl_utils import process_vision_info + +# 配置 +QWEN_MODEL_PATH = "/obscura/models/qwen/Qwen2-VL-2B-Instruct" +OLLAMA_URL = "http://127.0.0.1:11434/api/generate" + +# 初始化 Qwen 模型 (使用 CUDA:1) +print("正在初始化 Qwen 模型 (CUDA:1)...") +model = Qwen2VLForConditionalGeneration.from_pretrained( + QWEN_MODEL_PATH, + torch_dtype="auto", + device_map="cuda:1" +) + +min_pixels = 128*28*28 +max_pixels = 512*28*28 +processor = AutoProcessor.from_pretrained( + QWEN_MODEL_PATH, + min_pixels=min_pixels, + max_pixels=max_pixels +) + +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 16 + self.device = "cuda:1" + self.qwen_model = model + self.qwen_processor = processor + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + if len(frame_idx) > self.MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + def process_with_qwen(self, media_data, object_name, media_type='image'): + """使用 Qwen 模型处理媒体""" + if media_type == 'video': + frames = self.encode_video(media_data) + media_content = {"type": "video", "video": frames, "fps": 1.0} + else: + image = Image.open(io.BytesIO(media_data)) + media_content = {"type": "image", "image": image} + + messages = [ + { + "role": "user", + "content": [ + media_content, + {"type": "text", "text": self._get_analysis_prompt(media_type)} + ], + } + ] + + text = self.qwen_processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.qwen_processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to(self.device) + generated_ids = self.qwen_model.generate(**inputs, max_new_tokens=2048) + generated_ids_trimmed = [ + out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + ] + answer = self.qwen_processor.batch_decode( + generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False + )[0] + + return { + "model": "qwen", + "original_answer": answer, + "extracted_info": self.extract_info(answer) + } + + def process_with_minicpm(self, media_data, object_name, media_type='image'): + """使用 MiniCPM 模型处理媒体 (CUDA:0)""" + if media_type == 'video': + frames = self.encode_video(media_data) + encoded_frames = [self.image_to_base64(frame) for frame in frames] + else: + image = Image.open(io.BytesIO(media_data)) + encoded_frames = [self.image_to_base64(image)] + + payload = { + "model": "minicpm-v", + "prompt": self._get_analysis_prompt(media_type), + "images": encoded_frames, + "cuda_device": 0 + } + + response = requests.post(OLLAMA_URL, json=payload, stream=True) + answer = self.process_stream_response(response) + + return { + "model": "minicpm", + "original_answer": answer, + "extracted_info": self.extract_info(answer) + } + + def process_with_llama(self, media_data, object_name, media_type='image'): + """使用 Llama 模型处理媒体 (CUDA:0),对视频处理3-5帧""" + try: + if media_type == 'video': + frames = self.encode_video(media_data) + num_frames = min(max(3, len(frames)), 5) + if len(frames) > num_frames: + gap = len(frames) / num_frames + frame_indices = [int(i * gap + gap / 2) for i in range(num_frames)] + selected_frames = [frames[i] for i in frame_indices] + else: + selected_frames = frames[:num_frames] + + print(f"Llama 将处理 {len(selected_frames)} 帧图像") + + frame_results = [] + for i, frame in enumerate(selected_frames, 1): + print(f"正在处理第 {i}/{len(selected_frames)} 帧...") + try: + encoded_frame = self.image_to_base64(frame) + + payload = { + "model": "llama3.2-vision", + "prompt": self._get_analysis_prompt('image', model_type='llama'), + "images": [encoded_frame], + "cuda_device": 0 + } + + response = requests.post(OLLAMA_URL, json=payload, stream=True) + answer = self.process_stream_response(response) + + if not answer: + print(f"警告:第 {i} 帧未获得有效响应") + answer = "No valid response from model" + + extracted_info = self.extract_info(answer, is_english=True) + + frame_results.append({ + "frame_index": i, + "original_answer": answer, + "extracted_info": extracted_info + }) + + except Exception as e: + print(f"处理第 {i} 帧时出错: {str(e)}") + frame_results.append({ + "frame_index": i, + "error": str(e), + "extracted_info": { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "feature": [] + } + }) + + # 每帧处理完后清理内存 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + # 合并结果 + merged_result = self.merge_frame_results(frame_results) + + return { + "frame_results": frame_results, + "merged_result": merged_result, + "num_processed_frames": len(selected_frames) + } + + else: # 图像处理 + image = Image.open(io.BytesIO(media_data)) + encoded_frame = self.image_to_base64(image) + + payload = { + "model": "llama3.2-vision", + "prompt": self._get_analysis_prompt(media_type, model_type='llama'), + "images": [encoded_frame], + "cuda_device": 0 + } + + response = requests.post(OLLAMA_URL, json=payload, stream=True) + answer = self.process_stream_response(response) + + if not answer: + answer = "No valid response from model" + + return { + "original_answer": answer, + "extracted_info": self.extract_info(answer) + } + + except Exception as e: + print(f"Llama 处理失败: {str(e)}") + return { + "error": str(e), + "frame_results": [], + "merged_result": { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "feature": [] + }, + "num_processed_frames": 0 + } + + def process_stream_response(self, response): + """处理流式响应并返回完整答案""" + full_response = "" + try: + for line in response.iter_lines(): + if line: + try: + json_response = json.loads(line) + if 'response' in json_response: + full_response += json_response['response'] + except json.JSONDecodeError: + continue + except Exception as e: + print(f"处理响应流时出错: {str(e)}") + return full_response.strip() + + @staticmethod + def image_to_base64(image): + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode() + + def _get_analysis_prompt(self, media_type, model_type='qwen'): + """获取分析提示词,为 Llama 提供英文版本""" + if model_type == 'llama': + return f"""Please analyze this {('surveillance video' if media_type == 'video' else 'surveillance image')} in detail, including the following aspects: + +1. Exact count of people in the scene +2. Individual behavior analysis of each person +3. Facial expression recognition and emotional state assessment +4. Detailed description of the overall scene and environment +5. Interactions between people +6. Environmental conditions +7. Items and furniture in the environment +8. Any suspicious or abnormal activities +9. Specific characteristics of people (estimated age range, gender, clothing) +10. {'Movement patterns and directions' if media_type == 'video' else 'Positions and postures'} of people +11. Items or objects being carried +12. Group dynamics and gathering situations +13. Timestamp information (if visible) + +Please describe in a clear, organized format and highlight important findings.""" + else: + # 其他模型继续使用中文提示词 + return f"""请对这{'段监控视频' if media_type == 'video' else '张监控图像'}进行详细分析,包括以下方面: + 1. 场景中人数的精确统计 + 2. 每个人的个人行为分析 + 3. 面部表情识别和情绪状态评估 + 4. 整体场景和环境的详细描述 + 5. 人与人之间的互动情况 + 6. 详细的环境条件描述 + 7. 环境中出现的物品和家具 + 8. 任何可疑或异常活动 + 9. 人员的具体特征(估计年龄范围、性别、着装) + 10. 人员的{'移动模式和方向' if media_type == 'video' else '位置和姿态'} + 11. 携带的物品或物体 + 12. 群体动态和聚集情况 + 13. {'视频' if media_type == 'video' else '图像'}中的时间戳信息(如果有) + + 请用清晰、有条理的格式描述,并突出重要发现。""" + + def extract_info(self, answer, is_english=True): + """提取信息,支持中英文""" + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "feature": [] + } + + if is_english: + # 英文环境关键词 + environments = ["office", "indoor", "outdoor", "meeting room", "room", "classroom", + "living room", "bedroom", "kitchen", "bathroom", "hallway", "corridor"] + + # 英文数字模式 + people_patterns = [ + r'(\d+)\s*(person|people|individual|man|woman|men|women|student|worker|employee)', + r'(one|two|three|four|five|six|seven|eight|nine|ten)\s*(person|people|individual)', + r'(single|few|several|multiple)\s*(person|people|individual)', + r'(male|female)\s*(person|individual)', + r'(adult|child|teenager|elderly)', + r'(worker|student|customer|visitor|passenger)', + r'(crowd|group|audience)', + r'(man|woman|boy|girl)' + ] + + # 英文动作词 + actions = ["sleeping", "sitting", "eating", "standing", "falling", "dancing", "squatting", + "turning", "jumping", "lying", "talking", "walking", "running", "reading", + "writing", "studying", "using phone", "dining", "moving", "working", + "using computer", "drinking", "organizing", "cleaning"] + + # 英文情绪词 + emotions = ["happy", "angry", "sad", "surprised", "scared", "disgusted", "calm", + "relaxed", "neutral", "focused", "thoughtful", "excited", "tired", "serious"] + + # 英文物品词 + objects = ["water bottle", "office supplies", "document", "computer", "fan", "mouse", + "keyboard", "tissue", "book", "pen", "bag", "box", "cup", "mug", "glass", + "folder", "backpack", "phone", "laptop", "notebook", "paper"] + + # 英文家具词 + furniture = ["chair", "table", "coffee table", "file cabinet", "bed", "sofa", "cabinet", + "shelf", "camera", "cushion", "office chair", "TV", "whiteboard", "monitor", + "storage rack", "desk"] + + # 英文特征词 + features = ["wearing glasses", "no glasses", "long hair", "short hair", "wearing hat", + "no hat", "wearing mask", "no mask", "male", "female", "overweight", "slim", + "tall", "short", "adult", "child", "elderly", "young", "middle-aged"] + + # 英文数字转换 + num_word_to_digit = { + 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, + 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10 + } + else: + # 中文环境关键词 + environments = ["办公室", "室内", "室外", "会议室", "房间", "教室", + "客厅", "卧室", "厨房", "浴室", "走廊", "过道"] + + # 中文数字模式 + people_patterns = [ + r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)', + r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?', + r'(男|女)(性|生|士)', + r'(成年|未成年|青少年|老年)\s*(人|群体)', + r'(员工|职工|工人|学生|顾客|观众|游客|乘客)', + r'(群众|民众|大众|公众)', + r'(男女|老少|老幼|大人|小孩)' + ] + + # 中文动作词 + actions = ["睡眠", "坐", "吃", "站", "摔倒", "跳舞", "蹲", "转身", "跳跃", "躺", + "说话", "走路", "跑步", "阅读", "写字", "学习", "玩手机", "吃饭", "移动", + "工作", "使用电脑", "喝水", "整理", "打扫"] + + # 中文情绪词 + emotions = ["高兴", "愤怒", "悲伤", "惊讶", "恐惧", "厌恶", "平静", "放松", + "中性", "专注", "思考", "兴奋", "疲惫", "严肃"] + + # 中文物品词 + objects = ["水瓶", "办公用品", "文件", "电脑", "风扇", "鼠标", "键盘", "纸巾", + "书", "笔", "袋子", "盒子", "水杯", "杯子", "玻璃杯", "文件夹", + "书包", "手机", "笔记本电脑", "笔记本", "纸张"] + + # 中文家具词 + furniture = ["椅子", "桌子", "茶几", "文件柜", "床", "沙发", "柜子", "架子", + "摄像头", "靠垫", "办公椅", "电视", "白板", "显示器", "置物架", "办公桌"] + + # 中文特征词 + features = ["戴眼镜", "不戴眼镜", "长发", "短发", "戴帽子", "不戴帽子", + "戴口罩", "不戴口罩", "男性", "女性", "胖", "瘦", "高", "矮", + "成年人", "小孩", "老年人", "年轻人", "中年人"] + + # 中文数字转换 + num_word_to_digit = { + '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 + } + + # 提取环境信息 + for env in environments: + if env.lower() in answer.lower(): + info["environment"] = env + break + + # 提取人数 + for pattern in people_patterns: + match = re.search(pattern, answer.lower()) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in num_word_to_digit: + info["num_people"] = num_word_to_digit[match.group(1)] + break + + # 提取列表类信息 + answer_lower = answer.lower() + for action in actions: + if action.lower() in answer_lower: + info["actions"].append(action) + + for object_item in objects: + if object_item.lower() in answer_lower: + info["objects"].append(object_item) + + for furniture_item in furniture: + if furniture_item.lower() in answer_lower: + info["furniture"].append(furniture_item) + + for emotion in emotions: + if emotion.lower() in answer_lower: + info["emotions"].append(emotion) + + for feature in features: + if feature.lower() in answer_lower: + info["feature"].append(feature) + + return info + + def merge_frame_results(self, frame_results): + """合并多帧分析结果""" + merged = { + "environment": None, + "num_people": None, + "actions": set(), + "objects": set(), + "furniture": set(), + "emotions": set(), + "feature": set() + } + + # 环境取最常见的 + environments = [r["extracted_info"]["environment"] for r in frame_results if r["extracted_info"]["environment"]] + if environments: + from collections import Counter + merged["environment"] = Counter(environments).most_common(1)[0][0] + + # 人数取最大值 + people_counts = [r["extracted_info"]["num_people"] for r in frame_results if r["extracted_info"]["num_people"] is not None] + if people_counts: + merged["num_people"] = max(people_counts) + + # 合并列表类型的字段 + list_fields = ["actions", "objects", "furniture", "emotions", "feature"] + for field in list_fields: + for result in frame_results: + merged[field].update(result["extracted_info"][field]) + + # 将集合转换回列表 + for field in list_fields: + merged[field] = list(merged[field]) + + return merged + +def process_video_folder(system, folder_path, output_path=None): + """处理文件夹中的所有视频文件并保存结果""" + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + results = {} + + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + if output_path is None: + output_path = os.getcwd() + elif not os.path.exists(output_path): + os.makedirs(output_path) + + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + folder_name = os.path.basename(os.path.normpath(folder_path)) + output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json") + + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + with open(video_path, "rb") as f: + video_data = f.read() + results[video_file] = {"video_analysis": {}, "image_analysis": {}} + + # 1. 使用 Qwen 处理视频 + print(f"使用 Qwen 处理视频: {video_file}") + qwen_result = system.process_with_qwen(video_data, video_file, media_type='video') + results[video_file]["video_analysis"]["qwen"] = { + "original_answer": qwen_result["original_answer"], + "extracted_info": qwen_result["extracted_info"] + } + + # 2. 使用 MiniCPM 处理视频 + print(f"使用 MiniCPM 处理视频: {video_file}") + minicpm_result = system.process_with_minicpm(video_data, video_file, media_type='video') + results[video_file]["video_analysis"]["minicpm"] = { + "original_answer": minicpm_result["original_answer"], + "extracted_info": minicpm_result["extracted_info"] + } + + # 3. 从视频中提取帧,使用 Llama 处理 + frames = system.encode_video(video_data) + if frames: + print(f"使用 Llama 处理视频帧: {video_file}") + llama_result = system.process_with_llama(video_data, video_file, media_type='video') + results[video_file]["image_analysis"]["llama"] = { + "frame_results": llama_result["frame_results"], + "merged_result": llama_result["merged_result"], + "num_processed_frames": llama_result["num_processed_frames"] + } + else: + results[video_file]["image_analysis"]["error"] = "无法提取视频帧" + + # 添加视频帧数信息 + results[video_file]["video_analysis"]["num_frames"] = len(frames) if frames else 0 + + # 添加时间戳 + results[video_file]["timestamp"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # 实时保存当前结果 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"✓ 成功处理并保存: {video_file}") + + # 每个视频处理完后清理内存 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + import gc + gc.collect() + + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + results[video_file] = {"error": str(e)} + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n所有分析结果已保存到: {output_file}") + return results + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def main(): + try: + system = MediaAnalysisSystem() + + # 添加文件夹路径输入处理 + folder_path = input("请输入视频文件夹路径: ").strip() + output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip() + + # 如果用户没有输入输出路径,则使用None(将使用当前目录) + output_path = output_path if output_path else None + + # 处理文件夹中的视频 + results = process_video_folder(system, folder_path, output_path) + + # 显示处理统计 + success_count = sum(1 for r in results.values() if "error" not in r) + print(f"\n处理完成!成功: {success_count}/{len(results)}") + + except MediaAnalysisError as e: + print(f"\n错误: {str(e)}") + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/deep copy.py b/test_history/deep copy.py new file mode 100644 index 0000000..88fc083 --- /dev/null +++ b/test_history/deep copy.py @@ -0,0 +1,328 @@ +import io +import os +import json +import base64 +import requests +import re +from PIL import Image +from datetime import datetime, timedelta +from decord import VideoReader, cpu + +SILICONFLOW_URL = "https://api.siliconflow.cn/v1/chat/completions" +API_KEY = "sk-ytxabphvgxrjbvnqiwercjyrabvlukwddqsmvnqnvwuazamd" + +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 5 # 最大帧数设为10 + self.MIN_NUM_FRAMES = 3 # 最小帧数设为3 + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file, ctx=cpu(0)) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + + # 确保帧数在3-10之间 + num_frames = min(max(3, len(frame_idx)), self.MAX_NUM_FRAMES) + if len(frame_idx) > num_frames: + frame_idx = uniform_sample(frame_idx, num_frames) + + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + + # 压缩图片尺寸和质量 + compressed_frames = [] + for frame in frames: + # 保持宽高比的情况下调整大小 + frame.thumbnail((600, 600), Image.Resampling.LANCZOS) + buffered = io.BytesIO() + frame.save(buffered, format="JPEG", quality=85) + compressed_frames.append(Image.open(buffered)) + + print(f'处理后的帧数: {len(compressed_frames)}') + return compressed_frames + + def process_video(self, video_data, object_name): + if not video_data: + raise ValueError(f"Empty video data for {object_name}") + print(f"Processing video: {object_name}, data size: {len(video_data)} bytes") + frames = self.encode_video(video_data) + + # 构建单个请求的消息内容 + messages = [{ + "role": "user", + "content": [ + { + "type": "text", + "text": """请将这些图片作为一个时间序列进行详细分析,包括以下方面: + 1. 场景中人数的精确统计 + 2. 每个人的个人行为分析 + 3. 面部表情识别和情绪状态评估 + 4. 整体场景和环境的详细描述 + 5. 人与人之间的互动情况 + 6. 详细的环境条件描述 + 7. 环境中出现的物品和家具 + 8. 任何可疑或异常活动 + 9. 人员的具体特征(估计年龄范围、性别、着装) + 10. 人员的移动模式和方向 + 11. 携带的物品或物体 + 12. 群体动态和聚集情况 + 13. 视频中的时间戳分析(如果有)""" + } + ] + }] + + # 一次性添加所有图片到消息内容 + for frame in frames: + base64_image = self.image_to_base64(frame) + messages[0]["content"].append({ + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + "detail": "auto" + } + }) + + try: + response = self._make_api_request(messages) + answer = response["choices"][0]["message"]["content"] + extracted_info = self.extract_info(answer) + + return { + "original_answer": answer, + "extracted_info": extracted_info, + "num_frames": len(frames), + } + + except Exception as e: + print(f"API请求失败: {str(e)}") + raise + + def _make_api_request(self, messages): + payload = { + "model": "deepseek-ai/deepseek-vl2", + "messages": messages, + "stream": False, + "max_tokens": 1024, + "temperature": 0.7, + "top_p": 0.7, + "top_k": 50, + "frequency_penalty": 0.5, + "n": 1, + "response_format": {"type": "text"} + } + + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + + response = requests.post( + SILICONFLOW_URL, + json=payload, + headers=headers, + timeout=60 # 增加超时时间到60秒 + ) + + if response.status_code != 200: + raise Exception(f"Siliconflow API 错误: {response.status_code}") + + return response.json() + + @staticmethod + def image_to_base64(image): + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode() + + @staticmethod + def extract_time_from_filename(object_name): + filename = os.path.basename(object_name) + time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0] + + try: + start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S") + end_time = start_time + timedelta(seconds=10) + return start_time, end_time + except ValueError: + print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。") + return datetime.now(), datetime.now() + timedelta(seconds=10) + + @staticmethod + def extract_info(answer): + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + environments = ["办公室", "室内", "室外", "会议室"] + for env in environments: + if env in answer.lower(): + info["environment"] = env + break + + people_patterns = [ + r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)', + r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?', + r'(男|女)(性|生|士)', + r'(成年|未成年|青少年|老年)\s*(人|群体)', + r'(员工|职工|工人|学生|顾客|观众|游客|乘客)', + r'(群众|民众|大众|公众)', + r'(男女|老少|老幼|大人|小孩)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['一个', '一']: + info["num_people"] = 1 + else: + num_word_to_digit = { + '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + actions = ["睡眠","坐", "吃","站", "摔倒", "跳舞", "蹲","蹲下","转身", "摔", "倒", "倒下", "躺下", "转身", "转","跳跃", "跳", "躺", "睡", "说话","睡觉","起床","看书","写字","学习","玩手机","吃饭","搬东西","看风景","走路","散步","走","阅读","写作","使用手机","使用电脑","学习","工作","使用笔记本电脑","吃饭","喝水","整理"] + for action in actions: + if action in answer: + info["actions"].append(action) + emotions = ["高兴", "愤怒", "悲伤", "惊讶", "恐惧", "厌恶", "平静","放松","中性","专注","思考"] + objects = ["水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","书","笔","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","文件柜","手机"] + furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架"] + features = ["戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","胖","瘦","高","矮","男","女","成年人"] + + for obj in objects: + if obj in answer: + info["objects"].append(obj) + + for item in furniture: + if item in answer: + info["furniture"].append(item) + + for feature in features: + if feature in answer: + info["features"].append(feature) + + for emotion in emotions: + if emotion in answer: + info["emotions"].append(emotion) + + return info + +# 初始化 MediaAnalysisSystem +media_analysis_system = MediaAnalysisSystem() + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def process_video_folder(system, folder_path, output_path=None): + """处理文件夹中的所有视频文件并保存结果""" + # 支持的视频格式 + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + results = {} + + # 确保文件夹存在 + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + # 设置输出路径 + if output_path is None: + output_path = os.getcwd() # 如果未指定,使用当前目录 + elif not os.path.exists(output_path): + os.makedirs(output_path) # 如果输出目录不存在,创建它 + + # 获取所有视频文件 + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + # 生成输出文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + folder_name = os.path.basename(os.path.normpath(folder_path)) + output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json") + + # 处理每个视频文件并实时保存结果 + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + with open(video_path, "rb") as f: + video_data = f.read() + result = system.process_video(video_data, video_file) + # 修改结果存储格式 + results[video_file] = { + "video_analysis": { + "deepseek-vl2": result + } + } + + # 实时保存当前结果到JSON文件 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"✓ 成功处理并保存: {video_file}") + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + results[video_file] = { + "video_analysis": { + "deepseek-vl2": {"error": str(e)} + } + } + # 即使处理失败也保存当前结果 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n所有分析结果已保存到: {output_file}") + return results + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def main(): + try: + system = MediaAnalysisSystem() + + # 添加文件夹路径输入处理 + folder_path = input("请输入视频文件夹路径: ").strip() + output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip() + + # 如果用户没有输入输出路径,则使用None(将使用当前目录) + output_path = output_path if output_path else None + + # 处理文件夹中的视频 + results = process_video_folder(system, folder_path, output_path) + + # 显示处理统计 + success_count = sum(1 for r in results.values() if "error" not in r) + print(f"\n处理完成!成功: {success_count}/{len(results)}") + + except MediaAnalysisError as e: + print(f"\n错误: {str(e)}") + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/deep.py b/test_history/deep.py new file mode 100644 index 0000000..ff221a4 --- /dev/null +++ b/test_history/deep.py @@ -0,0 +1,328 @@ +import io +import os +import json +import base64 +import requests +import re +from PIL import Image +from datetime import datetime, timedelta +from decord import VideoReader, cpu + +SILICONFLOW_URL = "https://api.siliconflow.cn/v1/chat/completions" +API_KEY = "sk-ytxabphvgxrjbvnqiwercjyrabvlukwddqsmvnqnvwuazamd" + +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 5 # 最大帧数设为10 + self.MIN_NUM_FRAMES = 3 # 最小帧数设为3 + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file, ctx=cpu(0)) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + + # 确保帧数在3-10之间 + num_frames = min(max(3, len(frame_idx)), self.MAX_NUM_FRAMES) + if len(frame_idx) > num_frames: + frame_idx = uniform_sample(frame_idx, num_frames) + + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + + # 压缩图片尺寸和质量 + compressed_frames = [] + for frame in frames: + # 保持宽高比的情况下调整大小 + frame.thumbnail((600, 600), Image.Resampling.LANCZOS) + buffered = io.BytesIO() + frame.save(buffered, format="JPEG", quality=85) + compressed_frames.append(Image.open(buffered)) + + print(f'处理后的帧数: {len(compressed_frames)}') + return compressed_frames + + def process_video(self, video_data, object_name): + if not video_data: + raise ValueError(f"Empty video data for {object_name}") + print(f"Processing video: {object_name}, data size: {len(video_data)} bytes") + frames = self.encode_video(video_data) + + # 构建单个请求的消息内容 + messages = [{ + "role": "user", + "content": [ + { + "type": "text", + "text": """Please analyze these images as a time series in detail, including the following aspects: + 1. Exact count of people in the scene + 2. Individual behavior analysis for each person + 3. Facial expression recognition and emotional state assessment + 4. Overall scene and environment detailed description + 5. Interactions between people + 6. Detailed environmental conditions description + 7. Items and furniture appearing in the environment + 8. Any suspicious or abnormal activities + 9. Personnel specific characteristics (estimated age range, gender, clothing) + 10. Movement patterns and directions of people + 11. Carried items or objects + 12. Group dynamics and gathering situations + 13. Video timestamp analysis (if available)""" + } + ] + }] + + # 一次性添加所有图片到消息内容 + for frame in frames: + base64_image = self.image_to_base64(frame) + messages[0]["content"].append({ + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + "detail": "auto" + } + }) + + try: + response = self._make_api_request(messages) + answer = response["choices"][0]["message"]["content"] + extracted_info = self.extract_info(answer) + + return { + "original_answer": answer, + "extracted_info": extracted_info, + "num_frames": len(frames), + } + + except Exception as e: + print(f"API请求失败: {str(e)}") + raise + + def _make_api_request(self, messages): + payload = { + "model": "deepseek-ai/deepseek-vl2", + "messages": messages, + "stream": False, + "max_tokens": 1024, + "temperature": 0.7, + "top_p": 0.7, + "top_k": 50, + "frequency_penalty": 0.5, + "n": 1, + "response_format": {"type": "text"} + } + + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + + response = requests.post( + SILICONFLOW_URL, + json=payload, + headers=headers, + timeout=60 # 增加超时时间到60秒 + ) + + if response.status_code != 200: + raise Exception(f"Siliconflow API 错误: {response.status_code}") + + return response.json() + + @staticmethod + def image_to_base64(image): + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode() + + @staticmethod + def extract_time_from_filename(object_name): + filename = os.path.basename(object_name) + time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0] + + try: + start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S") + end_time = start_time + timedelta(seconds=10) + return start_time, end_time + except ValueError: + print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。") + return datetime.now(), datetime.now() + timedelta(seconds=10) + + @staticmethod + def extract_info(answer): + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + environments = ["office", "indoor", "outdoor", "meeting room"] + for env in environments: + if env.lower() in answer.lower(): + info["environment"] = env + break + + people_patterns = [ + r'(\d+)\s*(person|people|individual|staff|user|child|adult|female|male)', + r'(one|two|three|four|five|six|seven|eight|nine|ten)\s*(person|people|individual|staff|user|child|adult|female|male)', + r'(a|few)\s*(person|people|individual|staff|user|child|adult|female|male)', + r'several\s*(person|people|individual|staff|user|child|adult|female|male)?', + r'(male|female)', + r'(adult|minor|youth|elderly)\s*(person|group)', + r'(employee|worker|student|customer|audience|visitor|passenger)', + r'(crowd|public|mass|people)', + r'(men|women|old|young|adult|child)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['a', 'one','an']: + info["num_people"] = 1 + else: + num_word_to_digit = { + 'two': 2, 'three': 3, 'four': 4, 'five': 5, + 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + actions = ["sleeping", "sitting", "eating", "standing", "falling", "dancing", "squatting", "crouching", "turning", "falling down", "lying down", "turning around", "jumping", "lying", "sleeping", "talking", "waking up", "reading", "writing", "studying", "using phone", "dining", "moving things", "sightseeing", "walking", "strolling", "reading", "writing", "using phone", "using computer", "studying", "working", "using laptop", "eating", "drinking", "organizing"] + for action in actions: + if action in answer: + info["actions"].append(action) + emotions = ["happy", "angry", "sad", "surprised", "scared", "disgusted", "calm", "relaxed", "neutral", "focused", "thinking"] + objects = ["water bottle", "office supplies", "documents", "computer", "fan", "mouse", "keyboard", "tissue", "book", "pen", "bag", "box", "water cup", "cup", "mug", "glass", "folder", "backpack", "bookshelf", "filing cabinet", "phone"] + furniture = ["chair", "desk", "coffee table", "filing cabinet", "bed", "sofa", "cabinet", "shelf", "camera", "cushion", "office chair", "TV", "whiteboard", "monitor", "storage rack", "file rack"] + features = ["wearing glasses", "not wearing glasses", "long hair", "short hair", "long hair", "short hair", "wearing hat", "not wearing hat", "wearing mask", "not wearing mask", "male", "female", "fat", "thin", "tall", "short", "man", "woman", "adult"] + + for obj in objects: + if obj in answer: + info["objects"].append(obj) + + for item in furniture: + if item in answer: + info["furniture"].append(item) + + for feature in features: + if feature in answer: + info["features"].append(feature) + + for emotion in emotions: + if emotion in answer: + info["emotions"].append(emotion) + + return info + +# 初始化 MediaAnalysisSystem +media_analysis_system = MediaAnalysisSystem() + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def process_video_folder(system, folder_path, output_path=None): + """处理文件夹中的所有视频文件并保存结果""" + # 支持的视频格式 + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + results = {} + + # 确保文件夹存在 + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + # 设置输出路径 + if output_path is None: + output_path = os.getcwd() # 如果未指定,使用当前目录 + elif not os.path.exists(output_path): + os.makedirs(output_path) # 如果输出目录不存在,创建它 + + # 获取所有视频文件 + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + # 生成输出文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + folder_name = os.path.basename(os.path.normpath(folder_path)) + output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json") + + # 处理每个视频文件并实时保存结果 + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + with open(video_path, "rb") as f: + video_data = f.read() + result = system.process_video(video_data, video_file) + # 修改结果存储格式 + results[video_file] = { + "video_analysis": { + "deepseek-vl2": result + } + } + + # 实时保存当前结果到JSON文件 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"✓ 成功处理并保存: {video_file}") + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + results[video_file] = { + "video_analysis": { + "deepseek-vl2": {"error": str(e)} + } + } + # 即使处理失败也保存当前结果 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n所有分析结果已保存到: {output_file}") + return results + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def main(): + try: + system = MediaAnalysisSystem() + + # 添加文件夹路径输入处理 + folder_path = input("请输入视频文件夹路径: ").strip() + output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip() + + # 如果用户没有输入输出路径,则使用None(将使用当前目录) + output_path = output_path if output_path else None + + # 处理文件夹中的视频 + results = process_video_folder(system, folder_path, output_path) + + # 显示处理统计 + success_count = sum(1 for r in results.values() if "error" not in r) + print(f"\n处理完成!成功: {success_count}/{len(results)}") + + except MediaAnalysisError as e: + print(f"\n错误: {str(e)}") + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/face_monitor.py b/test_history/face_monitor.py new file mode 100644 index 0000000..77e7c87 --- /dev/null +++ b/test_history/face_monitor.py @@ -0,0 +1,297 @@ +import os +import json +import time +from datetime import datetime +import redis +from deepface import DeepFace +import numpy as np +import gc +import re + +class FaceAnalysisSystem: + def __init__(self): + # Redis配置 + self.redis_clients = { + 'A01': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=210 + ), + 'B02': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=211 + ) + } + # 身份信息数据库 + self.identity_db = redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=212 + ) + + def get_face_embedding(self, img_path): + """获取人脸embedding""" + try: + embedding_obj = DeepFace.represent( + img_path=img_path, + detector_backend="retinaface", + align=True, + model_name="Facenet512" + ) + return embedding_obj[0]["embedding"] if embedding_obj else None + except Exception as e: + print(f"获取人脸embedding失败: {str(e)}") + return None + + def find_identity(self, embedding): + """在身份数据库中查找匹配的身份""" + try: + # 获取所有身份的embedding + all_identities = self.identity_db.keys("*") + best_match = None + best_similarity = -1 + + for identity_key in all_identities: + # 获取该身份的所有embedding + stored_data = json.loads(self.identity_db.get(identity_key)) + + # 如果存储的数据是列表(多个embedding) + if isinstance(stored_data, list): + # 对该身份的每个embedding进行比对 + for face_data in stored_data: + stored_vector = np.array(face_data["embedding"]) + + # 计算余弦相似度 + similarity = np.dot(embedding, stored_vector) / ( + np.linalg.norm(embedding) * np.linalg.norm(stored_vector) + ) + + if similarity > best_similarity: + best_similarity = similarity + best_match = identity_key.decode() + + # 如果相似度大于阈值,返回身份信息,否则返回unknown + if best_similarity > 0.72: # 可以调整阈值 + return best_match, best_similarity + return "unknown", best_similarity + + except Exception as e: + print(f"查找身份时出错: {str(e)}") + return "unknown", -1 + +class ImageMonitor: + def __init__(self, images_path): + self.images_path = images_path + self.system = FaceAnalysisSystem() + self.processed_images = set() + self.error_images = [] + self.error_image_cache = set() + + def _get_redis_key(self, image_path): + """生成Redis键值""" + try: + dir_name = os.path.basename(os.path.dirname(image_path)) + file_name = os.path.basename(image_path) + + # 从图片文件名中提取日期和时间 + # 假设文件名格式: A01_20250105_134104.jpg + match = re.search(r'(\w+)_(\d{8})_(\d{2})\d{4}\.(jpg|png)', file_name) + if match: + camera_id = match.group(1) + date = match.group(2) + hour = match.group(3) + + # 生成key: A01_20250105_1300 + redis_key = f"face_{camera_id}_{date}_{hour}00" + return redis_key + + print(f"文件名格式不匹配: {file_name}") + return None + + except Exception as e: + print(f"生成Redis key失败: {str(e)}") + return None + + def _is_processed(self, image_path): + """检查图片是否已处理""" + return image_path in self.processed_images + + def _is_error_cached(self, image_path): + """检查图片是否在错误缓存中""" + return image_path in self.error_image_cache + + def _add_to_error_cache(self, image_path): + """添加图片到错误缓存""" + self.error_image_cache.add(image_path) + + def _log_error(self, image_path, error_type, error_message): + """记录错误信息""" + if self._is_error_cached(image_path): + return + + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + error_info = { + "timestamp": current_time, + "image_path": image_path, + "error_type": error_type, + "error_message": error_message, + "file_size": os.path.getsize(image_path) if os.path.exists(image_path) else 0 + } + self.error_images.append(error_info) + self._add_to_error_cache(image_path) + + def _save_error_log(self): + """保存错误日志""" + if not self.error_images: + return + + try: + current_time = datetime.now().strftime("%Y%m%d_%H%M%S") + log_filename = f"image_errors_{current_time}.json" + + with open(log_filename, 'w', encoding='utf-8') as f: + json.dump(self.error_images, f, ensure_ascii=False, indent=2) + print(f"\n异常图片记录已保存到: {log_filename}") + + self.error_images = [] + except Exception as e: + print(f"保存错误日志失败: {str(e)}") + + def process_new_image(self, image_path): + """处理新图片""" + try: + if self._is_error_cached(image_path): + return False + + file_name = os.path.basename(image_path) + if self._is_processed(image_path): + print(f"图片已处理过,跳过: {file_name}") + return True + + redis_key = self._get_redis_key(image_path) + if not redis_key: + self._log_error(image_path, "Redis Key Error", "无法生成Redis key") + return False + + if not os.path.exists(image_path): + self._log_error(image_path, "File Not Found", "图片文件不存在") + return False + + # 检查文件大小 + file_size = os.path.getsize(image_path) + if file_size == 0: + self._log_error(image_path, "Empty File", "图片文件大小为0") + return False + elif file_size < 10 * 1024: # 小于10KB + self._log_error(image_path, "Small File", f"图片文件大小异常({file_size/1024:.2f}KB)") + return False + + # 获取人脸embedding + embedding = self.system.get_face_embedding(image_path) + if embedding is None: + self._log_error(image_path, "Face Detection Error", "无法检测到人脸或提取特征") + return False + + # 查找身份 + identity, similarity = self.system.find_identity(embedding) + + # 从文件名提取时间戳 + timestamp_match = re.search(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', file_name) + if timestamp_match: + year, month, day, hour, minute, second = timestamp_match.groups() + timestamp = f"{year}-{month}-{day} {hour}:{minute}:{second}" + else: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # 准备结果数据 + result = { + "face_analysis": { + "identity": identity, + "similarity": float(similarity) + }, + "timestamp": timestamp + } + + # 保存到Redis + dir_name = os.path.basename(os.path.dirname(image_path)) + if dir_name in self.system.redis_clients: + redis_client = self.system.redis_clients[dir_name] + + existing_data = redis_client.get(redis_key) + if existing_data: + hour_results = json.loads(existing_data) + hour_results[file_name] = result + else: + hour_results = {file_name: result} + + json_str = json.dumps(hour_results, ensure_ascii=False) + redis_client.set(redis_key, json_str) + print(f"成功保存到Redis,key: {redis_key}") + + self.processed_images.add(image_path) + return True + + except Exception as e: + self._log_error(image_path, "Processing Error", str(e)) + print(f"处理图片时发生错误 {image_path}: {str(e)}") + return False + finally: + gc.collect() + + def monitor_directories(self): + """监控目录变化""" + try: + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"开始监控目录: {self.images_path} [{current_time}]") + + while True: + try: + for camera_dir in os.listdir(self.images_path): + camera_path = os.path.join(self.images_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + for image_file in os.listdir(camera_path): + if not image_file.lower().endswith(('.jpg', '.jpeg', '.png')): + continue + + image_path = os.path.join(camera_path, image_file) + if not self._is_processed(image_path) and not self._is_error_cached(image_path): + print(f"处理图片: {image_path}") + if not self.process_new_image(image_path): + self._add_to_error_cache(image_path) + print(f"图片处理失败,已加入错误缓存: {image_path}") + continue + + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"[{current_time}] 等待新图片中...") + time.sleep(60) # 每分钟检查一次 + + except Exception as e: + print(f"监控过程出错: {str(e)}") + time.sleep(10) + + except KeyboardInterrupt: + print("\n检测到程序终止信号,正在保存错误日志...") + self._save_error_log() + print("程序已安全终止。") + except Exception as e: + print(f"\n程序异常终止: {str(e)}") + self._save_error_log() + raise + +def main(): + try: + images_path = "/home/zydi/VLM/images" # 设置images目录路径 + monitor = ImageMonitor(images_path) + monitor.monitor_directories() + + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/llama.py b/test_history/llama.py new file mode 100644 index 0000000..3484ca0 --- /dev/null +++ b/test_history/llama.py @@ -0,0 +1,291 @@ +import io +import os +import json +import base64 +import requests +import re +from PIL import Image +from datetime import datetime, timedelta +from decord import VideoReader, cpu + +OLLAMA_URL = "http://127.0.0.1:11434/api/generate" +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 16 + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file, ctx=cpu(0)) + # 修改采样逻辑,随机选择3-8帧 + num_frames = min(max(3, len(vr) // 30), 8) # 确保至少3帧,最多8帧 + frame_idx = uniform_sample(range(len(vr)), num_frames) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('采样帧数:', len(frames)) + return frames + + def process_video(self, video_data, object_name): + if not video_data: + raise ValueError(f"Empty video data for {object_name}") + print(f"处理视频: {object_name}, 数据大小: {len(video_data)} bytes") + + frames = self.encode_video(video_data) + all_responses = [] + + # 逐帧分析 + for i, frame in enumerate(frames): + print(f"Analyzing frame {i+1}/{len(frames)}...") + question = """Please provide a detailed analysis of this surveillance image, including the following aspects: + 1. Precise count of people in the scene + 2. Individual behavior analysis of each person + 3. Facial expression recognition and emotional state assessment + 4. Detailed description of overall scene and environment + 5. Interactions between people + 6. Detailed description of environmental conditions + 7. Items and furniture present in the environment + 8. Any suspicious or unusual activities + 9. Specific characteristics of people (estimated age range, gender, clothing) + 10. Movement patterns and directions of people + 11. Carried items or objects + 12. Group dynamics and gathering situations + 13. Analysis of video timestamp (if present) + + Please describe in a clear, organized format and highlight important findings.""" + payload = { + "model": "llama3.2-vision", # 使用llama2 13b模型 + "prompt": question, + "images": [self.image_to_base64(frame)] # 每次只发送一张图片 + } + + try: + response = requests.post(OLLAMA_URL, json=payload, stream=True) + if response.status_code == 200: + frame_answer = self.process_stream_response(response) + all_responses.append(frame_answer) + else: + raise Exception(f"Ollama API 错误: {response.status_code}") + except requests.RequestException as e: + print(f"请求 Ollama API 时出错: {str(e)}") + raise + + # 合并所有帧的分析结果 + combined_answer = "\n\n=== 视频总体分析 ===\n".join(all_responses) + extracted_info = self.extract_info(combined_answer) + + return { + "original_answer": combined_answer, + "extracted_info": extracted_info, + "num_frames": len(frames), + } + + def process_stream_response(self, response): + full_response = [] + for line in response.iter_lines(): + if line: + try: + json_response = json.loads(line) + if 'response' in json_response: + full_response.append(json_response['response']) + if json_response.get('done', False): + break + except json.JSONDecodeError: + print(f"无法解析 JSON 行: {line}") + return ''.join(full_response) + + @staticmethod + def image_to_base64(image): + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode() + + @staticmethod + def extract_time_from_filename(object_name): + filename = os.path.basename(object_name) + time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0] + + try: + start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S") + end_time = start_time + timedelta(seconds=10) + return start_time, end_time + except ValueError: + print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。") + return datetime.now(), datetime.now() + timedelta(seconds=10) + + @staticmethod + def extract_info(answer): + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + environments = ["office", "indoor", "outdoor", "meeting room", "room", "classroom", "living room", "bedroom", "kitchen", "bathroom", "hallway", "corridor"] + for env in environments: + if env in answer.lower(): + info["environment"] = env + break + + people_patterns = [ + r'(\d+)\s*(person|people|individual|employee|user|child|adult|female|male)', + r'(one|two|three|four|five|six|seven|eight|nine|ten)\s*(person|people|individual|employee|user|child|adult|female|male)', + r'(a|few)\s*(person|people|employee|user|child|adult|female|male)', + r'several\s*(person|people|employee|user|child|adult|female|male)?', + r'(male|female)', + r'(adult|minor|teenager|elderly)\s*(person|group)', + r'(employee|worker|student|customer|audience|visitor|passenger)', + r'(crowd|public|people|mass)', + r'(men|women|adults|children)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['a', 'one',"an"]: + info["num_people"] = 1 + else: + num_word_to_digit = { + 'two': 2, 'three': 3, 'four': 4, 'five': 5, + 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + actions = ["sleeping", "sitting", "drinking", "eating", "standing", "falling", "dancing", "squatting", "squat", "turning", "fall", "falling down", "lying down", "turning around", "turn", "jumping", "jump", "lying", "sleep", "talking", "sleeping", "getting up", "reading", "writing", "studying", "phone", "eating", "moving things", "sightseeing", "walking", "strolling", "walk", "reading", "writing", "using phone", "computer", "studying", "working", "laptop", "eating", "drinking", "organizing"] + for action in actions: + if action in answer: + info["actions"].append(action) + emotions = ["happy", "angry", "sad", "surprised", "scared", "disgusted", "calm", "relaxed", "neutral", "focused", "thinking"] + objects = ["water bottle", "office supplies", "documents", "computer", "fan", "mouse", "keyboard", "tissue", "book", "pen", "bag", "box", "water cup", "cup", "mug", "glass", "folder", "backpack", "bookshelf", "file cabinet", "phone"] + furniture = ["chair", "table", "coffee table", "file cabinet", "bed", "sofa", "cabinet", "shelf", "camera", "cushion", "office chair", "TV", "whiteboard", "monitor", "storage rack", "file rack"] + features = ["wearing glasses", "not wearing glasses", "long hair", "short hair", "wearing hat", "not wearing hat", "wearing mask", "not wearing mask", "male", "female", "fat", "thin", "tall", "short", "man", "woman", "adult"] + + for obj in objects: + if obj in answer: + info["objects"].append(obj) + + for item in furniture: + if item in answer: + info["furniture"].append(item) + + for feature in features: + if feature in answer: + info["features"].append(feature) + + for emotion in emotions: + if emotion in answer: + info["emotions"].append(emotion) + + return info + +# 初始化 MediaAnalysisSystem +media_analysis_system = MediaAnalysisSystem() + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def process_video_folder(system, folder_path, output_path=None): + """处理文件夹中的所有视频文件并保存结果""" + # 支持的视频格式 + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + results = {} + + # 确保文件夹存在 + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + # 设置输出路径 + if output_path is None: + output_path = os.getcwd() # 如果未指定,使用当前目录 + elif not os.path.exists(output_path): + os.makedirs(output_path) # 如果输出目录不存在,创建它 + + # 获取所有视频文件 + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + # 生成输出文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + folder_name = os.path.basename(os.path.normpath(folder_path)) + output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json") + + # 处理每个视频文件并实时保存结果 + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + with open(video_path, "rb") as f: + video_data = f.read() + result = system.process_video(video_data, video_file) + # 修改结果存储格式 + results[video_file] = { + "video_analysis": { + "llama3.2-vision": result + } + } + + # 实时保存当前结果到JSON文件 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"✓ 成功处理并保存: {video_file}") + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + results[video_file] = { + "video_analysis": { + "llama3.2-vision": {"error": str(e)} + } + } + # 即使处理失败也保存当前结果 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n所有分析结果已保存到: {output_file}") + return results + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def main(): + try: + system = MediaAnalysisSystem() + + # 添加文件夹路径输入处理 + folder_path = input("请输入视频文件夹路径: ").strip() + output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip() + + # 如果用户没有输入输出路径,则使用None(将使用当前目录) + output_path = output_path if output_path else None + + # 处理文件夹中的视频 + results = process_video_folder(system, folder_path, output_path) + + # 显示处理统计 + success_count = sum(1 for r in results.values() if "error" not in r) + print(f"\n处理完成!成功: {success_count}/{len(results)}") + + except MediaAnalysisError as e: + print(f"\n错误: {str(e)}") + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/oll.py b/test_history/oll.py new file mode 100644 index 0000000..043a074 --- /dev/null +++ b/test_history/oll.py @@ -0,0 +1,305 @@ +import io +import os +import json +import base64 +import requests +import re +from PIL import Image +from datetime import datetime, timedelta +from decord import VideoReader, cpu + +OLLAMA_URL = "http://127.0.0.1:11434/api/generate" +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 16 + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file, ctx=cpu(0)) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + if len(frame_idx) > self.MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + def process_video(self, video_data, object_name): + if not video_data: + raise ValueError(f"Empty video data for {object_name}") + print(f"Processing video: {object_name}, data size: {len(video_data)} bytes") + frames = self.encode_video(video_data) + question = """你是一位视频描述专家,你擅长对视频进行详细的描述,请对这段监控视频进行详细分析,包括以下方面,并按照下面格式回答: + 1. 环境场景 + - 整体场景描述(室内/室外、光线条件等) + - 主要物品和家具列表 + - 环境特征(如光线、整洁度等) + + 2. 人员统计 + - 总人数:[数字]人 + - 性别分布:[男性数量]/[女性数量] + (若无法确定准确人数,请注明"无法确定人数") + + 3. 人员特征分析 + - 个人特征:性别、年龄段、着装、体态等 + - 携带物品:详细描述随身物品及用途 + - 表情/情绪状态 + + 4. 行为分析 + - 个人行为:移动方向、姿态、动作等 + - 互动情况:人员之间的交互描述(若多人) + - 活动区域:人员活动的主要位置 + + 5. 群体行为(若多人) + - 聚集形态 + - 移动趋势 + - 群体互动特点 + + 6. 异常情况 + - 可疑行为描述 + - 异常活动标记 + - 需要注意的安全隐患 + + 请用清晰、有条理的格式描述,并突出重要发现。""" + + encoded_frames = [self.image_to_base64(frame) for frame in frames] + + payload = { + "model": "minicpm-v", + "prompt": question, + "images": encoded_frames + } + + try: + response = requests.post(OLLAMA_URL, json=payload, stream=True) + print(f"Ollama API 响应状态码: {response.status_code}") + print(f"Ollama API 响应头: {response.headers}") + + if response.status_code == 200: + answer = self.process_stream_response(response) + else: + raise Exception(f"Ollama API 错误: {response.status_code}") + except requests.RequestException as e: + print(f"请求 Ollama API 时出错: {str(e)}") + raise + + extracted_info = self.extract_info(answer) + + return { + "original_answer": answer, + "extracted_info": extracted_info, + "num_frames": len(frames), + } + + def process_stream_response(self, response): + full_response = [] + for line in response.iter_lines(): + if line: + try: + json_response = json.loads(line) + if 'response' in json_response: + full_response.append(json_response['response']) + if json_response.get('done', False): + break + except json.JSONDecodeError: + print(f"无法解析 JSON 行: {line}") + return ''.join(full_response) + + @staticmethod + def image_to_base64(image): + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode() + + @staticmethod + def extract_time_from_filename(object_name): + filename = os.path.basename(object_name) + time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0] + + try: + start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S") + end_time = start_time + timedelta(seconds=10) + return start_time, end_time + except ValueError: + print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。") + return datetime.now(), datetime.now() + timedelta(seconds=10) + + @staticmethod + def extract_info(answer): + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + environments = ["办公室", "室内", "室外", "会议室", "房间", "教室", "客厅", "卧室", "厨房", "浴室", "走廊", "过道"] + for env in environments: + if env in answer.lower(): + info["environment"] = env + break + + people_patterns = [ + r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)', + r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?', + r'(男|女)(性|生|士)', + r'(成年|未成年|青少年|老年)\s*(人|群体)', + r'(员工|职工|工人|学生|顾客|观众|游客|乘客)', + r'(群众|民众|大众|公众)', + r'(男女|老少|老幼|大人|小孩)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['一个', '一']: + info["num_people"] = 1 + else: + num_word_to_digit = { + '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + actions = ["睡眠","坐", "喝","吃","站", "摔倒", "跳舞", "蹲","蹲下","转身", "摔", "倒", "倒下", "躺下", "转身", "转","跳跃", "跳", "躺", "睡", "说话","睡觉","起床","看书","写字","学习","玩手机","吃饭","搬东西","看风景","走路","散步","走","阅读","写作","使用手机","使用电脑","学习","工作","使用笔记本电脑","吃饭","喝水","整理"] + for action in actions: + if action in answer: + info["actions"].append(action) + emotions = ["高兴", "愤怒", "悲伤", "惊讶", "恐惧", "厌恶", "平静","放松","中性","专注","思考"] + objects = ["水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","书","笔","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","文件柜","手机"] + furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架"] + features = ["戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","胖","瘦","高","矮","男","女","成年人"] + + for obj in objects: + if obj in answer: + info["objects"].append(obj) + + for item in furniture: + if item in answer: + info["furniture"].append(item) + + for feature in features: + if feature in answer: + info["features"].append(feature) + + for emotion in emotions: + if emotion in answer: + info["emotions"].append(emotion) + + return info + +# 初始化 MediaAnalysisSystem +media_analysis_system = MediaAnalysisSystem() + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def process_video_folder(system, folder_path, output_path=None): + """处理文件夹中的所有视频文件并保存结果""" + # 支持的视频格式 + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + results = {} + + # 确保文件夹存在 + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + # 设置输出路径 + if output_path is None: + output_path = os.getcwd() # 如果未指定,使用当前目录 + elif not os.path.exists(output_path): + os.makedirs(output_path) # 如果输出目录不存在,创建它 + + # 获取所有视频文件 + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + # 生成输出文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + folder_name = os.path.basename(os.path.normpath(folder_path)) + output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json") + + # 处理每个视频文件并实时保存结果 + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + with open(video_path, "rb") as f: + video_data = f.read() + result = system.process_video(video_data, video_file) + # 修改结果存储格式 + results[video_file] = { + "video_analysis": { + "minicpm": result + } + } + + # 实时保存当前结果到JSON文件 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"✓ 成功处理并保存: {video_file}") + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + results[video_file] = { + "video_analysis": { + "minicpm": {"error": str(e)} + } + } + # 即使处理失败也保存当前结果 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n所有分析结果已保存到: {output_file}") + return results + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def main(): + try: + system = MediaAnalysisSystem() + + # 添加文件夹路径输入处理 + folder_path = input("请输入视频文件夹路径: ").strip() + output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip() + + # 如果用户没有输入输出路径,则使用None(将使用当前目录) + output_path = output_path if output_path else None + + # 处理文件夹中的视频 + results = process_video_folder(system, folder_path, output_path) + + # 显示处理统计 + success_count = sum(1 for r in results.values() if "error" not in r) + print(f"\n处理完成!成功: {success_count}/{len(results)}") + + except MediaAnalysisError as e: + print(f"\n错误: {str(e)}") + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/qwen copy.py b/test_history/qwen copy.py new file mode 100644 index 0000000..9b796be --- /dev/null +++ b/test_history/qwen copy.py @@ -0,0 +1,554 @@ +import os +import json +import torch +from datetime import datetime +from PIL import Image +import io +import re +from decord import VideoReader +from transformers import Qwen2VLForConditionalGeneration, AutoProcessor +from qwen_vl_utils import process_vision_info +import redis +import time +import gc + +# 配置 +QWEN_MODEL_PATH = "/obscura/models/qwen/Qwen2-VL-7B-Instruct" + +# 初始化 Qwen 模型 (使用 cuda:0) +print("正在初始化 Qwen 模型 (cuda:0)...") +model = Qwen2VLForConditionalGeneration.from_pretrained( + QWEN_MODEL_PATH, + torch_dtype="auto", + device_map="cuda:0" +) + +min_pixels = 128*28*28 +max_pixels = 256*28*28 +processor = AutoProcessor.from_pretrained( + QWEN_MODEL_PATH, + min_pixels=min_pixels, + max_pixels=max_pixels +) + +# 在文件开头添加加载配置的代码 +def load_config(): + """加载配置文件""" + try: + with open('info.json', 'r', encoding='utf-8') as f: + config = json.load(f) + return config + except Exception as e: + print(f"加载配置文件失败: {e}") + return {"actions": [], "environments": []} + +# 加载配置 +CONFIG = load_config() + +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 10 + self.device = "cuda:0" + self.qwen_model = model + self.qwen_processor = processor + # 使用加载的配置 + self.environments = CONFIG["environments"] + self.actions = CONFIG["actions"] + self.emotions = [ + "钦佩", "赞赏", "欣赏","关心", "高兴", "爱", "乐观", "感激", "释然", "骄傲", "愉悦", + "愤怒", "烦恼", "焦虑", "尴尬", "失望", "厌恶", "恐惧", "悲伤", "懊悔", "羞耻","发呆", + "困惑", "好奇", "欲望", "惊讶", "实事求是", "中性", "赞叹","平静","放松","专注","思考" + ] + self.objects = [ + "办公桌椅","文件柜","打印机","饮水机","装饰植物","书架","储物柜","水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","书","笔","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","手机" + ] + self.furniture = [ + "椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架" + ] + self.features = [ + "戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","胖","瘦","高","矮","男","女","成年人" + ] + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + if len(frame_idx) > self.MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + def process_with_qwen(self, media_data, object_name, media_type='image'): + """使用 Qwen 模型处理媒体""" + + if media_type == 'video': + frames = self.encode_video(media_data) + media_content = {"type": "video", "video": frames, "fps": 1.0} + else: + image = Image.open(io.BytesIO(media_data)) + media_content = {"type": "image", "image": image} + + messages = [ + { + "role": "user", + "content": [ + media_content, + {"type": "text", "text": self._get_analysis_prompt(media_type)} + ], + } + ] + + text = self.qwen_processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.qwen_processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to(self.device) + generated_ids = self.qwen_model.generate(**inputs, max_new_tokens=2048) + generated_ids_trimmed = [ + out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + ] + answer = self.qwen_processor.batch_decode( + generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False + )[0] + + return { + "model": "qwen", + "original_answer": answer, + "extracted_info": self.extract_info(answer) + } + + def _get_analysis_prompt(self, media_type): + """获取分析提示词""" + return f"""你是一位视频描述专家,你擅长对视频进行详细的描述,请对这段监控视频进行详细分析,包括以下方面,并按照下面格式回答: + 1. 环境场景 + - 整体场景描述(室内/室外、光线条件等) + - 主要物品和家具列表 + - 环境特征(如光线、整洁度等) + + 2. 人员统计 + - 总人数:[数字]人 + - 性别分布:[男性数量]/[女性数量] + (若无法确定准确人数,请注明"无法确定人数") + + 3. 人员特征分析 + - 个人特征:性别、年龄段、着装、体态等 + - 携带物品:详细描述随身物品及用途 + - 表情/情绪状态 + + 4. 行为分析 + - 个人行为:移动方向、姿态、动作等 + - 互动情况:人员之间的交互描述(若多人) + - 活动区域:人员活动的主要位置 + + 5. 群体行为(若多人) + - 聚集形态 + - 移动趋势 + - 群体互动特点 + + 6. 异常情况 + - 可疑行为描述 + - 异常活动标记 + - 需要注意的安全隐患 + + 请用清晰、有条理的格式描述,并突出重要发现。""" + + def extract_info(self, answer): + """提取中文信息""" + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + # 将回答按章节分割 + sections = {} + current_section = None + for line in answer.split('\n'): + if line.startswith('###'): + current_section = line.strip('# ').lower() + sections[current_section] = [] + elif current_section and line.strip(): + sections[current_section].append(line.strip()) + + # 从"行为分析"部分提取动作 + if '行为分析' in sections: + behavior_text = ' '.join(sections['行为分析']) + # 使用加载的动作列表 + for action in self.actions: + if action in behavior_text: + if action not in info["actions"]: # 避免重复 + info["actions"].append(action) + + # 从"环境场景"部分提取物品和家具 + if '环境场景' in sections: + scene_text = ' '.join(sections['环境场景']) + for obj in self.objects: # 假设已将objects移到类属性 + if obj in scene_text: + if obj not in info["objects"]: + info["objects"].append(obj) + + for item in self.furniture: # 假设已将furniture移到类属性 + if item in scene_text: + if item not in info["furniture"]: + info["furniture"].append(item) + + # 从"人员特征分析"部分提取特征和情绪 + if '人员特征分析' in sections: + feature_text = ' '.join(sections['人员特征分析']) + for feature in self.features: # 假设已将features移到类属性 + if feature in feature_text: + if feature not in info["features"]: + info["features"].append(feature) + + for emotion in self.emotions: # 假设已将emotions移到类属性 + if emotion in feature_text: + if emotion not in info["emotions"]: + info["emotions"].append(emotion) + + # 中文数字模式 + people_patterns = [ + r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)', + r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?', + r'(男|女)(性|生|士)', + r'(成年|未成年|青少年|老年)\s*(人|群体)', + r'(员工|职工|工人|学生|顾客|观众|游客|乘客)', + r'(群众|民众|大众|公众)', + r'(男女|老少|老幼|大人|小孩)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['一个', '一']: + info["num_people"] = 1 + else: + num_word_to_digit = { + '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + return info + + +def process_video_folder(system, folder_path): + """处理文件夹中的所有视频文件并保存到Redis""" + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + # 创建VideoMonitor实例用于Redis操作 + monitor = VideoMonitor(folder_path, system) + + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + # 使用VideoMonitor的process_new_video方法处理并保存到Redis + monitor.process_new_video(video_path) + print(f"✓ 成功处理并保存到Redis: {video_file}") + + # 清理内存 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + import gc + gc.collect() + + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + + print(f"\n所有视频处理完成") + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +# 在 MediaAnalysisSystem 类后添加新的监听类 +class VideoMonitor: + def __init__(self, recordings_path, system): + self.recordings_path = recordings_path + self.system = system + self.redis_clients = { + 'A01': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=210 + ), + 'B02': redis.Redis( + host="222.186.10.253", + port=6379, + password="Obscura@2024", + db=211 + ) + } + # 新增:初始化时加载已处理的视频记录 + self.processed_videos = self._load_processed_videos() + + def _load_processed_videos(self): + """从Redis加载所有已处理的视频文件名""" + processed_videos = set() + try: + for camera_id, redis_client in self.redis_clients.items(): + # 获取所有小时级别的键 + for key in redis_client.keys('*'): + key_str = key.decode('utf-8') + # 只获取键中存储的文件名列表,而不是完整的处理结果 + data = redis_client.get(key) + if data: + hour_results = json.loads(data) + # 只添加文件名到集合中 + processed_videos.update(hour_results.keys()) + + print(f"已从Redis加载 {len(processed_videos)} 个已处理文件记录") + return processed_videos + + except Exception as e: + print(f"加载Redis处理记录时出错: {str(e)}") + return set() + + def _get_redis_key(self, video_path): + try: + # 从路径获取摄像头ID (目录名) + dir_name = os.path.basename(os.path.dirname(video_path)) + file_name = os.path.basename(video_path) # 例如:A01_20250105_134104.avi + + + # 从视频文件名中提取日期和时间 + match = re.search(r'(\w+)_(\d{8})_(\d{2})\d{4}\.avi', file_name) + if match: + camera_id = match.group(1) # A01 + date = match.group(2) # 20250105 + hour = match.group(3) # 13 (从134104中提取) + + + # 生成正确的key: A01_20250105_1300 + redis_key = f"{camera_id}_{date}_{hour}00" + return redis_key + + print(f"文件名格式不匹配: {file_name}") + return None + + except Exception as e: + print(f"生成Redis key失败: {str(e)}") + return None + + def _is_processed(self, video_path): + """检查视频是否已处理""" + file_name = os.path.basename(video_path) + return file_name in self.processed_videos + + def process_new_video(self, video_path): + try: + # 处理前清理 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + file_name = os.path.basename(video_path) + # 检查是否已处理 + if self._is_processed(video_path): + print(f"视频已处理过,跳过: {file_name}") + return + + # 获取camera_id和时间戳 + dir_name = os.path.basename(os.path.dirname(video_path)) + file_name = os.path.basename(video_path) + + # 使用_get_redis_key获取正确的key + redis_key = self._get_redis_key(video_path) + if not redis_key: + print(f"无法生成Redis key,跳过处理: {file_name}") + return + + # 添加视频文件检查 + if not os.path.exists(video_path): + print(f"警告:视频文件不存在,跳过处理: {video_path}") + return False + + if os.path.getsize(video_path) == 0: + print(f"警告:视频文件大小为0,跳过处理: {video_path}") + return False + + # 处理视频 + try: + with open(video_path, "rb") as f: + video_data = f.read() + try: + qwen_result = self.system.process_with_qwen(video_data, file_name, media_type='video') + except Exception as e: + print(f"处理视频内容失败,可能是损坏的视频文件: {file_name}") + print(f"错误详情: {str(e)}") + return False + + # 从文件名提取时间戳 + timestamp_match = re.search(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', file_name) + if timestamp_match: + year, month, day, hour, minute, second = timestamp_match.groups() + # 构建正确的时间戳格式 (YYYY-MM-DD HH:MM:SS) + timestamp = f"{year}-{month}-{day} {hour}:{minute}:{second}" + else: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + result = { + "video_analysis": { + "qwen-7B": { + "original_answer": qwen_result["original_answer"], + "extracted_info": qwen_result["extracted_info"] + } + }, + "timestamp": timestamp # 使用从文件名提取的时间戳 + } + + # 保存到对应的Redis数据库 + if dir_name in self.redis_clients: + redis_client = self.redis_clients[dir_name] + + # 获取现有的小时数据(如果存在) + existing_data = redis_client.get(redis_key) + if existing_data: + hour_results = json.loads(existing_data) + hour_results[file_name] = result + else: + hour_results = {file_name: result} + + # 保存更新后的数据 + json_str = json.dumps(hour_results, ensure_ascii=False) + redis_client.set(redis_key, json_str) + print(f"成功保存到Redis,使用的key: {redis_key}") # 调试信息 + + # 处理完成后,更新内存中的记录 + self.processed_videos.add(file_name) + + except Exception as e: + print(f"读取视频文件失败: {str(e)}") + return False + + except Exception as e: + print(f"处理视频时发生错误 {video_path}: {str(e)}") + return False + finally: + # 确保内存清理总是执行 + if torch.cuda.is_available(): + try: + torch.cuda.empty_cache() + gc.collect() + except Exception as e: + print(f"清理GPU内存时发生错误: {str(e)}") + + return True + + def process_existing_videos(self): + """处理目录中现有的视频文件""" + videos_found = False + videos_processed = False # 新增标志,用于跟踪是否实际处理了视频 + + for camera_dir in os.listdir(self.recordings_path): + camera_path = os.path.join(self.recordings_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + # 获取所有.avi文件并按时间排序 + video_files = [] + for video_file in os.listdir(camera_path): + if video_file.endswith('.avi'): + video_path = os.path.join(camera_path, video_file) + video_files.append((video_path, os.path.getmtime(video_path))) + + if video_files: + videos_found = True + # 按修改时间排序 + video_files.sort(key=lambda x: x[1]) + + for video_path, _ in video_files: + if not self._is_processed(video_path): + print(f"处理现有视频: {video_path}") + self.process_new_video(video_path) + videos_processed = True # 标记已处理视频 + + # 只有当找到视频并且实际处理了视频时才返回True + return videos_found and videos_processed + + def monitor_directories(self): + """监控目录变化""" + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"开始监控目录: {self.recordings_path} [{current_time}]") + + while True: + try: + # 首先处理现有视频 + for camera_dir in os.listdir(self.recordings_path): + camera_path = os.path.join(self.recordings_path, camera_dir) + if not os.path.isdir(camera_path): + continue + + for video_file in os.listdir(camera_path): + if not video_file.endswith('.avi'): + continue + + video_path = os.path.join(camera_path, video_file) + if not self._is_processed(video_path): + print(f"处理视频: {video_path}") + if not self.process_new_video(video_path): + print(f"视频处理失败,继续处理下一个: {video_path}") + continue + + # 添加状态提示 + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f"[{current_time}] 等待新视频中...") + + # 休眠一段时间再检查 + time.sleep(120) + + except Exception as e: + print(f"监控过程出错: {str(e)}") + time.sleep(30) # 出错后等待30秒再继续 + +def main(): + try: + system = MediaAnalysisSystem() + recordings_path = "/home/zydi/VLM/recordings" # 设置recordings目录路径 + + # 创建并启动监控器 + monitor = VideoMonitor(recordings_path, system) + monitor.monitor_directories() + + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/qwen.py b/test_history/qwen.py new file mode 100644 index 0000000..4cff78e --- /dev/null +++ b/test_history/qwen.py @@ -0,0 +1,325 @@ +import os +import json +import torch +from datetime import datetime +from PIL import Image +import io +import re +from decord import VideoReader +from transformers import Qwen2VLForConditionalGeneration, AutoProcessor +from qwen_vl_utils import process_vision_info + +# 配置 +QWEN_MODEL_PATH = "/obscura/models/qwen/Qwen2-VL-7B-Instruct" + +# 初始化 Qwen 模型 (使用 cuda:0) +print("正在初始化 Qwen 模型 (cuda:0)...") +model = Qwen2VLForConditionalGeneration.from_pretrained( + QWEN_MODEL_PATH, + torch_dtype="auto", + device_map="cuda:0" +) + +min_pixels = 128*28*28 +max_pixels = 256*28*28 +processor = AutoProcessor.from_pretrained( + QWEN_MODEL_PATH, + min_pixels=min_pixels, + max_pixels=max_pixels +) + +# 在文件开头添加加载配置的代码 +def load_config(): + """加载配置文件""" + try: + with open('info.json', 'r', encoding='utf-8') as f: + config = json.load(f) + return config + except Exception as e: + print(f"加载配置文件失败: {e}") + return {"actions": [], "environments": []} + +# 加载配置 +CONFIG = load_config() + +class MediaAnalysisSystem: + def __init__(self): + self.MAX_NUM_FRAMES = 10 + self.device = "cuda:0" + self.qwen_model = model + self.qwen_processor = processor + # 使用加载的配置 + self.environments = CONFIG["environments"] + self.actions = CONFIG["actions"] + + def encode_video(self, video_data): + def uniform_sample(l, n): + gap = len(l) / n + return [l[int(i * gap + gap / 2)] for i in range(n)] + + video_file = io.BytesIO(video_data) + vr = VideoReader(video_file) + sample_fps = round(vr.get_avg_fps() / 1) + frame_idx = list(range(0, len(vr), sample_fps)) + if len(frame_idx) > self.MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + def process_with_qwen(self, media_data, object_name, media_type='image'): + """使用 Qwen 模型处理媒体""" + if media_type == 'video': + frames = self.encode_video(media_data) + media_content = {"type": "video", "video": frames, "fps": 1.0} + else: + image = Image.open(io.BytesIO(media_data)) + media_content = {"type": "image", "image": image} + + messages = [ + { + "role": "user", + "content": [ + media_content, + {"type": "text", "text": self._get_analysis_prompt(media_type)} + ], + } + ] + + text = self.qwen_processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.qwen_processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to(self.device) + generated_ids = self.qwen_model.generate(**inputs, max_new_tokens=2048) + generated_ids_trimmed = [ + out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + ] + answer = self.qwen_processor.batch_decode( + generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False + )[0] + + return { + "model": "qwen", + "original_answer": answer, + "extracted_info": self.extract_info(answer) + } + + def _get_analysis_prompt(self, media_type): + """获取分析提示词""" + return f"""你是一位视频描述专家,你擅长对视频进行详细的描述,请对这段监控视频进行详细分析,包括以下方面,并按照下面格式回答: + 1. 环境场景 + - 整体场景描述(室内/室外、光线条件等) + - 主要物品和家具列表 + - 环境特征(如光线、整洁度等) + + 2. 人员统计 + - 总人数:[数字]人 + - 性别分布:[男性数量]/[女性数量] + (若无法确定准确人数,请注明"无法确定人数") + + 3. 人员特征分析 + - 个人特征:性别、年龄段、着装、体态等 + - 携带物品:详细描述随身物品及用途 + - 表情/情绪状态 + + 4. 行为分析 + - 个人行为:移动方向、姿态、动作等 + - 互动情况:人员之间的交互描述(若多人) + - 活动区域:人员活动的主要位置 + + 5. 群体行为(若多人) + - 聚集形态 + - 移动趋势 + - 群体互动特点 + + 6. 异常情况 + - 可疑行为描述 + - 异常活动标记 + - 需要注意的安全隐患 + + 请用清晰、有条理的格式描述,并突出重要发现。""" + + def extract_info(self, answer): + """提取中文信息""" + info = { + "environment": None, + "num_people": None, + "actions": [], + "objects": [], + "furniture": [], + "emotions": [], + "features": [] + } + + # 使用加载的环境列表 + for env in self.environments: + if env in answer.lower(): + info["environment"] = env + break + + # 中文数字模式 + people_patterns = [ + r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)', + r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)', + r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?', + r'(男|女)(性|生|士)', + r'(成年|未成年|青少年|老年)\s*(人|群体)', + r'(员工|职工|工人|学生|顾客|观众|游客|乘客)', + r'(群众|民众|大众|公众)', + r'(男女|老少|老幼|大人|小孩)' + ] + for pattern in people_patterns: + match = re.search(pattern, answer) + if match: + if match.group(1).isdigit(): + info["num_people"] = int(match.group(1)) + elif match.group(1) in ['一个', '一']: + info["num_people"] = 1 + else: + num_word_to_digit = { + '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10 + } + info["num_people"] = num_word_to_digit.get(match.group(1), 0) + break + + # 使用加载的动作列表 + for action in self.actions: + if action in answer: + info["actions"].append(action) + + emotions = [ + "钦佩", "赞赏", "欣赏","关心", "高兴", "爱", "乐观", "感激", "释然", "骄傲", "愉悦", + "愤怒", "烦恼", "焦虑", "尴尬", "失望", "厌恶", "恐惧", "悲伤", "懊悔", "羞耻","发呆", + "困惑", "好奇", "欲望", "惊讶", "实事求是", "中性", "赞叹","平静","放松","专注","思考", + ] + objects = ["水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","书","笔","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","文件柜","手机"] + furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架"] + features = ["戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","胖","瘦","高","矮","男","女","成年人"] + + for obj in objects: + if obj in answer: + info["objects"].append(obj) + + for item in furniture: + if item in answer: + info["furniture"].append(item) + + for feature in features: + if feature in answer: + info["features"].append(feature) + + for emotion in emotions: + if emotion in answer: + info["emotions"].append(emotion) + + return info + + +def process_video_folder(system, folder_path, output_path=None): + """处理文件夹中的所有视频文件并保存结果""" + valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'} + results = {} + + if not os.path.exists(folder_path): + raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在") + + if output_path is None: + output_path = os.getcwd() + elif not os.path.exists(output_path): + os.makedirs(output_path) + + video_files = [ + f for f in os.listdir(folder_path) + if os.path.splitext(f)[1].lower() in valid_extensions + ] + + if not video_files: + raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件") + + print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n") + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + folder_name = os.path.basename(os.path.normpath(folder_path)) + output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json") + + for i, video_file in enumerate(video_files, 1): + video_path = os.path.join(folder_path, video_file) + print(f"正在处理 ({i}/{len(video_files)}): {video_file}") + + try: + with open(video_path, "rb") as f: + video_data = f.read() + results[video_file] = {"video_analysis": {}} + + # 只使用 Qwen 处理视频 + print(f"使用 Qwen 处理视频: {video_file}") + qwen_result = system.process_with_qwen(video_data, video_file, media_type='video') + results[video_file]["video_analysis"]["qwen-7B"] = { + "original_answer": qwen_result["original_answer"], + "extracted_info": qwen_result["extracted_info"] + } + + # 添加时间戳 + results[video_file]["timestamp"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # 保存结果 + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"✓ 成功处理并保存: {video_file}") + + # 每个视频处理完后清理内存 + if torch.cuda.is_available(): + torch.cuda.empty_cache() + import gc + gc.collect() + + except Exception as e: + print(f"✗ 处理失败 {video_file}: {str(e)}") + results[video_file] = {"error": str(e)} + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n所有分析结果已保存到: {output_file}") + return results + +class MediaAnalysisError(Exception): + """自定义媒体分析异常类""" + pass + +def main(): + try: + system = MediaAnalysisSystem() + + # 添加文件夹路径输入处理 + folder_path = input("请输入视频文件夹路径: ").strip() + output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip() + + # 如果用户没有输入输出路径,则使用None(将使用当前目录) + output_path = output_path if output_path else None + + # 处理文件夹中的视频 + results = process_video_folder(system, folder_path, output_path) + + # 显示处理统计 + success_count = sum(1 for r in results.values() if "error" not in r) + print(f"\n处理完成!成功: {success_count}/{len(results)}") + + except MediaAnalysisError as e: + print(f"\n错误: {str(e)}") + except Exception as e: + print(f"\n未预期的错误: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_history/report.md b/test_history/report.md new file mode 100644 index 0000000..66ee15c --- /dev/null +++ b/test_history/report.md @@ -0,0 +1,45 @@ + +# VLM模型测评总结 +## 测试模型:qwen-vl2-7B,qwen-vl2-2B,minicpm,llama3.2-vision,deepseek-vl2 + 其中,qwen-vl2-7B,qwen-vl2-2B,minicpm可直接输入视频,视频为10s短视频 + llama3.2-vision,deepseek-vl2输入图片,将10s短视频截取为3-8张图片 +## 测试数据包括: + 1. 室内右上角全景 + 2. 沙发正面 + 3. 右上角-吃饭 + 4. 左前方-吃饭 + 4. 左侧 +## 测试结果 +1. 在处于近景时(左前方、正前方),qwen-vl2-7B,qwen-vl2-2B,minicpm,llama3.2-vision,deepseek-vl2均能较好地识别出场景中的人数,并能较好地描述场景,包括吃饭、睡觉、喝水、玩手机等动作。对于人物的识别都差不多, +表情难以识别,穿着识别较准,性别和外貌一般。 + 1. minicpm会推测是什么食物,只有llama3.2-vision和minicpm识别出食物是玉米,但概率比较小,不是每次都能识别出来。 + 2. qwen-2B效果最差,无法识别出场景中的人数,也无法识别出场景中的人物 + 3. llama3.2-vision对场景描述很详细,只有llama3.2-vision识别到了电视 + +2. 在处于远景时(右上角),动作是吃饭,只有llama3.2-vision识别出吃东西和喝水的动作,但对人数识别不准确,其他模型都认为在工作 + +3. 在处于远景时(右上角),人物在室内活动,此时的行为包括 + 1. 搬纸箱/整理东西:只有qwen-7B准确识别行为和人数,以及人物外貌 + 2. 站立喝水:"qwen-7B"和llama3.2-vision识别出喝水的动作,其余模型会把水杯识别成手机,"qwen-7B"对人数和外貌识别最准,llama3.2-vision对人数及外貌的识别很离谱, + 3. 坐着使用手机:"deepseek-vl2"有时不能,llama3.2-vision对人数及外貌的识别最差 + 4. 坐着看书:在第一个视频"minicpm"、"qwen-vl2-7B"、llama3.2-vision能肯定是一个人坐着看书。在另一个视频deepseek认为在逗猫,qwen-vl2-7B认为在弹吉他,llama3.2-vision对人数及外貌的识别最离谱 + 5. 蹲着看书:都认为是坐着看书,只有minicpm识别到"蹲"这个动作, + 6. 行走、站在窗前:"qwen-7B"和"deepseek-vl2"对人数和行为以及行动轨迹的识别最准,其他模型能识别到在行走和行动轨迹,但在人数判断上存在问题 + 7. 移动电视:qwen-vl2-7B准确识别这一行为,其他模型只能识别出弯腰检查东西 + + 针对远景来看,qwen-7B对行为的识别最准,但对于人数的判断最差,minicpm对人数及外貌的判断相对最准确 + +4. 近景左侧时,行为包括 + 1. 看书:都可以识别到 + 2. 使用手机:都可以,llama3.2-vision对人数及外貌的识别还是不行 + 3. 使用电脑:都可以识别到 + 4. 喝水:"qwen-7B"、minicpm、deepseek-vl2、llama3.2-vision都可以识别到喝水 + 5. 在白板上写字:只有minicpm识别到 + 6. 行走:都能识别,但llama3.2-vision对人数及外貌识别差距很大 + 7. 先使用手机,然后拿起书并开始阅读:都可以识别到这个动作 +## 总结 + 1. 在近景时,几个模型对动作、人物表情、人物外貌的识别都较准确,只有llama3.2-vision对人数以及人物识别最差 + 2. 在远景时,qwen-7B对行为的识别相对较准,minicpm对人数及外貌的判断相对最准确。 + 3. 无论是近景还是远景,llama3.2-vision对人数以及人物识别都较差,会出现很多人物,对人物的描述也千奇百怪,但对行为识别还可以 + 4. qwen-2B表现最差 + 5. deepseek-vl2一般 \ No newline at end of file diff --git a/web.html b/web.html new file mode 100644 index 0000000..3057b18 --- /dev/null +++ b/web.html @@ -0,0 +1,2409 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ + +
+ + +
+
+

今日识别行为

+
--
+
+
+

异常行为

+
--
+
+
+

人数

+
--
+
+
+ + +
+
+
+ + + +
+
+
+
+ +
+
+ +
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+
+ +
+ + +
+
+

每日行为分析

+
+
+ +
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

分析中...

+
+ +
+
+ access_time +

高峰时段分析

+
+

分析中...

+
+
+ +
+
+ warning +

异常行为分析

+
+

分析中...

+
+ +
+
+ analytics +

行为分析

+
+
分析中...
+
+ +
+
+ tips_and_updates +

建议

+
+

分析中...

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 2.html b/web/web copy 2.html new file mode 100644 index 0000000..a3115b6 --- /dev/null +++ b/web/web copy 2.html @@ -0,0 +1,1330 @@ + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ +
+ + +
+
+

今日识别行为

+
--
+
+
+

平均置信度

+
--%
+
+
+

异常行为

+
--
+
+
+ + +
+
+
+ + +
+ +
+
+
+ +
+
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+ +
+ +
+ + +
+
+

每日行为分析

+
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

今日共记录 2,547 个行为事件,较昨日增长 15%

+
+ arrow_upward + 活跃度上升 +
+
+ +
+
+ access_time +

高峰时段分析

+
+

活动高峰出现在 9:00-11:00 和 14:00-16:00

+
+
+ +
+
+ warning +

异常行为分析

+
+

检测到 23 起异常行为,主要集中在午夜时段

+
+ 建议: +

建议加强夜间监控,特别是在 23:00-凌晨 2:00 时段

+
+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 3.html b/web/web copy 3.html new file mode 100644 index 0000000..2caca93 --- /dev/null +++ b/web/web copy 3.html @@ -0,0 +1,1503 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ +
+ + +
+
+

今日识别行为

+
--
+
+
+

平均置信度

+
--%
+
+
+

异常行为

+
--
+
+
+ + +
+
+
+ + + +
+ +
+
+
+ +
+
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+ +
+ +
+ + +
+
+

每日行为分析

+
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

今日共记录 2,547 个行为事件,较昨日增长 15%

+
+ arrow_upward + 活跃度上升 +
+
+ +
+
+ access_time +

高峰时段分析

+
+

活动高峰出现在 9:00-11:00 和 14:00-16:00

+
+
+ +
+
+ warning +

异常行为分析

+
+

检测到 23 起异常行为,主要集中在午夜时段

+
+ 建议: +

建议加强夜间监控,特别是在 23:00-凌晨 2:00 时段

+
+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 4.html b/web/web copy 4.html new file mode 100644 index 0000000..d695831 --- /dev/null +++ b/web/web copy 4.html @@ -0,0 +1,1976 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ +
+ + +
+
+

今日识别行为

+
--
+
+
+

平均置信度

+
--%
+
+
+

异常行为

+
--
+
+
+ + +
+
+
+ + + +
+
+
+
+ +
+
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+
+ +
+ + +
+
+

每日行为分析

+
+
+ +
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

分析中...

+
+ +
+
+ access_time +

高峰时段分析

+
+

分析中...

+
+
+ +
+
+ warning +

异常行为分析

+
+

分析中...

+
+ +
+
+ analytics +

行为分析

+
+
分析中...
+
+ +
+
+ tips_and_updates +

建议

+
+

分析中...

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 5.html b/web/web copy 5.html new file mode 100644 index 0000000..8edc57b --- /dev/null +++ b/web/web copy 5.html @@ -0,0 +1,2122 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ + +
+ + +
+
+

今日识别行为

+
--
+
+
+

异常行为

+
--
+
+
+ + +
+
+
+ + + +
+
+
+
+ +
+
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+
+ +
+ + +
+
+

每日行为分析

+
+
+ +
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

分析中...

+
+ +
+
+ access_time +

高峰时段分析

+
+

分析中...

+
+
+ +
+
+ warning +

异常行为分析

+
+

分析中...

+
+ +
+
+ analytics +

行为分析

+
+
分析中...
+
+ +
+
+ tips_and_updates +

建议

+
+

分析中...

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 6.html b/web/web copy 6.html new file mode 100644 index 0000000..83497c6 --- /dev/null +++ b/web/web copy 6.html @@ -0,0 +1,2191 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ + +
+ + +
+
+

今日识别行为

+
--
+
+
+

异常行为

+
--
+
+
+

人数

+
--
+
+
+ + +
+
+
+ + + +
+
+
+
+ +
+
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+
+ +
+ + +
+
+

每日行为分析

+
+
+ +
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

分析中...

+
+ +
+
+ access_time +

高峰时段分析

+
+

分析中...

+
+
+ +
+
+ warning +

异常行为分析

+
+

分析中...

+
+ +
+
+ analytics +

行为分析

+
+
分析中...
+
+ +
+
+ tips_and_updates +

建议

+
+

分析中...

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 7.html b/web/web copy 7.html new file mode 100644 index 0000000..f8fd1e2 --- /dev/null +++ b/web/web copy 7.html @@ -0,0 +1,2348 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ + +
+ + +
+
+

今日识别行为

+
--
+
+
+

异常行为

+
--
+
+
+

人数

+
--
+
+
+ + +
+
+
+ + + +
+
+
+
+ +
+
+ +
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+
+ +
+ + +
+
+

每日行为分析

+
+
+ +
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

分析中...

+
+ +
+
+ access_time +

高峰时段分析

+
+

分析中...

+
+
+ +
+
+ warning +

异常行为分析

+
+

分析中...

+
+ +
+
+ analytics +

行为分析

+
+
分析中...
+
+ +
+
+ tips_and_updates +

建议

+
+

分析中...

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy 8.html b/web/web copy 8.html new file mode 100644 index 0000000..16cf9d0 --- /dev/null +++ b/web/web copy 8.html @@ -0,0 +1,2358 @@ + + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ + +
+ + +
+
+

今日识别行为

+
--
+
+
+

异常行为

+
--
+
+
+

人数

+
--
+
+
+ + +
+
+
+ + + +
+
+
+
+ +
+
+ +
+ +
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+
+ +
+ + +
+
+

每日行为分析

+
+
+ +
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

分析中...

+
+ +
+
+ access_time +

高峰时段分析

+
+

分析中...

+
+
+ +
+
+ warning +

异常行为分析

+
+

分析中...

+
+ +
+
+ analytics +

行为分析

+
+
分析中...
+
+ +
+
+ tips_and_updates +

建议

+
+

分析中...

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/web/web copy.html b/web/web copy.html new file mode 100644 index 0000000..7f156cc --- /dev/null +++ b/web/web copy.html @@ -0,0 +1,1148 @@ + + + + + + 智能视频分析系统 + + + + +
+ + + + +
+ +
+ +
+ +
+ +
+ + +
+
+

今日识别行为

+
--
+
+
+

平均置信度

+
--%
+
+
+

异常行为

+
--
+
+
+ + +
+
+

行为时间轴

+ +
+
+
+
00:00
+
01:00
+
02:00
+
03:00
+
04:00
+
05:00
+
06:00
+
07:00
+
08:00
+
09:00
+
10:00
+
11:00
+
12:00
+
13:00
+
14:00
+
15:00
+
16:00
+
17:00
+
18:00
+
19:00
+
20:00
+
21:00
+
22:00
+
23:00
+
+
+ +
+
+
+ + +
+
+
+

行为分布

+
+
+
+
+
+

时段分析

+
+
+
+
+ + +
+
+

最新事件

+ +
+ +
+ + +
+
+

每日行为分析

+
+ +
+
+ +
+
+ trending_up +

整体活动趋势

+
+

今日共记录 2,547 个行为事件,较昨日增长 15%

+
+ arrow_upward + 活跃度上升 +
+
+ +
+
+ access_time +

高峰时段分析

+
+

活动高峰出现在 9:00-11:00 和 14:00-16:00

+
+
+ +
+
+ warning +

异常行为分析

+
+

检测到 23 起异常行为,主要集中在午夜时段

+
+ 建议: +

建议加强夜间监控,特别是在 23:00-凌晨 2:00 时段

+
+
+
+
+
+ + + + + \ No newline at end of file