Files
zydi-web/test_history/deep copy.py
T
2025-01-12 03:01:51 +00:00

328 lines
13 KiB
Python

import io
import os
import json
import base64
import requests
import re
from PIL import Image
from datetime import datetime, timedelta
from decord import VideoReader, cpu
SILICONFLOW_URL = "https://api.siliconflow.cn/v1/chat/completions"
API_KEY = "sk-ytxabphvgxrjbvnqiwercjyrabvlukwddqsmvnqnvwuazamd"
class MediaAnalysisSystem:
def __init__(self):
self.MAX_NUM_FRAMES = 5 # 最大帧数设为10
self.MIN_NUM_FRAMES = 3 # 最小帧数设为3
def encode_video(self, video_data):
def uniform_sample(l, n):
gap = len(l) / n
return [l[int(i * gap + gap / 2)] for i in range(n)]
video_file = io.BytesIO(video_data)
vr = VideoReader(video_file, ctx=cpu(0))
sample_fps = round(vr.get_avg_fps() / 1)
frame_idx = list(range(0, len(vr), sample_fps))
# 确保帧数在3-10之间
num_frames = min(max(3, len(frame_idx)), self.MAX_NUM_FRAMES)
if len(frame_idx) > num_frames:
frame_idx = uniform_sample(frame_idx, num_frames)
frames = vr.get_batch(frame_idx).asnumpy()
frames = [Image.fromarray(v.astype('uint8')) for v in frames]
# 压缩图片尺寸和质量
compressed_frames = []
for frame in frames:
# 保持宽高比的情况下调整大小
frame.thumbnail((600, 600), Image.Resampling.LANCZOS)
buffered = io.BytesIO()
frame.save(buffered, format="JPEG", quality=85)
compressed_frames.append(Image.open(buffered))
print(f'处理后的帧数: {len(compressed_frames)}')
return compressed_frames
def process_video(self, video_data, object_name):
if not video_data:
raise ValueError(f"Empty video data for {object_name}")
print(f"Processing video: {object_name}, data size: {len(video_data)} bytes")
frames = self.encode_video(video_data)
# 构建单个请求的消息内容
messages = [{
"role": "user",
"content": [
{
"type": "text",
"text": """请将这些图片作为一个时间序列进行详细分析,包括以下方面:
1. 场景中人数的精确统计
2. 每个人的个人行为分析
3. 面部表情识别和情绪状态评估
4. 整体场景和环境的详细描述
5. 人与人之间的互动情况
6. 详细的环境条件描述
7. 环境中出现的物品和家具
8. 任何可疑或异常活动
9. 人员的具体特征(估计年龄范围、性别、着装)
10. 人员的移动模式和方向
11. 携带的物品或物体
12. 群体动态和聚集情况
13. 视频中的时间戳分析(如果有)"""
}
]
}]
# 一次性添加所有图片到消息内容
for frame in frames:
base64_image = self.image_to_base64(frame)
messages[0]["content"].append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "auto"
}
})
try:
response = self._make_api_request(messages)
answer = response["choices"][0]["message"]["content"]
extracted_info = self.extract_info(answer)
return {
"original_answer": answer,
"extracted_info": extracted_info,
"num_frames": len(frames),
}
except Exception as e:
print(f"API请求失败: {str(e)}")
raise
def _make_api_request(self, messages):
payload = {
"model": "deepseek-ai/deepseek-vl2",
"messages": messages,
"stream": False,
"max_tokens": 1024,
"temperature": 0.7,
"top_p": 0.7,
"top_k": 50,
"frequency_penalty": 0.5,
"n": 1,
"response_format": {"type": "text"}
}
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
response = requests.post(
SILICONFLOW_URL,
json=payload,
headers=headers,
timeout=60 # 增加超时时间到60秒
)
if response.status_code != 200:
raise Exception(f"Siliconflow API 错误: {response.status_code}")
return response.json()
@staticmethod
def image_to_base64(image):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode()
@staticmethod
def extract_time_from_filename(object_name):
filename = os.path.basename(object_name)
time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0]
try:
start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S")
end_time = start_time + timedelta(seconds=10)
return start_time, end_time
except ValueError:
print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。")
return datetime.now(), datetime.now() + timedelta(seconds=10)
@staticmethod
def extract_info(answer):
info = {
"environment": None,
"num_people": None,
"actions": [],
"objects": [],
"furniture": [],
"emotions": [],
"features": []
}
environments = ["办公室", "室内", "室外", "会议室"]
for env in environments:
if env in answer.lower():
info["environment"] = env
break
people_patterns = [
r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)',
r'\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?',
r'(男|女)(性|生|士)',
r'(成年|未成年|青少年|老年)\s*(人|群体)',
r'(员工|职工|工人|学生|顾客|观众|游客|乘客)',
r'(群众|民众|大众|公众)',
r'(男女|老少|老幼|大人|小孩)'
]
for pattern in people_patterns:
match = re.search(pattern, answer)
if match:
if match.group(1).isdigit():
info["num_people"] = int(match.group(1))
elif match.group(1) in ['一个', '']:
info["num_people"] = 1
else:
num_word_to_digit = {
'': 2, '': 3, '': 4, '': 5,
'': 6, '': 7, '': 8, '': 9, '': 10
}
info["num_people"] = num_word_to_digit.get(match.group(1), 0)
break
actions = ["睡眠","", "","", "摔倒", "跳舞", "","蹲下","转身", "", "", "倒下", "躺下", "转身", "","跳跃", "", "", "", "说话","睡觉","起床","看书","写字","学习","玩手机","吃饭","搬东西","看风景","走路","散步","","阅读","写作","使用手机","使用电脑","学习","工作","使用笔记本电脑","吃饭","喝水","整理"]
for action in actions:
if action in answer:
info["actions"].append(action)
emotions = ["高兴", "愤怒", "悲伤", "惊讶", "恐惧", "厌恶", "平静","放松","中性","专注","思考"]
objects = ["水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","","","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","文件柜","手机"]
furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架"]
features = ["戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","","","","","","","成年人"]
for obj in objects:
if obj in answer:
info["objects"].append(obj)
for item in furniture:
if item in answer:
info["furniture"].append(item)
for feature in features:
if feature in answer:
info["features"].append(feature)
for emotion in emotions:
if emotion in answer:
info["emotions"].append(emotion)
return info
# 初始化 MediaAnalysisSystem
media_analysis_system = MediaAnalysisSystem()
class MediaAnalysisError(Exception):
"""自定义媒体分析异常类"""
pass
def process_video_folder(system, folder_path, output_path=None):
"""处理文件夹中的所有视频文件并保存结果"""
# 支持的视频格式
valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'}
results = {}
# 确保文件夹存在
if not os.path.exists(folder_path):
raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在")
# 设置输出路径
if output_path is None:
output_path = os.getcwd() # 如果未指定,使用当前目录
elif not os.path.exists(output_path):
os.makedirs(output_path) # 如果输出目录不存在,创建它
# 获取所有视频文件
video_files = [
f for f in os.listdir(folder_path)
if os.path.splitext(f)[1].lower() in valid_extensions
]
if not video_files:
raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件")
print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n")
# 生成输出文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
folder_name = os.path.basename(os.path.normpath(folder_path))
output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json")
# 处理每个视频文件并实时保存结果
for i, video_file in enumerate(video_files, 1):
video_path = os.path.join(folder_path, video_file)
print(f"正在处理 ({i}/{len(video_files)}): {video_file}")
try:
with open(video_path, "rb") as f:
video_data = f.read()
result = system.process_video(video_data, video_file)
# 修改结果存储格式
results[video_file] = {
"video_analysis": {
"deepseek-vl2": result
}
}
# 实时保存当前结果到JSON文件
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"✓ 成功处理并保存: {video_file}")
except Exception as e:
print(f"✗ 处理失败 {video_file}: {str(e)}")
results[video_file] = {
"video_analysis": {
"deepseek-vl2": {"error": str(e)}
}
}
# 即使处理失败也保存当前结果
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n所有分析结果已保存到: {output_file}")
return results
class MediaAnalysisError(Exception):
"""自定义媒体分析异常类"""
pass
def main():
try:
system = MediaAnalysisSystem()
# 添加文件夹路径输入处理
folder_path = input("请输入视频文件夹路径: ").strip()
output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip()
# 如果用户没有输入输出路径,则使用None(将使用当前目录)
output_path = output_path if output_path else None
# 处理文件夹中的视频
results = process_video_folder(system, folder_path, output_path)
# 显示处理统计
success_count = sum(1 for r in results.values() if "error" not in r)
print(f"\n处理完成!成功: {success_count}/{len(results)}")
except MediaAnalysisError as e:
print(f"\n错误: {str(e)}")
except Exception as e:
print(f"\n未预期的错误: {str(e)}")
if __name__ == "__main__":
main()