328 lines
13 KiB
Python
328 lines
13 KiB
Python
import io
|
|
import os
|
|
import json
|
|
import base64
|
|
import requests
|
|
import re
|
|
from PIL import Image
|
|
from datetime import datetime, timedelta
|
|
from decord import VideoReader, cpu
|
|
|
|
SILICONFLOW_URL = "https://api.siliconflow.cn/v1/chat/completions"
|
|
API_KEY = "sk-ytxabphvgxrjbvnqiwercjyrabvlukwddqsmvnqnvwuazamd"
|
|
|
|
class MediaAnalysisSystem:
|
|
def __init__(self):
|
|
self.MAX_NUM_FRAMES = 5 # 最大帧数设为10
|
|
self.MIN_NUM_FRAMES = 3 # 最小帧数设为3
|
|
|
|
def encode_video(self, video_data):
|
|
def uniform_sample(l, n):
|
|
gap = len(l) / n
|
|
return [l[int(i * gap + gap / 2)] for i in range(n)]
|
|
|
|
video_file = io.BytesIO(video_data)
|
|
vr = VideoReader(video_file, ctx=cpu(0))
|
|
sample_fps = round(vr.get_avg_fps() / 1)
|
|
frame_idx = list(range(0, len(vr), sample_fps))
|
|
|
|
# 确保帧数在3-10之间
|
|
num_frames = min(max(3, len(frame_idx)), self.MAX_NUM_FRAMES)
|
|
if len(frame_idx) > num_frames:
|
|
frame_idx = uniform_sample(frame_idx, num_frames)
|
|
|
|
frames = vr.get_batch(frame_idx).asnumpy()
|
|
frames = [Image.fromarray(v.astype('uint8')) for v in frames]
|
|
|
|
# 压缩图片尺寸和质量
|
|
compressed_frames = []
|
|
for frame in frames:
|
|
# 保持宽高比的情况下调整大小
|
|
frame.thumbnail((600, 600), Image.Resampling.LANCZOS)
|
|
buffered = io.BytesIO()
|
|
frame.save(buffered, format="JPEG", quality=85)
|
|
compressed_frames.append(Image.open(buffered))
|
|
|
|
print(f'处理后的帧数: {len(compressed_frames)}')
|
|
return compressed_frames
|
|
|
|
def process_video(self, video_data, object_name):
|
|
if not video_data:
|
|
raise ValueError(f"Empty video data for {object_name}")
|
|
print(f"Processing video: {object_name}, data size: {len(video_data)} bytes")
|
|
frames = self.encode_video(video_data)
|
|
|
|
# 构建单个请求的消息内容
|
|
messages = [{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": """请将这些图片作为一个时间序列进行详细分析,包括以下方面:
|
|
1. 场景中人数的精确统计
|
|
2. 每个人的个人行为分析
|
|
3. 面部表情识别和情绪状态评估
|
|
4. 整体场景和环境的详细描述
|
|
5. 人与人之间的互动情况
|
|
6. 详细的环境条件描述
|
|
7. 环境中出现的物品和家具
|
|
8. 任何可疑或异常活动
|
|
9. 人员的具体特征(估计年龄范围、性别、着装)
|
|
10. 人员的移动模式和方向
|
|
11. 携带的物品或物体
|
|
12. 群体动态和聚集情况
|
|
13. 视频中的时间戳分析(如果有)"""
|
|
}
|
|
]
|
|
}]
|
|
|
|
# 一次性添加所有图片到消息内容
|
|
for frame in frames:
|
|
base64_image = self.image_to_base64(frame)
|
|
messages[0]["content"].append({
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpeg;base64,{base64_image}",
|
|
"detail": "auto"
|
|
}
|
|
})
|
|
|
|
try:
|
|
response = self._make_api_request(messages)
|
|
answer = response["choices"][0]["message"]["content"]
|
|
extracted_info = self.extract_info(answer)
|
|
|
|
return {
|
|
"original_answer": answer,
|
|
"extracted_info": extracted_info,
|
|
"num_frames": len(frames),
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"API请求失败: {str(e)}")
|
|
raise
|
|
|
|
def _make_api_request(self, messages):
|
|
payload = {
|
|
"model": "deepseek-ai/deepseek-vl2",
|
|
"messages": messages,
|
|
"stream": False,
|
|
"max_tokens": 1024,
|
|
"temperature": 0.7,
|
|
"top_p": 0.7,
|
|
"top_k": 50,
|
|
"frequency_penalty": 0.5,
|
|
"n": 1,
|
|
"response_format": {"type": "text"}
|
|
}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {API_KEY}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = requests.post(
|
|
SILICONFLOW_URL,
|
|
json=payload,
|
|
headers=headers,
|
|
timeout=60 # 增加超时时间到60秒
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise Exception(f"Siliconflow API 错误: {response.status_code}")
|
|
|
|
return response.json()
|
|
|
|
@staticmethod
|
|
def image_to_base64(image):
|
|
buffered = io.BytesIO()
|
|
image.save(buffered, format="PNG")
|
|
return base64.b64encode(buffered.getvalue()).decode()
|
|
|
|
@staticmethod
|
|
def extract_time_from_filename(object_name):
|
|
filename = os.path.basename(object_name)
|
|
time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0]
|
|
|
|
try:
|
|
start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S")
|
|
end_time = start_time + timedelta(seconds=10)
|
|
return start_time, end_time
|
|
except ValueError:
|
|
print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。")
|
|
return datetime.now(), datetime.now() + timedelta(seconds=10)
|
|
|
|
@staticmethod
|
|
def extract_info(answer):
|
|
info = {
|
|
"environment": None,
|
|
"num_people": None,
|
|
"actions": [],
|
|
"objects": [],
|
|
"furniture": [],
|
|
"emotions": [],
|
|
"features": []
|
|
}
|
|
|
|
environments = ["办公室", "室内", "室外", "会议室"]
|
|
for env in environments:
|
|
if env in answer.lower():
|
|
info["environment"] = env
|
|
break
|
|
|
|
people_patterns = [
|
|
r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
|
|
r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
|
|
r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)',
|
|
r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?',
|
|
r'(男|女)(性|生|士)',
|
|
r'(成年|未成年|青少年|老年)\s*(人|群体)',
|
|
r'(员工|职工|工人|学生|顾客|观众|游客|乘客)',
|
|
r'(群众|民众|大众|公众)',
|
|
r'(男女|老少|老幼|大人|小孩)'
|
|
]
|
|
for pattern in people_patterns:
|
|
match = re.search(pattern, answer)
|
|
if match:
|
|
if match.group(1).isdigit():
|
|
info["num_people"] = int(match.group(1))
|
|
elif match.group(1) in ['一个', '一']:
|
|
info["num_people"] = 1
|
|
else:
|
|
num_word_to_digit = {
|
|
'二': 2, '三': 3, '四': 4, '五': 5,
|
|
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10
|
|
}
|
|
info["num_people"] = num_word_to_digit.get(match.group(1), 0)
|
|
break
|
|
|
|
actions = ["睡眠","坐", "吃","站", "摔倒", "跳舞", "蹲","蹲下","转身", "摔", "倒", "倒下", "躺下", "转身", "转","跳跃", "跳", "躺", "睡", "说话","睡觉","起床","看书","写字","学习","玩手机","吃饭","搬东西","看风景","走路","散步","走","阅读","写作","使用手机","使用电脑","学习","工作","使用笔记本电脑","吃饭","喝水","整理"]
|
|
for action in actions:
|
|
if action in answer:
|
|
info["actions"].append(action)
|
|
emotions = ["高兴", "愤怒", "悲伤", "惊讶", "恐惧", "厌恶", "平静","放松","中性","专注","思考"]
|
|
objects = ["水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","书","笔","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","文件柜","手机"]
|
|
furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架"]
|
|
features = ["戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","胖","瘦","高","矮","男","女","成年人"]
|
|
|
|
for obj in objects:
|
|
if obj in answer:
|
|
info["objects"].append(obj)
|
|
|
|
for item in furniture:
|
|
if item in answer:
|
|
info["furniture"].append(item)
|
|
|
|
for feature in features:
|
|
if feature in answer:
|
|
info["features"].append(feature)
|
|
|
|
for emotion in emotions:
|
|
if emotion in answer:
|
|
info["emotions"].append(emotion)
|
|
|
|
return info
|
|
|
|
# 初始化 MediaAnalysisSystem
|
|
media_analysis_system = MediaAnalysisSystem()
|
|
|
|
class MediaAnalysisError(Exception):
|
|
"""自定义媒体分析异常类"""
|
|
pass
|
|
|
|
def process_video_folder(system, folder_path, output_path=None):
|
|
"""处理文件夹中的所有视频文件并保存结果"""
|
|
# 支持的视频格式
|
|
valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'}
|
|
results = {}
|
|
|
|
# 确保文件夹存在
|
|
if not os.path.exists(folder_path):
|
|
raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在")
|
|
|
|
# 设置输出路径
|
|
if output_path is None:
|
|
output_path = os.getcwd() # 如果未指定,使用当前目录
|
|
elif not os.path.exists(output_path):
|
|
os.makedirs(output_path) # 如果输出目录不存在,创建它
|
|
|
|
# 获取所有视频文件
|
|
video_files = [
|
|
f for f in os.listdir(folder_path)
|
|
if os.path.splitext(f)[1].lower() in valid_extensions
|
|
]
|
|
|
|
if not video_files:
|
|
raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件")
|
|
|
|
print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n")
|
|
|
|
# 生成输出文件名
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
folder_name = os.path.basename(os.path.normpath(folder_path))
|
|
output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json")
|
|
|
|
# 处理每个视频文件并实时保存结果
|
|
for i, video_file in enumerate(video_files, 1):
|
|
video_path = os.path.join(folder_path, video_file)
|
|
print(f"正在处理 ({i}/{len(video_files)}): {video_file}")
|
|
|
|
try:
|
|
with open(video_path, "rb") as f:
|
|
video_data = f.read()
|
|
result = system.process_video(video_data, video_file)
|
|
# 修改结果存储格式
|
|
results[video_file] = {
|
|
"video_analysis": {
|
|
"deepseek-vl2": result
|
|
}
|
|
}
|
|
|
|
# 实时保存当前结果到JSON文件
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(results, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"✓ 成功处理并保存: {video_file}")
|
|
except Exception as e:
|
|
print(f"✗ 处理失败 {video_file}: {str(e)}")
|
|
results[video_file] = {
|
|
"video_analysis": {
|
|
"deepseek-vl2": {"error": str(e)}
|
|
}
|
|
}
|
|
# 即使处理失败也保存当前结果
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(results, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"\n所有分析结果已保存到: {output_file}")
|
|
return results
|
|
|
|
class MediaAnalysisError(Exception):
|
|
"""自定义媒体分析异常类"""
|
|
pass
|
|
|
|
def main():
|
|
try:
|
|
system = MediaAnalysisSystem()
|
|
|
|
# 添加文件夹路径输入处理
|
|
folder_path = input("请输入视频文件夹路径: ").strip()
|
|
output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip()
|
|
|
|
# 如果用户没有输入输出路径,则使用None(将使用当前目录)
|
|
output_path = output_path if output_path else None
|
|
|
|
# 处理文件夹中的视频
|
|
results = process_video_folder(system, folder_path, output_path)
|
|
|
|
# 显示处理统计
|
|
success_count = sum(1 for r in results.values() if "error" not in r)
|
|
print(f"\n处理完成!成功: {success_count}/{len(results)}")
|
|
|
|
except MediaAnalysisError as e:
|
|
print(f"\n错误: {str(e)}")
|
|
except Exception as e:
|
|
print(f"\n未预期的错误: {str(e)}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |