Files
zydi-web/test_history/oll.py
T
2025-01-12 03:01:51 +00:00

305 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import io
import os
import json
import base64
import requests
import re
from PIL import Image
from datetime import datetime, timedelta
from decord import VideoReader, cpu
OLLAMA_URL = "http://127.0.0.1:11434/api/generate"
class MediaAnalysisSystem:
def __init__(self):
self.MAX_NUM_FRAMES = 16
def encode_video(self, video_data):
def uniform_sample(l, n):
gap = len(l) / n
return [l[int(i * gap + gap / 2)] for i in range(n)]
video_file = io.BytesIO(video_data)
vr = VideoReader(video_file, ctx=cpu(0))
sample_fps = round(vr.get_avg_fps() / 1)
frame_idx = list(range(0, len(vr), sample_fps))
if len(frame_idx) > self.MAX_NUM_FRAMES:
frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES)
frames = vr.get_batch(frame_idx).asnumpy()
frames = [Image.fromarray(v.astype('uint8')) for v in frames]
print('num frames:', len(frames))
return frames
def process_video(self, video_data, object_name):
if not video_data:
raise ValueError(f"Empty video data for {object_name}")
print(f"Processing video: {object_name}, data size: {len(video_data)} bytes")
frames = self.encode_video(video_data)
question = """你是一位视频描述专家,你擅长对视频进行详细的描述,请对这段监控视频进行详细分析,包括以下方面,并按照下面格式回答:
1. 环境场景
- 整体场景描述(室内/室外、光线条件等)
- 主要物品和家具列表
- 环境特征(如光线、整洁度等)
2. 人员统计
- 总人数:[数字]人
- 性别分布:[男性数量]/[女性数量]
(若无法确定准确人数,请注明"无法确定人数"
3. 人员特征分析
- 个人特征:性别、年龄段、着装、体态等
- 携带物品:详细描述随身物品及用途
- 表情/情绪状态
4. 行为分析
- 个人行为:移动方向、姿态、动作等
- 互动情况:人员之间的交互描述(若多人)
- 活动区域:人员活动的主要位置
5. 群体行为(若多人)
- 聚集形态
- 移动趋势
- 群体互动特点
6. 异常情况
- 可疑行为描述
- 异常活动标记
- 需要注意的安全隐患
请用清晰、有条理的格式描述,并突出重要发现。"""
encoded_frames = [self.image_to_base64(frame) for frame in frames]
payload = {
"model": "minicpm-v",
"prompt": question,
"images": encoded_frames
}
try:
response = requests.post(OLLAMA_URL, json=payload, stream=True)
print(f"Ollama API 响应状态码: {response.status_code}")
print(f"Ollama API 响应头: {response.headers}")
if response.status_code == 200:
answer = self.process_stream_response(response)
else:
raise Exception(f"Ollama API 错误: {response.status_code}")
except requests.RequestException as e:
print(f"请求 Ollama API 时出错: {str(e)}")
raise
extracted_info = self.extract_info(answer)
return {
"original_answer": answer,
"extracted_info": extracted_info,
"num_frames": len(frames),
}
def process_stream_response(self, response):
full_response = []
for line in response.iter_lines():
if line:
try:
json_response = json.loads(line)
if 'response' in json_response:
full_response.append(json_response['response'])
if json_response.get('done', False):
break
except json.JSONDecodeError:
print(f"无法解析 JSON 行: {line}")
return ''.join(full_response)
@staticmethod
def image_to_base64(image):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode()
@staticmethod
def extract_time_from_filename(object_name):
filename = os.path.basename(object_name)
time_str = filename.split('_')[0] + '_' + filename.split('_')[1].split('.')[0]
try:
start_time = datetime.strptime(time_str, "%Y%m%d_%H%M%S")
end_time = start_time + timedelta(seconds=10)
return start_time, end_time
except ValueError:
print(f"无法从文件名 '{filename}' 解析时间。使用默认时间。")
return datetime.now(), datetime.now() + timedelta(seconds=10)
@staticmethod
def extract_info(answer):
info = {
"environment": None,
"num_people": None,
"actions": [],
"objects": [],
"furniture": [],
"emotions": [],
"features": []
}
environments = ["办公室", "室内", "室外", "会议室", "房间", "教室", "客厅", "卧室", "厨房", "浴室", "走廊", "过道"]
for env in environments:
if env in answer.lower():
info["environment"] = env
break
people_patterns = [
r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)',
r'\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?',
r'(男|女)(性|生|士)',
r'(成年|未成年|青少年|老年)\s*(人|群体)',
r'(员工|职工|工人|学生|顾客|观众|游客|乘客)',
r'(群众|民众|大众|公众)',
r'(男女|老少|老幼|大人|小孩)'
]
for pattern in people_patterns:
match = re.search(pattern, answer)
if match:
if match.group(1).isdigit():
info["num_people"] = int(match.group(1))
elif match.group(1) in ['一个', '']:
info["num_people"] = 1
else:
num_word_to_digit = {
'': 2, '': 3, '': 4, '': 5,
'': 6, '': 7, '': 8, '': 9, '': 10
}
info["num_people"] = num_word_to_digit.get(match.group(1), 0)
break
actions = ["睡眠","", "","","", "摔倒", "跳舞", "","蹲下","转身", "", "", "倒下", "躺下", "转身", "","跳跃", "", "", "", "说话","睡觉","起床","看书","写字","学习","玩手机","吃饭","搬东西","看风景","走路","散步","","阅读","写作","使用手机","使用电脑","学习","工作","使用笔记本电脑","吃饭","喝水","整理"]
for action in actions:
if action in answer:
info["actions"].append(action)
emotions = ["高兴", "愤怒", "悲伤", "惊讶", "恐惧", "厌恶", "平静","放松","中性","专注","思考"]
objects = ["水瓶", "办公用品", "文件", "电脑","风扇","鼠标","键盘","纸巾","","","袋子","盒子","水杯","杯子","马克杯","玻璃杯","文件夹","书包","书架","文件柜","手机"]
furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "", "沙发","柜子","架子","摄像头","靠垫","办公椅","电视","白板","显示器","置物架","文件架"]
features = ["戴眼镜","不戴眼镜","长发","短发","长头发","短头发","戴帽子","不戴帽子","戴口罩","不戴口罩","男性","女性","","","","","","","成年人"]
for obj in objects:
if obj in answer:
info["objects"].append(obj)
for item in furniture:
if item in answer:
info["furniture"].append(item)
for feature in features:
if feature in answer:
info["features"].append(feature)
for emotion in emotions:
if emotion in answer:
info["emotions"].append(emotion)
return info
# 初始化 MediaAnalysisSystem
media_analysis_system = MediaAnalysisSystem()
class MediaAnalysisError(Exception):
"""自定义媒体分析异常类"""
pass
def process_video_folder(system, folder_path, output_path=None):
"""处理文件夹中的所有视频文件并保存结果"""
# 支持的视频格式
valid_extensions = {'.mp4', '.avi', '.mov', '.mkv'}
results = {}
# 确保文件夹存在
if not os.path.exists(folder_path):
raise MediaAnalysisError(f"错误:文件夹 '{folder_path}' 不存在")
# 设置输出路径
if output_path is None:
output_path = os.getcwd() # 如果未指定,使用当前目录
elif not os.path.exists(output_path):
os.makedirs(output_path) # 如果输出目录不存在,创建它
# 获取所有视频文件
video_files = [
f for f in os.listdir(folder_path)
if os.path.splitext(f)[1].lower() in valid_extensions
]
if not video_files:
raise MediaAnalysisError(f"错误:在文件夹 '{folder_path}' 中未找到支持的视频文件")
print(f"\n找到 {len(video_files)} 个视频文件,开始处理...\n")
# 生成输出文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
folder_name = os.path.basename(os.path.normpath(folder_path))
output_file = os.path.join(output_path, f"analysis_results_{folder_name}_{timestamp}.json")
# 处理每个视频文件并实时保存结果
for i, video_file in enumerate(video_files, 1):
video_path = os.path.join(folder_path, video_file)
print(f"正在处理 ({i}/{len(video_files)}): {video_file}")
try:
with open(video_path, "rb") as f:
video_data = f.read()
result = system.process_video(video_data, video_file)
# 修改结果存储格式
results[video_file] = {
"video_analysis": {
"minicpm": result
}
}
# 实时保存当前结果到JSON文件
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"✓ 成功处理并保存: {video_file}")
except Exception as e:
print(f"✗ 处理失败 {video_file}: {str(e)}")
results[video_file] = {
"video_analysis": {
"minicpm": {"error": str(e)}
}
}
# 即使处理失败也保存当前结果
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n所有分析结果已保存到: {output_file}")
return results
class MediaAnalysisError(Exception):
"""自定义媒体分析异常类"""
pass
def main():
try:
system = MediaAnalysisSystem()
# 添加文件夹路径输入处理
folder_path = input("请输入视频文件夹路径: ").strip()
output_path = input("请输入结果保存路径 (直接回车使用当前目录): ").strip()
# 如果用户没有输入输出路径,则使用None(将使用当前目录)
output_path = output_path if output_path else None
# 处理文件夹中的视频
results = process_video_folder(system, folder_path, output_path)
# 显示处理统计
success_count = sum(1 for r in results.values() if "error" not in r)
print(f"\n处理完成!成功: {success_count}/{len(results)}")
except MediaAnalysisError as e:
print(f"\n错误: {str(e)}")
except Exception as e:
print(f"\n未预期的错误: {str(e)}")
if __name__ == "__main__":
main()