Files
api/api_history/video_s3.py
T
2025-01-12 06:15:15 +00:00

256 lines
9.8 KiB
Python

import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer
from decord import VideoReader, cpu
import json
import re
from pymongo import MongoClient
import io
from minio import Minio
import time
from bson import ObjectId
import concurrent.futures
import os
class MinioHandler:
def __init__(self, endpoint, access_key, secret_key, secure=True):
self.client = Minio(
endpoint,
access_key=access_key,
secret_key=secret_key,
secure=secure
)
def list_objects(self, bucket_name, prefix):
objects = self.client.list_objects(bucket_name, prefix=prefix, recursive=True)
return [obj for obj in objects if obj.object_name.lower().endswith(('.mp4', '.avi', '.mov', '.flv'))]
def get_video_data(self, bucket_name, object_name):
try:
response = self.client.get_object(bucket_name, object_name)
return response.read()
except Exception as e:
print(f"Error retrieving video data for {object_name}: {str(e)}")
return None
class DatabaseHandler:
def __init__(self, mongo_uri, database_name, results_collection_name):
self.client = MongoClient(mongo_uri)
self.db = self.client[database_name]
self.results_collection = self.db[results_collection_name]
def get_unprocessed_videos(self, minio_handler, bucket_name='raw', prefix='videoupload/'):
all_objects = minio_handler.list_objects(bucket_name, prefix)
processed_etags = set(self.results_collection.distinct('etag'))
unprocessed_videos = [
{
'bucket_name': bucket_name,
'object_name': obj.object_name,
'etag': obj.etag,
'size': obj.size,
'last_modified': obj.last_modified
}
for obj in all_objects if obj.etag not in processed_etags
]
return unprocessed_videos
def save_result(self, result):
existing_result = self.results_collection.find_one({'etag': result['etag']})
if existing_result:
print(f"Video with etag {result['etag']} has already been processed. Skipping.")
return
if 'video_id' in result and isinstance(result['video_id'], ObjectId):
result['video_id'] = str(result['video_id'])
self.results_collection.insert_one(result)
class JSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, ObjectId):
return str(o)
return super().default(o)
class VideoProcessor:
def __init__(self, model_dir):
self.model = AutoModel.from_pretrained(model_dir, trust_remote_code=True,
attn_implementation='sdpa', torch_dtype=torch.bfloat16).eval().cuda()
self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
self.MAX_NUM_FRAMES = 12
def encode_video(self, video_data):
def uniform_sample(l, n):
gap = len(l) / n
return [l[int(i * gap + gap / 2)] for i in range(n)]
video_file = io.BytesIO(video_data)
vr = VideoReader(video_file, ctx=cpu(0))
sample_fps = round(vr.get_avg_fps() / 1)
frame_idx = list(range(0, len(vr), sample_fps))
if len(frame_idx) > self.MAX_NUM_FRAMES:
frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES)
frames = vr.get_batch(frame_idx).asnumpy()
frames = [Image.fromarray(v.astype('uint8')) for v in frames]
print('num frames:', len(frames))
return frames
def process_video(self, video_data, object_name):
if not video_data:
raise ValueError(f"Empty video data for {object_name}")
print(f"Processing video: {object_name}, data size: {len(video_data)} bytes")
frames = self.encode_video(video_data)
question = "Describe the video in as much detail as possible in Chinese, including the setting, clear number of people, and changes in behavior."
msgs = [
{'role': 'user', 'content': frames + [question]},
]
params = {
"use_image_id": False,
"max_slice_nums": 1
}
answer = self.model.chat(
image=None,
msgs=msgs,
tokenizer=self.tokenizer,
**params
)
extracted_info = self.extract_info(answer)
return {
"original_answer": answer,
"extracted_info": extracted_info,
"num_frames": len(frames),
}
@staticmethod
def extract_info(answer):
info = {
"environment": None,
"num_people": None,
"actions": [],
"interactions": [],
"objects": [],
"furniture": []
}
environments = ["办公室", "室内", "室外", "会议室", "办公"]
for env in environments:
if env in answer.lower():
info["environment"] = env
break
people_patterns = [
r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)',
r'\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?',
r'(男|女)(性|生|士)',
r'(成年|未成年|青少年|老年)\s*(人|群体)',
r'(员工|职工|工人|学生|顾客|观众|游客|乘客)',
r'(群众|民众|大众|公众)',
r'(男女|老少|老幼|大人|小孩)'
]
for pattern in people_patterns:
match = re.search(pattern, answer)
if match:
if match.group(1).isdigit():
info["num_people"] = int(match.group(1))
elif match.group(1) in ['一个', '']:
info["num_people"] = 1
else:
num_word_to_digit = {
'': 2, '': 3, '': 4, '': 5,
'': 6, '': 7, '': 8, '': 9, '': 10
}
info["num_people"] = num_word_to_digit.get(match.group(1), 0)
break
actions = ["", "", "摔倒", "跳舞", "转身", "", "", "倒下", "躺下", "转身", "跳跃", "", "", "", "说话"]
interactions = ["互动", "交流", "身体语言", "交谈", "讨论", "开会"]
objects = ["水瓶", "办公用品", "文件", "电脑"]
furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "", "沙发"]
for action in actions:
if action in answer:
info["actions"].append(action)
for interaction in interactions:
if interaction in answer:
info["interactions"].append(interaction)
for obj in objects:
if obj in answer:
info["objects"].append(obj)
for item in furniture:
if item in answer:
info["furniture"].append(item)
return info
class VideoAnalysisSystem:
def __init__(self, minio_endpoint, minio_access_key, minio_secret_key,
mongo_uri, db_name, model_dir, results_collection_name):
self.minio_handler = MinioHandler(minio_endpoint, minio_access_key, minio_secret_key)
self.db_handler = DatabaseHandler(mongo_uri, db_name, results_collection_name)
self.video_processor = VideoProcessor(model_dir)
def process_video(self, video_doc):
start_time = time.time()
try:
video_data = self.minio_handler.get_video_data(video_doc['bucket_name'], video_doc['object_name'])
result = self.video_processor.process_video(video_data, video_doc['object_name'])
result['etag'] = video_doc['etag']
result['bucket_name'] = video_doc['bucket_name']
result['object_name'] = video_doc['object_name']
self.db_handler.save_result(result)
end_time = time.time()
processing_time = end_time - start_time
print(f"Processed video: {video_doc['object_name']}")
print(f"Processing time: {processing_time:.2f} seconds")
except Exception as e:
end_time = time.time()
processing_time = end_time - start_time
print(f"Error processing video {video_doc['object_name']}: {str(e)}")
print(f"Processing time (including error): {processing_time:.2f} seconds")
import traceback
traceback.print_exc()
def run(self):
while True:
unprocessed_videos = self.db_handler.get_unprocessed_videos(self.minio_handler)
if not unprocessed_videos:
print("No new videos to process. Waiting for 5 seconds before checking again...")
time.sleep(1)
continue
for video_doc in unprocessed_videos:
self.process_video(video_doc)
print("Finished processing current batch of videos. Waiting for new videos...")
time.sleep(1)
if __name__ == "__main__":
minio_endpoint = "api.obscura.work"
minio_access_key = "MnHTAG2NOLyXXIZrwDLp"
minio_secret_key = "WVlmMgww0aRIU43pCJ1XCjubXQO6YsbHysxX2hBf"
mongo_uri = "mongodb://minio_mongo:BCd4npzKBnwmCRdh@222.186.136.78:27017/minio_mongo"
db_name = "minio_mongo"
results_collection_name = "videoupload_results"
model_dir = "MiniCPM-V-2_6"
system = VideoAnalysisSystem(minio_endpoint, minio_access_key, minio_secret_key,
mongo_uri, db_name, model_dir, results_collection_name)
system.run()