256 lines
9.8 KiB
Python
256 lines
9.8 KiB
Python
import torch
|
|
from PIL import Image
|
|
from transformers import AutoModel, AutoTokenizer
|
|
from decord import VideoReader, cpu
|
|
import json
|
|
import re
|
|
from pymongo import MongoClient
|
|
import io
|
|
from minio import Minio
|
|
import time
|
|
from bson import ObjectId
|
|
import concurrent.futures
|
|
import os
|
|
|
|
class MinioHandler:
|
|
def __init__(self, endpoint, access_key, secret_key, secure=True):
|
|
self.client = Minio(
|
|
endpoint,
|
|
access_key=access_key,
|
|
secret_key=secret_key,
|
|
secure=secure
|
|
)
|
|
|
|
def list_objects(self, bucket_name, prefix):
|
|
objects = self.client.list_objects(bucket_name, prefix=prefix, recursive=True)
|
|
return [obj for obj in objects if obj.object_name.lower().endswith(('.mp4', '.avi', '.mov', '.flv'))]
|
|
|
|
def get_video_data(self, bucket_name, object_name):
|
|
try:
|
|
response = self.client.get_object(bucket_name, object_name)
|
|
return response.read()
|
|
except Exception as e:
|
|
print(f"Error retrieving video data for {object_name}: {str(e)}")
|
|
return None
|
|
|
|
class DatabaseHandler:
|
|
def __init__(self, mongo_uri, database_name, results_collection_name):
|
|
self.client = MongoClient(mongo_uri)
|
|
self.db = self.client[database_name]
|
|
self.results_collection = self.db[results_collection_name]
|
|
|
|
def get_unprocessed_videos(self, minio_handler, bucket_name='raw', prefix='videoupload/'):
|
|
all_objects = minio_handler.list_objects(bucket_name, prefix)
|
|
processed_etags = set(self.results_collection.distinct('etag'))
|
|
|
|
unprocessed_videos = [
|
|
{
|
|
'bucket_name': bucket_name,
|
|
'object_name': obj.object_name,
|
|
'etag': obj.etag,
|
|
'size': obj.size,
|
|
'last_modified': obj.last_modified
|
|
}
|
|
for obj in all_objects if obj.etag not in processed_etags
|
|
]
|
|
|
|
return unprocessed_videos
|
|
|
|
def save_result(self, result):
|
|
existing_result = self.results_collection.find_one({'etag': result['etag']})
|
|
if existing_result:
|
|
print(f"Video with etag {result['etag']} has already been processed. Skipping.")
|
|
return
|
|
|
|
if 'video_id' in result and isinstance(result['video_id'], ObjectId):
|
|
result['video_id'] = str(result['video_id'])
|
|
|
|
self.results_collection.insert_one(result)
|
|
|
|
class JSONEncoder(json.JSONEncoder):
|
|
def default(self, o):
|
|
if isinstance(o, ObjectId):
|
|
return str(o)
|
|
return super().default(o)
|
|
|
|
class VideoProcessor:
|
|
def __init__(self, model_dir):
|
|
self.model = AutoModel.from_pretrained(model_dir, trust_remote_code=True,
|
|
attn_implementation='sdpa', torch_dtype=torch.bfloat16).eval().cuda()
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
|
self.MAX_NUM_FRAMES = 12
|
|
|
|
def encode_video(self, video_data):
|
|
def uniform_sample(l, n):
|
|
gap = len(l) / n
|
|
return [l[int(i * gap + gap / 2)] for i in range(n)]
|
|
|
|
video_file = io.BytesIO(video_data)
|
|
vr = VideoReader(video_file, ctx=cpu(0))
|
|
sample_fps = round(vr.get_avg_fps() / 1)
|
|
frame_idx = list(range(0, len(vr), sample_fps))
|
|
if len(frame_idx) > self.MAX_NUM_FRAMES:
|
|
frame_idx = uniform_sample(frame_idx, self.MAX_NUM_FRAMES)
|
|
frames = vr.get_batch(frame_idx).asnumpy()
|
|
frames = [Image.fromarray(v.astype('uint8')) for v in frames]
|
|
print('num frames:', len(frames))
|
|
return frames
|
|
|
|
def process_video(self, video_data, object_name):
|
|
if not video_data:
|
|
raise ValueError(f"Empty video data for {object_name}")
|
|
print(f"Processing video: {object_name}, data size: {len(video_data)} bytes")
|
|
frames = self.encode_video(video_data)
|
|
question = "Describe the video in as much detail as possible in Chinese, including the setting, clear number of people, and changes in behavior."
|
|
msgs = [
|
|
{'role': 'user', 'content': frames + [question]},
|
|
]
|
|
|
|
params = {
|
|
"use_image_id": False,
|
|
"max_slice_nums": 1
|
|
}
|
|
|
|
answer = self.model.chat(
|
|
image=None,
|
|
msgs=msgs,
|
|
tokenizer=self.tokenizer,
|
|
**params
|
|
)
|
|
|
|
extracted_info = self.extract_info(answer)
|
|
|
|
return {
|
|
"original_answer": answer,
|
|
"extracted_info": extracted_info,
|
|
"num_frames": len(frames),
|
|
}
|
|
|
|
@staticmethod
|
|
def extract_info(answer):
|
|
info = {
|
|
"environment": None,
|
|
"num_people": None,
|
|
"actions": [],
|
|
"interactions": [],
|
|
"objects": [],
|
|
"furniture": []
|
|
}
|
|
|
|
environments = ["办公室", "室内", "室外", "会议室", "办公"]
|
|
for env in environments:
|
|
if env in answer.lower():
|
|
info["environment"] = env
|
|
break
|
|
|
|
people_patterns = [
|
|
r'(\d+)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
|
|
r'(一|二|三|四|五|六|七|八|九|十)\s*(人|个人|位|名|员工|用户|小朋友|成年人|女性|男性)',
|
|
r'(一个|几个)\s*(人|个人|员工|用户|小朋友|成年人|女性|男性)',
|
|
r'几\s*(名|位)\s*(人|员工|用户|小朋友|成年人|女性|男性)?',
|
|
r'(男|女)(性|生|士)',
|
|
r'(成年|未成年|青少年|老年)\s*(人|群体)',
|
|
r'(员工|职工|工人|学生|顾客|观众|游客|乘客)',
|
|
r'(群众|民众|大众|公众)',
|
|
r'(男女|老少|老幼|大人|小孩)'
|
|
]
|
|
for pattern in people_patterns:
|
|
match = re.search(pattern, answer)
|
|
if match:
|
|
if match.group(1).isdigit():
|
|
info["num_people"] = int(match.group(1))
|
|
elif match.group(1) in ['一个', '一']:
|
|
info["num_people"] = 1
|
|
else:
|
|
num_word_to_digit = {
|
|
'二': 2, '三': 3, '四': 4, '五': 5,
|
|
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10
|
|
}
|
|
info["num_people"] = num_word_to_digit.get(match.group(1), 0)
|
|
break
|
|
|
|
actions = ["坐", "站", "摔倒", "跳舞", "转身", "摔", "倒", "倒下", "躺下", "转身", "跳跃", "跳", "躺", "睡", "说话"]
|
|
interactions = ["互动", "交流", "身体语言", "交谈", "讨论", "开会"]
|
|
objects = ["水瓶", "办公用品", "文件", "电脑"]
|
|
furniture = ["椅子", "桌子", "咖啡桌", "文件柜", "床", "沙发"]
|
|
|
|
for action in actions:
|
|
if action in answer:
|
|
info["actions"].append(action)
|
|
|
|
for interaction in interactions:
|
|
if interaction in answer:
|
|
info["interactions"].append(interaction)
|
|
|
|
for obj in objects:
|
|
if obj in answer:
|
|
info["objects"].append(obj)
|
|
|
|
for item in furniture:
|
|
if item in answer:
|
|
info["furniture"].append(item)
|
|
|
|
return info
|
|
|
|
class VideoAnalysisSystem:
|
|
def __init__(self, minio_endpoint, minio_access_key, minio_secret_key,
|
|
mongo_uri, db_name, model_dir, results_collection_name):
|
|
self.minio_handler = MinioHandler(minio_endpoint, minio_access_key, minio_secret_key)
|
|
self.db_handler = DatabaseHandler(mongo_uri, db_name, results_collection_name)
|
|
self.video_processor = VideoProcessor(model_dir)
|
|
|
|
def process_video(self, video_doc):
|
|
start_time = time.time()
|
|
try:
|
|
video_data = self.minio_handler.get_video_data(video_doc['bucket_name'], video_doc['object_name'])
|
|
result = self.video_processor.process_video(video_data, video_doc['object_name'])
|
|
|
|
result['etag'] = video_doc['etag']
|
|
result['bucket_name'] = video_doc['bucket_name']
|
|
result['object_name'] = video_doc['object_name']
|
|
|
|
self.db_handler.save_result(result)
|
|
|
|
end_time = time.time()
|
|
processing_time = end_time - start_time
|
|
|
|
print(f"Processed video: {video_doc['object_name']}")
|
|
print(f"Processing time: {processing_time:.2f} seconds")
|
|
except Exception as e:
|
|
end_time = time.time()
|
|
processing_time = end_time - start_time
|
|
|
|
print(f"Error processing video {video_doc['object_name']}: {str(e)}")
|
|
print(f"Processing time (including error): {processing_time:.2f} seconds")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def run(self):
|
|
while True:
|
|
unprocessed_videos = self.db_handler.get_unprocessed_videos(self.minio_handler)
|
|
|
|
if not unprocessed_videos:
|
|
print("No new videos to process. Waiting for 5 seconds before checking again...")
|
|
time.sleep(1)
|
|
continue
|
|
|
|
for video_doc in unprocessed_videos:
|
|
self.process_video(video_doc)
|
|
|
|
print("Finished processing current batch of videos. Waiting for new videos...")
|
|
time.sleep(1)
|
|
|
|
if __name__ == "__main__":
|
|
minio_endpoint = "api.obscura.work"
|
|
minio_access_key = "MnHTAG2NOLyXXIZrwDLp"
|
|
minio_secret_key = "WVlmMgww0aRIU43pCJ1XCjubXQO6YsbHysxX2hBf"
|
|
|
|
mongo_uri = "mongodb://minio_mongo:BCd4npzKBnwmCRdh@222.186.136.78:27017/minio_mongo"
|
|
db_name = "minio_mongo"
|
|
results_collection_name = "videoupload_results"
|
|
|
|
model_dir = "MiniCPM-V-2_6"
|
|
|
|
system = VideoAnalysisSystem(minio_endpoint, minio_access_key, minio_secret_key,
|
|
mongo_uri, db_name, model_dir, results_collection_name)
|
|
system.run() |