""" convert anyword into llava like format """ import sys import json TYPE = "ego" TYPE = "webvid" TYPE = "videochat2" def convert_json(input_json, json_file_path): data = input_json["data_list"] print(f"processing data list: {len(data)}") output_data = [] for item in data: if "annotations" in item.keys(): new_item = { # "id": f"ego_video_{video_path}", "id": item["img_name"], # "video": f"split_videos/{video_path}", "image": f"images/{item['img_name']}", } new_item["conversations"] = [] ocr_text = [] all_lans = [] for i, QA_data in enumerate(item["annotations"]): ocr_text.append(QA_data["text"]) all_lans.append(QA_data["language"]) should_use = True if "laion" in json_file_path and len(all_lans) > 0: if any(lang != "Latin" for lang in all_lans): should_use = False # print(item) else: print(item) # if len(new_item["conversations"]) > 5: # # depart conversations into 2 parts # for i in range(0, len(new_item["conversations"]), 10): # data_dict_i = {} # data_dict_i["id"] = new_item["id"] + f"_{i//10}" # data_dict_i["video"] = new_item["video"] # data_dict_i["conversations"] = new_item["conversations"][i : i + 10] # if i != 0: # data_dict_i["conversations"][0][ # "value" # ] = f"