From d5919041fc0d2e5a53dfb012b3b526f6113cf0c8 Mon Sep 17 00:00:00 2001 From: Han Wang <109300432+Hon-Wong@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:40:59 +0800 Subject: [PATCH] Update merge_result.py --- eval/merge_result.py | 110 ++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/eval/merge_result.py b/eval/merge_result.py index 5059ba1..fb85cd4 100644 --- a/eval/merge_result.py +++ b/eval/merge_result.py @@ -35,59 +35,61 @@ def b2str(b): return "[" + ",".join([str(int(pos)) for pos in b]) + "]" results = {} -for jsonfile in args.files_to_merge: - print(jsonfile) - f = open(jsonfile, "r").read() - # flat_outputs = json.loads(f.read()) - lines = [l + "}" for l in f.split("}\n")] - start_frame_idx = 0 - is_first_line = True - for line in lines: - try: - line = json.loads(line) - except: - if len(line) < 4: - continue - # print(line) - _id = line["id"] - seq_id, clip_id = _id.split("|") - seq_id = seq_id - if seq_id not in results: - is_first_line = True - start_frame_idx = 0 - else: - is_first_line = False - clip_id = int(clip_id) - image_size = line["image_size"] - predict = line["predict"] - gt = line["gt"] - pred_bb = parse_box_from_raw_text(predict) - anno_bb = parse_box_from_raw_text(gt) - if len(pred_bb) > len(anno_bb): - pred_bb = pred_bb[:len(anno_bb)] - elif len(pred_bb) < len(anno_bb): - pad_len = len(anno_bb) - len(pred_bb) - for i in range(pad_len): - pred_bb.append([0., 0., 0., 0.]) - clip_frame_count = len(pred_bb) if is_first_line else len(pred_bb) - 1 +jsonfile = args.files_to_merge +print(jsonfile) +f = open(jsonfile, "r").read() +# flat_outputs = json.loads(f.read()) +lines = [l + "}" for l in f.split("}\n")] +start_frame_idx = 0 +is_first_line = True +for line in lines: + try: + line = json.loads(line) + except: + if len(line) < 4: + continue + # print(line) + _id = line["id"] + seq_id, clip_id = _id.split("|") + seq_id = seq_id + if seq_id not in results: + is_first_line = True + start_frame_idx = 0 + else: + is_first_line = False + clip_id = int(clip_id) + image_size = line["image_size"] + predict = line["predict"] + gt = line["gt"] + pred_bb = parse_box_from_raw_text(predict) + anno_bb = parse_box_from_raw_text(gt) + if len(pred_bb) > len(anno_bb): + pred_bb = pred_bb[:len(anno_bb)] + elif len(pred_bb) < len(anno_bb): + pad_len = len(anno_bb) - len(pred_bb) + for i in range(pad_len): + pred_bb.append([0., 0., 0., 0.]) + clip_frame_count = len(pred_bb) if is_first_line else len(pred_bb) - 1 - pred_bb = pred_bb[-clip_frame_count:] - anno_bb = anno_bb[-clip_frame_count:] - - if seq_id not in results: - results[seq_id] = {} - results[seq_id]["predict"] = ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(pred_bb[i])}" for i in range(0, clip_frame_count)) - results[seq_id]["gt"] = ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(anno_bb[i])}" for i in range(0, clip_frame_count)) - results[seq_id]["image_size"] = image_size - results[seq_id]["source"] = "unknown" - results[seq_id]["vid"] = seq_id - results[seq_id]["id"] = seq_id - clip_count = 1 - else: - clip_count += 1 - if clip_count < 1000000000: # see if metrics go lower when processing longer frames, set to a large value to evaluate the whole video - results[seq_id]["predict"] += ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(pred_bb[i])}" for i in range(0, clip_frame_count)) - results[seq_id]["gt"] += ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(anno_bb[i])}" for i in range(0, clip_frame_count)) - start_frame_idx += clip_frame_count + pred_bb = pred_bb[-clip_frame_count:] + anno_bb = anno_bb[-clip_frame_count:] + + if seq_id not in results: + results[seq_id] = {} + results[seq_id]["predict"] = ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(pred_bb[i])}" for i in range(0, clip_frame_count)) + results[seq_id]["gt"] = ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(anno_bb[i])}" for i in range(0, clip_frame_count)) + results[seq_id]["image_size"] = image_size + results[seq_id]["source"] = "unknown" + results[seq_id]["vid"] = seq_id + results[seq_id]["id"] = seq_id + clip_count = 1 + else: + clip_count += 1 + if clip_count < 1000000000: # see if metrics go lower when processing longer frames, set to a large value to evaluate the whole video + results[seq_id]["predict"] += ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(pred_bb[i])}" for i in range(0, clip_frame_count)) + results[seq_id]["gt"] += ",".join(f"Frame {start_frame_idx + i + 1}: {b2str(anno_bb[i])}" for i in range(0, clip_frame_count)) + start_frame_idx += clip_frame_count -json.dump(list(results.values()), open(args.output_file, "w"), indent=4, ensure_ascii=False) +with open(args.output_file, "w") as f: + for line in list(results.values()): + f.write(json.dumps(line, ensure_ascii=False) + '\n')