-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvision_cli.py
151 lines (133 loc) · 8.47 KB
/
vision_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
# Copyright (C) 2025 Anthony Casagrande
# AGPL-3.0 license
import argparse
from ffmpeg_decode import FfmpegVisionProcessor
def parse_resolution_string(res_str: str) -> tuple[int, int]:
"""
Parse a resolution string like '3840x2160' or '1920,1080' into (width, height).
You can make this more robust to handle multiple formats.
"""
# Try splitting by 'x' first, then fall back to ',' if needed.
if 'x' in res_str:
w_str, h_str = res_str.lower().split('x')
elif ',' in res_str:
w_str, h_str = res_str.lower().split(',')
else:
raise ValueError(f"Invalid resolution format: '{res_str}'. Use e.g. 1920x1080")
return int(w_str), int(h_str)
def parse_bool(value: str) -> bool:
"""
Convert common string representations of booleans to Python bool.
"""
value = value.strip().lower()
if value in ['true', '1', 'yes']:
return True
elif value in ['false', '0', 'no']:
return False
else:
raise ValueError(f"Invalid boolean value: '{value}'")
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="RC Race Vision")
# Required positional argument
parser.add_argument(
"video_path",
type=str,
help="Path to the input video file."
)
# Optional arguments
parser.add_argument(
"--start-time", type=float, default=0,
help="Start time in seconds (default: 0)."
)
parser.add_argument(
"--end-time", type=float, default=0,
help="End time in seconds (default: 0 for no limit)."
)
parser.add_argument(
"--vid-stride", type=int, default=1,
help="Process every Nth frame (default: 1)."
)
parser.add_argument(
"--out-size", type=str, default="1280x720",
help="Output resolution as WIDTHxHEIGHT (default: 1280x720)."
)
parser.add_argument(
"--yolo-size", type=str, default="1280x720",
help="YOLO input resolution as WIDTHxHEIGHT (default: 1280x720)."
)
parser.add_argument(
"--preview-size", type=str, default="1280x720",
help="Preview window resolution as WIDTHxHEIGHT (default: 1280x720)."
)
parser.add_argument(
"--use-cuda", type=parse_bool, nargs='?', const=True, default=None,
help="Use GPU acceleration (True/False). If provided without value, defaults to Autodetect."
)
parser.add_argument(
"--iou", type=float, default=0.2,
help="IOU threshold for object detection (default: 0.2)."
)
parser.add_argument(
"--conf", type=float, default=0.01,
help="Confidence threshold for object detection (default: 0.01)."
)
# Boolean flags
parser.add_argument("--show-plot", action="store_true", help="If set, display the detection result plots.")
parser.add_argument("--show-preview", action="store_true", help="If set, show a preview window with detections.")
parser.add_argument("--save-video", action="store_true", help="If set, save output video with detections.")
parser.add_argument("--save-lost-trackers", action="store_true", help="If set, save data of lost trackers.")
parser.add_argument("--show-trace", action="store_true", help="If set, draw the path/trace of objects.")
parser.add_argument("--trace-length", type=int, default=500, help="Maximum length of each object's trace (default: 500).")
parser.add_argument("--trace-thickness", type=int, default=3, help="Line thickness for traces (default: 3).")
parser.add_argument("--show-box", action="store_true", help="If set, draw bounding boxes around detections.")
parser.add_argument("--show-label", action="store_true", help="If set, draw text labels above bounding boxes.")
parser.add_argument("--show-triangle", action="store_true", help="If set, draw a triangle marker on each detected object.")
parser.add_argument("--triangle-size", type=str, default="10x10", help="Triangle size as WIDTHxHEIGHT (default: 10x10).")
parser.add_argument("--triangle-thickness", type=int, default=2, help="Line thickness for triangles (default: 2).")
parser.add_argument("--save-auto-tracker", action="store_true", help="If set, save auto tracker data.")
parser.add_argument("--auto-tracker-preview", action="store_true", help="If set, show a preview window of auto tracker steps.")
parser.add_argument("--model-file", type=str, default="weights/train29s.pt", help="Path to the model weights file (default: weights/train29s.pt).")
parser.add_argument(
"--stabilization-classes", nargs='*', default=["Exalt", "ORCA Orange", "ORCA Blue", "Fridge"],
help="List of classes to stabilize (default: ['Exalt','ORCA Orange','ORCA Blue','Fridge'])."
)
parser.add_argument("--primary-stabilization-class", type=str, default="Exalt", help="Main class used for stabilization (default: 'Exalt').")
parser.add_argument("--agnostic-nms", action="store_true", help="Use class-agnostic NMS (default: True).")
parser.add_argument("--stabilize", action="store_true", help="If set, attempt to stabilize the video.")
parser.add_argument("--trail-stabilize", action="store_true", help="If set, apply additional stabilization on object trails.")
parser.add_argument("--gyro-adjust", action="store_true", help="If set, apply gyroscopic adjustments to the video (experimental).")
parser.add_argument("--tracker-output-dir", type=str, default="tracked_cars", help="Directory to save tracker videos (default: tracked_cars).")
parser.add_argument("--min-tracker-seconds", type=float, default=1.0, help="Minimum tracker lifespan in seconds before saving (default: 1.0).")
parser.add_argument("--save-tracker-videos", action="store_true", help="If set, save individual tracker videos.")
parser.add_argument("--tracker-video-size", type=str, default="608x1080", help="Cropped tracker video size as WIDTHxHEIGHT (default: 608x1080).")
parser.add_argument("--target-bbox-size", type=str, default="150x50", help="Approx. bounding box size of tracker target as WIDTHxHEIGHT (default: 150x50).")
parser.add_argument("--tracker-lose-track", type=int, default=30, help="Number of frames allowed to lose track before giving up (default: 30).")
parser.add_argument("--tracker-video-auto-scale", action="store_true", help="If set, automatically zoom tracker videos to keep a consistent bounding box size.")
parser.add_argument("--tracker-scale-min", type=float, default=0.5, help="Minimum scale factor for auto-scaling (default: 0.5).")
parser.add_argument("--tracker-scale-max", type=float, default=1.5, help="Maximum scale factor for auto-scaling (default: 1.5).")
parser.add_argument("--tracker-scale-rate", type=float, default=1.10, help="Rate at which changes in bbox size affect the zoom (default: 1.10).")
parser.add_argument("--tracker-scale-min-delta", type=float, default=0.2, help="Minimum delta in scale before applying changes (default: 0.2).")
parser.add_argument("--tracker-scale-smooth-delta", type=float, default=0.01, help="Maximum per-frame scale change (default: 0.01).")
parser.add_argument("--auto-tracker-timeout", type=float, default=5, help="Time in seconds to stay on the same tracker ID before stopping (default: 5).")
parser.add_argument("--auto-tracker-concurrent-missing", type=int, default=3, help="Number of consecutive missing frames before switching track ID (default: 3).")
parser.add_argument("--save-json", action="store_true", help="If set, also save results to a JSON file.")
parser.add_argument("--no-track", action="store_true", default=False, help="Disable tracking (default: False)")
parser.add_argument("--tracker-file", type=str, default="./trackers/botsort_cfg.yaml", help="Path to the tracker configuration file (default: ./trackers/botsort_cfg.yaml)")
return parser
def main():
parser = build_parser()
args = parser.parse_args()
# Convert resolution-like strings to tuples
args.out_size = parse_resolution_string(args.out_size)
args.yolo_size = parse_resolution_string(args.yolo_size)
args.preview_size = parse_resolution_string(args.preview_size)
args.triangle_size = parse_resolution_string(args.triangle_size)
args.tracker_video_size = parse_resolution_string(args.tracker_video_size)
args.target_bbox_size = parse_resolution_string(args.target_bbox_size)
# Instantiate the processor using all parsed arguments
# We can do **vars(args) to unpack or list them individually.
p = FfmpegVisionProcessor(**vars(args))
p.process_video()
if __name__ == "__main__":
main()