From 0334a96a6b6c49c5e1a7833f30fb6e32da6c202c Mon Sep 17 00:00:00 2001 From: ahmetburakgozel Date: Tue, 9 Aug 2022 09:01:14 +0300 Subject: [PATCH 1/4] necessary changes were made. --- README.md | 88 +++--- configs/repopt/yolov6s_hs.py | 6 +- configs/repopt/yolov6s_opt.py | 6 +- configs/yolov6_tiny.py | 4 +- configs/yolov6_tiny_finetune.py | 4 +- configs/yolov6n.py | 4 +- configs/yolov6n_finetune.py | 4 +- configs/yolov6s.py | 4 +- configs/yolov6s_finetune.py | 4 +- data/coco.yaml | 8 +- data/dataset.yaml | 4 +- data/voc.yaml | 4 +- deploy/ONNX/README.md | 15 +- deploy/ONNX/eval_trt.py | 10 +- deploy/ONNX/export_onnx.py | 15 +- deploy/OpenVINO/README.md | 4 + deploy/OpenVINO/export_openvino.py | 3 +- docs/About_naming_yolov6.md | 7 +- docs/Test_speed.md | 3 +- docs/Train_custom_data.md | 19 +- docs/tutorial_repopt.md | 38 ++- docs/tutorial_voc.ipynb | 104 ++++++-- tools/eval.py | 6 +- tools/infer.py | 14 +- tools/partial_quantization/README.md | 21 +- tools/partial_quantization/eval.py | 1 + tools/partial_quantization/partial_quant.py | 4 +- tools/partial_quantization/ptq.py | 32 ++- .../sensitivity_analyse.py | 13 +- tools/partial_quantization/utils.py | 9 + .../tensorrt/post_training/Calibrator.py | 14 +- .../tensorrt/post_training/README.md | 11 +- .../post_training/onnx_to_tensorrt.py | 72 +++-- .../tensorrt/training_aware/QAT_quantizer.py | 1 + tools/train.py | 11 +- yolov6/core/engine.py | 57 ++-- yolov6/core/evaler.py | 31 ++- yolov6/core/inferer.py | 179 ++++++------- yolov6/data/data_augment.py | 4 +- yolov6/data/data_load.py | 30 +-- yolov6/data/datasets.py | 252 +++++++++--------- yolov6/data/vis_dataset.py | 15 +- yolov6/data/voc2yolo.py | 9 +- yolov6/layers/common.py | 108 +++++--- yolov6/layers/dbb_transforms.py | 9 +- yolov6/models/efficientrep.py | 11 +- yolov6/models/effidehead.py | 15 +- yolov6/models/end2end.py | 78 +++--- yolov6/models/loss.py | 65 ++--- yolov6/models/reppan.py | 9 +- yolov6/models/yolo.py | 1 + yolov6/utils/RepOptimizer.py | 33 ++- yolov6/utils/events.py | 1 + yolov6/utils/figure_iou.py | 1 + yolov6/utils/general.py | 1 + yolov6/utils/nms.py | 4 +- 56 files changed, 864 insertions(+), 616 deletions(-) diff --git a/README.md b/README.md index 44aa5011..f5356de2 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,26 @@ # YOLOv6 + ## Introduction -YOLOv6 is a single-stage object detection framework dedicated to industrial applications, with hardware-friendly efficient design and high performance. +YOLOv6 is a single-stage object detection framework dedicated to industrial applications, with hardware-friendly +efficient design and high performance. -YOLOv6-nano achieves 35.0 mAP on COCO val2017 dataset with 1242 FPS on T4 using TensorRT FP16 for bs32 inference, and YOLOv6-s achieves 43.1 mAP on COCO val2017 dataset with 520 FPS on T4 using TensorRT FP16 for bs32 inference. +YOLOv6-nano achieves 35.0 mAP on COCO val2017 dataset with 1242 FPS on T4 using TensorRT FP16 for bs32 inference, and +YOLOv6-s achieves 43.1 mAP on COCO val2017 dataset with 520 FPS on T4 using TensorRT FP16 for bs32 inference. YOLOv6 is composed of the following methods: - Hardware-friendly Design for Backbone and Neck - Efficient Decoupled Head with SIoU Loss - ## Coming soon - [ ] YOLOv6 m/l/x model. - [ ] Deployment for MNN/TNN/NCNN/CoreML... - [ ] Quantization tools - ## Quick Start ### Install @@ -58,8 +59,11 @@ python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 256 ``` - conf: select config file to specify network/optimizer/hyperparameters -- data: prepare [COCO](http://cocodataset.org) dataset, [YOLO format coco labels](https://github.com/meituan/YOLOv6/releases/download/0.1.0/coco2017labels.zip) and specify dataset paths in data.yaml +- data: prepare [COCO](http://cocodataset.org) + dataset, [YOLO format coco labels](https://github.com/meituan/YOLOv6/releases/download/0.1.0/coco2017labels.zip) and + specify dataset paths in data.yaml - make sure your dataset structure as follows: + ``` ├── coco │ ├── annotations @@ -75,7 +79,6 @@ python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 256 │ ├── README.txt ``` - ### Evaluation Reproduce mAP on COCO val2017 dataset @@ -86,14 +89,18 @@ python tools/eval.py --data data/coco.yaml --batch 32 --weights yolov6s.pt --tas ``` ### Resume + If your training process is corrupted, you can resume training by + ``` # single GPU traning. python tools/train.py --resume # multi GPU training. python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --resume ``` + Your can also specify a checkpoint path to `--resume` parameter by + ``` # remember replace /path/to/your/checkpoint/path to the checkpoint path which you want to resume training. --resume /path/to/your/checkpoint/path @@ -102,37 +109,58 @@ Your can also specify a checkpoint path to `--resume` parameter by ### Deployment -* [ONNX](./deploy/ONNX) -* [OpenVINO](./deploy/OpenVINO) -* [Partial Quantization](./tools/partial_quantization) +* [ONNX](./deploy/ONNX) +* [OpenVINO](./deploy/OpenVINO) +* [Partial Quantization](./tools/partial_quantization) ### Tutorials -* [Train custom data](./docs/Train_custom_data.md) -* [Test speed](./docs/Test_speed.md) -* [Tutorial of RepOpt for YOLOv6](./docs/tutorial_repopt.md) +* [Train custom data](./docs/Train_custom_data.md) +* [Test speed](./docs/Test_speed.md) +* [Tutorial of RepOpt for YOLOv6](./docs/tutorial_repopt.md) ## Benchmark - | Model | Size | mAPval
0.5:0.95 | SpeedV100
fp16 b32
(ms) | SpeedV100
fp32 b32
(ms) | SpeedT4
trt fp16 b1
(fps) | SpeedT4
trt fp16 b32
(fps) | Params
(M) | Flops
(G) | | :-------------- | ----------- | :----------------------- | :------------------------------------ | :------------------------------------ | ---------------------------------------- | ----------------------------------------- | --------------- | -------------- | -| [**YOLOv6-n**](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6n.pt) | 416
640 | 30.8
35.0 | 0.3
0.5 | 0.4
0.7 | 1100
788 | 2716
1242 | 4.3
4.3 | 4.7
11.1 | -| [**YOLOv6-tiny**](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6t.pt) | 640 | 41.3 | 0.9 | 1.5 | 425 | 602 | 15.0 | 36.7 | -| [**YOLOv6-s**](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.pt) | 640 | 43.1 | 1.0 | 1.7 | 373 | 520 | 17.2 | 44.2 | - - -- Comparisons of the mAP and speed of different object detectors are tested on [COCO val2017](https://cocodataset.org/#download) dataset. +| [** +YOLOv6-n**](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6n.pt) | 416
640 | 30.8
35.0 | 0.3
0.5 | 0.4
0.7 | 1100
788 | 2716
1242 | 4.3
4.3 | 4.7
11.1 | +| [** +YOLOv6-tiny**](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6t.pt) | 640 | 41.3 | 0.9 | 1.5 | 425 | 602 | 15.0 | 36.7 | +| [** +YOLOv6-s**](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.pt) | 640 | 43.1 | 1.0 | 1.7 | 373 | 520 | 17.2 | 44.2 | + +- Comparisons of the mAP and speed of different object detectors are tested + on [COCO val2017](https://cocodataset.org/#download) dataset. - Refer to [Test speed](./docs/Test_speed.md) tutorial to reproduce the speed results of YOLOv6. - Params and Flops of YOLOv6 are estimated on deployed model. -- Speed results of other methods are tested in our environment using official codebase and model if not found from the corresponding official release. - - ## Third-party resources - * YOLOv6 NCNN Android app demo: [ncnn-android-yolov6](https://github.com/FeiGeChuanShu/ncnn-android-yolov6) from [FeiGeChuanShu](https://github.com/FeiGeChuanShu) - * YOLOv6 ONNXRuntime/MNN/TNN C++: [YOLOv6-ORT](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolov6.cpp), [YOLOv6-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolov6.cpp) and [YOLOv6-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolov6.cpp) from [DefTruth](https://github.com/DefTruth) - * YOLOv6 TensorRT Python: [yolov6-tensorrt-python](https://github.com/Linaom1214/tensorrt-python/blob/main/yolov6/trt.py) from [Linaom1214](https://github.com/Linaom1214) - * YOLOv6 TensorRT Windows C++: [yolort](https://github.com/zhiqwang/yolov5-rt-stack/tree/main/deployment/tensorrt-yolov6) from [Wei Zeng](https://github.com/Wulingtian) - * YOLOv6 Quantization and Auto Compression Example [YOLOv6-ACT](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/auto_compression/pytorch_yolov6) from [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) - * [YOLOv6 web demo](https://huggingface.co/spaces/nateraw/yolov6) on [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/nateraw/yolov6) - * Tutorial: [How to train YOLOv6 on a custom dataset](https://blog.roboflow.com/how-to-train-yolov6-on-a-custom-dataset/) Open In Colab - * Demo of YOLOv6 inference on Google Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mahdilamb/YOLOv6/blob/main/inference.ipynb) +- Speed results of other methods are tested in our environment using official codebase and model if not found from the + corresponding official release. + +## Third-party resources + +* YOLOv6 NCNN Android app demo: [ncnn-android-yolov6](https://github.com/FeiGeChuanShu/ncnn-android-yolov6) + from [FeiGeChuanShu](https://github.com/FeiGeChuanShu) +* YOLOv6 ONNXRuntime/MNN/TNN + C++: [YOLOv6-ORT](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolov6.cpp) + , [YOLOv6-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolov6.cpp) + and [YOLOv6-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolov6.cpp) + from [DefTruth](https://github.com/DefTruth) +* YOLOv6 TensorRT + Python: [yolov6-tensorrt-python](https://github.com/Linaom1214/tensorrt-python/blob/main/yolov6/trt.py) + from [Linaom1214](https://github.com/Linaom1214) +* YOLOv6 TensorRT Windows + C++: [yolort](https://github.com/zhiqwang/yolov5-rt-stack/tree/main/deployment/tensorrt-yolov6) + from [Wei Zeng](https://github.com/Wulingtian) +* YOLOv6 Quantization and Auto Compression + Example [YOLOv6-ACT](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/auto_compression/pytorch_yolov6) + from [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) +* [YOLOv6 web demo](https://huggingface.co/spaces/nateraw/yolov6) on [Huggingface Spaces](https://huggingface.co/spaces) + with [Gradio](https://github.com/gradio-app/gradio) + . [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/nateraw/yolov6) +* + +Tutorial: [How to train YOLOv6 on a custom dataset](https://blog.roboflow.com/how-to-train-yolov6-on-a-custom-dataset/) Open In Colab + +* Demo of YOLOv6 inference on Google + Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mahdilamb/YOLOv6/blob/main/inference.ipynb) diff --git a/configs/repopt/yolov6s_hs.py b/configs/repopt/yolov6s_hs.py index fef6b2f0..06b5715b 100644 --- a/configs/repopt/yolov6s_hs.py +++ b/configs/repopt/yolov6s_hs.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], @@ -51,4 +51,4 @@ ) # Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"] -training_mode='hyper_search' +training_mode = 'hyper_search' diff --git a/configs/repopt/yolov6s_opt.py b/configs/repopt/yolov6s_opt.py index 571f7e3a..ecfd2ef2 100644 --- a/configs/repopt/yolov6s_opt.py +++ b/configs/repopt/yolov6s_opt.py @@ -9,12 +9,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], @@ -52,4 +52,4 @@ ) # Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"] -training_mode='repopt' +training_mode = 'repopt' diff --git a/configs/yolov6_tiny.py b/configs/yolov6_tiny.py index 7fb2eee7..08cab337 100644 --- a/configs/yolov6_tiny.py +++ b/configs/yolov6_tiny.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], diff --git a/configs/yolov6_tiny_finetune.py b/configs/yolov6_tiny_finetune.py index 1c0e03e8..2743d2d7 100644 --- a/configs/yolov6_tiny_finetune.py +++ b/configs/yolov6_tiny_finetune.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], diff --git a/configs/yolov6n.py b/configs/yolov6n.py index b191364f..25a0598b 100644 --- a/configs/yolov6n.py +++ b/configs/yolov6n.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], diff --git a/configs/yolov6n_finetune.py b/configs/yolov6n_finetune.py index 2f7b4dd0..81ff630a 100644 --- a/configs/yolov6n_finetune.py +++ b/configs/yolov6n_finetune.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], diff --git a/configs/yolov6s.py b/configs/yolov6s.py index 7b3b4b43..d0b427c1 100644 --- a/configs/yolov6s.py +++ b/configs/yolov6s.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], diff --git a/configs/yolov6s_finetune.py b/configs/yolov6s_finetune.py index 4d0f33c9..bb361410 100644 --- a/configs/yolov6s_finetune.py +++ b/configs/yolov6s_finetune.py @@ -8,12 +8,12 @@ type='EfficientRep', num_repeats=[1, 6, 12, 18, 6], out_channels=[64, 128, 256, 512, 1024], - ), + ), neck=dict( type='RepPAN', num_repeats=[12, 12, 12, 12], out_channels=[256, 128, 128, 256, 256, 512], - ), + ), head=dict( type='EffiDeHead', in_channels=[128, 256, 512], diff --git a/data/coco.yaml b/data/coco.yaml index 28faa6d5..fffad598 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -1,7 +1,7 @@ # COCO 2017 dataset http://cocodataset.org -train: ../coco/images/train2017 # 118287 images -val: ../coco/images/val2017 # 5000 images -test: ../coco/images/test2017 +train: C:/Users/ahmtb/YOLOv6coco/images/train2017 # 118287 images +val: C:/Users/ahmtb/YOLOv6coco/images/train2017 # 5000 images +test: C:/Users/ahmtb/YOLOv6coco/images/train2017 anno_path: ../coco/annotations/instances_val2017.json # number of classes nc: 80 @@ -17,4 +17,4 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', ' 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush' ] + 'hair drier', 'toothbrush' ] \ No newline at end of file diff --git a/data/dataset.yaml b/data/dataset.yaml index 6e026921..561a299e 100644 --- a/data/dataset.yaml +++ b/data/dataset.yaml @@ -7,5 +7,5 @@ test: ../custom_dataset/images/test # test images (optional) is_coco: False # Classes nc: 20 # number of classes -names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', - 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names +names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] # class names diff --git a/data/voc.yaml b/data/voc.yaml index d6aa6a62..590a3513 100644 --- a/data/voc.yaml +++ b/data/voc.yaml @@ -7,5 +7,5 @@ test: VOCdevkit/voc_07_12/images/val # test images (optional) is_coco: False # Classes nc: 20 # number of classes -names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', - 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names +names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] # class names diff --git a/deploy/ONNX/README.md b/deploy/ONNX/README.md index d1f7c22c..f1d4fb67 100644 --- a/deploy/ONNX/README.md +++ b/deploy/ONNX/README.md @@ -1,11 +1,13 @@ # Export ONNX Model ## Check requirements + ```shell pip install onnx>=1.10.0 ``` ## Export script + ```shell python ./deploy/ONNX/export_onnx.py \ --weights yolov6s.pt \ @@ -13,8 +15,6 @@ python ./deploy/ONNX/export_onnx.py \ --batch 1 ``` - - #### Description of all arguments - `--weights` : The path of yolov6 model weights. @@ -45,6 +45,7 @@ Now YOLOv6 supports end to end detect for onnxruntime and TensorRT ! If you want to deploy in TensorRT, make sure you have installed TensorRT >= 8.0.0 ! ### onnxruntime backend + #### Usage ```bash @@ -77,7 +78,8 @@ python ./deploy/ONNX/export_onnx.py \ --trt-version 7 ``` -You will get an onnx with **[BatchedNMSDynamic_TRT](https://github.com/triple-Mu/TensorRT/tree/main/plugin/batchedNMSPlugin)** plugin . +You will get an onnx +with **[BatchedNMSDynamic_TRT](https://github.com/triple-Mu/TensorRT/tree/main/plugin/batchedNMSPlugin)** plugin . ### TensorRT backend (TensorRT version>= 8.0.0) @@ -92,7 +94,8 @@ python ./deploy/ONNX/export_onnx.py \ --trt-version 8 ``` -You will get an onnx with **[EfficientNMS_TRT](https://github.com/NVIDIA/TensorRT/tree/main/plugin/efficientNMSPlugin)** plugin . +You will get an onnx with **[EfficientNMS_TRT](https://github.com/NVIDIA/TensorRT/tree/main/plugin/efficientNMSPlugin)** +plugin . ### Outputs Description @@ -108,8 +111,9 @@ The onnx outputs are as shown : ```det_classes``` means the category of every topk(100) objects . +You can export TensorRT engine +use [trtexec](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#trtexec-ovr) tools. -You can export TensorRT engine use [trtexec](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#trtexec-ovr) tools. #### Usage For both TensorRT-7 and TensorRT-8 `trtexec` tool is avaiable. @@ -124,6 +128,7 @@ trtexec --onnx=yolov6s.onnx \ ## Evaluate TensorRT model's performance When we get the TensorRT model, we can evalute its performance by: + ``` python deploy/ONNX/eval_trt.py --weights yolov6s.engine --batch-size=1 --data data/coco.yaml ``` diff --git a/deploy/ONNX/eval_trt.py b/deploy/ONNX/eval_trt.py index fe1c297c..74951194 100644 --- a/deploy/ONNX/eval_trt.py +++ b/deploy/ONNX/eval_trt.py @@ -38,14 +38,14 @@ def run(data, task='val', device='', save_dir='', - name = '' + name='' ): """ TensorRT models's evaluation process. """ - # task - assert task== 'val', f'task type can only be val, however you set it to {task}' + # task + assert task == 'val', f'task type can only be val, however you set it to {task}' save_dir = str(increment_name(osp.join(save_dir, name))) os.makedirs(save_dir, exist_ok=True) @@ -57,9 +57,9 @@ def run(data, # init val = Evaler(data, batch_size, img_size, None, \ - None, device, False, save_dir) + None, device, False, save_dir) - dataloader,pred_result = val.eval_trt(weights) + dataloader, pred_result = val.eval_trt(weights) eval_result = val.eval_model(pred_result, dummy_model, dataloader, task) return eval_result diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py index 11cecf24..6955c76a 100644 --- a/deploy/ONNX/export_onnx.py +++ b/deploy/ONNX/export_onnx.py @@ -19,7 +19,6 @@ from yolov6.utils.checkpoint import load_checkpoint from io import BytesIO - if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path') @@ -31,7 +30,8 @@ parser.add_argument('--end2end', action='store_true', help='export end2end onnx') parser.add_argument('--trt-version', type=int, default=8, help='tensorrt version') parser.add_argument('--with-preprocess', action='store_true', help='export bgr2rgb and normalize') - parser.add_argument('--max-wh', type=int, default=None, help='None for tensorrt nms, int value for onnx-runtime nms') + parser.add_argument('--max-wh', type=int, default=None, + help='None for tensorrt nms, int value for onnx-runtime nms') parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images') parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS') parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS') @@ -66,8 +66,10 @@ m.inplace = args.inplace if args.end2end: from yolov6.models.end2end import End2End - model = End2End(model, max_obj=args.topk_all, iou_thres=args.iou_thres,score_thres=args.conf_thres, - max_wh=args.max_wh, device=device, trt_version=args.trt_version, with_preprocess=args.with_preprocess) + + model = End2End(model, max_obj=args.topk_all, iou_thres=args.iou_thres, score_thres=args.conf_thres, + max_wh=args.max_wh, device=device, trt_version=args.trt_version, + with_preprocess=args.with_preprocess) y = model(img) # dry run @@ -81,7 +83,7 @@ do_constant_folding=True, input_names=['images'], output_names=['num_dets', 'det_boxes', 'det_scores', 'det_classes'] - if args.end2end and args.max_wh is None else ['outputs'],) + if args.end2end and args.max_wh is None else ['outputs'], ) f.seek(0) # Checks onnx_model = onnx.load(f) # load onnx model @@ -96,6 +98,7 @@ if args.simplify: try: import onnxsim + LOGGER.info('\nStarting to simplify ONNX...') onnx_model, check = onnxsim.simplify(onnx_model) assert check, 'assert check failed' @@ -111,4 +114,4 @@ if args.end2end: if args.max_wh is None: LOGGER.info('\nYou can export tensorrt engine use trtexec tools.\nCommand is:') - LOGGER.info(f'trtexec --onnx={export_file} --saveEngine={export_file.replace(".onnx",".engine")}') + LOGGER.info(f'trtexec --onnx={export_file} --saveEngine={export_file.replace(".onnx", ".engine")}') diff --git a/deploy/OpenVINO/README.md b/deploy/OpenVINO/README.md index 365ced75..8a2d7e34 100644 --- a/deploy/OpenVINO/README.md +++ b/deploy/OpenVINO/README.md @@ -1,23 +1,27 @@ ## Export OpenVINO Model ### Check requirements + ```shell pip install --upgrade pip pip install openvino-dev ``` ### Export script + ```shell python deploy/OpenVINO/export_openvino.py --weights yolov6s.pt --img 640 --batch 1 ``` ### Download + * [YOLOv6-nano](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6n_openvino.tar.gz) * [YOLOv6-tiny](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6n_openvino.tar.gz) * [YOLOv6-s](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6n_openvino.tar.gz) ### Speed test + ```shell benchmark_app -m yolov6s_openvino/yolov6s.xml -i data/images/image1.jpg -d CPU -niter 100 -progress diff --git a/deploy/OpenVINO/export_openvino.py b/deploy/OpenVINO/export_openvino.py index 7b59ae0f..7191354d 100644 --- a/deploy/OpenVINO/export_openvino.py +++ b/deploy/OpenVINO/export_openvino.py @@ -19,7 +19,6 @@ from yolov6.utils.events import LOGGER from yolov6.utils.checkpoint import load_checkpoint - if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path') @@ -68,7 +67,7 @@ do_constant_folding=True, input_names=['image_arrays'], output_names=['outputs'], - ) + ) # Checks onnx_model = onnx.load(export_file) # load onnx model diff --git a/docs/About_naming_yolov6.md b/docs/About_naming_yolov6.md index 84b3196b..e158faea 100644 --- a/docs/About_naming_yolov6.md +++ b/docs/About_naming_yolov6.md @@ -1,7 +1,12 @@ # About the naming of YOLOv6 ### WHY named YOLOv6 ? -The full name is actually MT-YOLOv6, which is called YOLOv6 for brevity. Our work is majorly inspired by the original idea of the one-stage YOLO detection algorithm and the implementation has leveraged various techniques and tricks of former relevant work . Therefore, we named the project YOLOv6 to pay tribute to the work of YOLO series. Furthermore, we have indeed adopted some novel method and made solid engineering improvements to dedicate the algorithm to industrial applications. + +The full name is actually MT-YOLOv6, which is called YOLOv6 for brevity. Our work is majorly inspired by the original +idea of the one-stage YOLO detection algorithm and the implementation has leveraged various techniques and tricks of +former relevant work . Therefore, we named the project YOLOv6 to pay tribute to the work of YOLO series. Furthermore, we +have indeed adopted some novel method and made solid engineering improvements to dedicate the algorithm to industrial +applications. As for the project, we'll continue to improve and maintain it, contributing more values for industrial applications. P.S. We are contacting the authors of YOLO series about the naming of YOLOv6. diff --git a/docs/Test_speed.md b/docs/Test_speed.md index 8b1adff5..0c7b9b36 100644 --- a/docs/Test_speed.md +++ b/docs/Test_speed.md @@ -1,6 +1,7 @@ # Test speed -This guidence explains how to reproduce speed results of YOLOv6. For fair comparison, the speed results do not contain the time cost of data pre-processing and NMS post-processing. +This guidence explains how to reproduce speed results of YOLOv6. For fair comparison, the speed results do not contain +the time cost of data pre-processing and NMS post-processing. ## 0. Prepare model diff --git a/docs/Train_custom_data.md b/docs/Train_custom_data.md index 35c1ee44..cd312336 100644 --- a/docs/Train_custom_data.md +++ b/docs/Train_custom_data.md @@ -5,13 +5,15 @@ This guidence explains how to train your own custom data with YOLOv6 (take fine- ## 0. Before you start Clone this repo and follow README.md to install requirements in a Python3.8 environment. + ```shell $ git clone https://github.com/meituan/YOLOv6.git ``` ## 1. Prepare your own dataset -**Step 1**: Prepare your own dataset with images. For labeling images, you can use tools like [Labelme](https://github.com/wkentaro/labelme). +**Step 1**: Prepare your own dataset with images. For labeling images, you can use tools +like [Labelme](https://github.com/wkentaro/labelme). **Step 2**: Generate label files in YOLO format. @@ -23,10 +25,10 @@ One image corresponds to one label file, and the label format example is present 1 0.575 0.319531 0.4 0.551562 ``` - - Each row represents one object. - Class id starts from `0`. -- Boundingbox coordinates must be in normalized `xywh` format (from 0 - 1). If your boxes are in pixels, divide `center_x` and `bbox_width` by image width, and `center_y` and `bbox_height` by image height. +- Boundingbox coordinates must be in normalized `xywh` format (from 0 - 1). If your boxes are in pixels, + divide `center_x` and `bbox_width` by image width, and `center_y` and `bbox_height` by image height. **Step 3**: Organize directories. @@ -75,7 +77,8 @@ names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', ' ## 2. Create a config file -We use a config file to specify the network structure and training setting, including optimizer and data augmentation hyperparameters. +We use a config file to specify the network structure and training setting, including optimizer and data augmentation +hyperparameters. If you create a new config file, please put it under the `configs` directory. Or just use the provided config file in `$YOLOV6_HOME/configs/*_finetune.py`. @@ -103,8 +106,6 @@ data_aug = dict( ) ``` - - ## 3. Train Single GPU @@ -119,24 +120,18 @@ Multi GPUs (DDP mode recommended) python -m torch.distributed.launch --nproc_per_node 4 tools/train.py --batch 256 --conf configs/yolov6s_finetune.py --data data/data.yaml --device 0,1,2,3 ``` - - ## 4. Evaluation ```shell python tools/eval.py --data data/data.yaml --weights output_dir/name/weights/best_ckpt.pt --device 0 ``` - - ## 5. Inference ```shell python tools/infer.py --weights output_dir/name/weights/best_ckpt.pt --source img.jpg --device 0 ``` - - ## 6. Deployment Export as [ONNX](https://github.com/meituan/YOLOv6/tree/main/deploy/ONNX) Format diff --git a/docs/tutorial_repopt.md b/docs/tutorial_repopt.md index 607fb42c..57716365 100644 --- a/docs/tutorial_repopt.md +++ b/docs/tutorial_repopt.md @@ -1,28 +1,50 @@ # RepOpt version implementation of YOLOv6 + ## Introduction -This is a RepOpt-version implementation of YOLOv6 according to RepOptimizer: https://arxiv.org/pdf/2205.15242.pdf @DingXiaoH \ + +This is a RepOpt-version implementation of YOLOv6 according to RepOptimizer: https://arxiv.org/pdf/2205.15242.pdf +@DingXiaoH \ It shows some advantages: -1. With only minor changes. it is compatible with the original repvgg version, and it is easy to reproduce the precision comparable with original version. + +1. With only minor changes. it is compatible with the original repvgg version, and it is easy to reproduce the precision + comparable with original version. 2. No more train/deploy transform. The target network is consistent when training and deploying. 3. A slight training acceleration of about 8%. -4. Last and the most important, It is quantization friendly. Compared to the original version, the mAP decrease of PTQ can be greatly improved. Furthermore, the architecture of RepOptimizer is friendly to wrap quant-models for QAT. +4. Last and the most important, It is quantization friendly. Compared to the original version, the mAP decrease of PTQ + can be greatly improved. Furthermore, the architecture of RepOptimizer is friendly to wrap quant-models for QAT. ## Training + The training of V6-RepOpt can be divided into two stages, hyperparameter search and target network training. -1. hyperparameter search. This stage is used to get a suitable 'scale' for RepOptimizer, and the result checkpoint can be passed to stage2. Remember to add `training_mode='hyper_search'` in your config. + +1. hyperparameter search. This stage is used to get a suitable 'scale' for RepOptimizer, and the result checkpoint can + be passed to stage2. Remember to add `training_mode='hyper_search'` in your config. + ``` python tools/train.py --batch 32 --conf configs/repopt/yolov6s_hs.py --data data/coco.yaml --device 0 ``` - Or you can directly use the [pretrained scale](https://github.com/xingyueye/YOLOv6/releases/download/0.1.0/yolov6s_scale.pt) we provided and omit this stage. -2. Training. Add the flag of `training_mode='repopt'` and pretraind model `scales='./assets/yolov6s_scale.pt',` in your config +Or you can directly use +the [pretrained scale](https://github.com/xingyueye/YOLOv6/releases/download/0.1.0/yolov6s_scale.pt) we provided and +omit this stage. + +2. Training. Add the flag of `training_mode='repopt'` and pretraind model `scales='./assets/yolov6s_scale.pt',` in your + config + ``` python tools/train.py --batch 32 --conf configs/repopt/yolov6s_opt.py --data data/coco.yaml --device 0 ``` + ## Evaluation -Reproduce mAP on COCO val2017 dataset, you can directly test our [pretrained model](https://github.com/xingyueye/YOLOv6/releases/download/0.1.0/yolov6s_opt.pt). + +Reproduce mAP on COCO val2017 dataset, you can directly test +our [pretrained model](https://github.com/xingyueye/YOLOv6/releases/download/0.1.0/yolov6s_opt.pt). + ``` python tools/eval.py --data data/coco.yaml --batch 32 --weights yolov6s_opt.pt --task val ``` + ## Benchmark -We train a yolov6s-repopt with 300epochs, the fp32 mAP is 42.4, while the mAP of PTQ is 40.5. More results is coming soon... + +We train a yolov6s-repopt with 300epochs, the fp32 mAP is 42.4, while the mAP of PTQ is 40.5. More results is coming +soon... diff --git a/docs/tutorial_voc.ipynb b/docs/tutorial_voc.ipynb index 8dda21b4..9596bca3 100644 --- a/docs/tutorial_voc.ipynb +++ b/docs/tutorial_voc.ipynb @@ -2,21 +2,33 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Training YOLOv6 on VOC dataset" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Step 1: Prepare VOC dataset" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "| dataset | url | size | images |\n", "| :----: | :----: |:----: | :----: |\n", @@ -27,7 +39,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Download VOC dataset and unzip them, the directory shows like:\n", "```\n", @@ -49,14 +65,22 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Step 2: Convert VOC dataset to YOLO-format." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The VOC dataset use xml format annotations as below. (refer to [VOC2007 guidelines](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/guidelines.html))\n", "```\n", @@ -97,7 +121,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Run the following command to convert voc dataset to yolo format:\n", "\n", @@ -106,7 +134,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "We follow the `07+12` training setting, which means using VOC2007 and VOC2012's train+val(16551) as training set, VOC2007's test(4952) as validation set and testing set.\n", "\n", @@ -130,14 +162,22 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "#### Visualize yolo format dataset (Optional)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "To check if your dataset is correct, run the following command:\n", "\n", @@ -146,14 +186,22 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Step 3: Create dataset config file." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Create `data/voc.yaml` like:\n", "\n", @@ -174,14 +222,22 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Step 4: Training.\n" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Use the following command to start training:\n", "- Multi GPUs (DDP mode recommended)\n", @@ -195,7 +251,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "#### Tensorboard\n", "We can use tensorboard to visualize the train_batch/validation predictions and loss/mAP curve, run:\n", @@ -209,7 +269,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "#### Evaluation\n", "When training finished, it automatically do evaulation on the testset, the output metrics are:\n", @@ -238,7 +302,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### 5.Inference\n", "\n", @@ -276,4 +344,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/tools/eval.py b/tools/eval.py index 2f193043..0b4ce3f9 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -46,7 +46,7 @@ def run(data, model=None, dataloader=None, save_dir='', - name = '' + name='' ): """ Run the evaluation process @@ -57,7 +57,7 @@ def run(data, """ - # task + # task Evaler.check_task(task) if task == 'train': save_dir = save_dir @@ -73,7 +73,7 @@ def run(data, # init val = Evaler(data, batch_size, img_size, conf_thres, \ - iou_thres, device, half, save_dir) + iou_thres, device, half, save_dir) model = val.init_model(model, weights, task) dataloader = val.init_data(dataloader, task) diff --git a/tools/infer.py b/tools/infer.py index 8b6c46da..91d2730e 100644 --- a/tools/infer.py +++ b/tools/infer.py @@ -18,7 +18,8 @@ def get_args_parser(add_help=True): parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Inference.', add_help=add_help) parser.add_argument('--weights', type=str, default='weights/yolov6s.pt', help='model path(s) for inference.') - parser.add_argument('--source', type=str, default='data/images', help='the source path, e.g. image-file/dir.') + parser.add_argument('--source', type=str, default='data/images', + help='the source path, e.g. image-file/dir. or video-file or camera for webcam.') parser.add_argument('--yaml', type=str, default='data/coco.yaml', help='data yaml file.') parser.add_argument('--img-size', type=int, default=640, help='the image-size(h,w) in inference size.') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold for inference.') @@ -27,8 +28,9 @@ def get_args_parser(add_help=True): parser.add_argument('--device', default='0', help='device to run our model i.e. 0 or 0,1,2,3 or cpu.') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt.') parser.add_argument('--save-img', action='store_false', help='save visuallized inference results.') - parser.add_argument('--view-img', action='store_true', help='show inference results') - parser.add_argument('--classes', nargs='+', type=int, help='filter by classes, e.g. --classes 0, or --classes 0 2 3.') + parser.add_argument('--view-img', action='store_false', help='show inference results') + parser.add_argument('--classes', nargs='+', type=int, + help='filter by classes, e.g. --classes 0, or --classes 0 2 3.') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS.') parser.add_argument('--project', default='runs/inference', help='save inference results to project/name.') parser.add_argument('--name', default='exp', help='save inference results to project/name.') @@ -40,6 +42,7 @@ def get_args_parser(add_help=True): LOGGER.info(args) return args + @torch.no_grad() def run(weights=osp.join(ROOT, 'yolov6s.pt'), source=osp.join(ROOT, 'data/images'), @@ -63,7 +66,7 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'), """ Inference process, supporting inference on one image file or directory which containing images. Args: weights: The path of model.pt, e.g. yolov6s.pt - source: Source path, supporting image files or dirs containing images. + source: Source path, supporting image files or dirs containing images or video files or camera for webcam. yaml: Data yaml file, . img_size: Inference image-size, e.g. 640 conf_thres: Confidence threshold in inference, e.g. 0.25 @@ -94,7 +97,8 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'), # Inference inferer = Inferer(source, weights, device, yaml, img_size, half) - inferer.infer(conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf, view_img) + inferer.infer(conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, + hide_conf, view_img) if save_txt or save_img: LOGGER.info(f"Results saved to {save_dir}") diff --git a/tools/partial_quantization/README.md b/tools/partial_quantization/README.md index 3a15a39d..85dea3f6 100644 --- a/tools/partial_quantization/README.md +++ b/tools/partial_quantization/README.md @@ -1,16 +1,24 @@ # Partial Quantization -The performance of YOLOv6s heavily degrades from 42.4% to 35.6% after traditional PTQ, which is unacceptable. To resolve this issue, we propose **partial quantization**. First we analyze the quantization sensitivity of all layers, and then we let the most sensitive layers to have full precision as a compromise. -With partial quantization, we finally reach 42.1%, only 0.3% loss in accuracy, while the throughput of the partially quantized model is about 1.56 times that of the FP16 model at a batch size of 32. This method achieves a nice tradeoff between accuracy and throughput. +The performance of YOLOv6s heavily degrades from 42.4% to 35.6% after traditional PTQ, which is unacceptable. To resolve +this issue, we propose **partial quantization**. First we analyze the quantization sensitivity of all layers, and then +we let the most sensitive layers to have full precision as a compromise. + +With partial quantization, we finally reach 42.1%, only 0.3% loss in accuracy, while the throughput of the partially +quantized model is about 1.56 times that of the FP16 model at a batch size of 32. This method achieves a nice tradeoff +between accuracy and throughput. ## Prerequirements + ```python pip install --extra-index-url=https://pypi.ngc.nvidia.com --trusted-host pypi.ngc.nvidia.com nvidia-pyindex pip install --extra-index-url=https://pypi.ngc.nvidia.com --trusted-host pypi.ngc.nvidia.com pytorch_quantization ``` + ## Sensitivity analysis -Please use the following command to perform sensitivity analysis. Since we randomly sample 128 images from train dataset each time, the sensitivity files will be slightly different. +Please use the following command to perform sensitivity analysis. Since we randomly sample 128 images from train dataset +each time, the sensitivity files will be slightly different. ```python python3 sensitivity_analyse.py --weights yolov6s_reopt.pt \ @@ -40,7 +48,10 @@ trtexec --workspace=1024 --percentile=99 --streams=1 --int8 --fp16 --avgRuns=10 ``` ## Performance + | Model | Size | Precision |mAPval
0.5:0.95 | SpeedT4
trt b1
(fps) | SpeedT4
trt b32
(fps) | | :-------------- | ----------- | ----------- |:----------------------- | ---------------------------------------- | -----------------------------------| -| [**YOLOv6-s-partial**]
[bs1](https://github.com/lippman1125/YOLOv6/releases/download/0.1.0/yolov6s_reopt_partial_bs1.sim.onnx)
[bs32](https://github.com/lippman1125/YOLOv6/releases/download/0.1.0/yolov6s_reopt_partial_bs32.sim.onnx)
| 640 | INT8 |42.1 | 503 | 811 | -| [**YOLOv6-s**] | 640 | FP16 |42.4 | 373 | 520 | +| [** +YOLOv6-s-partial**]
[bs1](https://github.com/lippman1125/YOLOv6/releases/download/0.1.0/yolov6s_reopt_partial_bs1.sim.onnx)
[bs32](https://github.com/lippman1125/YOLOv6/releases/download/0.1.0/yolov6s_reopt_partial_bs32.sim.onnx)
| 640 | INT8 |42.1 | 503 | 811 | +| [** +YOLOv6-s**] | 640 | FP16 |42.4 | 373 | 520 | diff --git a/tools/partial_quantization/eval.py b/tools/partial_quantization/eval.py index 1266b75c..27d5515a 100644 --- a/tools/partial_quantization/eval.py +++ b/tools/partial_quantization/eval.py @@ -2,6 +2,7 @@ import torch from yolov6.core.evaler import Evaler + class EvalerWrapper(object): def __init__(self, eval_cfg): task = eval_cfg['task'] diff --git a/tools/partial_quantization/partial_quant.py b/tools/partial_quantization/partial_quant.py index 16b7689e..bf63d0d1 100644 --- a/tools/partial_quantization/partial_quant.py +++ b/tools/partial_quantization/partial_quant.py @@ -79,7 +79,7 @@ quant_sensitivity = quant_sensitivity_load(args.sensitivity_file) quant_sensitivity.sort(key=lambda tup: tup[2], reverse=True) boundary = args.quant_boundary - quantable_ops = [qops[0] for qops in quant_sensitivity[:boundary+1]] + quantable_ops = [qops[0] for qops in quant_sensitivity[:boundary + 1]] # only quantize ops in quantable_ops list partial_quant(model_ptq, quantable_ops=quantable_ops) # concat amax fusion @@ -105,7 +105,7 @@ input_names=['image_arrays'], output_names=['outputs'], dynamic_axes=dynamic_axes - ) + ) else: img = torch.zeros(args.export_batch_size, 3, *args.img_size).to(device) export_file = args.weights.replace('.pt', '_partial_bs{}.onnx'.format(args.export_batch_size)) # filename diff --git a/tools/partial_quantization/ptq.py b/tools/partial_quantization/ptq.py index d897e982..614bc9e6 100644 --- a/tools/partial_quantization/ptq.py +++ b/tools/partial_quantization/ptq.py @@ -9,6 +9,7 @@ from tools.partial_quantization.utils import set_module, module_quant_disable + def collect_stats(model, data_loader, batch_number, device='cuda'): """Feed data to the network and collect statistic""" @@ -23,7 +24,7 @@ def collect_stats(model, data_loader, batch_number, device='cuda'): for i, data_tuple in enumerate(data_loader): image = data_tuple[0] - image = image.float()/255.0 + image = image.float() / 255.0 model(image.to(device)) if i + 1 >= batch_number: break @@ -37,6 +38,7 @@ def collect_stats(model, data_loader, batch_number, device='cuda'): else: module.enable() + def compute_amax(model, **kwargs): # Load calib result for name, module in model.named_modules(): @@ -48,6 +50,7 @@ def compute_amax(model, **kwargs): module.load_calib_amax(**kwargs) print(F"{name:40}: {module}") + def quantable_op_check(k, quantable_ops): if quantable_ops is None: return True @@ -57,8 +60,8 @@ def quantable_op_check(k, quantable_ops): else: return False -def quant_model_init(model, device): +def quant_model_init(model, device): model_ptq = copy.deepcopy(model) model_ptq.eval() model_ptq.to(device) @@ -87,8 +90,8 @@ def quant_model_init(model, device): kernel_size, stride, padding, - quant_desc_input = conv2d_input_default_desc, - quant_desc_weight = conv2d_weight_default_desc) + quant_desc_input=conv2d_input_default_desc, + quant_desc_weight=conv2d_weight_default_desc) quant_conv.weight.data.copy_(m.weight.detach()) if m.bias is not None: quant_conv.bias.data.copy_(m.bias.detach()) @@ -107,12 +110,12 @@ def quant_model_init(model, device): stride = m.stride padding = m.padding quant_convtrans = quant_nn.QuantConvTranspose2d(in_channels, - out_channels, - kernel_size, - stride, - padding, - quant_desc_input = convtrans2d_input_default_desc, - quant_desc_weight = convtrans2d_weight_default_desc) + out_channels, + kernel_size, + stride, + padding, + quant_desc_input=convtrans2d_input_default_desc, + quant_desc_weight=convtrans2d_weight_default_desc) quant_convtrans.weight.data.copy_(m.weight.detach()) if m.bias is not None: quant_convtrans.bias.data.copy_(m.bias.detach()) @@ -135,7 +138,7 @@ def quant_model_init(model, device): padding, dilation, ceil_mode, - quant_desc_input = conv2d_input_default_desc) + quant_desc_input=conv2d_input_default_desc) set_module(model_ptq, k, quant_maxpool2d) else: # module can not be quantized, continue @@ -143,6 +146,7 @@ def quant_model_init(model, device): return model_ptq.to(device) + def do_ptq(model, train_loader, batch_number, device): model_ptq = quant_model_init(model, device) # It is a bit slow since we collect histograms on CPU @@ -151,11 +155,13 @@ def do_ptq(model, train_loader, batch_number, device): compute_amax(model_ptq, method='entropy') return model_ptq + def load_ptq(model, calib_path, device): model_ptq = quant_model_init(model, device) model_ptq.load_state_dict(torch.load(calib_path)['model'].state_dict()) return model_ptq + def partial_quant(model_ptq, quantable_ops=None): # ops not in quantable_ops will reserve full-precision. for k, m in model_ptq.named_modules(): @@ -163,6 +169,6 @@ def partial_quant(model_ptq, quantable_ops=None): continue # enable full-precision if isinstance(m, quant_nn.QuantConv2d) or \ - isinstance(m, quant_nn.QuantConvTranspose2d) or \ - isinstance(m, quant_nn.QuantMaxPool2d): + isinstance(m, quant_nn.QuantConvTranspose2d) or \ + isinstance(m, quant_nn.QuantMaxPool2d): module_quant_disable(model_ptq, k) diff --git a/tools/partial_quantization/sensitivity_analyse.py b/tools/partial_quantization/sensitivity_analyse.py index 6e2cb1f1..47bfe514 100644 --- a/tools/partial_quantization/sensitivity_analyse.py +++ b/tools/partial_quantization/sensitivity_analyse.py @@ -32,8 +32,8 @@ def quant_sensitivity_analyse(model_ptq, evaler): quant_sensitivity = list() for k, m in model_ptq.named_modules(): if isinstance(m, quant_nn.QuantConv2d) or \ - isinstance(m, quant_nn.QuantConvTranspose2d) or \ - isinstance(m, quant_nn.MaxPool2d): + isinstance(m, quant_nn.QuantConvTranspose2d) or \ + isinstance(m, quant_nn.MaxPool2d): module_quant_enable(model_ptq, k) else: # module can not be quantized, continue @@ -42,14 +42,15 @@ def quant_sensitivity_analyse(model_ptq, evaler): eval_result = evaler.eval(model_ptq) print(eval_result) print("Quantize Layer {}, result mAP0.5 = {:0.4f}, mAP0.5:0.95 = {:0.4f}".format(k, - eval_result[0], - eval_result[1])) + eval_result[0], + eval_result[1])) quant_sensitivity.append((k, eval_result[0], eval_result[1])) # disable this module sensitivity, anlayse next module module_quant_disable(model_ptq, k) return quant_sensitivity + def get_yolov6_config(key): # hard code config_dict = {'yolov6s_reopt.pt': '../../configs/repopt/yolov6s_opt.py'} @@ -110,7 +111,7 @@ def get_yolov6_config(key): # Step1: do post training quantization if args.calib_weights is None: - model_ptq= do_ptq(model, train_loader, args.batch_number, device) + model_ptq = do_ptq(model, train_loader, args.batch_number, device) torch.save({'model': model_ptq}, args.weights.replace('.pt', '_calib.pt')) else: model_ptq = load_ptq(model, args.calib_weights, device) @@ -120,7 +121,7 @@ def get_yolov6_config(key): if args.sensitivity_file is None: quant_sensitivity = quant_sensitivity_analyse(model_ptq, yolov6_evaler) qfile = "{}_quant_sensitivity_{}_calib.txt".format(os.path.basename(args.weights).split('.')[0], - args.batch_size * args.batch_number) + args.batch_size * args.batch_number) quant_sensitivity.sort(key=lambda tup: tup[2], reverse=True) quant_sensitivity_save(quant_sensitivity, qfile) else: diff --git a/tools/partial_quantization/utils.py b/tools/partial_quantization/utils.py index a2ebe1af..2e56d372 100644 --- a/tools/partial_quantization/utils.py +++ b/tools/partial_quantization/utils.py @@ -1,6 +1,7 @@ import os from pytorch_quantization import nn as quant_nn + def set_module(model, submodule_key, module): tokens = submodule_key.split('.') sub_tokens = tokens[:-1] @@ -9,6 +10,7 @@ def set_module(model, submodule_key, module): cur_mod = getattr(cur_mod, s) setattr(cur_mod, tokens[-1], module) + def get_module(model, submodule_key): sub_tokens = submodule_key.split('.') cur_mod = model @@ -16,6 +18,7 @@ def get_module(model, submodule_key): cur_mod = getattr(cur_mod, s) return cur_mod + def module_quant_disable(model, k): cur_module = get_module(model, k) if hasattr(cur_module, '_input_quantizer'): @@ -23,6 +26,7 @@ def module_quant_disable(model, k): if hasattr(cur_module, '_weight_quantizer'): cur_module._weight_quantizer.disable() + def module_quant_enable(model, k): cur_module = get_module(model, k) if hasattr(cur_module, '_input_quantizer'): @@ -30,16 +34,19 @@ def module_quant_enable(model, k): if hasattr(cur_module, '_weight_quantizer'): cur_module._weight_quantizer.enable() + def model_quant_disable(model): for name, module in model.named_modules(): if isinstance(module, quant_nn.TensorQuantizer): module.disable() + def model_quant_enable(model): for name, module in model.named_modules(): if isinstance(module, quant_nn.TensorQuantizer): module.enable() + def concat_quant_amax_fuse(ops_list): if len(ops_list) <= 1: return @@ -64,6 +71,7 @@ def concat_quant_amax_fuse(ops_list): elif hasattr(op, '_input_quantizer'): op._input_quantizer._amax.fill_(amax) + def quant_sensitivity_load(file): assert os.path.exists(file), print("File {} does not exist".format(file)) quant_sensitivity = list() @@ -75,6 +83,7 @@ def quant_sensitivity_load(file): return quant_sensitivity + def quant_sensitivity_save(quant_sensitivity, file): with open(file, 'w') as qfile: for item in quant_sensitivity: diff --git a/tools/quantization/tensorrt/post_training/Calibrator.py b/tools/quantization/tensorrt/post_training/Calibrator.py index 8e9a3692..6e8958b2 100755 --- a/tools/quantization/tensorrt/post_training/Calibrator.py +++ b/tools/quantization/tensorrt/post_training/Calibrator.py @@ -35,6 +35,7 @@ datefmt="%Y-%m-%d %H:%M:%S") logger = logging.getLogger(__name__) + def preprocess_yolov6(image, channels=3, height=224, width=224): """Pre-processing for YOLOv6-based Object Detection Models @@ -76,6 +77,7 @@ def preprocess_yolov6(image, channels=3, height=224, width=224): return img_data + def get_int8_calibrator(calib_cache, calib_data, max_calib_size, calib_batch_size): # Use calibration cache if it exists if os.path.exists(calib_cache): @@ -84,7 +86,8 @@ def get_int8_calibrator(calib_cache, calib_data, max_calib_size, calib_batch_siz # Use calibration files from validation dataset if no cache exists else: if not calib_data: - raise ValueError("ERROR: Int8 mode requested, but no calibration data provided. Please provide --calibration-data /path/to/calibration/files") + raise ValueError( + "ERROR: Int8 mode requested, but no calibration data provided. Please provide --calibration-data /path/to/calibration/files") calib_files = get_calibration_files(calib_data, max_calib_size) @@ -92,8 +95,8 @@ def get_int8_calibrator(calib_cache, calib_data, max_calib_size, calib_batch_siz preprocess_func = preprocess_yolov6 int8_calibrator = ImageCalibrator(calibration_files=calib_files, - batch_size=calib_batch_size, - cache_file=calib_cache) + batch_size=calib_batch_size, + cache_file=calib_cache) return int8_calibrator @@ -124,7 +127,8 @@ def get_calibration_files(calibration_data, max_calibration_size=None, allowed_e if max_calibration_size: if len(calibration_files) > max_calibration_size: - logger.warning("Capping number of calibration images to max_calibration_size: {:}".format(max_calibration_size)) + logger.warning( + "Capping number of calibration images to max_calibration_size: {:}".format(max_calibration_size)) random.seed(42) # Set seed for reproducibility calibration_files = random.sample(calibration_files, max_calibration_size) @@ -179,7 +183,7 @@ def load_batches(self): else: image = Image.open(self.files[index + offset]) self.batch[offset] = self.preprocess_func(image, *self.input_shape) - logger.info("Calibration images pre-processed: {:}/{:}".format(index+self.batch_size, len(self.files))) + logger.info("Calibration images pre-processed: {:}/{:}".format(index + self.batch_size, len(self.files))) yield self.batch def get_batch_size(self): diff --git a/tools/quantization/tensorrt/post_training/README.md b/tools/quantization/tensorrt/post_training/README.md index a1fa66e4..7c3b7d7e 100644 --- a/tools/quantization/tensorrt/post_training/README.md +++ b/tools/quantization/tensorrt/post_training/README.md @@ -1,7 +1,9 @@ # ONNX -> TensorRT INT8 + These scripts were last tested using the [NGC TensorRT Container Version 20.06-py3](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt). -You can see the corresponding framework versions for this container [here](https://docs.nvidia.com/deeplearning/sdk/tensorrt-container-release-notes/rel_20.06.html#rel_20.06). +You can see the corresponding framework versions for this +container [here](https://docs.nvidia.com/deeplearning/sdk/tensorrt-container-release-notes/rel_20.06.html#rel_20.06). ## Quickstart @@ -9,7 +11,8 @@ You can see the corresponding framework versions for this container [here](https > INT8 Calibration on **dynamic-shape** models is now supported, however this example has not been updated to reflect that yet. For more details on INT8 Calibration for **dynamic-shape** models, please -see the [documentation](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#int8-calib-dynamic-shapes). +see +the [documentation](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#int8-calib-dynamic-shapes). ### 1. Convert ONNX model to TensorRT INT8 @@ -31,7 +34,9 @@ or want to create a new one. ## INT8 Calibration See [ImagenetCalibrator.py](ImagenetCalibrator.py) for a reference implementation -of TensorRT's [IInt8EntropyCalibrator2](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/infer/Int8/EntropyCalibrator2.html). +of +TensorRT's [IInt8EntropyCalibrator2](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/infer/Int8/EntropyCalibrator2.html) +. This class can be tweaked to work for other kinds of models, inputs, etc. diff --git a/tools/quantization/tensorrt/post_training/onnx_to_tensorrt.py b/tools/quantization/tensorrt/post_training/onnx_to_tensorrt.py index 4ba1779f..1a7382b3 100755 --- a/tools/quantization/tensorrt/post_training/onnx_to_tensorrt.py +++ b/tools/quantization/tensorrt/post_training/onnx_to_tensorrt.py @@ -27,7 +27,8 @@ import argparse import tensorrt as trt -#sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages') + +# sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages') TRT_LOGGER = trt.Logger() logging.basicConfig(level=logging.DEBUG, @@ -48,7 +49,7 @@ def add_profiles(config, inputs, opt_profiles): def mark_outputs(network): # Mark last layer's outputs if not already marked # NOTE: This may not be correct in all cases - last_layer = network.get_layer(network.num_layers-1) + last_layer = network.get_layer(network.num_layers - 1) if not last_layer.num_outputs: logger.error("Last layer contains no outputs.") return @@ -76,8 +77,8 @@ def check_network(network): def get_batch_sizes(max_batch_size): # Returns powers of 2, up to and including max_batch_size max_exponent = math.log2(max_batch_size) - for i in range(int(max_exponent)+1): - batch_size = 2**i + for i in range(int(max_exponent) + 1): + batch_size = 2 ** i yield batch_size if max_batch_size != batch_size: @@ -85,7 +86,7 @@ def get_batch_sizes(max_batch_size): # TODO: This only covers dynamic shape for batch size, not dynamic shape for other dimensions -def create_optimization_profiles(builder, inputs, batch_sizes=[1,8,16,32,64]): +def create_optimization_profiles(builder, inputs, batch_sizes=[1, 8, 16, 32, 64]): # Check if all inputs are fixed explicit batch to create a single profile and avoid duplicates if all([inp.shape[0] > -1 for inp in inputs]): profile = builder.create_optimization_profile() @@ -110,14 +111,19 @@ def create_optimization_profiles(builder, inputs, batch_sizes=[1,8,16,32,64]): return list(profiles.values()) + def main(): parser = argparse.ArgumentParser(description="Creates a TensorRT engine from the provided ONNX file.\n") parser.add_argument("--onnx", required=True, help="The ONNX model file to convert to TensorRT") - parser.add_argument("-o", "--output", type=str, default="model.engine", help="The path at which to write the engine") + parser.add_argument("-o", "--output", type=str, default="model.engine", + help="The path at which to write the engine") parser.add_argument("-b", "--max-batch-size", type=int, help="The max batch size for the TensorRT engine input") - parser.add_argument("-v", "--verbosity", action="count", help="Verbosity for logging. (None) for ERROR, (-v) for INFO/WARNING/ERROR, (-vv) for VERBOSE.") - parser.add_argument("--explicit-batch", action='store_true', help="Set trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH.") - parser.add_argument("--explicit-precision", action='store_true', help="Set trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION.") + parser.add_argument("-v", "--verbosity", action="count", + help="Verbosity for logging. (None) for ERROR, (-v) for INFO/WARNING/ERROR, (-vv) for VERBOSE.") + parser.add_argument("--explicit-batch", action='store_true', + help="Set trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH.") + parser.add_argument("--explicit-precision", action='store_true', + help="Set trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION.") parser.add_argument("--gpu-fallback", action='store_true', help="Set trt.BuilderFlag.GPU_FALLBACK.") parser.add_argument("--refittable", action='store_true', help="Set trt.BuilderFlag.REFIT.") parser.add_argument("--debug", action='store_true', help="Set trt.BuilderFlag.DEBUG.") @@ -125,11 +131,18 @@ def main(): parser.add_argument("--fp16", action="store_true", help="Attempt to use FP16 kernels when possible.") parser.add_argument("--int8", action="store_true", help="Attempt to use INT8 kernels when possible. This should generally be used in addition to the --fp16 flag. \ ONLY SUPPORTS RESNET-LIKE MODELS SUCH AS RESNET50/VGG16/INCEPTION/etc.") - parser.add_argument("--calibration-cache", help="(INT8 ONLY) The path to read/write from calibration cache.", default="calibration.cache") - parser.add_argument("--calibration-data", help="(INT8 ONLY) The directory containing {*.jpg, *.jpeg, *.png} files to use for calibration. (ex: Imagenet Validation Set)", default=None) - parser.add_argument("--calibration-batch-size", help="(INT8 ONLY) The batch size to use during calibration.", type=int, default=128) - parser.add_argument("--max-calibration-size", help="(INT8 ONLY) The max number of data to calibrate on from --calibration-data.", type=int, default=2048) - parser.add_argument("-s", "--simple", action="store_true", help="Use SimpleCalibrator with random data instead of ImagenetCalibrator for INT8 calibration.") + parser.add_argument("--calibration-cache", help="(INT8 ONLY) The path to read/write from calibration cache.", + default="calibration.cache") + parser.add_argument("--calibration-data", + help="(INT8 ONLY) The directory containing {*.jpg, *.jpeg, *.png} files to use for calibration. (ex: Imagenet Validation Set)", + default=None) + parser.add_argument("--calibration-batch-size", help="(INT8 ONLY) The batch size to use during calibration.", + type=int, default=128) + parser.add_argument("--max-calibration-size", + help="(INT8 ONLY) The max number of data to calibrate on from --calibration-data.", type=int, + default=2048) + parser.add_argument("-s", "--simple", action="store_true", + help="Use SimpleCalibrator with random data instead of ImagenetCalibrator for INT8 calibration.") args, _ = parser.parse_known_args() print(args) @@ -153,21 +166,21 @@ def main(): network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION) builder_flag_map = { - 'gpu_fallback': trt.BuilderFlag.GPU_FALLBACK, - 'refittable': trt.BuilderFlag.REFIT, - 'debug': trt.BuilderFlag.DEBUG, - 'strict_types': trt.BuilderFlag.STRICT_TYPES, - 'fp16': trt.BuilderFlag.FP16, - 'int8': trt.BuilderFlag.INT8, + 'gpu_fallback': trt.BuilderFlag.GPU_FALLBACK, + 'refittable': trt.BuilderFlag.REFIT, + 'debug': trt.BuilderFlag.DEBUG, + 'strict_types': trt.BuilderFlag.STRICT_TYPES, + 'fp16': trt.BuilderFlag.FP16, + 'int8': trt.BuilderFlag.INT8, } # Building engine with trt.Builder(TRT_LOGGER) as builder, \ - builder.create_network(network_flags) as network, \ - builder.create_builder_config() as config, \ - trt.OnnxParser(network, TRT_LOGGER) as parser: + builder.create_network(network_flags) as network, \ + builder.create_builder_config() as config, \ + trt.OnnxParser(network, TRT_LOGGER) as parser: - config.max_workspace_size = 2**30 # 1GiB + config.max_workspace_size = 2 ** 30 # 1GiB # Set Builder Config Flags for flag in builder_flag_map: @@ -205,16 +218,17 @@ def main(): logger.warning("INT8 not supported on this platform.") if args.int8: - from Calibrator import ImageCalibrator, get_int8_calibrator # local module - config.int8_calibrator = get_int8_calibrator(args.calibration_cache, - args.calibration_data, - args.max_calibration_size, - args.calibration_batch_size) + from Calibrator import ImageCalibrator, get_int8_calibrator # local module + config.int8_calibrator = get_int8_calibrator(args.calibration_cache, + args.calibration_data, + args.max_calibration_size, + args.calibration_batch_size) logger.info("Building Engine...") with builder.build_engine(network, config) as engine, open(args.output, "wb") as f: logger.info("Serializing engine to file: {:}".format(args.output)) f.write(engine.serialize()) + if __name__ == "__main__": main() diff --git a/tools/quantization/tensorrt/training_aware/QAT_quantizer.py b/tools/quantization/tensorrt/training_aware/QAT_quantizer.py index 356330fa..99f70bf2 100644 --- a/tools/quantization/tensorrt/training_aware/QAT_quantizer.py +++ b/tools/quantization/tensorrt/training_aware/QAT_quantizer.py @@ -10,6 +10,7 @@ from pytorch_quantization import nn as quant_nn from pytorch_quantization import quant_modules + # Call this function before defining the model def tensorrt_official_qat(): # Quantization Aware Training is based on Straight Through Estimator (STE) derivative approximation. diff --git a/tools/train.py b/tools/train.py index e5353865..e14237c1 100644 --- a/tools/train.py +++ b/tools/train.py @@ -42,7 +42,8 @@ def get_args_parser(add_help=True): parser.add_argument('--gpu_count', type=int, default=0) parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume the most recent training') - parser.add_argument('--write_trainbatch_tb', action='store_true', help='write train_batch image to tensorboard once an epoch, may slightly slower train speed if open') + parser.add_argument('--write_trainbatch_tb', action='store_true', + help='write train_batch image to tensorboard once an epoch, may slightly slower train speed if open') return parser @@ -61,7 +62,7 @@ def check_and_init(args): with open(resume_opt_file_path) as f: args = argparse.Namespace(**yaml.safe_load(f)) # load args value from args.yaml else: - LOGGER.warning(f'We can not find the path of {Path(checkpoint_path).parent.parent / "args.yaml"},'\ + LOGGER.warning(f'We can not find the path of {Path(checkpoint_path).parent.parent / "args.yaml"},' \ f' we will save exp log to {Path(checkpoint_path).parent.parent}') LOGGER.warning(f'In this case, make sure to provide configuration, such as data, batch size.') args.save_dir = str(Path(checkpoint_path).parent.parent) @@ -77,7 +78,7 @@ def check_and_init(args): # check device device = select_device(args.device) # set random seed - set_random_seed(1+args.rank, deterministic=(args.rank == -1)) + set_random_seed(1 + args.rank, deterministic=(args.rank == -1)) # save args if master_process: save_yaml(vars(args), osp.join(args.save_dir, 'args.yaml')) @@ -93,12 +94,12 @@ def main(args): # reload envs because args was chagned in check_and_init(args) args.rank, args.local_rank, args.world_size = get_envs() LOGGER.info(f'training args are: {args}\n') - if args.local_rank != -1: # if DDP mode + if args.local_rank != -1: # if DDP mode torch.cuda.set_device(args.local_rank) device = torch.device('cuda', args.local_rank) LOGGER.info('Initializing process group... ') dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo", \ - init_method=args.dist_url, rank=args.local_rank, world_size=args.world_size) + init_method=args.dist_url, rank=args.local_rank, world_size=args.world_size) # Start trainer = Trainer(args, cfg, device) diff --git a/yolov6/core/engine.py b/yolov6/core/engine.py index 67e0d621..0dcccc27 100644 --- a/yolov6/core/engine.py +++ b/yolov6/core/engine.py @@ -58,7 +58,7 @@ def __init__(self, args, cfg, device): # tensorboard self.tblogger = SummaryWriter(self.save_dir) if self.main_process else None self.start_epoch = 0 - #resume + # resume if hasattr(self, "ckpt"): resume_state_dict = self.ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 model.load_state_dict(resume_state_dict, strict=True) # load @@ -135,28 +135,28 @@ def eval_and_save(self): eval_interval = self.args.eval_interval if remaining_epochs > self.args.heavy_eval_range else 1 is_val_epoch = (not self.args.eval_final_only or (remaining_epochs == 1)) and (self.epoch % eval_interval == 0) if self.main_process: - self.ema.update_attr(self.model, include=['nc', 'names', 'stride']) # update attributes for ema model + self.ema.update_attr(self.model, include=['nc', 'names', 'stride']) # update attributes for ema model if is_val_epoch: self.eval_model() self.ap = self.evaluate_results[0] * 0.1 + self.evaluate_results[1] * 0.9 self.best_ap = max(self.ap, self.best_ap) # save ckpt ckpt = { - 'model': deepcopy(de_parallel(self.model)).half(), - 'ema': deepcopy(self.ema.ema).half(), - 'updates': self.ema.updates, - 'optimizer': self.optimizer.state_dict(), - 'epoch': self.epoch, - } + 'model': deepcopy(de_parallel(self.model)).half(), + 'ema': deepcopy(self.ema.ema).half(), + 'updates': self.ema.updates, + 'optimizer': self.optimizer.state_dict(), + 'epoch': self.epoch, + } save_ckpt_dir = osp.join(self.save_dir, 'weights') save_checkpoint(ckpt, (is_val_epoch) and (self.ap == self.best_ap), save_ckpt_dir, model_name='last_ckpt') del ckpt # log for learning rate - lr = [x['lr'] for x in self.optimizer.param_groups] + lr = [x['lr'] for x in self.optimizer.param_groups] self.evaluate_results = list(self.evaluate_results) + lr - + # log for tensorboard write_tblog(self.tblogger, self.epoch, self.evaluate_results, self.mean_loss) @@ -219,7 +219,8 @@ def plot_train_batch(self, images, targets, max_size=1920, max_subplots=16): if labels: label = f'{cls}' cv2.rectangle(mosaic, (box[0], box[1]), (box[2], box[3]), color, thickness=1) - cv2.putText(mosaic, label, (box[0], box[1] - 5), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, thickness=1) + cv2.putText(mosaic, label, (box[0], box[1] - 5), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, + thickness=1) self.vis_train_batch = mosaic.copy() @@ -227,7 +228,7 @@ def plot_val_pred(self, vis_outputs, vis_paths, vis_conf=0.3, vis_max_box_num=5) # plot validation predictions self.vis_imgs_list = [] for (vis_output, vis_path) in zip(vis_outputs, vis_paths): - vis_output_array = vis_output.cpu().numpy() # xyxy + vis_output_array = vis_output.cpu().numpy() # xyxy ori_img = cv2.imread(vis_path) for bbox_idx, vis_bbox in enumerate(vis_output_array): @@ -241,18 +242,20 @@ def plot_val_pred(self, vis_outputs, vis_paths, vis_conf=0.3, vis_max_box_num=5) # draw top n bbox if box_score < vis_conf or bbox_idx > vis_max_box_num: break - cv2.rectangle(ori_img, (x_tl, y_tl), (x_br, y_br), tuple([int(x) for x in self.color[cls_id]]), thickness=1) - cv2.putText(ori_img, f"{self.data_dict['names'][cls_id]}: {box_score:.2f}", (x_tl, y_tl - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, tuple([int(x) for x in self.color[cls_id]]), thickness=1) + cv2.rectangle(ori_img, (x_tl, y_tl), (x_br, y_br), tuple([int(x) for x in self.color[cls_id]]), + thickness=1) + cv2.putText(ori_img, f"{self.data_dict['names'][cls_id]}: {box_score:.2f}", (x_tl, y_tl - 10), + cv2.FONT_HERSHEY_COMPLEX, 0.5, tuple([int(x) for x in self.color[cls_id]]), thickness=1) self.vis_imgs_list.append(torch.from_numpy(ori_img[:, :, ::-1].copy())) def eval_model(self): results, vis_outputs, vis_paths = eval.run(self.data_dict, - batch_size=self.batch_size // self.world_size * 2, - img_size=self.img_size, - model=self.ema.ema, - dataloader=self.val_loader, - save_dir=self.save_dir, - task='train') + batch_size=self.batch_size // self.world_size * 2, + img_size=self.img_size, + model=self.ema.ema, + dataloader=self.val_loader, + save_dir=self.save_dir, + task='train') LOGGER.info(f"Epoch: {self.epoch} | mAP@0.5: {results[0]} | mAP@0.50:0.95: {results[1]}") self.evaluate_results = results[:2] @@ -269,7 +272,7 @@ def train_before_loop(self): self.scaler = amp.GradScaler(enabled=self.device != 'cpu') self.best_ap, self.ap = 0.0, 0.0 - self.evaluate_results = (0, 0) # AP50, AP50_95 + self.evaluate_results = (0, 0) # AP50, AP50_95 self.compute_loss = ComputeLoss(iou_type=self.cfg.model.head.iou_type) def prepare_for_steps(self): @@ -284,14 +287,15 @@ def prepare_for_steps(self): LOGGER.info(('\n' + '%10s' * 5) % ('Epoch', 'iou_loss', 'l1_loss', 'obj_loss', 'cls_loss')) self.pbar = enumerate(self.train_loader) if self.main_process: - self.pbar = tqdm(self.pbar, total=self.max_stepnum, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') + self.pbar = tqdm(self.pbar, total=self.max_stepnum, ncols=NCOLS, + bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # Print loss after each steps def print_details(self): if self.main_process: self.mean_loss = (self.mean_loss * self.step + self.loss_items) / (self.step + 1) self.pbar.set_description(('%10s' + '%10.4g' * 4) % (f'{self.epoch}/{self.max_epoch - 1}', \ - *(self.mean_loss))) + *(self.mean_loss))) # Empty cache if training finished def train_after_loop(self): @@ -309,9 +313,11 @@ def update_optimizer(self): self.accumulate = max(1, np.interp(curr_step, [0, self.warmup_stepnum], [1, 64 / self.batch_size]).round()) for k, param in enumerate(self.optimizer.param_groups): warmup_bias_lr = self.cfg.solver.warmup_bias_lr if k == 2 else 0.0 - param['lr'] = np.interp(curr_step, [0, self.warmup_stepnum], [warmup_bias_lr, param['initial_lr'] * self.lf(self.epoch)]) + param['lr'] = np.interp(curr_step, [0, self.warmup_stepnum], + [warmup_bias_lr, param['initial_lr'] * self.lf(self.epoch)]) if 'momentum' in param: - param['momentum'] = np.interp(curr_step, [0, self.warmup_stepnum], [self.cfg.solver.warmup_momentum, self.cfg.solver.momentum]) + param['momentum'] = np.interp(curr_step, [0, self.warmup_stepnum], + [self.cfg.solver.warmup_momentum, self.cfg.solver.momentum]) if curr_step - self.last_opt_step >= self.accumulate: self.scaler.step(self.optimizer) self.scaler.update() @@ -370,7 +376,6 @@ def load_scale_from_pretrained_models(cfg, device): scales = extract_scales(ckpt) return scales - @staticmethod def parallel_model(args, model, device): # If DP mode diff --git a/yolov6/core/evaler.py b/yolov6/core/evaler.py index 683fefb3..b147ec60 100644 --- a/yolov6/core/evaler.py +++ b/yolov6/core/evaler.py @@ -66,7 +66,8 @@ def init_data(self, dataloader, task): if task != 'train': pad = 0.0 if task == 'speed' else 0.5 dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'], - self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, rect=True, + self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, + rect=True, data_dict=self.data, task=task)[0] return dataloader @@ -136,7 +137,7 @@ def eval_model(self, pred_results, model, dataloader, task): cocoEval = COCOeval(anno, pred, 'bbox') if self.is_coco: imgIds = [int(os.path.basename(x).split(".")[0]) - for x in dataloader.dataset.img_paths] + for x in dataloader.dataset.img_paths] cocoEval.params.imgIds = imgIds cocoEval.evaluate() cocoEval.accumulate() @@ -154,7 +155,7 @@ def eval_speed(self, task): if task != 'train': n_samples = self.speed_result[0].item() pre_time, inf_time, nms_time = 1000 * self.speed_result[1:].cpu().numpy() / n_samples - for n, v in zip(["pre-process", "inference", "NMS"],[pre_time, inf_time, nms_time]): + for n, v in zip(["pre-process", "inference", "NMS"], [pre_time, inf_time, nms_time]): LOGGER.info("Average {} time: {:.2f} ms".format(n, v)) def box_convert(self, x): @@ -255,19 +256,20 @@ def reload_dataset(data): @staticmethod def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) - # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] return x def eval_trt(self, engine, stride=32): self.stride = stride + def init_engine(engine): import tensorrt as trt - from collections import namedtuple,OrderedDict + from collections import namedtuple, OrderedDict Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) logger = trt.Logger(trt.Logger.ERROR) trt.init_libnvinfer_plugins(logger, namespace="") @@ -289,7 +291,8 @@ def init_data(dataloader, task): self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000)) pad = 0.0 if task == 'speed' else 0.5 dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'], - self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, rect=False, + self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, + rect=False, data_dict=self.data, task=task)[0] return dataloader @@ -301,7 +304,7 @@ def convert_to_coco_format_trt(nums, boxes, scores, classes, paths, shapes, ids) continue path, shape = Path(paths[i]), shapes[i][0] gain = shapes[i][1][0][0] - pad = torch.tensor(shapes[i][1][1]*2).to(self.device) + pad = torch.tensor(shapes[i][1][1] * 2).to(self.device) detbox = detbox[:n, :] detbox -= pad detbox /= gain @@ -309,7 +312,7 @@ def convert_to_coco_format_trt(nums, boxes, scores, classes, paths, shapes, ids) detbox[:, 1].clamp_(0, shape[0]) detbox[:, 2].clamp_(0, shape[1]) detbox[:, 3].clamp_(0, shape[0]) - detbox[:,2:] = detbox[:,2:] - detbox[:,:2] + detbox[:, 2:] = detbox[:, 2:] - detbox[:, :2] detscore = detscore[:n] detcls = detcls[:n] @@ -335,7 +338,7 @@ def convert_to_coco_format_trt(nums, boxes, scores, classes, paths, shapes, ids) for _ in range(10): binding_addrs['images'] = int(tmp.data_ptr()) context.execute_v2(list(binding_addrs.values())) - dataloader = init_data(None,'val') + dataloader = init_data(None, 'val') self.speed_result = torch.zeros(4, device=self.device) pred_results = [] pbar = tqdm(dataloader, desc="Inferencing model in validation dataset.", ncols=NCOLS) @@ -344,7 +347,7 @@ def convert_to_coco_format_trt(nums, boxes, scores, classes, paths, shapes, ids) if nb_img != self.batch_size: # pad to tensorrt model setted batch size zeros = torch.zeros(self.batch_size - nb_img, 3, *imgs.shape[2:]) - imgs = torch.cat([imgs, zeros],0) + imgs = torch.cat([imgs, zeros], 0) t1 = time_sync() imgs = imgs.to(self.device, non_blocking=True) # preprocess diff --git a/yolov6/core/inferer.py b/yolov6/core/inferer.py index e8f1d253..d9874e2d 100644 --- a/yolov6/core/inferer.py +++ b/yolov6/core/inferer.py @@ -1,27 +1,30 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -import os -import cv2 -import time +from yolov6.utils.torch_utils import get_model_info import math -import torch -import numpy as np +import os import os.path as osp - -from tqdm import tqdm +import warnings from pathlib import Path -from PIL import ImageFont -from collections import deque -from yolov6.utils.events import LOGGER, load_yaml -from yolov6.layers.common import DetectBackend +import cv2 +import numpy as np +import torch +from PIL import ImageFont +from tqdm import tqdm from yolov6.data.data_augment import letterbox -from yolov6.data.datasets import LoadData +from yolov6.layers.common import DetectBackend +from yolov6.utils.events import LOGGER, load_yaml from yolov6.utils.nms import non_max_suppression -from yolov6.utils.torch_utils import get_model_info + +warnings.filterwarnings("ignore") + class Inferer: def __init__(self, source, weights, device, yaml, img_size, half): + import glob + + from yolov6.data.datasets import LoadData self.__dict__.update(locals()) @@ -29,11 +32,12 @@ def __init__(self, source, weights, device, yaml, img_size, half): self.device = device self.img_size = img_size cuda = self.device != 'cpu' and torch.cuda.is_available() - self.device = torch.device(f'cuda:{device}' if cuda else 'cpu') + self.device = torch.device('cuda:0' if cuda else 'cpu') self.model = DetectBackend(weights, device=self.device) self.stride = self.model.stride self.class_names = load_yaml(yaml)['names'] - self.img_size = self.check_img_size(self.img_size, s=self.stride) # check image size + self.img_size = self.check_img_size( + self.img_size, s=self.stride) # check image size # Half precision if half & (self.device.type != 'cpu'): @@ -43,13 +47,15 @@ def __init__(self, source, weights, device, yaml, img_size, half): half = False if self.device.type != 'cpu': - self.model(torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters()))) # warmup + self.model(torch.zeros(1, 3, *self.img_size).to(self.device).type_as( + next(self.model.model.parameters()))) # warmup # Load data + print(f'Loading data from {source}') self.files = LoadData(source) # Switch model to deploy status - self.model_switch(self.model.model, self.img_size) + self.model_switch(self.model, self.img_size) def model_switch(self, model, img_size): ''' Model switch to deploy status ''' @@ -60,25 +66,32 @@ def model_switch(self, model, img_size): LOGGER.info("Switch model to deploy modality.") - def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf, view_img=True): + def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, + hide_conf, view_img): ''' Model Inference and results visualization ''' vid_path, vid_writer, windows = None, None, [] - fps_calculator = CalcFPS() for img_src, img_path, vid_cap in tqdm(self.files): - img, img_src = self.precess_image(img_src, self.img_size, self.stride, self.half) + img, img_src = self.precess_image( + img_src, self.img_size, self.stride, self.half) img = img.to(self.device) if len(img.shape) == 3: img = img[None] # expand for batch dim - t1 = time.time() pred_results = self.model(img) - det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0] - t2 = time.time() - - save_path = osp.join(save_dir, osp.basename(img_path)) # im.jpg - txt_path = osp.join(save_dir, 'labels', osp.splitext(osp.basename(img_path))[0]) - - gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]] # normalization gain whwh + det = non_max_suppression( + pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0] + if self.source != 'camera': + save_path = osp.join( + save_dir, osp.basename(img_path)) # im.jpg + txt_path = osp.join(save_dir, 'labels', + osp.basename(img_path).split('.')[0]) + elif self.source == 'camera': + save_path = osp.join( + save_dir, 'camera.mp4') # im.jpg + txt_path = osp.join(save_dir, 'labels', + 'camera.mp4'.split('.')[0]) # im.jpg + # normalization gain whwh + gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]] img_ori = img_src.copy() # check image and font @@ -86,44 +99,39 @@ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, self.font_check() if len(det): - det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round() + det[:, :4] = self.rescale( + img.shape[2:], det[:, :4], img_src.shape).round() for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file - xywh = (self.box_convert(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + xywh = (self.box_convert(torch.tensor(xyxy).view( + 1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img: class_num = int(cls) # integer class - label = None if hide_labels else (self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}') + label = None if hide_labels else ( + self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}') - self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True)) + self.plot_box_and_label(img_ori, max(round(sum( + img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True)) img_src = np.asarray(img_ori) - # FPS counter - fps_calculator.update(1.0 / (t2 - t1)) - avg_fps = fps_calculator.accumulate() - - if self.files.type == 'video': - self.draw_text( - img_src, - f"FPS: {avg_fps:0.1f}", - pos=(20, 20), - font_scale=1.0, - text_color=(204, 85, 17), - text_color_bg=(255, 255, 255), - font_thickness=2, - ) - if view_img: if img_path not in windows: windows.append(img_path) - cv2.namedWindow(str(img_path), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) - cv2.resizeWindow(str(img_path), img_src.shape[1], img_src.shape[0]) + # allow window resize (Linux) + cv2.namedWindow( + str(img_path), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) + cv2.resizeWindow( + str(img_path), img_src.shape[1], img_src.shape[0]) cv2.imshow(str(img_path), img_src) - cv2.waitKey(1) # 1 millisecond + key = cv2.waitKey(1) # 1 millisecond + if key == ord('q'): + cv2.destroyAllWindows() + return # Save results (image with detections) if save_img: @@ -140,8 +148,10 @@ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, img_ori.shape[1], img_ori.shape[0] - save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos - vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + # force *.mp4 suffix on results videos + save_path = str(Path(save_path).with_suffix('.mp4')) + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(img_src) @staticmethod @@ -159,8 +169,10 @@ def precess_image(img_src, img_size, stride, half): @staticmethod def rescale(ori_shape, boxes, target_shape): '''Rescale the output to the original image shape''' - ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1]) - padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2 + ratio = min(ori_shape[0] / target_shape[0], + ori_shape[1] / target_shape[1]) + padding = (ori_shape[1] - target_shape[1] * ratio) / \ + 2, (ori_shape[0] - target_shape[0] * ratio) / 2 boxes[:, [0, 2]] -= padding[0] boxes[:, [1, 3]] -= padding[1] @@ -178,50 +190,20 @@ def check_img_size(self, img_size, s=32, floor=0): if isinstance(img_size, int): # integer i.e. img_size=640 new_size = max(self.make_divisible(img_size, int(s)), floor) elif isinstance(img_size, list): # list i.e. img_size=[640, 480] - new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size] + new_size = [max(self.make_divisible(x, int(s)), floor) + for x in img_size] else: raise Exception(f"Unsupported type of img_size: {type(img_size)}") if new_size != img_size: - print(f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}') - return new_size if isinstance(img_size,list) else [new_size]*2 + print( + f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}') + return new_size if isinstance(img_size, list) else [new_size] * 2 def make_divisible(self, x, divisor): # Upward revision the value x to make it evenly divisible by the divisor. return math.ceil(x / divisor) * divisor - @staticmethod - def draw_text( - img, - text, - font=cv2.FONT_HERSHEY_SIMPLEX, - pos=(0, 0), - font_scale=1, - font_thickness=2, - text_color=(0, 255, 0), - text_color_bg=(0, 0, 0), - ): - - offset = (5, 5) - x, y = pos - text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness) - text_w, text_h = text_size - rec_start = tuple(x - y for x, y in zip(pos, offset)) - rec_end = tuple(x + y for x, y in zip((x + text_w, y + text_h), offset)) - cv2.rectangle(img, rec_start, rec_end, text_color_bg, -1) - cv2.putText( - img, - text, - (x, int(y + text_h + font_scale - 1)), - font, - font_scale, - text_color, - font_thickness, - cv2.LINE_AA, - ) - - return text_size - @staticmethod def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label @@ -229,7 +211,8 @@ def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_colo cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA) if label: tf = max(lw - 1, 1) # font thickness - w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] # text width, height + # text width, height + w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] outside = p1[1] - h - 3 >= 0 # label fits outside box p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA) # filled @@ -262,20 +245,8 @@ def generate_colors(i, bgr=False): palette = [] for iter in hex: h = '#' + iter - palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))) + palette.append(tuple(int(h[1 + i:1 + i + 2], 16) + for i in (0, 2, 4))) num = len(palette) color = palette[int(i) % num] return (color[2], color[1], color[0]) if bgr else color - -class CalcFPS: - def __init__(self, nsamples: int = 50): - self.framerate = deque(maxlen=nsamples) - - def update(self, duration: float): - self.framerate.append(duration) - - def accumulate(self): - if len(self.framerate) > 1: - return np.average(self.framerate) - else: - return 0.0 diff --git a/yolov6/data/data_augment.py b/yolov6/data/data_augment.py index 0bef2d87..79fc5361 100644 --- a/yolov6/data/data_augment.py +++ b/yolov6/data/data_augment.py @@ -73,7 +73,6 @@ def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10, new_shape=(640, 640)): - n = len(labels) height, width = new_shape @@ -138,12 +137,11 @@ def get_transform_matrix(img_shape, new_shape, degrees, scale, shear, translate) def mosaic_augmentation(img_size, imgs, hs, ws, labels, hyp): - assert len(imgs) == 4, "Mosaic augmentation of current version only supports 4 images." labels4 = [] s = img_size - yc, xc = (int(random.uniform(s//2, 3*s//2)) for _ in range(2)) # mosaic center x, y + yc, xc = (int(random.uniform(s // 2, 3 * s // 2)) for _ in range(2)) # mosaic center x, y for i in range(len(imgs)): # Load image img, h, w = imgs[i], hs[i], ws[i] diff --git a/yolov6/data/data_load.py b/yolov6/data/data_load.py index 8cf36072..322b7814 100644 --- a/yolov6/data/data_load.py +++ b/yolov6/data/data_load.py @@ -12,21 +12,21 @@ def create_dataloader( - path, - img_size, - batch_size, - stride, - hyp=None, - augment=False, - check_images=False, - check_labels=False, - pad=0.0, - rect=False, - rank=-1, - workers=8, - shuffle=False, - data_dict=None, - task="Train", + path, + img_size, + batch_size, + stride, + hyp=None, + augment=False, + check_images=False, + check_labels=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + shuffle=False, + data_dict=None, + task="Train", ): """Create general dataloader. diff --git a/yolov6/data/datasets.py b/yolov6/data/datasets.py index c0f1f0f7..ac41a438 100644 --- a/yolov6/data/datasets.py +++ b/yolov6/data/datasets.py @@ -18,6 +18,7 @@ from PIL import ExifTags, Image, ImageOps from torch.utils.data import Dataset from tqdm import tqdm +from pathlib import Path from .data_augment import ( augment_hsv, @@ -29,7 +30,8 @@ from yolov6.utils.events import LOGGER # Parameters -IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"] +IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", + "tif", "tiff", "dng", "webp", "mpo"] VID_FORMATS = ["mp4", "mov", "avi", "mkv"] # Get orientation exif tag for k, v in ExifTags.TAGS.items(): @@ -41,27 +43,27 @@ class TrainValDataset(Dataset): # YOLOv6 train_loader/val_loader, loads images and labels for training and validation def __init__( - self, - img_dir, - img_size=640, - batch_size=16, - augment=False, - hyp=None, - rect=False, - check_images=False, - check_labels=False, - stride=32, - pad=0.0, - rank=-1, - data_dict=None, - task="train", + self, + img_dir, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + check_images=False, + check_labels=False, + stride=32, + pad=0.0, + rank=-1, + class_names=None, + task="train", ): - assert task.lower() in ("train", "val", "speed"), f"Not supported task: {task}" + assert task.lower() in ( + "train", "val", "speed"), f"Not supported task: {task}" t1 = time.time() self.__dict__.update(locals()) self.main_process = self.rank in (-1, 0) self.task = self.task.capitalize() - self.class_names = data_dict["names"] self.img_paths, self.labels = self.get_imgs_labels(self.img_dir) if self.rect: shapes = [self.img_info[p]["shape"] for p in self.img_paths] @@ -107,8 +109,10 @@ def __getitem__(self, index): if self.rect else self.img_size ) # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + img, ratio, pad = letterbox( + img, shape, auto=False, scaleup=self.augment) + # for COCO mAP rescaling + shapes = (h0, w0), ((h / h0, w / w0), pad) labels = self.labels[index].copy() if labels.size: @@ -117,16 +121,16 @@ def __getitem__(self, index): # new boxes boxes = np.copy(labels[:, 1:]) boxes[:, 0] = ( - w * (labels[:, 1] - labels[:, 3] / 2) + pad[0] + w * (labels[:, 1] - labels[:, 3] / 2) + pad[0] ) # top left x boxes[:, 1] = ( - h * (labels[:, 2] - labels[:, 4] / 2) + pad[1] + h * (labels[:, 2] - labels[:, 4] / 2) + pad[1] ) # top left y boxes[:, 2] = ( - w * (labels[:, 1] + labels[:, 3] / 2) + pad[0] + w * (labels[:, 1] + labels[:, 3] / 2) + pad[0] ) # bottom right x boxes[:, 3] = ( - h * (labels[:, 2] + labels[:, 4] / 2) + pad[1] + h * (labels[:, 2] + labels[:, 4] / 2) + pad[1] ) # bottom right y labels[:, 1:] = boxes @@ -204,28 +208,18 @@ def get_imgs_labels(self, img_dir): valid_img_record = osp.join( osp.dirname(img_dir), "." + osp.basename(img_dir) + ".json" ) + img_info = {} NUM_THREADS = min(8, os.cpu_count()) - - img_paths = glob.glob(osp.join(img_dir, "*"), recursive=True) - img_paths = sorted( - p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS - ) - assert img_paths, f"No images found in {img_dir}." - - img_hash = self.get_hash(img_paths) - if osp.exists(valid_img_record): - with open(valid_img_record, "r") as f: - cache_info = json.load(f) - if "image_hash" in cache_info and cache_info["image_hash"] == img_hash: - img_info = cache_info["information"] - else: - self.check_images = True - else: - self.check_images = True - # check images - if self.check_images and self.main_process: - img_info = {} + if ( + self.check_images or not osp.exists(valid_img_record) + ) and self.main_process: + img_paths = glob.glob(osp.join(img_dir, "*"), recursive=True) + img_paths = sorted( + p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS + ) + assert img_paths, f"No images found in {img_dir}." + nc, msgs = 0, [] # number corrupt, messages LOGGER.info( f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): " @@ -246,48 +240,51 @@ def get_imgs_labels(self, img_dir): if msgs: LOGGER.info("\n".join(msgs)) - cache_info = {"information": img_info, "image_hash": img_hash} # save valid image paths. with open(valid_img_record, "w") as f: - json.dump(cache_info, f) + json.dump(img_info, f) # check and load anns label_dir = osp.join( osp.dirname(osp.dirname(img_dir)), "labels", osp.basename(img_dir) ) - assert osp.exists(label_dir), f"{label_dir} is an invalid directory path!" - + assert osp.exists( + label_dir), f"{label_dir} is an invalid directory path!" + if not img_info: + with open(valid_img_record, "r") as f: + img_info = json.load(f) + assert ( + img_info + ), "No information in record files, please add option --check_images." img_paths = list(img_info.keys()) - label_paths = sorted( - osp.join(label_dir, osp.splitext(osp.basename(p))[0] + ".txt") + label_paths = [ + osp.join(label_dir, osp.basename(p).split(".")[0] + ".txt") for p in img_paths - ) - assert label_paths, f"No labels found in {label_dir}." - label_hash = self.get_hash(label_paths) - if "label_hash" not in cache_info or cache_info["label_hash"] != label_hash: - self.check_labels = True - - if self.check_labels: - cache_info["label_hash"] = label_hash + ] + if ( + self.check_labels or "labels" not in img_info[img_paths[0]] + ): # key 'labels' not saved in img_info nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number corrupt, messages LOGGER.info( f"{self.task}: Checking formats of labels with {NUM_THREADS} process(es): " ) with Pool(NUM_THREADS) as pool: pbar = pool.imap( - TrainValDataset.check_label_files, zip(img_paths, label_paths) + TrainValDataset.check_label_files, zip( + img_paths, label_paths) ) - pbar = tqdm(pbar, total=len(label_paths)) if self.main_process else pbar + pbar = tqdm(pbar, total=len(label_paths) + ) if self.main_process else pbar for ( - img_path, - labels_per_file, - nc_per_file, - nm_per_file, - nf_per_file, - ne_per_file, - msg, + img_path, + labels_per_file, + nc_per_file, + nm_per_file, + nf_per_file, + ne_per_file, + msg, ) in pbar: - if nc_per_file == 0: + if img_path: img_info[img_path]["labels"] = labels_per_file else: img_info.pop(img_path) @@ -302,28 +299,28 @@ def get_imgs_labels(self, img_dir): if self.main_process: pbar.close() with open(valid_img_record, "w") as f: - json.dump(cache_info, f) + json.dump(img_info, f) if msgs: LOGGER.info("\n".join(msgs)) if nf == 0: LOGGER.warning( - f"WARNING: No labels found in {osp.dirname(img_paths[0])}. " + f"WARNING: No labels found in {osp.dirname(self.img_paths[0])}. " ) - - + else: + with open(valid_img_record) as f: + img_info = json.load(f) if self.task.lower() == "val": - if self.data_dict.get("is_coco", False): # use original json file when evaluating on coco dataset. - assert osp.exists(self.data_dict["anno_path"]), "Eval on coco dataset must provide valid path of the annotation file in config file: data/coco.yaml" - else: - assert ( - self.class_names - ), "Class names is required when converting labels to coco format for evaluating." - save_dir = osp.join(osp.dirname(osp.dirname(img_dir)), "annotations") - if not osp.exists(save_dir): - os.mkdir(save_dir) - save_path = osp.join( - save_dir, "instances_" + osp.basename(img_dir) + ".json" - ) + assert ( + self.class_names + ), "Class names is required when converting labels to coco format for evaluating." + save_dir = osp.join(osp.dirname( + osp.dirname(img_dir)), "annotations") + if not osp.exists(save_dir): + os.mkdir(save_dir) + save_path = osp.join( + save_dir, "instances_" + osp.basename(img_dir) + ".json" + ) + if not osp.exists(save_path): TrainValDataset.generate_coco_format_labels( img_info, self.class_names, save_path ) @@ -361,7 +358,8 @@ def get_mosaic(self, index): hs.append(h) ws.append(w) labels.append(labels_per_img) - img, labels = mosaic_augmentation(self.img_size, imgs, hs, ws, labels, self.hyp) + img, labels = mosaic_augmentation( + self.img_size, imgs, hs, ws, labels, self.hyp) return img, labels def general_augment(self, img, labels): @@ -413,10 +411,10 @@ def sort_files_shapes(self): elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = ( - np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype( - np.int - ) - * self.stride + np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype( + np.int + ) + * self.stride ) @staticmethod @@ -427,21 +425,16 @@ def check_image(im_file): im = Image.open(im_file) im.verify() # PIL verify shape = im.size # (width, height) - try: - im_exif = im._getexif() - if im_exif and ORIENTATION in im_exif: - rotation = im_exif[ORIENTATION] - if rotation in (6, 8): - shape = (shape[1], shape[0]) - except: - im_exif = None + im_exif = im._getexif() if im_exif and ORIENTATION in im_exif: rotation = im_exif[ORIENTATION] if rotation in (6, 8): shape = (shape[1], shape[0]) - assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels" - assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}" + assert (shape[0] > 9) & ( + shape[1] > 9), f"image size {shape} <10 pixels" + assert im.format.lower( + ) in IMG_FORMATS, f"invalid image format {im.format}" if im.format.lower() in ("jpg", "jpeg"): with open(im_file, "rb") as f: f.seek(-2, 2) @@ -459,7 +452,8 @@ def check_image(im_file): @staticmethod def check_label_files(args): img_path, lb_path = args - nm, nf, ne, nc, msg = 0, 0, 0, 0, "" # number (missing, found, empty, message + # number (missing, found, empty, message + nm, nf, ne, nc, msg = 0, 0, 0, 0, "" try: if osp.exists(lb_path): nf = 1 # label found @@ -473,10 +467,10 @@ def check_label_files(args): len(l) == 5 for l in labels ), f"{lb_path}: wrong label format." assert ( - labels >= 0 + labels >= 0 ).all(), f"{lb_path}: Label values error: all values in label file must > 0" assert ( - labels[:, 1:] <= 1 + labels[:, 1:] <= 1 ).all(), f"{lb_path}: Label values error: all coordinates must be normalized" _, indices = np.unique(labels, axis=0, return_index=True) @@ -495,7 +489,7 @@ def check_label_files(args): except Exception as e: nc = 1 msg = f"WARNING: {lb_path}: ignoring invalid labels: {e}" - return img_path, None, nc, nm, nf, ne, msg + return None, None, nc, nm, nf, ne, msg @staticmethod def generate_coco_format_labels(img_info, class_names, save_path): @@ -559,29 +553,44 @@ def get_hash(paths): h = hashlib.md5("".join(paths).encode()) return h.hexdigest() + class LoadData: def __init__(self, path): - p = str(Path(path).resolve()) # os-agnostic absolute path - if os.path.isdir(p): - files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir - elif os.path.isfile(p): - files = [p] # files - else: - raise FileNotFoundError(f'Invalid path {p}') - - imgp = [i for i in files if i.split('.')[-1] in IMG_FORMATS] - vidp = [v for v in files if v.split('.')[-1] in VID_FORMATS] - self.files = imgp + vidp - self.nf = len(self.files) - self.type = 'image' - if any(vidp): - self.add_video(vidp[0]) # new video - else: - self.cap = None + if path != 'camera': + p = str(Path(path).resolve()) # os-agnostic absolute path + if os.path.isdir(p): + files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir + elif os.path.isfile(p): + files = [p] # files + else: + raise FileNotFoundError(f'Invalid path {p}') + imgp = [i for i in files if i.split('.')[-1] in IMG_FORMATS] + vidp = [v for v in files if v.split('.')[-1] in VID_FORMATS] + self.files = imgp + vidp + self.nf = len(self.files) + self.type = 'image' + if any(vidp): + self.add_video(vidp[0]) # new video + else: + self.cap = None + elif path == 'camera': + imgp = [] + vidp = [cv2.VideoCapture(0)] + self.files = imgp + vidp + self.nf = len(self.files) + self.type = 'image' + if any(vidp): + self.add_video(0) # new video + else: + self.cap = None @staticmethod def checkext(path): - file_type = 'image' if path.split('.')[-1].lower() in IMG_FORMATS else 'video' + try: + file_type = 'image' if path.split( + '.')[-1].lower() in IMG_FORMATS else 'video' + except AttributeError: + file_type = 'video' return file_type def __iter__(self): @@ -618,3 +627,6 @@ def add_video(self, path): def __len__(self): return self.nf # number of files + + def stop(self): + self.videoCapture.release() diff --git a/yolov6/data/vis_dataset.py b/yolov6/data/vis_dataset.py index 67e1bb97..8fd6b68c 100644 --- a/yolov6/data/vis_dataset.py +++ b/yolov6/data/vis_dataset.py @@ -31,12 +31,15 @@ def main(args): with open(label_path, 'r') as f: for bbox in f: - cls, x_c, y_c, w, h = [float(v) if i > 0 else int(v) for i, v in enumerate(bbox.split('\n')[0].split(' '))] + cls, x_c, y_c, w, h = [float(v) if i > 0 else int(v) for i, v in + enumerate(bbox.split('\n')[0].split(' '))] x_tl = int((x_c - w / 2) * width) y_tl = int((y_c - h / 2) * height) - cv2.rectangle(img_data, (x_tl, y_tl), (x_tl + int(w * width), y_tl + int(h * height)), tuple([int(x) for x in color[cls]]), thickness) - cv2.putText(img_data, label_map[cls], (x_tl, y_tl - 10), cv2.FONT_HERSHEY_COMPLEX, 1, tuple([int(x) for x in color[cls]]), thickness) + cv2.rectangle(img_data, (x_tl, y_tl), (x_tl + int(w * width), y_tl + int(h * height)), + tuple([int(x) for x in color[cls]]), thickness) + cv2.putText(img_data, label_map[cls], (x_tl, y_tl - 10), cv2.FONT_HERSHEY_COMPLEX, 1, + tuple([int(x) for x in color[cls]]), thickness) cv2.imshow('image', img_data) cv2.waitKey(0) @@ -49,8 +52,10 @@ def main(args): parser = argparse.ArgumentParser() parser.add_argument('--img_dir', default='VOCdevkit/voc_07_12/images') parser.add_argument('--label_dir', default='VOCdevkit/voc_07_12/labels') - parser.add_argument('--class_names', default=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', - 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']) + parser.add_argument('--class_names', + default=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', + 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']) args = parser.parse_args() print(args) diff --git a/yolov6/data/voc2yolo.py b/yolov6/data/voc2yolo.py index 9019e1fc..27c40529 100644 --- a/yolov6/data/voc2yolo.py +++ b/yolov6/data/voc2yolo.py @@ -9,7 +9,8 @@ # VOC2007 test: 438MB, 4953 images # VOC2012 trainval: 1.95GB, 17126 images -VOC_NAMES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', +VOC_NAMES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', + 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] @@ -18,6 +19,7 @@ def convert_box(size, box): dw, dh = 1. / size[0], 1. / size[1] x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2] return x * dw, y * dh, w * dw, h * dh + in_file = open(os.path.join(path, f'VOC{year}/Annotations/{image_id}.xml')) out_file = open(lb_path, 'w') tree = ET.parse(in_file) @@ -49,7 +51,7 @@ def gen_voc07_12(voc_path): if not os.path.exists(dataset_root): os.makedirs(dataset_root) - dataset_settings = {'train': ['train2007', 'val2007', 'train2012', 'val2012'], 'val':['test2007']} + dataset_settings = {'train': ['train2007', 'val2007', 'train2012', 'val2012'], 'val': ['test2007']} for item in ['images', 'labels']: for data_type, data_list in dataset_settings.items(): for data_name in data_list: @@ -82,14 +84,13 @@ def main(args): lb_path = os.path.join(lbs_path, f'{id}.txt') # new label path convert_label(voc_path, lb_path, year, id) # convert labels to YOLO format if os.path.exists(f): - shutil.move(f, imgs_path) # move image + shutil.move(f, imgs_path) # move image except Exception as e: print(f'[Warning]: {e} {year}{image_set} convert fail!') gen_voc07_12(voc_path) - if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--voc_path', default='VOCdevkit') diff --git a/yolov6/layers/common.py b/yolov6/layers/common.py index ad4a02b1..4d405d92 100644 --- a/yolov6/layers/common.py +++ b/yolov6/layers/common.py @@ -15,6 +15,7 @@ class SiLU(nn.Module): '''Activation of SiLU''' + @staticmethod def forward(x): return x * torch.sigmoid(x) @@ -22,6 +23,7 @@ def forward(x): class Conv(nn.Module): '''Normal Conv with SiLU activation''' + def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False): super().__init__() padding = kernel_size // 2 @@ -46,6 +48,7 @@ def forward_fuse(self, x): class SimConv(nn.Module): '''Normal Conv with ReLU activation''' + def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False): super().__init__() padding = kernel_size // 2 @@ -70,6 +73,7 @@ def forward_fuse(self, x): class SimSPPF(nn.Module): '''Simplified SPPF with ReLU activation''' + def __init__(self, in_channels, out_channels, kernel_size=5): super().__init__() c_ = in_channels // 2 # hidden channels @@ -88,6 +92,7 @@ def forward(self, x): class Transpose(nn.Module): '''Normal Transpose, default for upsampling''' + def __init__(self, in_channels, out_channels, kernel_size=2, stride=2): super().__init__() self.upsample_transpose = torch.nn.ConvTranspose2d( @@ -115,7 +120,8 @@ def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): '''Basic cell for rep-style block, including conv and bn''' result = nn.Sequential() result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, - kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, + bias=False)) result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) return result @@ -124,6 +130,7 @@ class RepVGGBlock(nn.Module): '''RepVGGBlock is a basic rep-style block, including training and deploy status This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py ''' + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False): super(RepVGGBlock, self).__init__() @@ -160,13 +167,18 @@ def __init__(self, in_channels, out_channels, kernel_size=3, self.se = nn.Identity() if deploy: - self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, - padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True, + padding_mode=padding_mode) else: - self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None - self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) - self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) + self.rbr_identity = nn.BatchNorm2d( + num_features=in_channels) if out_channels == in_channels and stride == 1 else None + self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, padding=padding, groups=groups) + self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, + padding=padding_11, groups=groups) def forward(self, inputs): '''Forward process''' @@ -224,9 +236,11 @@ def switch_to_deploy(self): if hasattr(self, 'rbr_reparam'): return kernel, bias = self.get_equivalent_kernel_bias() - self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels, + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, + out_channels=self.rbr_dense.conv.out_channels, kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride, - padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True) + padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, + groups=self.rbr_dense.conv.groups, bias=True) self.rbr_reparam.weight.data = kernel self.rbr_reparam.bias.data = bias for para in self.parameters(): @@ -244,10 +258,11 @@ class RealVGGBlock(nn.Module): def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', use_se=False, - ): + ): super(RealVGGBlock, self).__init__() self.relu = nn.ReLU() - self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, padding=padding, bias=False) self.bn = nn.BatchNorm2d(out_channels) if use_se: @@ -259,6 +274,7 @@ def forward(self, inputs): out = self.relu(self.se(self.bn(self.conv(inputs)))) return out + class ScaleLayer(torch.nn.Module): def __init__(self, num_features, use_bias=True, scale_init=1.0): @@ -278,6 +294,7 @@ def forward(self, inputs): else: return inputs * self.weight.view(1, self.num_features, 1, 1) + self.bias.view(1, self.num_features, 1, 1) + # A CSLA block is a LinearAddBlock with is_csla=True class LinearAddBlock(nn.Module): @@ -286,14 +303,16 @@ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1 super(LinearAddBlock, self).__init__() self.in_channels = in_channels self.relu = nn.ReLU() - self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, padding=padding, bias=False) self.scale_conv = ScaleLayer(num_features=out_channels, use_bias=False, scale_init=conv_scale_init) - self.conv_1x1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=0, bias=False) + self.conv_1x1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, + padding=0, bias=False) self.scale_1x1 = ScaleLayer(num_features=out_channels, use_bias=False, scale_init=conv_scale_init) if in_channels == out_channels and stride == 1: self.scale_identity = ScaleLayer(num_features=out_channels, use_bias=False, scale_init=1.0) self.bn = nn.BatchNorm2d(out_channels) - if is_csla: # Make them constant + if is_csla: # Make them constant self.scale_1x1.requires_grad_(False) self.scale_conv.requires_grad_(False) if use_se: @@ -308,6 +327,7 @@ def forward(self, inputs): out = self.relu(self.se(self.bn(out))) return out + def conv_bn_v2(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros'): conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, @@ -323,7 +343,8 @@ def conv_bn_v2(in_channels, out_channels, kernel_size, stride=1, padding=0, dila class IdentityBasedConv1x1(nn.Conv2d): def __init__(self, channels, groups=1): - super(IdentityBasedConv1x1, self).__init__(in_channels=channels, out_channels=channels, kernel_size=1, stride=1, padding=0, groups=groups, bias=False) + super(IdentityBasedConv1x1, self).__init__(in_channels=channels, out_channels=channels, kernel_size=1, stride=1, + padding=0, groups=groups, bias=False) assert channels % groups == 0 input_dim = channels // groups @@ -358,7 +379,8 @@ def forward(self, input): output = self.bn(input) if self.pad_pixels > 0: if self.bn.affine: - pad_values = self.bn.bias.detach() - self.bn.running_mean * self.bn.weight.detach() / torch.sqrt(self.bn.running_var + self.bn.eps) + pad_values = self.bn.bias.detach() - self.bn.running_mean * self.bn.weight.detach() / torch.sqrt( + self.bn.running_var + self.bn.eps) else: pad_values = - self.bn.running_mean / torch.sqrt(self.bn.running_var + self.bn.eps) output = F.pad(output, [self.pad_pixels] * 4) @@ -394,10 +416,12 @@ class DBBBlock(nn.Module): ''' RepBlock is a stage block with rep-style basic block ''' + def __init__(self, in_channels, out_channels, n=1): super().__init__() self.conv1 = DiverseBranchBlock(in_channels, out_channels) - self.block = nn.Sequential(*(DiverseBranchBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None + self.block = nn.Sequential( + *(DiverseBranchBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None def forward(self, x): x = self.conv1(x) @@ -426,12 +450,14 @@ def __init__(self, in_channels, out_channels, kernel_size=3, assert padding == kernel_size // 2 if deploy: - self.dbb_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, - padding=padding, dilation=dilation, groups=groups, bias=True) + self.dbb_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True) else: - self.dbb_origin = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups) + self.dbb_origin = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, padding=padding, dilation=dilation, groups=groups) self.dbb_avg = nn.Sequential() if groups < out_channels: @@ -440,25 +466,30 @@ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, groups=groups, bias=False)) self.dbb_avg.add_module('bn', BNAndPadLayer(pad_pixels=padding, num_features=out_channels)) self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=0)) - self.dbb_1x1 = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, - padding=0, groups=groups) + self.dbb_1x1 = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=1, + stride=stride, + padding=0, groups=groups) else: self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding)) self.dbb_avg.add_module('avgbn', nn.BatchNorm2d(out_channels)) if internal_channels_1x1_3x3 is None: - internal_channels_1x1_3x3 = in_channels if groups < out_channels else 2 * in_channels # For mobilenet, it is better to have 2X internal channels + internal_channels_1x1_3x3 = in_channels if groups < out_channels else 2 * in_channels # For mobilenet, it is better to have 2X internal channels self.dbb_1x1_kxk = nn.Sequential() if internal_channels_1x1_3x3 == in_channels: self.dbb_1x1_kxk.add_module('idconv1', IdentityBasedConv1x1(channels=in_channels, groups=groups)) else: - self.dbb_1x1_kxk.add_module('conv1', nn.Conv2d(in_channels=in_channels, out_channels=internal_channels_1x1_3x3, - kernel_size=1, stride=1, padding=0, groups=groups, bias=False)) - self.dbb_1x1_kxk.add_module('bn1', BNAndPadLayer(pad_pixels=padding, num_features=internal_channels_1x1_3x3, affine=True)) - self.dbb_1x1_kxk.add_module('conv2', nn.Conv2d(in_channels=internal_channels_1x1_3x3, out_channels=out_channels, - kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=False)) + self.dbb_1x1_kxk.add_module('conv1', + nn.Conv2d(in_channels=in_channels, out_channels=internal_channels_1x1_3x3, + kernel_size=1, stride=1, padding=0, groups=groups, bias=False)) + self.dbb_1x1_kxk.add_module('bn1', BNAndPadLayer(pad_pixels=padding, num_features=internal_channels_1x1_3x3, + affine=True)) + self.dbb_1x1_kxk.add_module('conv2', + nn.Conv2d(in_channels=internal_channels_1x1_3x3, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=0, groups=groups, + bias=False)) self.dbb_1x1_kxk.add_module('bn2', nn.BatchNorm2d(out_channels)) # The experiments reported in the paper used the default initialization of bn.weight (all as 1). But changing the initialization may be useful in some cases. @@ -481,25 +512,31 @@ def get_equivalent_kernel_bias(self): k_1x1_kxk_first = self.dbb_1x1_kxk.conv1.weight k_1x1_kxk_first, b_1x1_kxk_first = transI_fusebn(k_1x1_kxk_first, self.dbb_1x1_kxk.bn1) k_1x1_kxk_second, b_1x1_kxk_second = transI_fusebn(self.dbb_1x1_kxk.conv2.weight, self.dbb_1x1_kxk.bn2) - k_1x1_kxk_merged, b_1x1_kxk_merged = transIII_1x1_kxk(k_1x1_kxk_first, b_1x1_kxk_first, k_1x1_kxk_second, b_1x1_kxk_second, groups=self.groups) + k_1x1_kxk_merged, b_1x1_kxk_merged = transIII_1x1_kxk(k_1x1_kxk_first, b_1x1_kxk_first, k_1x1_kxk_second, + b_1x1_kxk_second, groups=self.groups) k_avg = transV_avg(self.out_channels, self.kernel_size, self.groups) - k_1x1_avg_second, b_1x1_avg_second = transI_fusebn(k_avg.to(self.dbb_avg.avgbn.weight.device), self.dbb_avg.avgbn) + k_1x1_avg_second, b_1x1_avg_second = transI_fusebn(k_avg.to(self.dbb_avg.avgbn.weight.device), + self.dbb_avg.avgbn) if hasattr(self.dbb_avg, 'conv'): k_1x1_avg_first, b_1x1_avg_first = transI_fusebn(self.dbb_avg.conv.weight, self.dbb_avg.bn) - k_1x1_avg_merged, b_1x1_avg_merged = transIII_1x1_kxk(k_1x1_avg_first, b_1x1_avg_first, k_1x1_avg_second, b_1x1_avg_second, groups=self.groups) + k_1x1_avg_merged, b_1x1_avg_merged = transIII_1x1_kxk(k_1x1_avg_first, b_1x1_avg_first, k_1x1_avg_second, + b_1x1_avg_second, groups=self.groups) else: k_1x1_avg_merged, b_1x1_avg_merged = k_1x1_avg_second, b_1x1_avg_second - return transII_addbranch((k_origin, k_1x1, k_1x1_kxk_merged, k_1x1_avg_merged), (b_origin, b_1x1, b_1x1_kxk_merged, b_1x1_avg_merged)) + return transII_addbranch((k_origin, k_1x1, k_1x1_kxk_merged, k_1x1_avg_merged), + (b_origin, b_1x1, b_1x1_kxk_merged, b_1x1_avg_merged)) def switch_to_deploy(self): if hasattr(self, 'dbb_reparam'): return kernel, bias = self.get_equivalent_kernel_bias() - self.dbb_reparam = nn.Conv2d(in_channels=self.dbb_origin.conv.in_channels, out_channels=self.dbb_origin.conv.out_channels, + self.dbb_reparam = nn.Conv2d(in_channels=self.dbb_origin.conv.in_channels, + out_channels=self.dbb_origin.conv.out_channels, kernel_size=self.dbb_origin.conv.kernel_size, stride=self.dbb_origin.conv.stride, - padding=self.dbb_origin.conv.padding, dilation=self.dbb_origin.conv.dilation, groups=self.dbb_origin.conv.groups, bias=True) + padding=self.dbb_origin.conv.padding, dilation=self.dbb_origin.conv.dilation, + groups=self.dbb_origin.conv.groups, bias=True) self.dbb_reparam.weight.data = kernel self.dbb_reparam.bias.data = bias for para in self.parameters(): @@ -540,9 +577,9 @@ def single_init(self): class DetectBackend(nn.Module): def __init__(self, weights='yolov6s.pt', device=None, dnn=True): - super().__init__() - assert isinstance(weights, str) and Path(weights).suffix == '.pt', f'{Path(weights).suffix} format is not supported.' + assert isinstance(weights, str) and Path( + weights).suffix == '.pt', f'{Path(weights).suffix} format is not supported.' from yolov6.utils.checkpoint import load_checkpoint model = load_checkpoint(weights, map_location=device) stride = int(model.stride.max()) @@ -559,6 +596,7 @@ class RepBlock(nn.Module): ''' RepBlock is a stage block with rep-style basic block ''' + def __init__(self, in_channels, out_channels, n=1, block=RepVGGBlock): super().__init__() self.conv1 = block(in_channels, out_channels) diff --git a/yolov6/layers/dbb_transforms.py b/yolov6/layers/dbb_transforms.py index cd93d0e2..781fb535 100644 --- a/yolov6/layers/dbb_transforms.py +++ b/yolov6/layers/dbb_transforms.py @@ -15,7 +15,7 @@ def transII_addbranch(kernels, biases): def transIII_1x1_kxk(k1, b1, k2, b2, groups): if groups == 1: - k = F.conv2d(k2, k1.permute(1, 0, 2, 3)) # + k = F.conv2d(k2, k1.permute(1, 0, 2, 3)) # b_hat = (k2 * b1.reshape(1, -1, 1, 1)).sum((1, 2, 3)) else: k_slices = [] @@ -24,10 +24,11 @@ def transIII_1x1_kxk(k1, b1, k2, b2, groups): k1_group_width = k1.size(0) // groups k2_group_width = k2.size(0) // groups for g in range(groups): - k1_T_slice = k1_T[:, g*k1_group_width:(g+1)*k1_group_width, :, :] - k2_slice = k2[g*k2_group_width:(g+1)*k2_group_width, :, :, :] + k1_T_slice = k1_T[:, g * k1_group_width:(g + 1) * k1_group_width, :, :] + k2_slice = k2[g * k2_group_width:(g + 1) * k2_group_width, :, :, :] k_slices.append(F.conv2d(k2_slice, k1_T_slice)) - b_slices.append((k2_slice * b1[g * k1_group_width:(g+1) * k1_group_width].reshape(1, -1, 1, 1)).sum((1, 2, 3))) + b_slices.append( + (k2_slice * b1[g * k1_group_width:(g + 1) * k1_group_width].reshape(1, -1, 1, 1)).sum((1, 2, 3))) k, b_hat = transIV_depthconcat(k_slices, b_slices) return k, b_hat + b2 diff --git a/yolov6/models/efficientrep.py b/yolov6/models/efficientrep.py index 835e698e..f3420945 100644 --- a/yolov6/models/efficientrep.py +++ b/yolov6/models/efficientrep.py @@ -9,11 +9,11 @@ class EfficientRep(nn.Module): ''' def __init__( - self, - in_channels=3, - channels_list=None, - num_repeats=None, - block=RepVGGBlock + self, + in_channels=3, + channels_list=None, + num_repeats=None, + block=RepVGGBlock ): super().__init__() @@ -93,7 +93,6 @@ def __init__( ) def forward(self, x): - outputs = [] x = self.stem(x) x = self.ERBlock_2(x) diff --git a/yolov6/models/effidehead.py b/yolov6/models/effidehead.py index 4664f937..2b463f72 100644 --- a/yolov6/models/effidehead.py +++ b/yolov6/models/effidehead.py @@ -9,6 +9,7 @@ class Detect(nn.Module): With hardware-aware degisn, the decoupled head is optimized with hybridchannels methods. ''' + def __init__(self, num_classes=80, anchors=1, num_layers=3, inplace=True, head_layers=None): # detection layer super().__init__() assert head_layers is not None @@ -36,13 +37,13 @@ def __init__(self, num_classes=80, anchors=1, num_layers=3, inplace=True, head_l # Efficient decoupled head layers for i in range(num_layers): - idx = i*6 + idx = i * 6 self.stems.append(head_layers[idx]) - self.cls_convs.append(head_layers[idx+1]) - self.reg_convs.append(head_layers[idx+2]) - self.cls_preds.append(head_layers[idx+3]) - self.reg_preds.append(head_layers[idx+4]) - self.obj_preds.append(head_layers[idx+5]) + self.cls_convs.append(head_layers[idx + 1]) + self.reg_convs.append(head_layers[idx + 2]) + self.cls_preds.append(head_layers[idx + 3]) + self.reg_preds.append(head_layers[idx + 4]) + self.obj_preds.append(head_layers[idx + 5]) def initialize_biases(self): for conv in self.cls_preds: @@ -79,7 +80,7 @@ def forward(self, x): self.grid[i] = torch.stack((xv, yv), 2).view(1, self.na, ny, nx, 2).float() if self.inplace: y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i] # xy - y[..., 2:4] = torch.exp(y[..., 2:4]) * self.stride[i] # wh + y[..., 2:4] = torch.exp(y[..., 2:4]) * self.stride[i] # wh else: xy = (y[..., 0:2] + self.grid[i]) * self.stride[i] # xy wh = torch.exp(y[..., 2:4]) * self.stride[i] # wh diff --git a/yolov6/models/end2end.py b/yolov6/models/end2end.py index e3cf188e..5c19dca1 100644 --- a/yolov6/models/end2end.py +++ b/yolov6/models/end2end.py @@ -5,6 +5,7 @@ class ORT_NMS(torch.autograd.Function): '''ONNX-Runtime NMS operation''' + @staticmethod def forward(ctx, boxes, @@ -29,18 +30,19 @@ def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_ class TRT8_NMS(torch.autograd.Function): '''TensorRT NMS operation''' + @staticmethod def forward( - ctx, - boxes, - scores, - background_class=-1, - box_coding=1, - iou_threshold=0.45, - max_output_boxes=100, - plugin_version="1", - score_activation=0, - score_threshold=0.25, + ctx, + boxes, + scores, + background_class=-1, + box_coding=1, + iou_threshold=0.45, + max_output_boxes=100, + plugin_version="1", + score_activation=0, + score_threshold=0.25, ): batch_size, num_boxes, num_classes = scores.shape num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32) @@ -77,23 +79,24 @@ def symbolic(g, class TRT7_NMS(torch.autograd.Function): '''TensorRT NMS operation''' + @staticmethod def forward( - ctx, - boxes, - scores, - plugin_version="1", - shareLocation=1, - backgroundLabelId=-1, - numClasses=80, - topK=1000, - keepTopK=100, - scoreThreshold=0.25, - iouThreshold=0.45, - isNormalized=0, - clipBoxes=0, - scoreBits=16, - caffeSemantics=1, + ctx, + boxes, + scores, + plugin_version="1", + shareLocation=1, + backgroundLabelId=-1, + numClasses=80, + topK=1000, + keepTopK=100, + scoreThreshold=0.25, + iouThreshold=0.45, + isNormalized=0, + clipBoxes=0, + scoreBits=16, + caffeSemantics=1, ): batch_size, num_boxes, numClasses = scores.shape num_det = torch.randint(0, keepTopK, (batch_size, 1), dtype=torch.int32) @@ -119,7 +122,7 @@ def symbolic(g, scoreBits=16, caffeSemantics=1, ): - out = g.op("TRT::BatchedNMSDynamic_TRT", # BatchedNMS_TRT BatchedNMSDynamic_TRT + out = g.op("TRT::BatchedNMSDynamic_TRT", # BatchedNMS_TRT BatchedNMSDynamic_TRT boxes, scores, shareLocation_i=shareLocation, @@ -141,6 +144,7 @@ def symbolic(g, class ONNX_ORT(nn.Module): '''onnx module with ONNX-Runtime NMS operation.''' + def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None): super().__init__() self.device = device if device else torch.device("cpu") @@ -173,7 +177,8 @@ def forward(self, x): class ONNX_TRT7(nn.Module): '''onnx module with TensorRT NMS operation.''' - def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None): + + def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None): super().__init__() assert max_wh is None self.device = device if device else torch.device('cpu') @@ -219,7 +224,8 @@ def forward(self, x): class ONNX_TRT8(nn.Module): '''onnx module with TensorRT NMS operation.''' - def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None): + + def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None): super().__init__() assert max_wh is None self.device = device if device else torch.device('cpu') @@ -237,28 +243,30 @@ def forward(self, x): score = x[:, :, 5:] score *= conf num_det, det_boxes, det_scores, det_classes = TRT8_NMS.apply(box, score, self.background_class, self.box_coding, - self.iou_threshold, self.max_obj, - self.plugin_version, self.score_activation, - self.score_threshold) + self.iou_threshold, self.max_obj, + self.plugin_version, self.score_activation, + self.score_threshold) return num_det, det_boxes, det_scores, det_classes class End2End(nn.Module): '''export onnx or tensorrt model with NMS operation.''' - def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, trt_version=8, with_preprocess=False): + + def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, trt_version=8, + with_preprocess=False): super().__init__() device = device if device else torch.device('cpu') self.with_preprocess = with_preprocess self.model = model.to(device) - TRT = ONNX_TRT8 if trt_version >= 8 else ONNX_TRT7 + TRT = ONNX_TRT8 if trt_version >= 8 else ONNX_TRT7 self.patch_model = TRT if max_wh is None else ONNX_ORT self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device) self.end2end.eval() def forward(self, x): if self.with_preprocess: - x = x[:,[2,1,0],...] - x = x * (1/255) + x = x[:, [2, 1, 0], ...] + x = x * (1 / 255) x = self.model(x) x = self.end2end(x) return x diff --git a/yolov6/models/loss.py b/yolov6/models/loss.py index b86e4830..1a81016b 100644 --- a/yolov6/models/loss.py +++ b/yolov6/models/loss.py @@ -16,6 +16,7 @@ class ComputeLoss: '''Loss computation func. This func contains SimOTA and siou loss. ''' + def __init__(self, reg_weight=5.0, iou_weight=3.0, @@ -44,14 +45,14 @@ def __init__(self, self.iou_loss = IOUloss(iou_type=iou_type, reduction="none") def __call__( - self, - outputs, - targets + self, + outputs, + targets ): dtype = outputs[0].type() device = targets.device loss_cls, loss_obj, loss_iou, loss_l1 = torch.zeros(1, device=device), torch.zeros(1, device=device), \ - torch.zeros(1, device=device), torch.zeros(1, device=device) + torch.zeros(1, device=device), torch.zeros(1, device=device) num_classes = outputs[0].shape[-1] - 5 outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides = self.get_outputs_and_grids( @@ -70,7 +71,9 @@ def __call__( targets_list[int(item[0])].append(item[1:]) max_len = max((len(l) for l in targets_list)) - targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device) + targets = torch.from_numpy( + np.array(list(map(lambda l: l + [[-1, 0, 0, 0, 0]] * (max_len - len(l)), targets_list)))[:, 1:, :]).to( + targets.device) num_targets_list = (targets.sum(dim=2) > 0).sum(dim=1) # number of objects num_fg, num_gts = 0, 0 @@ -190,7 +193,7 @@ def __call__( loss_iou += (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks].T, reg_targets)).sum() / num_fg loss_l1 += (self.l1_loss(bbox_preds_org.view(-1, 4)[fg_masks], l1_targets)).sum() / num_fg - loss_obj += (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets*1.0)).sum() / num_fg + loss_obj += (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets * 1.0)).sum() / num_fg loss_cls += (self.bcewithlog_loss(cls_preds.view(-1, num_classes)[fg_masks], cls_targets)).sum() / num_fg total_losses = self.reg_weight * loss_iou + loss_l1 + loss_obj + loss_cls @@ -233,7 +236,7 @@ def get_outputs_and_grids(self, outputs, strides, dtype, device): outputs_origin.append(output_origin) xy_shifts = torch.cat(xy_shifts, 1) # [1, n_anchors_all, 2] - expanded_strides = torch.cat(expanded_strides, 1) # [1, n_anchors_all, 1] + expanded_strides = torch.cat(expanded_strides, 1) # [1, n_anchors_all, 1] outputs_origin = torch.cat(outputs_origin, 1) outputs = torch.cat(outputs_new, 1) @@ -251,18 +254,18 @@ def get_l1_target(self, l1_target, gt, stride, xy_shifts, eps=1e-8): @torch.no_grad() def get_assignments( - self, - batch_idx, - num_gt, - total_num_anchors, - gt_bboxes_per_image, - gt_classes, - bboxes_preds_per_image, - cls_preds_per_image, - obj_preds_per_image, - expanded_strides, - xy_shifts, - num_classes + self, + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + cls_preds_per_image, + obj_preds_per_image, + expanded_strides, + xy_shifts, + num_classes ): fg_mask, is_in_boxes_and_center = self.get_in_boxes_info( @@ -291,8 +294,8 @@ def get_assignments( with torch.cuda.amp.autocast(enabled=False): cls_preds_ = ( - cls_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1) - * obj_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1) + cls_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1) + * obj_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1) ) pair_wise_cls_loss = F.binary_cross_entropy( cls_preds_.sqrt_(), gt_cls_per_image, reduction="none" @@ -300,9 +303,9 @@ def get_assignments( del cls_preds_, obj_preds_ cost = ( - self.cls_weight * pair_wise_cls_loss - + self.iou_weight * pair_wise_ious_loss - + 100000.0 * (~is_in_boxes_and_center) + self.cls_weight * pair_wise_cls_loss + + self.iou_weight * pair_wise_ious_loss + + 100000.0 * (~is_in_boxes_and_center) ) ( @@ -323,12 +326,12 @@ def get_assignments( ) def get_in_boxes_info( - self, - gt_bboxes_per_image, - expanded_strides, - xy_shifts, - total_num_anchors, - num_gt, + self, + gt_bboxes_per_image, + expanded_strides, + xy_shifts, + total_num_anchors, + num_gt, ): expanded_strides_per_image = expanded_strides[0] xy_shifts_per_image = xy_shifts[0] * expanded_strides_per_image @@ -374,7 +377,7 @@ def get_in_boxes_info( is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all is_in_boxes_and_center = ( - is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor] + is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor] ) return is_in_boxes_anchor, is_in_boxes_and_center diff --git a/yolov6/models/reppan.py b/yolov6/models/reppan.py index 320c9d1c..24856910 100644 --- a/yolov6/models/reppan.py +++ b/yolov6/models/reppan.py @@ -10,10 +10,10 @@ class RepPANNeck(nn.Module): """ def __init__( - self, - channels_list=None, - num_repeats=None, - block=RepVGGBlock, + self, + channels_list=None, + num_repeats=None, + block=RepVGGBlock, ): super().__init__() @@ -87,7 +87,6 @@ def __init__( ) def forward(self, input): - (x2, x1, x0) = input fpn_out0 = self.reduce_layer0(x0) diff --git a/yolov6/models/yolo.py b/yolov6/models/yolo.py index 9ebdf0e4..3d30f2e5 100644 --- a/yolov6/models/yolo.py +++ b/yolov6/models/yolo.py @@ -14,6 +14,7 @@ class Model(nn.Module): The default parts are EfficientRep Backbone, Rep-PAN and Efficient Decoupled Head. ''' + def __init__(self, config, channels=3, num_classes=None, anchors=None): # model, input channels, number of classes super().__init__() # Build network diff --git a/yolov6/utils/RepOptimizer.py b/yolov6/utils/RepOptimizer.py index a61f44d9..a26e76c7 100644 --- a/yolov6/utils/RepOptimizer.py +++ b/yolov6/utils/RepOptimizer.py @@ -6,13 +6,15 @@ from torch.optim.sgd import SGD from yolov6.utils.events import LOGGER + def extract_blocks_into_list(model, blocks): - for module in model.children(): + for module in model.children(): if isinstance(module, LinearAddBlock) or isinstance(module, RealVGGBlock): blocks.append(module) else: extract_blocks_into_list(module, blocks) + def extract_scales(model): blocks = [] extract_blocks_into_list(model['model'], blocks) @@ -26,6 +28,7 @@ def extract_scales(model): print('extract scales: ', scales[-1][-2].mean(), scales[-1][-1].mean()) return scales + def check_keywords_in_name(name, keywords=()): isin = False for keyword in keywords: @@ -33,6 +36,7 @@ def check_keywords_in_name(name, keywords=()): isin = True return isin + def set_weight_decay(model, skip_list=(), skip_keywords=(), echo=False): has_decay = [] no_decay = [] @@ -45,7 +49,7 @@ def set_weight_decay(model, skip_list=(), skip_keywords=(), echo=False): if echo: print(f"{name} USE weight decay") elif len(param.shape) == 1 or name.endswith(".bias") or (name in skip_list) or \ - check_keywords_in_name(name, skip_keywords): + check_keywords_in_name(name, skip_keywords): no_decay.append(param) if echo: print(f"{name} has no weight decay") @@ -57,6 +61,7 @@ def set_weight_decay(model, skip_list=(), skip_keywords=(), echo=False): return [{'params': has_decay}, {'params': no_decay, 'weight_decay': 0.}] + def get_optimizer_param(args, cfg, model): """ Build optimizer from cfg file.""" accumulate = max(1, round(64 / args.batch_size)) @@ -74,6 +79,7 @@ def get_optimizer_param(args, cfg, model): {'params': g_w, 'weight_decay': cfg.solver.weight_decay}, {'params': g_b}] + class RepVGGOptimizer(SGD): # scales is a list, scales[i] is a triple (scale_identity.weight, scale_1x1.weight, scale_conv.weight) or a two-tuple (scale_1x1.weight, scale_conv.weight) (if the block has no scale_identity) def __init__(self, model, scales, @@ -82,7 +88,8 @@ def __init__(self, model, scales, reinit=True, use_identity_scales_for_reinit=True, cpu_mode=False): - defaults = dict(lr=cfg.solver.lr0, momentum=cfg.solver.momentum, dampening=dampening, weight_decay=weight_decay, nesterov=nesterov) + defaults = dict(lr=cfg.solver.lr0, momentum=cfg.solver.momentum, dampening=dampening, weight_decay=weight_decay, + nesterov=nesterov) if nesterov and (cfg.solver.momentum <= 0 or dampening != 0): raise ValueError("Nesterov momentum requires a momentum and zero dampening") # parameters = set_weight_decay(model) @@ -102,7 +109,8 @@ def __init__(self, model, scales, if gamma_init == 1.0: LOGGER.info('Checked. This is training from scratch.') else: - LOGGER.warning('========================== Warning! Is this really training from scratch ? =================') + LOGGER.warning( + '========================== Warning! Is this really training from scratch ? =================') LOGGER.info('##################### Re-initialize #############') self.reinitialize(scales, convs, use_identity_scales_for_reinit) @@ -119,25 +127,30 @@ def reinitialize(self, scales_by_idx, conv3x3_by_idx, use_identity_scales): else: assert len(scales) == 3 assert in_channels == out_channels - identity = torch.from_numpy(np.eye(out_channels, dtype=np.float32).reshape(out_channels, out_channels, 1, 1)).to(conv3x3.weight.device) - conv3x3.weight.data = conv3x3.weight * scales[2].view(-1, 1, 1, 1) + F.pad(kernel_1x1.weight, [1, 1, 1, 1]) * scales[1].view(-1, 1, 1, 1) - if use_identity_scales: # You may initialize the imaginary CSLA block with the trained identity_scale values. Makes almost no difference. + identity = torch.from_numpy( + np.eye(out_channels, dtype=np.float32).reshape(out_channels, out_channels, 1, 1)).to( + conv3x3.weight.device) + conv3x3.weight.data = conv3x3.weight * scales[2].view(-1, 1, 1, 1) + F.pad(kernel_1x1.weight, + [1, 1, 1, 1]) * scales[ + 1].view(-1, 1, 1, 1) + if use_identity_scales: # You may initialize the imaginary CSLA block with the trained identity_scale values. Makes almost no difference. identity_scale_weight = scales[0] conv3x3.weight.data += F.pad(identity * identity_scale_weight.view(-1, 1, 1, 1), [1, 1, 1, 1]) else: conv3x3.weight.data += F.pad(identity, [1, 1, 1, 1]) - def generate_gradient_masks(self, scales_by_idx, conv3x3_by_idx, cpu_mode=False): self.grad_mask_map = {} for scales, conv3x3 in zip(scales_by_idx, conv3x3_by_idx): para = conv3x3.weight if len(scales) == 2: mask = torch.ones_like(para, device=scales[0].device) * (scales[1] ** 2).view(-1, 1, 1, 1) - mask[:, :, 1:2, 1:2] += torch.ones(para.shape[0], para.shape[1], 1, 1, device=scales[0].device) * (scales[0] ** 2).view(-1, 1, 1, 1) + mask[:, :, 1:2, 1:2] += torch.ones(para.shape[0], para.shape[1], 1, 1, device=scales[0].device) * ( + scales[0] ** 2).view(-1, 1, 1, 1) else: mask = torch.ones_like(para, device=scales[0].device) * (scales[2] ** 2).view(-1, 1, 1, 1) - mask[:, :, 1:2, 1:2] += torch.ones(para.shape[0], para.shape[1], 1, 1, device=scales[0].device) * (scales[1] ** 2).view(-1, 1, 1, 1) + mask[:, :, 1:2, 1:2] += torch.ones(para.shape[0], para.shape[1], 1, 1, device=scales[0].device) * ( + scales[1] ** 2).view(-1, 1, 1, 1) ids = np.arange(para.shape[1]) assert para.shape[1] == para.shape[0] mask[ids, ids, 1:2, 1:2] += 1.0 diff --git a/yolov6/utils/events.py b/yolov6/utils/events.py index 39fcb18a..ed0adc40 100644 --- a/yolov6/utils/events.py +++ b/yolov6/utils/events.py @@ -44,6 +44,7 @@ def write_tblog(tblogger, epoch, results, losses): tblogger.add_scalar("x/lr1", results[3], epoch + 1) tblogger.add_scalar("x/lr2", results[4], epoch + 1) + def write_tbimg(tblogger, imgs, step, type='train'): """Display train_batch and validation predictions to tensorboard.""" if type == 'train': diff --git a/yolov6/utils/figure_iou.py b/yolov6/utils/figure_iou.py index 13b69d77..c8ca952e 100644 --- a/yolov6/utils/figure_iou.py +++ b/yolov6/utils/figure_iou.py @@ -7,6 +7,7 @@ class IOUloss: """ Calculate IoU loss. """ + def __init__(self, box_format='xywh', iou_type='ciou', reduction='none', eps=1e-7): """ Setting of the class. Args: diff --git a/yolov6/utils/general.py b/yolov6/utils/general.py index 10a59058..643c9d66 100644 --- a/yolov6/utils/general.py +++ b/yolov6/utils/general.py @@ -4,6 +4,7 @@ import glob from pathlib import Path + def increment_name(path): "increase save directory's id" path = Path(path) diff --git a/yolov6/utils/nms.py b/yolov6/utils/nms.py index 9c61b7cc..ee878fff 100644 --- a/yolov6/utils/nms.py +++ b/yolov6/utils/nms.py @@ -10,7 +10,6 @@ import torch import torchvision - # Settings torch.set_printoptions(linewidth=320, precision=5, profile='long') np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 @@ -28,7 +27,8 @@ def xywh2xyxy(x): return y -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300): +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, + max_det=300): """Runs Non-Maximum Suppression (NMS) on inference results. This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775 Args: From c74091f35783ae07d1f345336cf025956a8e7ad1 Mon Sep 17 00:00:00 2001 From: ahmetburakgozel Date: Tue, 9 Aug 2022 16:29:25 +0300 Subject: [PATCH 2/4] python tools/infer.py --weights yolov6s.pt --class 56 --source camera --- yolov6/core/inferer.py | 15 +++++++++++---- yolov6/data/datasets.py | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/yolov6/core/inferer.py b/yolov6/core/inferer.py index d9874e2d..60ac7e63 100644 --- a/yolov6/core/inferer.py +++ b/yolov6/core/inferer.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -from yolov6.utils.torch_utils import get_model_info + import math import os import os.path as osp @@ -11,9 +11,11 @@ import numpy as np import torch from PIL import ImageFont +from tensorboard.compat import tf from tqdm import tqdm from yolov6.data.data_augment import letterbox from yolov6.layers.common import DetectBackend +from yolov6.utils import nms from yolov6.utils.events import LOGGER, load_yaml from yolov6.utils.nms import non_max_suppression @@ -111,11 +113,16 @@ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, if save_img: class_num = int(cls) # integer class + counter = str(len(det)) label = None if hide_labels else ( - self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}') + self.class_names[ + class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}') + + self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), + xyxy, label, color=self.generate_colors(class_num, True)) - self.plot_box_and_label(img_ori, max(round(sum( - img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True)) + cv2.putText(img_ori, counter, (25, 25), 0, max(round(sum(img_ori.shape) / 2 * 0.003), 2) / 3, + (56, 108, 32), 2, cv2.LINE_AA) img_src = np.asarray(img_ori) diff --git a/yolov6/data/datasets.py b/yolov6/data/datasets.py index ac41a438..4d77adfd 100644 --- a/yolov6/data/datasets.py +++ b/yolov6/data/datasets.py @@ -555,6 +555,8 @@ def get_hash(paths): class LoadData: + count: int + def __init__(self, path): if path != 'camera': p = str(Path(path).resolve()) # os-agnostic absolute path From c15c622378b6a11d06491fdeac1fe1e27a496777 Mon Sep 17 00:00:00 2001 From: ahmetburakgozel Date: Tue, 9 Aug 2022 16:36:53 +0300 Subject: [PATCH 3/4] Now it can count objects with the command "python tools/infer.py --weights yolov6s.pt --class 56 --source camera". --- yolov6/core/inferer.py | 1 - yolov6/data/datasets.py | 1 - 2 files changed, 2 deletions(-) diff --git a/yolov6/core/inferer.py b/yolov6/core/inferer.py index 60ac7e63..f0b062db 100644 --- a/yolov6/core/inferer.py +++ b/yolov6/core/inferer.py @@ -2,7 +2,6 @@ # -*- coding:utf-8 -*- import math -import os import os.path as osp import warnings from pathlib import Path diff --git a/yolov6/data/datasets.py b/yolov6/data/datasets.py index 4d77adfd..3522c82b 100644 --- a/yolov6/data/datasets.py +++ b/yolov6/data/datasets.py @@ -9,7 +9,6 @@ import json import time import hashlib -from pathlib import Path from multiprocessing.pool import Pool import cv2 From 5a243ff5d4f27e24bf20671ca6d7839d2b1b7ec1 Mon Sep 17 00:00:00 2001 From: ahmetburakgozel Date: Fri, 12 Aug 2022 15:52:38 +0300 Subject: [PATCH 4/4] Resolved conflicts --- yolov6/core/inferer.py | 2 ++ yolov6/data/datasets.py | 1 + 2 files changed, 3 insertions(+) diff --git a/yolov6/core/inferer.py b/yolov6/core/inferer.py index f0b062db..803ecc7b 100644 --- a/yolov6/core/inferer.py +++ b/yolov6/core/inferer.py @@ -6,6 +6,8 @@ import warnings from pathlib import Path +import os + import cv2 import numpy as np import torch diff --git a/yolov6/data/datasets.py b/yolov6/data/datasets.py index 3522c82b..4d77adfd 100644 --- a/yolov6/data/datasets.py +++ b/yolov6/data/datasets.py @@ -9,6 +9,7 @@ import json import time import hashlib +from pathlib import Path from multiprocessing.pool import Pool import cv2