diff --git a/README.md b/README.md index 8361688..4d242f1 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,63 @@ python tools/model_converters/extract_backbone_weights.py work_dirs/openmixup/pr PORT=29500 bash tools/dist_train_ft_8gpu.sh configs/openmixup/finetune/imagenet/r50_rsb_a3_ft_sz160_4xb512_cos_fp16_ep100.py ${PATH_TO_CHECKPOINT} ``` +## Results and Models + +We provide the summarization of pre-training and fine-tuning results of A2MIM and baselines on ImageNet-1K. + +| Methods | # Params. | Supervision | SimMIM | A2MIM | +|---|:---:|:---:|:---:|:---:| +| Target | (M) | Label | RGB | RGB | +| ViT-S | 48.8 | 79.9 | 81.7 | 82.1 | +| ViT-B | 86.7 | 81.8 | 83.8 | 84.2 | +| ViT-L | 304.6 | 82.6 | 85.6 | 86.1 | +| ResNet-50 | 25.6 | 79.8 | 79.9 | 80.4 | +| ResNet-101 | 44.5 | 81.3 | 81.3 | 81.9 | +| ResNet-152 | 60.2 | 81.8 | 81.9 | 82.5 | +| ResNet-200 | 64.7 | 82.1 | 82.2 | 83.0 | +| ConvNeXt-S | 50.2 | 83.1 | 83.2 | 83.7 | +| ConvNeXt-B | 88.6 | 83.5 | 83.6 | 84.1 | + +Config files, models, logs, and visualization of reconstructions are provided as follows. These files can also be downloaded from [a2mim-in1k-weights](https://github.com/Westlake-AI/openmixup/releases/tag/a2mim-in1k-weights) or **Baidu Cloud**: [A2MIM (3q5i)](https://pan.baidu.com/s/1aj3Lbj_wvyV_1BRzFhPcwQ?pwd=3q5i). + +
+ ViT-S/B/L on ImageNet-1K. + + | Method | Backbone | Epoch | Fine-tuning Top-1 | Pre-training | Fine-tuning | Results | + |:---:|:---:|:---:|:---:|:---:|:---:|:---:| + | SimMIM | ViT-Small | 800 | 81.7 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/vit_small_sz224_8xb256_step_fp16_ep800.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_vis.zip) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/vit_small_p16_swin_ft_simmim_sz224_8xb128_cos_ep100.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_ft.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_ft.log.json) | + | A2MIM | ViT-Small | 800 | 82.1 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/vit_small_l0_sz224_8xb256_step_ep800.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_vis.zip) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/vit_small_p16_swin_ft_simmim_sz224_8xb128_cos_ep200.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_ft.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_small_sz224_8xb256_step_fp16_ep800_ft.log.json) | + | SimMIM | ViT-Base | 800 | 83.8 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/vit_base_sz224_8xb128_accu2_step_fp16_ep800.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_base_sz224_8xb128_accu2_step_fp16_ep800_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_base_sz224_8xb128_accu2_step_fp16_ep800_vis.zip) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/vit_base_p16_swin_ft_simmim_sz224_4xb128_accu2_cos_ep100.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_vit_base_l0_res_fft01_sz224_4xb128_accu4_step_fp16_ep800_ft.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_vit_base_l0_res_fft01_sz224_4xb128_accu4_step_fp16_ep800_ft.log.json) | + | A2MIM | ViT-Base | 800 | 84.3 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/vit_base_l0_sz224_8xb128_accu2_step_ep800.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/full_a2mim_vit_base_l0_res_fft01_sz224_4xb128_accu4_step_fp16_ep800.pth) \| [vis](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/visualization_a2mim_vit_base_l0_res_fft01_sz224_4xb128_accu4_step_fp16_ep800.zip) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/vit_base_p16_swin_ft_simmim_sz224_4xb128_accu2_cos_ep100.py) | [ckpt](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/a2mim_vit_base_l0_res_fft01_sz224_4xb128_accu4_step_fp16_ep800_ft.pth) \| [log](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/a2mim_vit_base_l0_res_fft01_sz224_4xb128_accu4_step_fp16_ep800_ft.log.json) | + | SimMIM | ViT-Large | 800 | 85.6 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/vit_large_sz224_8xb128_accu2_step_fp16_ep800.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_large_sz224_8xb128_accu2_step_fp16_ep800_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_large_sz224_8xb128_accu2_step_fp16_ep800_vis.zip) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/vit_large_p16_swin_ft_simmim_sz224_8xb64_accu2_cos_ep100.py) | [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_vit_large_sz224_8xb128_accu2_step_fp16_ep800_ft.log.json) | + | A2MIM | ViT-Large | 800 | 86.1 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/vit_large_l0_sz224_8xb128_accu2_step_ep800.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_vit_large_l0_sz224_8xb128_accu2_step_fp16_ep800_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_vit_large_l0_sz224_8xb128_accu2_step_fp16_ep800_vis.zip) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/vit_large_p16_swin_ft_simmim_sz224_8xb64_accu2_cos_ep150.py) | [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_vit_large_l0_sz224_8xb128_accu2_step_fp16_ep800_ft.log.json) | +
+
+ ResNet-50/101/152/200 on ImageNet-1K. + + | Method | Backbone | Epoch | Fine-tuning (A2) Top-1 | Pre-training | Fine-tuning | Results | + |:---:|:---:|:---:|:---:|:---:|:---:|:---:| + | SimMIM | ResNet-50 | 300 | 79.9 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/r50_sz224_8xb256_fp16_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r50_sz224_8xb256_cos_fp16_ep300_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r50_sz224_8xb256_cos_fp16_ep300_vis.zip) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | - | + | A2MIM | ResNet-50 | 100 | 78.8 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/r50_l3_sz224_init_8xb256_cos_ep100.py) \| [ckpt](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/full_a2mim_r50_l3_sz224_init_8xb256_cos_ep100.pth) \| [vis](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/visualization_a2mim_r50_l3_sz224_init_8xb256_cos_ep100.zip) | [RSB A3](https://github.com/Westlake-AI/openmixup/blob/main/configs/benchmarks/classification/imagenet/r50_rsb_a3_ft_sz160_4xb512_cos_fp16_ep100.py) | [ckpt](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/a2mim_r50_l3_sz224_init_8xb256_cos_ep100_ft_rsb_a3.pth) \| [log](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/a2mim_r50_l3_sz224_init_8xb256_cos_ep100_ft_rsb_a3.log.json) | + | A2MIM | ResNet-50 | 300 | 80.4 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/r50_l3_sz224_init_8xb256_cos_ep300.py) \| [ckpt](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/full_a2mim_r50_l3_sz224_init_8xb256_cos_ep300.pth) \| [vis](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/visualization_a2mim_r50_l3_sz224_init_8xb256_cos_ep300.zip) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/a2mim_r50_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.pth) \| [log](https://github.com/Westlake-AI/openmixup/releases/download/a2mim-in1k-weights/a2mim_r50_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.log.json) | + | SimMIM | ResNet-101 | 300 | 81.3 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/r101_sz224_8xb256_fp16_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r101_sz224_8xb256_cos_fp16_ep300_full.pth) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt (A3)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r101_sz224_8xb256_ep300_ft_rsb_a3.pth) \| [log (A3)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r101_sz224_8xb256_ep300_ft_rsb_a3.log.json) | + | A2MIM | ResNet-101 | 300 | 81.9 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/r101_l3_sz224_init_8xb256_cos_ep300.py) \| [ckpt (300ep)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r101_l3_sz224_init_8xb256_cos_ep300_full.pth) \| [ckpt (800ep)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r101_l3_sz224_init_8xb256_cos_ep800_full.pth) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt (A2)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r101_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.pth) \| [log (A2)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r101_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.log.json) | + | SimMIM | ResNet-152 | 300 | 81.9 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/r152_sz224_8xb256_fp16_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r152_sz224_8xb256_cos_fp16_ep300_full.pth) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [log (A3)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r152_sz224_8xb256_ep300_ft_rsb_a3.log.json) | + | A2MIM | ResNet-152 | 300 | 82.5 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/r152_l3_sz224_init_8xb256_cos_ep300.py) \| [ckpt (300ep)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r152_l3_sz224_init_8xb256_cos_ep300_full.pth) \| [ckpt (800ep)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r152_l3_sz224_init_8xb256_cos_ep800_full.pth) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt (A2)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r152_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.pth) \| [log (A2)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r152_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.log.json) | + | SimMIM | ResNet-200 | 300 | 82.2 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/r200_sz224_8xb256_fp16_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r200_sz224_8xb256_cos_fp16_ep300_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r200_sz224_8xb256_cos_fp16_ep300_vis.zip) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r200_sz224_8xb256_cos_fp16_ep300_ft_rsb_a2.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_r200_sz224_8xb256_cos_fp16_ep300_ft_rsb_a2.log.json) | + | A2MIM | ResNet-200 | 300 | 83.0 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/r200_l3_sz224_init_8xb256_cos_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r200_l3_sz224_init_8xb256_cos_ep300_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r200_l3_sz224_init_8xb256_cos_ep300_vis.zip) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r200_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_r200_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.log.json) | +
+
+ ConvNeXt-S/B on ImageNet-1K. + + | Method | Backbone | Epoch | Fine-tuning (A2) Top-1 | Pre-training | Fine-tuning | Results | + |:---:|:---:|:---:|:---:|:---:|:---:|:---:| + | SimMIM | ConvNeXt-S | 300 | 83.2 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/convnext_small_sz224_8xb256_fp16_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_convnext_small_sz224_8xb256_cos_fp16_ep300_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_convnext_small_sz224_8xb256_cos_fp16_ep300_vis.zip) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/convnext_s_spark_ft_sz224_8xb256_cos_fp16_ep300.py) | - | + | A2MIM | ConvNeXt-S | 300 | 83.7 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/convnext_s_l3_sz224_init_8xb256_cos_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_small_l3_sz224_init_8xb256_cos_ep300_full.pth) \| [vis](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_small_l3_sz224_init_8xb256_cos_ep300_vis.zip) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/convnext_s_spark_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_small_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_small_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.log.json) | + | SimMIM | ConvNeXt-B | 300 | 83.6 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/simmim/imagenet/convnext_base_sz224_8xb256_fp16_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_convnext_base_sz224_8xb256_cos_fp16_ep300_full.pth) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/convnext_b_spark_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_convnext_base_sz224_8xb256_cos_fp16_ep300_ft.pth) \| [log](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/simmim_convnext_base_sz224_8xb256_cos_fp16_ep300_ft.log.json) | + | A2MIM | ConvNeXt-B | 300 | 84.1 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/convnext_b_l3_sz224_init_8xb256_cos_ep300.py) \| [ckpt](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_base_l3_sz224_init_8xb256_cos_fp16_ep300_full.pth) | [RSB A2](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/convnext_b_spark_ft_sz224_8xb256_cos_fp16_ep300.py) | [ckpt (A2)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_base_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.pth) \| [ckpt (A3)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_base_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a3.pth) \| [log (A2)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_base_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a2.log.json) \| [log (A3)](https://github.com/Westlake-AI/A2MIM/releases/download/a2mim-in1k-weights/a2mim_convnext_base_l3_sz224_init_8xb256_cos_ep300_ft_rsb_a3.log.json) | +
+ ## License This project is released under the [Apache 2.0 license](LICENSE). diff --git a/configs/openmixup/pretrain/_base_/models/simmim/convnext_b.py b/configs/openmixup/pretrain/_base_/models/simmim/convnext_b.py new file mode 100644 index 0000000..d2e4bd2 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/convnext_b.py @@ -0,0 +1,15 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='MIMConvNeXt', + arch="base", + out_indices=(3,), # x-1: stage-x + act_cfg=dict(type='GELU'), + drop_path_rate=0.0, + gap_before_final_norm=False, + replace=False, # use residual mask token + mask_layer=0, mask_token='learnable', + ), + neck=dict(type='SimMIMNeck', in_channels=1024, encoder_stride=32), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/convnext_s.py b/configs/openmixup/pretrain/_base_/models/simmim/convnext_s.py new file mode 100644 index 0000000..2cc9f19 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/convnext_s.py @@ -0,0 +1,15 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='MIMConvNeXt', + arch="small", + out_indices=(3,), # x-1: stage-x + act_cfg=dict(type='GELU'), + drop_path_rate=0.0, + gap_before_final_norm=False, + replace=False, # use residual mask token + mask_layer=0, mask_token='learnable', + ), + neck=dict(type='SimMIMNeck', in_channels=768, encoder_stride=32), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/convnext_t.py b/configs/openmixup/pretrain/_base_/models/simmim/convnext_t.py new file mode 100644 index 0000000..d26ad2f --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/convnext_t.py @@ -0,0 +1,15 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='MIMConvNeXt', + arch="tiny", + out_indices=(3,), # x-1: stage-x + act_cfg=dict(type='GELU'), + drop_path_rate=0.0, + gap_before_final_norm=False, + replace=False, # use residual mask token + mask_layer=0, mask_token='learnable', + ), + neck=dict(type='SimMIMNeck', in_channels=768, encoder_stride=32), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/r101.py b/configs/openmixup/pretrain/_base_/models/simmim/r101.py new file mode 100644 index 0000000..942dd30 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/r101.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='MIMResNet', + depth=101, + mask_layer=0, mask_token='learnable', + num_stages=4, + out_indices=(3,), # no conv-1, x-1: stage-x + norm_cfg=dict(type='SyncBN'), + style='pytorch'), + neck=dict(type='SimMIMNeck', in_channels=2048, encoder_stride=32), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/r152.py b/configs/openmixup/pretrain/_base_/models/simmim/r152.py new file mode 100644 index 0000000..ee38a60 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/r152.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='MIMResNet', + depth=152, + mask_layer=0, mask_token='learnable', + num_stages=4, + out_indices=(3,), # no conv-1, x-1: stage-x + norm_cfg=dict(type='SyncBN'), + style='pytorch'), + neck=dict(type='SimMIMNeck', in_channels=2048, encoder_stride=32), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/r200.py b/configs/openmixup/pretrain/_base_/models/simmim/r200.py new file mode 100644 index 0000000..b2aab74 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/r200.py @@ -0,0 +1,13 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='MIMResNet', + depth=200, + mask_layer=0, mask_token='learnable', + num_stages=4, + out_indices=(3,), # no conv-1, x-1: stage-x + norm_cfg=dict(type='SyncBN'), + style='pytorch'), + neck=dict(type='SimMIMNeck', in_channels=2048, encoder_stride=32), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/vit_large.py b/configs/openmixup/pretrain/_base_/models/simmim/vit_large.py new file mode 100644 index 0000000..96b3379 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/vit_large.py @@ -0,0 +1,14 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='SimMIMViT', + arch='large', + replace=True, + mask_layer=0, mask_token='learnable', + img_size=224, + drop_rate=0., drop_path_rate=0.1, + use_window=True, init_values=0.1, # SimMIM: use init_value and relative pos encoding + ), + neck=dict(type='SimMIMNeck', in_channels=1024, encoder_stride=16), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/_base_/models/simmim/vit_small.py b/configs/openmixup/pretrain/_base_/models/simmim/vit_small.py new file mode 100644 index 0000000..2b03e82 --- /dev/null +++ b/configs/openmixup/pretrain/_base_/models/simmim/vit_small.py @@ -0,0 +1,14 @@ +# model settings +model = dict( + type='SimMIM', + backbone=dict( + type='SimMIMViT', + arch='small', + replace=True, + mask_layer=0, mask_token='learnable', + img_size=224, + drop_rate=0., drop_path_rate=0.1, + use_window=True, init_values=0.1, # SimMIM: use init_value and relative pos encoding + ), + neck=dict(type='SimMIMNeck', in_channels=768, encoder_stride=16), + head=dict(type='SimMIMHead', encoder_in_channels=3)) diff --git a/configs/openmixup/pretrain/a2mim/imagenet/convnext_b_l3_sz224_init_8xb256_cos_ep300.py b/configs/openmixup/pretrain/a2mim/imagenet/convnext_b_l3_sz224_init_8xb256_cos_ep300.py new file mode 100644 index 0000000..90f4c70 --- /dev/null +++ b/configs/openmixup/pretrain/a2mim/imagenet/convnext_b_l3_sz224_init_8xb256_cos_ep300.py @@ -0,0 +1,69 @@ +_base_ = [ + '../../_base_/models/a2mim/convnext_b.py', + '../../_base_/datasets/imagenet/a2mim_rgb_m_sz224_rrc08_bs64.py', + '../../_base_/default_runtime.py', +] + +# model settings +model = dict( + backbone=dict( + mask_layer=3, mask_token="learnable", + mask_init=1e-6, # init residual gamma + ), + head=dict( + fft_weight=0., fft_focal=False, + ), +) + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=10, + train=dict( + feature_mode=None, feature_args=dict(), + mask_pipeline=[ + dict(type='BlockwiseMaskGenerator', + input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=32, # stage 3 + mask_color='mean', mask_only=False), + ], +)) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=3e-4 * 2048 / 512, # 3e-4 * 4 for bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0., lr_mult=1e-1,), + 'mask_gamma': dict(weight_decay=0., lr_mult=1e-1,), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/a2mim/imagenet/convnext_t_l3_sz224_init_8xb256_cos_ep300.py b/configs/openmixup/pretrain/a2mim/imagenet/convnext_t_l3_sz224_init_8xb256_cos_ep300.py index 6276af4..6a35994 100644 --- a/configs/openmixup/pretrain/a2mim/imagenet/convnext_t_l3_sz224_init_8xb256_cos_ep300.py +++ b/configs/openmixup/pretrain/a2mim/imagenet/convnext_t_l3_sz224_init_8xb256_cos_ep300.py @@ -22,7 +22,7 @@ feature_mode=None, feature_args=dict(), mask_pipeline=[ dict(type='BlockwiseMaskGenerator', - input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=32, # stage 3 in MogaNet + input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=32, # stage 3 mask_color='mean', mask_only=False), ], )) @@ -33,14 +33,14 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=626 * 10, # plot every 10 ep + save_interval=626 * 25, # plot every 25 ep iter_per_epoch=626), ] # optimizer optimizer = dict( type='AdamW', - lr=1e-4 * 2048 / 512, # 4e-3 for bs2048 + lr=3e-4 * 2048 / 512, # 3e-4 * 4 for bs2048 betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, paramwise_options={ '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), diff --git a/configs/openmixup/pretrain/a2mim/imagenet/moganet_s_l3_sz224_init_8xb256_cos_ep300.py b/configs/openmixup/pretrain/a2mim/imagenet/moganet_s_l3_sz224_init_8xb256_cos_ep300.py index 3fa5a3e..326fa90 100644 --- a/configs/openmixup/pretrain/a2mim/imagenet/moganet_s_l3_sz224_init_8xb256_cos_ep300.py +++ b/configs/openmixup/pretrain/a2mim/imagenet/moganet_s_l3_sz224_init_8xb256_cos_ep300.py @@ -17,13 +17,12 @@ # dataset data = dict( - # imgs_per_gpu=256, workers_per_gpu=10, - imgs_per_gpu=64, workers_per_gpu=4, + imgs_per_gpu=256, workers_per_gpu=10, train=dict( feature_mode=None, feature_args=dict(), mask_pipeline=[ dict(type='BlockwiseMaskGenerator', - input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=32, # stage 3 in MogaNet + input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=32, # stage 3 mask_color='mean', mask_only=False), ], )) @@ -34,26 +33,25 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=626 * 10, # plot every 10 ep + save_interval=626 * 25, # plot every 25 ep iter_per_epoch=626), ] # optimizer optimizer = dict( type='AdamW', - lr=1e-4 * 2048 / 512, # 4e-3 for bs2048 + lr=3e-4 * 2048 / 512, # 3e-4 * 4 for bs2048 betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, paramwise_options={ '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), 'bias': dict(weight_decay=0.), - 'layer_scale': dict(weight_decay=0.), - 'scale': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), 'mask_token': dict(weight_decay=0., lr_mult=1e-1,), 'mask_gamma': dict(weight_decay=0., lr_mult=1e-1,), }) # fp16 -use_fp16 = False +use_fp16 = True fp16 = dict(type='mmcv', loss_scale='dynamic') # optimizer args optimizer_config = dict(update_interval=update_interval) diff --git a/configs/openmixup/pretrain/a2mim/imagenet/r200_l3_sz224_init_8xb256_cos_ep300.py b/configs/openmixup/pretrain/a2mim/imagenet/r200_l3_sz224_init_8xb256_cos_ep300.py new file mode 100644 index 0000000..d2833c3 --- /dev/null +++ b/configs/openmixup/pretrain/a2mim/imagenet/r200_l3_sz224_init_8xb256_cos_ep300.py @@ -0,0 +1,63 @@ +_base_ = [ + '../../_base_/models/a2mim/r200.py', + '../../_base_/datasets/imagenet/a2mim_rgb_m_sz224_rrc08_bs64.py', + '../../_base_/default_runtime.py', +] + +# model settings +model = dict( + backbone=dict( + mask_layer=3, mask_token="learnable", + mask_init=1e-6, # init residual gamma +)) + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=10, + train=dict( + feature_mode=None, feature_args=dict(), + mask_pipeline=[ + dict(type='BlockwiseMaskGenerator', + input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=16, # stage 3 + mask_color='mean', mask_only=False), + ], +)) + +# interval for accumulate gradient +update_interval = 1 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 10, # plot every 10 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=3e-4 * 2048 / 512, # 1.2e-3 for bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0., lr_mult=1e-1,), + }) + +# fp16 +use_fp16 = False +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/a2mim/imagenet/r50_l3_sz224_init_8xb256_cos_ep300.py b/configs/openmixup/pretrain/a2mim/imagenet/r50_l3_sz224_init_8xb256_cos_ep300.py index 366a8fb..9e28f9f 100644 --- a/configs/openmixup/pretrain/a2mim/imagenet/r50_l3_sz224_init_8xb256_cos_ep300.py +++ b/configs/openmixup/pretrain/a2mim/imagenet/r50_l3_sz224_init_8xb256_cos_ep300.py @@ -29,7 +29,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=626 * 10, # plot every 10 ep + save_interval=626 * 25, # plot every 25 ep iter_per_epoch=626), ] diff --git a/configs/openmixup/pretrain/a2mim/imagenet/vit_base_l0_sz224_8xb128_accu2_step_ep800.py b/configs/openmixup/pretrain/a2mim/imagenet/vit_base_l0_sz224_8xb128_accu2_step_ep800.py index 9562e3a..be7dd1b 100644 --- a/configs/openmixup/pretrain/a2mim/imagenet/vit_base_l0_sz224_8xb128_accu2_step_ep800.py +++ b/configs/openmixup/pretrain/a2mim/imagenet/vit_base_l0_sz224_8xb128_accu2_step_ep800.py @@ -12,7 +12,7 @@ # dataset data = dict( - imgs_per_gpu=128, workers_per_gpu=10, + imgs_per_gpu=128, workers_per_gpu=12, train=dict( feature_mode=None, feature_args=dict(), mask_pipeline=[ @@ -28,7 +28,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=1252 * 10, # plot every 10 ep + save_interval=1252 * 25, # plot every 25 ep iter_per_epoch=1252), ] diff --git a/configs/openmixup/pretrain/a2mim/imagenet/vit_large_l0_sz224_8xb128_accu2_step_ep800.py b/configs/openmixup/pretrain/a2mim/imagenet/vit_large_l0_sz224_8xb128_accu2_step_ep800.py index 7b8c32d..81dfeaa 100644 --- a/configs/openmixup/pretrain/a2mim/imagenet/vit_large_l0_sz224_8xb128_accu2_step_ep800.py +++ b/configs/openmixup/pretrain/a2mim/imagenet/vit_large_l0_sz224_8xb128_accu2_step_ep800.py @@ -12,7 +12,7 @@ # dataset data = dict( - imgs_per_gpu=128, workers_per_gpu=10, + imgs_per_gpu=128, workers_per_gpu=12, train=dict( feature_mode=None, feature_args=dict(), mask_pipeline=[ @@ -28,7 +28,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=1252 * 10, # plot every 10 ep + save_interval=1252 * 25, # plot every 25 ep iter_per_epoch=1252), ] diff --git a/configs/openmixup/pretrain/a2mim/imagenet/vit_small_l0_sz224_8xb256_step_ep800.py b/configs/openmixup/pretrain/a2mim/imagenet/vit_small_l0_sz224_8xb256_step_ep800.py new file mode 100644 index 0000000..3ad402e --- /dev/null +++ b/configs/openmixup/pretrain/a2mim/imagenet/vit_small_l0_sz224_8xb256_step_ep800.py @@ -0,0 +1,68 @@ +_base_ = [ + '../../_base_/models/a2mim/vit_small.py', + '../../_base_/datasets/imagenet/a2mim_rgb_m_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# model settings +model = dict( + backbone=dict( + mask_layer=0, mask_token='learnable') +) + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, + train=dict( + feature_mode=None, feature_args=dict(), + mask_pipeline=[ + dict(type='BlockwiseMaskGenerator', + input_size=224, mask_patch_size=32, model_patch_size=16, mask_ratio=0.6, + mask_color='mean', mask_only=False), + ], +)) + +# interval for accumulate gradient +update_interval = 1 # total: 8 x bs256 x 1 accumulates = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=1e-4 * 2048 / 512, # 4e-4 for bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'norm': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + 'pos_embed': dict(weight_decay=0.), + 'cls_token': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = False +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict( + update_interval=update_interval, + grad_clip=dict(max_norm=5.0), +) + +# lr scheduler +lr_config = dict( + policy='step', step=[700,], gamma=0.1, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=5e-7 * 2048 / 512, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=800) diff --git a/configs/openmixup/pretrain/simmim/imagenet/convnext_base_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/convnext_base_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..b3d8eb9 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/convnext_base_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,50 @@ +_base_ = [ + '../../_base_/models/simmim/convnext_b.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 2 # bs256 x 4gpus x 2 = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=1252 * 25, # plot every 25 ep + iter_per_epoch=1252), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/convnext_small_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/convnext_small_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..46f89ff --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/convnext_small_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,50 @@ +_base_ = [ + '../../_base_/models/simmim/convnext_s.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/convnext_tiny_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/convnext_tiny_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..346c526 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/convnext_tiny_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,50 @@ +_base_ = [ + '../../_base_/models/simmim/convnext_t.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/r101_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/r101_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..762e534 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/r101_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,49 @@ +_base_ = [ + '../../_base_/models/simmim/r101.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/r152_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/r152_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..bfeb1e6 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/r152_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,49 @@ +_base_ = [ + '../../_base_/models/simmim/r152.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/r200_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/r200_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..beaef4a --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/r200_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,49 @@ +_base_ = [ + '../../_base_/models/simmim/r200.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/r50_sz224_8xb256_fp16_ep300.py b/configs/openmixup/pretrain/simmim/imagenet/r50_sz224_8xb256_fp16_ep300.py new file mode 100644 index 0000000..eee58a4 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/r50_sz224_8xb256_fp16_ep300.py @@ -0,0 +1,49 @@ +_base_ = [ + '../../_base_/models/simmim/r50.py', + '../../_base_/datasets/imagenet/simmim_sz224_bs64.py', + '../../_base_/default_runtime.py', +] + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=12, +) + +# interval for accumulate gradient +update_interval = 1 # bs256 x 8gpus = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, # plot every 25 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4 * 2048 / 512, # bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep100.py b/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep100.py index de37dac..0a9ef55 100644 --- a/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep100.py +++ b/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep100.py @@ -13,7 +13,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=1252 * 10, # plot every 10 ep + save_interval=1252 * 25, # plot every 25 ep iter_per_epoch=1252), ] diff --git a/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep800.py b/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep800.py index a77b7e2..b078cc8 100644 --- a/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep800.py +++ b/configs/openmixup/pretrain/simmim/imagenet/swin_base_sz192_8xb128_accu2_cos_ep800.py @@ -13,7 +13,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=1252 * 10, # plot every 10 ep + save_interval=1252 * 25, # plot every 25 ep iter_per_epoch=1252), ] diff --git a/configs/openmixup/pretrain/simmim/imagenet/vit_base_rgb_m_sz224_8xb128_accu2_step_fp16_ep800.py b/configs/openmixup/pretrain/simmim/imagenet/vit_base_rgb_m_sz224_8xb128_accu2_step_fp16_ep800.py index 467cede..b33791e 100644 --- a/configs/openmixup/pretrain/simmim/imagenet/vit_base_rgb_m_sz224_8xb128_accu2_step_fp16_ep800.py +++ b/configs/openmixup/pretrain/simmim/imagenet/vit_base_rgb_m_sz224_8xb128_accu2_step_fp16_ep800.py @@ -22,7 +22,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=1252 * 10, # plot every 10 ep + save_interval=1252 * 25, # plot every 25 ep iter_per_epoch=1252), ] diff --git a/configs/openmixup/pretrain/simmim/imagenet/vit_base_sz224_8xb128_accu2_step_fp16_ep800.py b/configs/openmixup/pretrain/simmim/imagenet/vit_base_sz224_8xb128_accu2_step_fp16_ep800.py index 5f7cbe8..47e8634 100644 --- a/configs/openmixup/pretrain/simmim/imagenet/vit_base_sz224_8xb128_accu2_step_fp16_ep800.py +++ b/configs/openmixup/pretrain/simmim/imagenet/vit_base_sz224_8xb128_accu2_step_fp16_ep800.py @@ -13,7 +13,7 @@ # additional hooks custom_hooks = [ dict(type='SAVEHook', - save_interval=1252 * 10, # plot every 10 ep + save_interval=1252 * 25, # plot every 25 ep iter_per_epoch=1252), ] diff --git a/configs/openmixup/pretrain/simmim/imagenet/vit_large_sz224_8xb128_accu2_step_fp16_ep800.py b/configs/openmixup/pretrain/simmim/imagenet/vit_large_sz224_8xb128_accu2_step_fp16_ep800.py new file mode 100644 index 0000000..84426b1 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/vit_large_sz224_8xb128_accu2_step_fp16_ep800.py @@ -0,0 +1,52 @@ +_base_ = [ + '../../_base_/models/simmim/vit_large.py', + '../../_base_/datasets/imagenet/simmim_sz224_p16_bs64.py', + '../../_base_/default_runtime.py', +] + +# data +data = dict(imgs_per_gpu=128, workers_per_gpu=12) + +# interval for accumulate gradient +update_interval = 2 # total: 8 x bs128 x 2 accumulates = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=1252 * 25, # plot every 25 ep + iter_per_epoch=1252), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=1e-4 * 2048 / 512, # 4e-4 for bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'norm': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + 'pos_embed': dict(weight_decay=0.), + 'cls_token': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict( + update_interval=update_interval, grad_clip=dict(max_norm=5.0), +) + +# lr scheduler +lr_config = dict( + policy='step', step=[700,], gamma=0.1, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=5e-7 * 2048 / 512, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=800) diff --git a/configs/openmixup/pretrain/simmim/imagenet/vit_small_sz224_8xb256_step_fp16_ep800.py b/configs/openmixup/pretrain/simmim/imagenet/vit_small_sz224_8xb256_step_fp16_ep800.py new file mode 100644 index 0000000..f109cc0 --- /dev/null +++ b/configs/openmixup/pretrain/simmim/imagenet/vit_small_sz224_8xb256_step_fp16_ep800.py @@ -0,0 +1,52 @@ +_base_ = [ + '../../_base_/models/simmim/vit_small.py', + '../../_base_/datasets/imagenet/simmim_sz224_p16_bs64.py', + '../../_base_/default_runtime.py', +] + +# data +data = dict(imgs_per_gpu=256, workers_per_gpu=12) + +# interval for accumulate gradient +update_interval = 1 # total: 8 x bs256 x 1 accumulates = bs2048 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 25, + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=1e-4 * 2048 / 512, # 4e-4 for bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'norm': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0.), + 'pos_embed': dict(weight_decay=0.), + 'cls_token': dict(weight_decay=0.), + 'gamma': dict(weight_decay=0.), + }) + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict( + update_interval=update_interval, grad_clip=dict(max_norm=5.0), +) + +# lr scheduler +lr_config = dict( + policy='step', step=[700,], gamma=0.1, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=5e-7 * 2048 / 512, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=800)