-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update mixup benchmarks & awesome lists
- Loading branch information
Showing
51 changed files
with
932 additions
and
288 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
54 changes: 54 additions & 0 deletions
54
configs/classification/_base_/datasets/inaturalist2018/basic_sz224_4xbs64.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# dataset settings | ||
data_source_cfg = dict(type='ImageNet') | ||
# iNat dataset | ||
data_train_list = 'data/meta/iNaturalist2018/train_labeled_full.txt' | ||
data_train_root = 'data/iNaturalist2018/train' | ||
data_test_list = 'data/meta/iNaturalist2018/val_labeled.txt' | ||
data_test_root = 'data/iNaturalist2018/val/' | ||
|
||
dataset_type = 'ClassificationDataset' | ||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | ||
train_pipeline = [ | ||
dict(type='RandomResizedCrop', size=224, interpolation=3), # bicubic | ||
dict(type='RandomHorizontalFlip'), | ||
] | ||
test_pipeline = [ | ||
dict(type='Resize', size=256, interpolation=3), # 0.85 | ||
dict(type='CenterCrop', size=224), | ||
dict(type='ToTensor'), | ||
dict(type='Normalize', **img_norm_cfg), | ||
] | ||
# prefetch | ||
prefetch = True | ||
if not prefetch: | ||
train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) | ||
|
||
data = dict( | ||
imgs_per_gpu=64, # V100: 64 x 4gpus = bs256 | ||
workers_per_gpu=8, # according to total cpus cores, usually 4 workers per 32~128 imgs | ||
train=dict( | ||
type=dataset_type, | ||
data_source=dict( | ||
list_file=data_train_list, root=data_train_root, | ||
**data_source_cfg), | ||
pipeline=train_pipeline, | ||
prefetch=prefetch, | ||
), | ||
val=dict( | ||
type=dataset_type, | ||
data_source=dict( | ||
list_file=data_test_list, root=data_test_root, **data_source_cfg), | ||
pipeline=test_pipeline, | ||
prefetch=False, | ||
)) | ||
|
||
# validation hook | ||
evaluation = dict( | ||
initial=False, | ||
interval=1, | ||
imgs_per_gpu=128, | ||
workers_per_gpu=4, | ||
eval_param=dict(topk=(1, 5))) | ||
|
||
# checkpoint | ||
checkpoint_config = dict(interval=1, max_keep_ckpts=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Mixup Classification Benchmark on iNaturalist-2017 | ||
|
||
> [The iNaturalist Challenge 2017 Dataset](https://arxiv.org/abs/1707.06642) | ||
## Abstract | ||
|
||
Existing image classification datasets used in computer vision tend to have an even number of images for each object category. In contrast, the natural world is heavily imbalanced, as some species are more abundant and easier to photograph than others. To encourage further progress in challenging real world conditions we present the iNaturalist Challenge 2017 dataset - an image classification benchmark consisting of 675,000 images with over 5,000 different species of plants and animals. It features many visually similar species, captured in a wide variety of situations, from all over the world. Images were collected with different camera types, have varying image quality, have been verified by multiple citizen scientists, and feature a large class imbalance. We discuss the collection of the dataset and present baseline results for state-of-the-art computer vision classification models. Results show that current non-ensemble based methods achieve only 64% top one classification accuracy, illustrating the difficulty of the dataset. Finally, we report results from a competition that was held with the data. | ||
|
||
<div align=center> | ||
<img src="https://user-images.githubusercontent.com/44519745/185645586-a5317b04-daf4-429c-a0fa-c9dd87dac45b.png" width="100%"/> | ||
</div> | ||
|
||
## Results and models | ||
|
||
We provide a collection of [weights and logs](https://github.com/Westlake-AI/openmixup/releases/tag/mixup-inat2017-weights) for mixup classification benchmark on iNaturalist-2017. You can download all results from **Baidu Cloud**: [iNaturalist-2017 (1e7w)](https://pan.baidu.com/s/1GsoXVpIBXPjyFKsCdnmp9Q). | ||
|
||
* All compared methods adopt ResNet-18/50 and ResNeXt-101 (32x4d) architectures and are trained 100 epochs using the PyTorch training recipe. The training and testing image size is 224 with the CenterCrop ratio of 0.85. We search $\alpha$ in $Beta(\alpha, \alpha)$ for all compared methods. | ||
* The **median** of top-1 accuracy in the last 5 training epochs is reported for ResNet variants. | ||
* Visualization of mixed samples from [AutoMix](https://arxiv.org/abs/2103.13027) and [SAMix](https://arxiv.org/abs/2111.15454) are provided in zip files. | ||
|
||
### iNaturalist-2017 | ||
|
||
| Backbones | ResNet-18 top-1 | ResNet-50 top-1 | ResNeXt-101 top-1 | | ||
|-------------------------------------------------------------|:---------------:|:---------------:|:-----------------:| | ||
| Vanilla | 51.79 | 60.23 | 63.70 | | ||
| MixUp [[ICLR'2018](https://arxiv.org/abs/1710.09412)] | 51.40 | 61.22 | 66.27 | | ||
| CutMix [[ICCV'2019](https://arxiv.org/abs/1905.04899)] | 51.24 | 62.34 | 67.59 | | ||
| ManifoldMix [[ICML'2019](https://arxiv.org/abs/1806.05236)] | 51.83 | 61.47 | 66.08 | | ||
| SaliencyMix [[ICLR'2021](https://arxiv.org/abs/2006.01791)] | 51.29 | 62.51 | 67.20 | | ||
| FMix [[Arixv'2020](https://arxiv.org/abs/2002.12047)] | 52.01 | 61.90 | 66.64 | | ||
| PuzzleMix [[ICML'2020](https://arxiv.org/abs/2009.06962)] | - | 62.66 | 67.72 | | ||
| ResizeMix [[Arixv'2020](https://arxiv.org/abs/2012.11101)] | 51.21 | 62.29 | 66.82 | | ||
| AutoMix [[ECCV'2022](https://arxiv.org/abs/2103.13027)] | 52.84 | 63.08 | 68.03 | | ||
| SAMix [[Arxiv'2021](https://arxiv.org/abs/2111.15454)] | 53.42 | 63.32 | 68.26 | | ||
|
||
We summarize mixup benchmarks in [Model Zoo](https://github.com/Westlake-AI/openmixup/tree/main/docs/en/model_zoos/Model_Zoo_sup.md). | ||
|
||
|
||
## Citation | ||
|
||
Please refer to the original paper of iNaturalist-2017 and AutoMix for details. | ||
|
||
```bibtex | ||
@article{Horn2017TheIC, | ||
title={The iNaturalist Challenge 2017 Dataset}, | ||
author={Grant Van Horn and Oisin Mac Aodha and Yang Song and Alexander Shepard and Hartwig Adam and Pietro Perona and Serge J. Belongie}, | ||
journal={ArXiv}, | ||
year={2017}, | ||
volume={abs/1707.06642} | ||
} | ||
``` | ||
```bibtex | ||
@misc{eccv2022automix, | ||
title={AutoMix: Unveiling the Power of Mixup for Stronger Classifiers}, | ||
author={Zicheng Liu and Siyuan Li and Di Wu and Zhiyuan Chen and Lirong Wu and Jianzhu Guo and Stan Z. Li}, | ||
year={2021}, | ||
eprint={2103.13027}, | ||
archivePrefix={arXiv}, | ||
primaryClass={cs.CV} | ||
} | ||
``` |
48 changes: 48 additions & 0 deletions
48
...on/inaturalist2017/automix/basic/r18_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0_4xb64_ep100.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
_base_ = "r50_l2_a2_near_lam_cat_mb_mlr1e_4_bb_mlr0_4xb64_ep100.py" | ||
|
||
# model settings | ||
model = dict( | ||
type='AutoMixup', | ||
pretrained=None, | ||
alpha=2.0, | ||
momentum=0.999, # 0.999 to 0.99999 | ||
mask_layer=2, | ||
mask_loss=0.1, # using mask loss | ||
mask_adjust=0, | ||
lam_margin=0.08, # degenerate to mixup when lam or 1-lam <= 0.08 | ||
mask_up_override=None, # If not none, override upsampling when train MixBlock | ||
debug=False, # show attention and content map | ||
backbone=dict( | ||
type='ResNet', | ||
depth=18, | ||
num_stages=4, | ||
out_indices=(2,3), # stage-3 for MixBlock, x-1: stage-x | ||
style='pytorch'), | ||
mix_block = dict( # AutoMix | ||
type='PixelMixBlock', | ||
in_channels=256, reduction=2, use_scale=True, | ||
unsampling_mode=['nearest',], # str or list, train & test MixBlock | ||
lam_concat=True, lam_concat_v=False, # AutoMix: lam cat q,k,v | ||
lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none | ||
value_neck_cfg=None, # SAMix: non-linear value | ||
x_qk_concat=False, x_v_concat=False, # SAMix x concat: none | ||
# att_norm_cfg=dict(type='BN'), # norm after q,k (design for fp16, also conduct better performace in fp32) | ||
mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 | ||
frozen=False), | ||
head_one=dict( | ||
type='ClsHead', # default CE | ||
loss=dict(type='CrossEntropyLoss', use_soft=False, use_sigmoid=False, loss_weight=1.0), | ||
with_avg_pool=True, multi_label=False, in_channels=512, num_classes=5089), | ||
head_mix=dict( # backbone & mixblock | ||
type='ClsMixupHead', # mixup, default CE | ||
loss=dict(type='CrossEntropyLoss', use_soft=False, use_sigmoid=False, loss_weight=1.0), | ||
with_avg_pool=True, multi_label=False, in_channels=512, num_classes=5089), | ||
head_weights=dict( | ||
head_mix_q=1, head_one_q=1, head_mix_k=1, head_one_k=1), | ||
) | ||
|
||
# additional scheduler | ||
addtional_scheduler = dict( | ||
policy='CosineAnnealing', min_lr=1e-3, | ||
paramwise_options=['mix_block'], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
..._near_lam_cat_mb_mlr1e_4_bb_mlr0_4xb64.py → ...lam_cat_mb_mlr1e_4_bb_mlr0_4xb64_ep100.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.