diff --git a/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep100.py b/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py similarity index 100% rename from configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep100.py rename to configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py diff --git a/configs/classification/_base_/datasets/imagenet/deit3_sz160_8xbs128.py b/configs/classification/_base_/datasets/imagenet/deit3_sz160_8xbs128.py index f626123b..1ed70905 100644 --- a/configs/classification/_base_/datasets/imagenet/deit3_sz160_8xbs128.py +++ b/configs/classification/_base_/datasets/imagenet/deit3_sz160_8xbs128.py @@ -11,13 +11,13 @@ train_pipeline = [ dict(type='RandomResizedCrop', size=160, interpolation=3), # bicubic dict(type='RandomHorizontalFlip'), - dict(type='RandomAppliedTrans', # 3-Augment in DeiT III + dict(type='RandomChoiceTrans', # 3-Augment in DeiT III transforms=[ dict(type='RandomGrayscale', p=1.), dict(type='Solarization', p=1.), dict(type='GaussianBlur', sigma_min=0.1, sigma_max=2.0, p=1.), ], - p=1.0), + ), dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3), ] diff --git a/configs/classification/_base_/datasets/imagenet/deit3_sz192_8xbs128.py b/configs/classification/_base_/datasets/imagenet/deit3_sz192_8xbs128.py index f2f7cb03..49d68c55 100644 --- a/configs/classification/_base_/datasets/imagenet/deit3_sz192_8xbs128.py +++ b/configs/classification/_base_/datasets/imagenet/deit3_sz192_8xbs128.py @@ -11,13 +11,13 @@ train_pipeline = [ dict(type='RandomResizedCrop', size=192, interpolation=3), # bicubic dict(type='RandomHorizontalFlip'), - dict(type='RandomAppliedTrans', # 3-Augment in DeiT III + dict(type='RandomChoiceTrans', # 3-Augment in DeiT III transforms=[ dict(type='RandomGrayscale', p=1.), dict(type='Solarization', p=1.), dict(type='GaussianBlur', sigma_min=0.1, sigma_max=2.0, p=1.), ], - p=1.0), + ), dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3), ] diff --git a/configs/classification/_base_/datasets/imagenet/deit3_sz224_8xbs128.py b/configs/classification/_base_/datasets/imagenet/deit3_sz224_8xbs128.py index a8fa0d25..9cb2bc45 100644 --- a/configs/classification/_base_/datasets/imagenet/deit3_sz224_8xbs128.py +++ b/configs/classification/_base_/datasets/imagenet/deit3_sz224_8xbs128.py @@ -11,13 +11,13 @@ train_pipeline = [ dict(type='RandomResizedCrop', size=224, interpolation=3), # bicubic dict(type='RandomHorizontalFlip'), - dict(type='RandomAppliedTrans', # 3-Augment in DeiT III + dict(type='RandomChoiceTrans', # 3-Augment in DeiT III transforms=[ dict(type='RandomGrayscale', p=1.), dict(type='Solarization', p=1.), dict(type='GaussianBlur', sigma_min=0.1, sigma_max=2.0, p=1.), ], - p=1.0), + ), dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3), ] diff --git a/configs/classification/_base_/datasets/imagenet/moga_sz224_8xbs128.py b/configs/classification/_base_/datasets/imagenet/moga_sz224_8xbs128.py new file mode 100644 index 00000000..0763fa5b --- /dev/null +++ b/configs/classification/_base_/datasets/imagenet/moga_sz224_8xbs128.py @@ -0,0 +1,88 @@ +# Refers to `_RAND_INCREASING_TRANSFORMS` in pytorch-image-models +rand_increasing_policies = [ + dict(type='AutoContrast'), + dict(type='Equalize'), + dict(type='Invert'), + dict(type='Rotate', magnitude_key='angle', magnitude_range=(0, 30)), + dict(type='Posterize', magnitude_key='bits', magnitude_range=(4, 0)), + dict(type='Solarize', magnitude_key='thr', magnitude_range=(256, 0)), + dict(type='SolarizeAdd', magnitude_key='magnitude', magnitude_range=(0, 110)), + dict(type='ColorTransform', magnitude_key='magnitude', magnitude_range=(0, 0.9)), + dict(type='Contrast', magnitude_key='magnitude', magnitude_range=(0, 0.9)), + dict(type='Brightness', magnitude_key='magnitude', magnitude_range=(0, 0.9)), + dict(type='Sharpness', magnitude_key='magnitude', magnitude_range=(0, 0.9)), + dict(type='Shear', + magnitude_key='magnitude', magnitude_range=(0, 0.3), direction='horizontal'), + dict(type='Shear', + magnitude_key='magnitude', magnitude_range=(0, 0.3), direction='vertical'), + dict(type='Translate', + magnitude_key='magnitude', magnitude_range=(0, 0.45), direction='horizontal'), + dict(type='Translate', + magnitude_key='magnitude', magnitude_range=(0, 0.45), direction='vertical'), +] + +# dataset settings +data_source_cfg = dict(type='ImageNet') +# ImageNet dataset +data_train_list = 'data/meta/ImageNet/train_labeled_full.txt' +data_train_root = 'data/ImageNet/train' +data_test_list = 'data/meta/ImageNet/val_labeled.txt' +data_test_root = 'data/ImageNet/val/' + +dataset_type = 'ClassificationDataset' +img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +train_pipeline = [ + dict(type='RandomResizedCrop', size=224, interpolation=3), # bicubic + dict(type='RandomHorizontalFlip'), + dict(type='RandAugment', + policies=rand_increasing_policies, + num_policies=2, total_level=10, + magnitude_level=9, magnitude_std=0.5, + hparams=dict( + pad_val=[104, 116, 124], interpolation='bicubic')), + dict( + type='RandomErasing_numpy', # before ToTensor and Normalize + erase_prob=0.25, + mode='rand', min_area_ratio=0.02, max_area_ratio=1 / 3, + fill_color=[104, 116, 124], fill_std=[58, 57, 57]), # RGB +] +test_pipeline = [ + dict(type='Resize', size=248, interpolation=3), # 0.90 + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +# prefetch +prefetch = True +if not prefetch: + train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) + +data = dict( + imgs_per_gpu=128, + workers_per_gpu=8, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline, + prefetch=prefetch, + ), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline, + prefetch=False, + )) + +# validation hook +evaluation = dict( + initial=False, + interval=1, + imgs_per_gpu=128, + workers_per_gpu=4, + eval_param=dict(topk=(1, 5))) + +# checkpoint +checkpoint_config = dict(interval=1, max_keep_ckpts=1) diff --git a/configs/classification/imagenet/automix/README.md b/configs/classification/imagenet/automix/README.md index 6e881552..a208f9df 100644 --- a/configs/classification/imagenet/automix/README.md +++ b/configs/classification/imagenet/automix/README.md @@ -14,17 +14,21 @@ Data mixing augmentation have proved to be effective in improving the generaliza ### ImageNet-1k -| Model | Mixup | resolution | Params(M) | Epochs | Top-1 (%) | Config | Download | -| :-----------: | :-----: | :--------: | :-------: | :----: | :-------: | :-----------------------------------------------------------------: | :-------------------------------------------------------------------: | -| ResNet-18 | AutoMix | 224x224 | 11.17 | 100 | 70.50 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r18_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-18 | AutoMix | 224x224 | 11.17 | 300 | 72.05 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r18_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-34 | AutoMix | 224x224 | 21.28 | 100 | 74.52 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r34_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-34 | AutoMix | 224x224 | 21.28 | 300 | 76.10 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r34_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-50 | AutoMix | 224x224 | 23.52 | 100 | 77.91 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r50_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-50 | AutoMix | 224x224 | 23.52 | 300 | 79.25 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r50_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-101 | AutoMix | 224x224 | 42.51 | 100 | 79.87 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r101_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNet-101 | AutoMix | 224x224 | 42.51 | 300 | 80.98 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/r101_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | -| ResNeXt-101 | AutoMix | 224x224 | 44.18 | 100 | 80.89 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/automix/basic/rx101_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| Model | Mixup | resolution | Params(M) | Epochs | Top-1 (%) | Config | Download | +|:-----------:|:-------:|:----------:|:---------:|:------:|:---------:|:---------------------------------------------------------------------------------:|:-----------:| +| ResNet-18 | AutoMix | 224x224 | 11.17 | 100 | 70.50 | [config](./basic/r18_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-18 | AutoMix | 224x224 | 11.17 | 300 | 72.05 | [config](./basic/r18_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-34 | AutoMix | 224x224 | 21.28 | 100 | 74.52 | [config](./basic/r34_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-34 | AutoMix | 224x224 | 21.28 | 300 | 76.10 | [config](./basic/r34_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-50 | AutoMix | 224x224 | 23.52 | 100 | 77.91 | [config](./basic/r50_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-50 | AutoMix | 224x224 | 23.52 | 300 | 79.25 | [config](./basic/r50_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-101 | AutoMix | 224x224 | 42.51 | 100 | 79.87 | [config](./basic/r101_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNet-101 | AutoMix | 224x224 | 42.51 | 300 | 80.98 | [config](./basic/r101_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| ResNeXt-101 | AutoMix | 224x224 | 44.18 | 100 | 80.89 | [config](./basic/rx101_l2_a2_near_lam_cat_mb_mlr1e_3_bb_mlr0.py) | model / log | +| DeiT-S | AutoMix | 224x224 | 22.05 | 300 | 80.78 | [config](./deit/deit_s_l6_a2_near_lam_cat_switch0_8_8x128_ep300.py) | model / log | +| PVT-T | AutoMix | 224x224 | 13.2 | 300 | 76.37 | [config](./pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_attn_ln_8x128_fp16_ep300.py) | model / log | +| Swin-T | AutoMix | 224x224 | 28.29 | 300 | 81.80 | [config](./swin/swin_t_l2_a2_near_lam_cat_switch0_8_8x128_ep300.py) | model / log | +| ConvNeXt-T | AutoMix | 224x224 | 28.59 | 300 | 82.28 | [config](./convnext/convnext_t_l2_a2_near_lam_cat_switch0_8_8x128_accu4_ep300.py) | model / log | We will update configs and models for AutoMix soon. Please refer to [Model Zoo](https://github.com/Westlake-AI/openmixup/tree/main/docs/en/model_zoos/Model_Zoo_sup.md) for image classification results. diff --git a/configs/classification/imagenet/automix/convnext/convnext_t_l2_a2_near_lam_cat_switch0_8_8x128_accu4_ep300.py b/configs/classification/imagenet/automix/convnext/convnext_t_l2_a2_near_lam_cat_switch0_8_8x128_accu4_ep300.py new file mode 100644 index 00000000..492cddf0 --- /dev/null +++ b/configs/classification/imagenet/automix/convnext/convnext_t_l2_a2_near_lam_cat_switch0_8_8x128_accu4_ep300.py @@ -0,0 +1,128 @@ +_base_ = [ + '../../../_base_/datasets/imagenet/swin_sz224_8xbs128.py', + '../../../_base_/default_runtime.py', +] + +# model settings +model = dict( + type='AutoMixup', + pretrained=None, + alpha=2.0, + momentum=0.999, + mask_layer=2, # dowmsampling to 1/16 + mask_loss=0.1, # using loss + mask_adjust=0, # none for large datasets + lam_margin=0.08, + switch_off=0.8, # switch off mixblock (fixed) + mask_up_override=None, + debug=True, + backbone=dict( + type='SwinTransformer', + arch='tiny', + img_size=224, drop_path_rate=0.2, + out_indices=(2,3), # use stage-2 of 7x7x768 + ), + backbone=dict( + type='ConvNeXt', + arch='tiny', + out_indices=(2,3,), # x-1: stage-x + act_cfg=dict(type='GELU'), + drop_path_rate=0.1, + gap_before_final_norm=True, + ), + mix_block = dict( # AutoMix + type='PixelMixBlock', + in_channels=768, reduction=2, use_scale=True, + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix + lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v + lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none + value_neck_cfg=None, # SAMix: non-linear value + x_qk_concat=False, x_v_concat=False, # SAMix x concat: none + att_norm_cfg=None, # Not use attention_norm for better performance + mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 + frozen=False), + head_one=dict( + type='ClsMixupHead', # mixup CE + label smooth + loss=dict(type='LabelSmoothLoss', + label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0), + with_avg_pool=False, + in_channels=768, num_classes=1000), + head_mix=dict( + type='ClsMixupHead', # mixup CE + label smooth + loss=dict(type='LabelSmoothLoss', + label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0), + with_avg_pool=False, + in_channels=768, num_classes=1000), + head_weights=dict( + decent_weight=[], accent_weight=[], + head_mix_q=1, head_one_q=1, head_mix_k=1, head_one_k=1), + init_cfg=[ + dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.), + dict(type='Constant', layer='LayerNorm', val=1., bias=0.) + ], +) + +# dataset +data = dict(imgs_per_gpu=128, workers_per_gpu=10) + +# interval for accumulate gradient +update_interval = 4 # total: 8 x bs128 x 4 accumulates = bs4096 + +custom_hooks = [ + dict(type='SAVEHook', + save_interval=1252 * 20, # 20 ep + iter_per_epoch=1252, + ), + dict(type='CustomCosineAnnealingHook', # 0.1 to 0 + attr_name="mask_loss", attr_base=0.1, min_attr=0., by_epoch=False, # by iter + update_interval=update_interval, + ), + dict(type='CosineScheduleHook', + end_momentum=0.99996, # 0.999 to 0.99996 + adjust_scope=[0.25, 1.0], + warming_up="constant", + update_interval=update_interval, + interval=1) +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=4e-3, # lr = 5e-4 * (256 * 4) * 4 accumulate / 1024 = 4e-3 / bs4096 + weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999), + paramwise_options={ + 'norm': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'absolute_pos_embed': dict(weight_decay=0.), + 'relative_position_bias_table': dict(weight_decay=0.), + 'mix_block': dict(lr=4e-3), + }) +# Sets `find_unused_parameters`: randomly switch off mixblock +find_unused_parameters = True + +# fp16 +use_fp16 = False +fp16 = dict(type='mmcv', loss_scale='dynamic') +optimizer_config = dict( + grad_clip=None, update_interval=update_interval) + +# lr scheduler: Swim for DeiT +lr_config = dict( + policy='CosineAnnealing', + by_epoch=False, min_lr=1e-6, + warmup='linear', + warmup_iters=20, warmup_by_epoch=True, # warmup 20 epochs. + warmup_ratio=1e-5, +) + +# additional scheduler +addtional_scheduler = dict( + policy='CosineAnnealing', + by_epoch=False, min_lr=1e-4, + paramwise_options=['mix_block'], + warmup_iters=20, warmup_by_epoch=True, # warmup 20 epochs + warmup_ratio=1e-5, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_8x128_ep300.py b/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_8x128_ep300.py index 77de3bbd..e15ddab3 100644 --- a/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_8x128_ep300.py +++ b/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_8x128_ep300.py @@ -21,15 +21,15 @@ arch='deit-small', img_size=224, patch_size=16, drop_path_rate=0.1, - out_indices=(5, 11), # DeiT-S: 12 layers + out_indices=(5, 11), # DeiT-S: 12 layers, use 6-layer for MixBlock ), - mix_block = dict( # SAMix + mix_block = dict( # AutoMix type='PixelMixBlock', in_channels=384, reduction=2, use_scale=True, - unsampling_mode=['nearest',], # str or list, train & test MixBlock + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none - value_neck_cfg=None, # SAMix: non-linear value + att_norm_cfg=None, # Not use attention_norm for better performance x_qk_concat=False, x_v_concat=False, # SAMix x concat: none att_norm_cfg=None, # AutoMix: attention norm for fp16 mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 diff --git a/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py b/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py index d3a4b718..70fca54f 100644 --- a/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py +++ b/configs/classification/imagenet/automix/deit/deit_s_l6_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py @@ -21,17 +21,17 @@ arch='deit-small', img_size=224, patch_size=16, drop_path_rate=0.1, - out_indices=(5, 11), # DeiT-S: 12 layers + out_indices=(5, 11), # DeiT-S: 12 layers, use 6-layer for MixBlock ), - mix_block = dict( # SAMix + mix_block = dict( # AutoMix type='PixelMixBlock', in_channels=384, reduction=2, use_scale=True, - unsampling_mode=['nearest',], # str or list, train & test MixBlock + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none value_neck_cfg=None, # SAMix: non-linear value x_qk_concat=False, x_v_concat=False, # SAMix x concat: none - att_norm_cfg=dict(type='LN2d', eps=1e-6), # AutoMix: attention norm for fp16 + att_norm_cfg=dict(type='LN2d', eps=1e-6), # AutoMix: attention norm for fp16 (fast training) mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 frozen=False), head_one=dict( diff --git a/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_8x128_fp16_ep300.py b/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_8x128_fp16_ep300.py index a883eb8e..de759466 100644 --- a/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_8x128_fp16_ep300.py +++ b/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_8x128_fp16_ep300.py @@ -24,15 +24,15 @@ drop_path_rate=0.1, out_indices=(2,3,), ), - mix_block = dict( # SAMix + mix_block = dict( # AutoMix type='PixelMixBlock', in_channels=320, reduction=2, use_scale=True, - unsampling_mode=['nearest',], # str or list, train & test MixBlock + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none value_neck_cfg=None, # SAMix: non-linear value x_qk_concat=False, x_v_concat=False, # SAMix x concat: none - att_norm_cfg=None, # AutoMix: attention norm for fp16 + att_norm_cfg=None, # Not use attention_norm for better performance mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 mask_mode="none_v_", frozen=False), diff --git a/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_attn_ln_8x128_fp16_ep300.py b/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_attn_ln_8x128_fp16_ep300.py index 3b3e55c4..65f621db 100644 --- a/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_attn_ln_8x128_fp16_ep300.py +++ b/configs/classification/imagenet/automix/pvt/pvt_t_l2_a2_near_lam_cat_swch0_8_attn_ln_8x128_fp16_ep300.py @@ -24,15 +24,15 @@ drop_path_rate=0.1, out_indices=(2,3,), ), - mix_block = dict( # SAMix + mix_block = dict( # AutoMix type='PixelMixBlock', in_channels=320, reduction=2, use_scale=True, - unsampling_mode=['nearest',], # str or list, train & test MixBlock + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none value_neck_cfg=None, # SAMix: non-linear value x_qk_concat=False, x_v_concat=False, # SAMix x concat: none - att_norm_cfg=dict(type='LN2d', eps=1e-6), # AutoMix: attention norm for fp16 + att_norm_cfg=dict(type='LN2d', eps=1e-6), # AutoMix: attention norm for fp16 (fast training) mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 mask_mode="none_v_", frozen=False), diff --git a/configs/classification/imagenet/automix/swin/swin_t_l2_a2_near_lam_cat_switch0_8_8x128_ep300.py b/configs/classification/imagenet/automix/swin/swin_t_l2_a2_near_lam_cat_switch0_8_8x128_ep300.py new file mode 100644 index 00000000..57999fa9 --- /dev/null +++ b/configs/classification/imagenet/automix/swin/swin_t_l2_a2_near_lam_cat_switch0_8_8x128_ep300.py @@ -0,0 +1,120 @@ +_base_ = [ + '../../../_base_/datasets/imagenet/swin_sz224_8xbs128.py', + '../../../_base_/default_runtime.py', +] + +# model settings +model = dict( + type='AutoMixup', + pretrained=None, + alpha=2.0, + momentum=0.999, + mask_layer=2, # dowmsampling to 1/16 + mask_loss=0.1, # using loss + mask_adjust=0, # none for large datasets + lam_margin=0.08, + switch_off=0.8, # switch off mixblock (fixed) + mask_up_override=None, + debug=True, + backbone=dict( + type='SwinTransformer', + arch='tiny', + img_size=224, drop_path_rate=0.2, + out_indices=(2,3), # use stage-2 of 7x7x768 + ), + mix_block = dict( # AutoMix + type='PixelMixBlock', + in_channels=768, reduction=2, use_scale=True, + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix + lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v + lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none + value_neck_cfg=None, # SAMix: non-linear value + x_qk_concat=False, x_v_concat=False, # SAMix x concat: none + att_norm_cfg=None, # Not use attention_norm for better performance + mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 + frozen=False), + head_one=dict( + type='ClsMixupHead', # mixup CE + label smooth + loss=dict(type='LabelSmoothLoss', + label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0), + with_avg_pool=True, + in_channels=768, num_classes=1000), + head_mix=dict( + type='ClsMixupHead', # mixup CE + label smooth + loss=dict(type='LabelSmoothLoss', + label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0), + with_avg_pool=True, + in_channels=768, num_classes=1000), + head_weights=dict( + decent_weight=[], accent_weight=[], + head_mix_q=1, head_one_q=1, head_mix_k=1, head_one_k=1), + init_cfg=[ + dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), + dict(type='Constant', layer='LayerNorm', val=1., bias=0.) + ], +) + +# dataset +data = dict(imgs_per_gpu=128, workers_per_gpu=10) + +# interval for accumulate gradient +update_interval = 1 # total: 8 x bs128 x 1 accumulates = bs1024 + +custom_hooks = [ + dict(type='SAVEHook', + save_interval=1252 * 20, # 20 ep + iter_per_epoch=1252, + ), + dict(type='CustomCosineAnnealingHook', # 0.1 to 0 + attr_name="mask_loss", attr_base=0.1, min_attr=0., by_epoch=False, # by iter + update_interval=update_interval, + ), + dict(type='CosineScheduleHook', + end_momentum=0.99999, # 0.999 to 0.99999 + adjust_scope=[0.25, 1.0], + warming_up="constant", + update_interval=update_interval, + interval=1) +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=1e-3, # lr = 5e-4 * (256 * 4) * 1 accumulate / 512 = 1e-3 / bs1024 + weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999), + paramwise_options={ + 'norm': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'absolute_pos_embed': dict(weight_decay=0.), + 'relative_position_bias_table': dict(weight_decay=0.), + 'mix_block': dict(lr=1e-3), + }) +# Sets `find_unused_parameters`: randomly switch off mixblock +find_unused_parameters = True + +# fp16 +use_fp16 = False +fp16 = dict(type='mmcv', loss_scale='dynamic') +optimizer_config = dict( + grad_clip=dict(max_norm=5.0), update_interval=update_interval) + +# lr scheduler: Swim for DeiT +lr_config = dict( + policy='CosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=20, warmup_by_epoch=True, # warmup 20 epochs. + warmup_ratio=1e-5, +) + +# additional scheduler +addtional_scheduler = dict( + policy='CosineAnnealing', + by_epoch=False, min_lr=1e-4, # 0.1 x lr + paramwise_options=['mix_block'], + warmup_iters=20, warmup_by_epoch=True, # warmup 20 epochs + warmup_ratio=1e-5, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/classification/imagenet/automix/swin/swin_t_l2_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py b/configs/classification/imagenet/automix/swin/swin_t_l2_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py new file mode 100644 index 00000000..07f17313 --- /dev/null +++ b/configs/classification/imagenet/automix/swin/swin_t_l2_a2_near_lam_cat_switch0_8_att_ln_8x128_fp16_ep300.py @@ -0,0 +1,120 @@ +_base_ = [ + '../../../_base_/datasets/imagenet/swin_sz224_8xbs128.py', + '../../../_base_/default_runtime.py', +] + +# model settings +model = dict( + type='AutoMixup', + pretrained=None, + alpha=2.0, + momentum=0.999, + mask_layer=2, # dowmsampling to 1/16 + mask_loss=0.1, # using loss + mask_adjust=0, # none for large datasets + lam_margin=0.08, + switch_off=0.8, # switch off mixblock (fixed) + mask_up_override=None, + debug=True, + backbone=dict( + type='SwinTransformer', + arch='tiny', + img_size=224, drop_path_rate=0.2, + out_indices=(2,3), # use stage-2 of 7x7x768 + ), + mix_block = dict( # AutoMix + type='PixelMixBlock', + in_channels=768, reduction=2, use_scale=True, + unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix + lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v + lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none + value_neck_cfg=None, # SAMix: non-linear value + x_qk_concat=False, x_v_concat=False, # SAMix x concat: none + att_norm_cfg=dict(type='LN2d', eps=1e-6), # AutoMix: attention norm for fp16 (fast training) + mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1 + frozen=False), + head_one=dict( + type='ClsMixupHead', # mixup CE + label smooth + loss=dict(type='LabelSmoothLoss', + label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0), + with_avg_pool=True, + in_channels=768, num_classes=1000), + head_mix=dict( + type='ClsMixupHead', # mixup CE + label smooth + loss=dict(type='LabelSmoothLoss', + label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0), + with_avg_pool=True, + in_channels=768, num_classes=1000), + head_weights=dict( + decent_weight=[], accent_weight=[], + head_mix_q=1, head_one_q=1, head_mix_k=1, head_one_k=1), + init_cfg=[ + dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), + dict(type='Constant', layer='LayerNorm', val=1., bias=0.) + ], +) + +# dataset +data = dict(imgs_per_gpu=128, workers_per_gpu=10) + +# interval for accumulate gradient +update_interval = 1 # total: 8 x bs128 x 1 accumulates = bs1024 + +custom_hooks = [ + dict(type='SAVEHook', + save_interval=1252 * 20, # 20 ep + iter_per_epoch=1252, + ), + dict(type='CustomCosineAnnealingHook', # 0.1 to 0 + attr_name="mask_loss", attr_base=0.1, min_attr=0., by_epoch=False, # by iter + update_interval=update_interval, + ), + dict(type='CosineScheduleHook', + end_momentum=0.99999, # 0.999 to 0.99999 + adjust_scope=[0.25, 1.0], + warming_up="constant", + update_interval=update_interval, + interval=1) +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=1e-3, # lr = 5e-4 * (256 * 4) * 1 accumulate / 512 = 1e-3 / bs1024 + weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999), + paramwise_options={ + 'norm': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'absolute_pos_embed': dict(weight_decay=0.), + 'relative_position_bias_table': dict(weight_decay=0.), + 'mix_block': dict(lr=1e-3), + }) +# Sets `find_unused_parameters`: randomly switch off mixblock +find_unused_parameters = True + +# fp16 +use_fp16 = True +fp16 = dict(type='mmcv', loss_scale='dynamic') +optimizer_config = dict( + grad_clip=dict(max_norm=5.0), update_interval=update_interval) + +# lr scheduler: Swim for DeiT +lr_config = dict( + policy='CosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=20, warmup_by_epoch=True, # warmup 20 epochs. + warmup_ratio=1e-5, +) + +# additional scheduler +addtional_scheduler = dict( + policy='CosineAnnealing', + by_epoch=False, min_lr=1e-4, # 0.1 x lr + paramwise_options=['mix_block'], + warmup_iters=20, warmup_by_epoch=True, # warmup 20 epochs + warmup_ratio=1e-5, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/selfsup/a2mim/README.md b/configs/selfsup/a2mim/README.md index 89f16b92..c69b29f1 100644 --- a/configs/selfsup/a2mim/README.md +++ b/configs/selfsup/a2mim/README.md @@ -28,9 +28,10 @@ The classification benchmarks includes 1 downstream task datasets, **ImageNet**. * For ResNet-50, the top-1 classification accuracy of **RSB A3** and **RSB A2** are obtained from end-to-end fine-tuning 100 and 300 epochs on ImageNet. -| Backbone | Pre-train epoch | Fine-tuning Top-1 | Pre-train Config | Fine-tuning Config | Download | -| :-------: | :-------------: | :---------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ResNet-50 | 100 | 78.76 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/selfsup/a2mim/imagenet/r50_l3_sz224_8xb256_cos_ep100.py) | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | model \| log | +| Backbone | Pre-train epoch | Fine-tuning Top-1 | Pre-train Config | Fine-tuning Config | Download | +|:---------:|:---------------:|:-----------------:|:-----------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------:|:------------:| +| ResNet-50 | 100 | 78.76 | [config](./imagenet/r50_l3_sz224_8xb256_cos_ep100.py) | [config](https://github.com/Westlake-AI/openmixup/blob/main/configs/benchmarks/classification/imagenet/r50_rsb_a3_ft_sz160_4xb512_cos_fp16_ep100.py) | model \| log | +| ResNet-50 | 300 | 78.90 | [config](./imagenet/r50_l3_sz224_8xb256_cos_ep300.py) | [config](https://github.com/Westlake-AI/openmixup/blob/main/configs/benchmarks/classification/imagenet/r50_rsb_a2_ft_sz224_8xb256_cos_fp16_ep300.py) | model \| log | #### ImageNet-100 Fine-tuning Evaluation diff --git a/configs/selfsup/a2mim/imagenet/r50_l3_sz224_8xb256_cos_ep300.py b/configs/selfsup/a2mim/imagenet/r50_l3_sz224_8xb256_cos_ep300.py new file mode 100644 index 00000000..cf95aabe --- /dev/null +++ b/configs/selfsup/a2mim/imagenet/r50_l3_sz224_8xb256_cos_ep300.py @@ -0,0 +1,62 @@ +_base_ = [ + '../../_base_/models/a2mim/r50.py', + '../../_base_/datasets/imagenet/a2mim_rgb_m_sz224_rrc08_bs64.py', + '../../_base_/default_runtime.py', +] + +# model settings +model = dict( + backbone=dict( + mask_layer=3, mask_token="learnable", +)) + +# dataset +data = dict( + imgs_per_gpu=256, workers_per_gpu=10, + train=dict( + feature_mode=None, feature_args=dict(), + mask_pipeline=[ + dict(type='BlockwiseMaskGenerator', + input_size=224, mask_patch_size=32, mask_ratio=0.6, model_patch_size=16, # stage 3 + mask_color='mean', mask_only=False), + ], +)) + +# interval for accumulate gradient +update_interval = 1 + +# additional hooks +custom_hooks = [ + dict(type='SAVEHook', + save_interval=626 * 10, # plot every 10 ep + iter_per_epoch=626), +] + +# optimizer +optimizer = dict( + type='AdamW', + lr=3e-4 * 2048 / 512, # 1.2e-3 for bs2048 + betas=(0.9, 0.999), weight_decay=0.05, eps=1e-8, + paramwise_options={ + '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.), + 'bias': dict(weight_decay=0.), + 'mask_token': dict(weight_decay=0., lr_mult=1e-1,), + }) + +# fp16 +use_fp16 = False +fp16 = dict(type='mmcv', loss_scale='dynamic') +# optimizer args +optimizer_config = dict(update_interval=update_interval) + +# lr scheduler +lr_config = dict( + policy='StepFixCosineAnnealing', + by_epoch=False, min_lr=1e-5, + warmup='linear', + warmup_iters=10, warmup_by_epoch=True, + warmup_ratio=1e-6, +) + +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/docs/en/awesome_mixups/Mixup_SL.md b/docs/en/awesome_mixups/Mixup_SL.md index ac482303..eb260679 100644 --- a/docs/en/awesome_mixups/Mixup_SL.md +++ b/docs/en/awesome_mixups/Mixup_SL.md @@ -118,16 +118,22 @@ We are working on a survey of mixup methods. The list of awesome mixup methods i - Mixup Without Hesitation. [[Pattern Recognition'2022](https://arxiv.org/abs/2101.04342)] [[code](https://github.com/yuhao318/mwh)] * **CAMixup**: Yeming Wen, Ghassen Jerfel, Rafael Muller, Michael W. Dusenberry, Jasper Snoek, Balaji Lakshminarayanan, Dustin Tran. - Combining Ensembles and Data Augmentation can Harm your Calibration. [[ICLR'2021](https://arxiv.org/abs/2010.09875)] [[code](https://github.com/google/edward2/tree/main/experimental/marginalization_mixup)] +* **TokenLabeling**: Zihang Jiang, Qibin Hou, Li Yuan, Daquan Zhou, Yujun Shi, Xiaojie Jin, Anran Wang, Jiashi Feng. + - Combining Ensembles and Data Augmentation can Harm your Calibration. [[NIPS'2021](https://arxiv.org/abs/2104.10858)] [[code](https://github.com/zihangJiang/TokenLabeling)] * **Saliency Grafting**: Joonhyung Park, June Yong Yang, Jinwoo Shin, Sung Ju Hwang, Eunho Yang. - Saliency Grafting: Innocuous Attribution-Guided Mixup with Calibrated Label Mixing. [[AAAI'2022](https://arxiv.org/abs/2112.08796)] * **TransMix**: Jie-Neng Chen, Shuyang Sun, Ju He, Philip Torr, Alan Yuille, Song Bai. - TransMix: Attend to Mix for Vision Transformers. [[CVPR'2022](https://arxiv.org/abs/2111.09833)] [[code](https://github.com/Beckschen/TransMix)] -* **GenLabel**: Yeming Wen, Ghassen Jerfel, Rafael Muller, Michael W. Dusenberry, Jasper Snoek, Balaji Lakshminarayanan, Dustin Tran. +* **GenLabel**: Jy-yong Sohn, Liang Shang, Hongxu Chen, Jaekyun Moon, Dimitris Papailiopoulos, Kangwook Lee. - GenLabel: Mixup Relabeling using Generative Models. [[ArXiv'2022](https://arxiv.org/abs/2201.02354)] * **DecoupleMix**: Zicheng Liu, Siyuan Li, Ge Wang, Cheng Tan, Lirong Wu, Stan Z. Li. - Decoupled Mixup for Data-efficient Learning. [[Arxiv'2022](https://arxiv.org/abs/2203.10761)] [[code](https://github.com/Westlake-AI/openmixup)] * **TokenMix**: Jihao Liu, Boxiao Liu, Hang Zhou, Hongsheng Li, Yu Liu. - TokenMix: Rethinking Image Mixing for Data Augmentation in Vision Transformers. [[ECCV'2022](https://arxiv.org/abs/2207.08409)] [[code](https://github.com/Sense-X/TokenMix)] +* **TokenMixup**: Hyeong Kyu Choi, Joonmyung Choi, Hyunwoo J. Kim. + - TokenMix: Rethinking Image Mixing for Data Augmentation in Vision Transformers. [[NIPS'2022](https://arxiv.org/abs/2210.07562)] [[code](https://github.com/mlvlab/TokenMixup)] +* **TL-Align**: Han Xiao, Wenzhao Zheng, Zheng Zhu, Jie Zhou, Jiwen Lu. + - TokenMix: Rethinking Image Mixing for Data Augmentation in Vision Transformers. [[arXiv'2022](https://arxiv.org/abs/2210.06455)] [[code](https://github.com/Euphoria16/TL-Align)] ## Analysis of Mixup diff --git a/docs/en/changelog.md b/docs/en/changelog.md index ebba8448..ed796fc0 100644 --- a/docs/en/changelog.md +++ b/docs/en/changelog.md @@ -2,7 +2,7 @@ ### v0.2.6 (41/09/2022) -Bump version to V0.2.6 with new features as [#20](https://github.com/Westlake-AI/openmixup/issues/20). Update new features and documents of `OpenMixup` v0.2.6 (issue [#24](https://github.com/Westlake-AI/openmixup/issues/24) and issue [#25](https://github.com/Westlake-AI/openmixup/issues/25)). +Bump version to V0.2.6 with new features as [#20](https://github.com/Westlake-AI/openmixup/issues/20). Update new features and documents of `OpenMixup` v0.2.6 as issue [#24](https://github.com/Westlake-AI/openmixup/issues/24), fix relevant issue [#25](https://github.com/Westlake-AI/openmixup/issues/25), issue [#26](https://github.com/Westlake-AI/openmixup/issues/26), and issue [#27](https://github.com/Westlake-AI/openmixup/issues/27). #### New Features diff --git a/openmixup/__init__.py b/openmixup/__init__.py index 02c2f777..aa4eea11 100644 --- a/openmixup/__init__.py +++ b/openmixup/__init__.py @@ -47,7 +47,7 @@ def digit_version(version_str: str, length: int = 4): mmcv_minimum_version = '1.4.2' -mmcv_maximum_version = '1.6.0' +mmcv_maximum_version = '1.7.0' mmcv_version = digit_version(mmcv.__version__) diff --git a/openmixup/datasets/data_sources/image_list.py b/openmixup/datasets/data_sources/image_list.py index cf91b7e0..1ea0c653 100644 --- a/openmixup/datasets/data_sources/image_list.py +++ b/openmixup/datasets/data_sources/image_list.py @@ -1,4 +1,3 @@ -import cv2 import os import mmcv import numpy as np @@ -19,7 +18,9 @@ class ImageList(object): root (str): Path to the dataset. list_file (str): Path to the txt list file. splitor (str): Splitor between file names and the class id. - backend (str): Backend of the loader in {'pillow', 'cv2'}. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='pillow')``. return_label (bool): Whether to return the class id. """ @@ -29,7 +30,7 @@ def __init__(self, root, list_file, splitor=" ", - backend='pillow', + file_client_args=dict(backend='pillow'), return_label=True): with open(list_file, 'r') as fp: lines = fp.readlines() @@ -45,10 +46,16 @@ def __init__(self, self.labels = None self.fns = [l.strip() for l in lines] self.fns = [os.path.join(root, fn) for fn in self.fns] - self.backend = backend - assert backend in ['cv2', 'pillow'] - if self.backend == 'cv2': - self.file_client = mmcv.FileClient(backend='disk') + + self.file_client_args = file_client_args + self.backend = file_client_args.get('backend', 'pillow') + assert self.backend in \ + ['pillow', 'disk', 'ceph', 'memcached', 'lmdb', 'petrel', 'http'], \ + "Find unsupport file_client_backend={}".format(self.backend) + if self.backend != 'pillow': + self.file_client = mmcv.FileClient(**self.file_client_args) + else: + self.file_client = None def get_length(self): return len(self.fns) @@ -60,10 +67,13 @@ def get_sample(self, idx): else: img_bytes = self.file_client.get(self.fns[idx]) img = mmcv.imfrombytes(img_bytes, flag='color') - if img is None: - img = cv2.cvtColor( - np.array(Image.open(self.fns[idx])), cv2.COLOR_RGB2BGR) img = Image.fromarray(img.astype(np.uint8)) + if img is None: # fix bug of loading by cv2 + if self.backend == 'cv2': + img = Image.open(self.fns[idx]) + img = img.convert('RGB') + else: + raise ValueError("Fail to load img={}".format(self.fns[idx])) if self.has_labels and self.return_label: target = self.labels[idx] diff --git a/openmixup/datasets/data_sources/imagenet.py b/openmixup/datasets/data_sources/imagenet.py index bca37d12..f217c9ce 100644 --- a/openmixup/datasets/data_sources/imagenet.py +++ b/openmixup/datasets/data_sources/imagenet.py @@ -1013,6 +1013,11 @@ class ImageNet(ImageList): 'toilet tissue, toilet paper, bathroom tissue' ] - def __init__(self, root, list_file, splitor=" ", backend='pillow', return_label=True): + def __init__(self, + root, + list_file, + splitor=" ", + file_client_args=dict(backend='pillow'), + return_label=True): super(ImageNet, self).__init__( - root, list_file, splitor, backend, return_label) + root, list_file, splitor, file_client_args, return_label) diff --git a/openmixup/datasets/pipelines/transforms.py b/openmixup/datasets/pipelines/transforms.py index 3ced998c..608adda7 100644 --- a/openmixup/datasets/pipelines/transforms.py +++ b/openmixup/datasets/pipelines/transforms.py @@ -46,6 +46,30 @@ def __repr__(self): return repr_str +@PIPELINES.register_module +class RandomChoiceTrans(object): + """Apply single transformation randomly picked from a list. + + Args: + transforms (list[dict]): List of transformations in dictionaries. + p (float or list): Probability. + """ + + def __init__(self, transforms, p=None): + if p is not None: + assert not isinstance(p, Sequence), "Argument p should be a sequence" + assert len(p) == len(transforms) + self.trans = [build_from_cfg(t, PIPELINES) for t in transforms] + self.p = p + + def __call__(self, *args): + t = random.choices(self.trans, weights=self.p)[0] + return t(*args) + + def __repr__(self) -> str: + return f"{super().__repr__()}(p={self.p})" + + @PIPELINES.register_module() class CenterCropForEfficientNet(object): r"""Center crop the image. @@ -1833,3 +1857,22 @@ def __call__(self, img): def __repr__(self): repr_str = self.__class__.__name__ return repr_str + + +@PIPELINES.register_module +class ToHalf(object): + """ Convert to torch.Tensor (torch.fp16) """ + + def __init__(self): + pass + + def __call__(self, img): + if isinstance(img, torch.Tensor): + img = img.to(torch.half) + else: + img = img.astype(np.float16) + return img + + def __repr__(self): + repr_str = self.__class__.__name__ + return repr_str