Skip to content

Commit

Permalink
configs | bug fixes, config yml files added
Browse files Browse the repository at this point in the history
  • Loading branch information
mustafa1728 committed Jun 1, 2021
1 parent ab10546 commit f249c96
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 103 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
# EPIC-KITCHENS-100 UDA Challenge Source Code
This repository contains the code used to produce the baseline results (TA3N) for the EPIC-KITCHENS-100 UDA Challenge.

Notebook added to show demo and verify working correctly:



# Lightning Implementation

Notebook added to show demo and verify working correctly:
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1OXHG5cAS-mNRjjKzjt73ljZYg5bBMQv7#offline=true&sandboxMode=false)

Instructions to run:
~~~
python3 main_lightning.py --cfg configs/train_epic_RGB.yml
python3 test_models_lightning.py --cfg configs/test_epic_RGB.yml
~~~


# UDA Challenge Original Code

Some modifications have been made to the original TA3N code base to produce results on EPIC-KITCHENS-100 including:
1. Multiple classification heads to produce predictions for verb and nouns.
2. Modified dataloader to load EPIC-KITCHENS-100 pre-extracted features.
Expand Down
31 changes: 18 additions & 13 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

_C = CN()

_C.TO_VALIDATE = True # choices = [True, False]
_C.TO_VALIDATE = False # choices = [True, False]


# -----------------------------------------------------------------------------
Expand All @@ -32,9 +32,6 @@
else:
_C.PATHS.VAL_DATASET_SOURCE= None
_C.PATHS.VAL_DATASET_TARGET= None
_C.PATHS.NUM_SOURCE= 16115 # number of training data (source)
_C.PATHS.NUM_TARGET= 26115 # number of training data (target)

_C.PATHS.PATH_DATA_SOURCE=os.path.join(_C.PATHS.PATH_DATA_ROOT, _C.PATHS.DATASET_SOURCE)
_C.PATHS.PATH_DATA_TARGET=os.path.join(_C.PATHS.PATH_DATA_ROOT, _C.PATHS.DATASET_TARGET)
if _C.TO_VALIDATE:
Expand Down Expand Up @@ -62,9 +59,13 @@
_C.DATASET = CN()
_C.DATASET.DATASET = "epic" # dataset choices = [hmdb_ucf, hmdb_ucf_small, ucf_olympic]
_C.DATASET.NUM_CLASSES = "97,300"
_C.DATASET.MODALITY = "ALL" # choices = [RGB ]
_C.DATASET.NUM_SOURCE= 16115 # number of training data (source)
_C.DATASET.NUM_TARGET= 26115 # number of training data (target)

_C.DATASET.MODALITY = "RGB" # choices = [ALL, RGB, Audio, Flow]
_C.DATASET.FRAME_TYPE = "feature" # choices = [frame]
_C.DATASET.NUM_SEGMENTS = 5 # sample frame # of each video for training
_C.DATASET.VAL_SEGMENTS = 5 # sample frame # of each video for training
_C.DATASET.BASELINE_TYPE = "video" # choices = ['frame', 'tsn']
_C.DATASET.FRAME_AGGREGATION = "trn-m" # method to integrate the frame-level features. choices = [avgpool, trn, trn-m, rnn, temconv]

Expand All @@ -82,8 +83,8 @@
_C.MODEL.WEIGHTED_CLASS_LOSS_DA = "N" # choices = [Y, N]
_C.MODEL.WEIGHTED_CLASS_LOSS = "N" # choices = [Y, N]

_C.MODEL.DROPOUT_I = 0.8
_C.MODEL.DROPOUT_V = 0.8
_C.MODEL.DROPOUT_I = 0.5
_C.MODEL.DROPOUT_V = 0.5
_C.MODEL.NO_PARTIALBN = True


Expand All @@ -92,7 +93,7 @@
_C.MODEL.EXP_DA_NAME="baseline"
else:
_C.MODEL.EXP_DA_NAME="DA"
_C.MODEL.DIS_DA = None # choices = [DAN, JAN]
_C.MODEL.DIS_DA = "DAN" # choices = [DAN, CORAL, JAN]
_C.MODEL.ADV_POS_0 = "Y" # discriminator for relation features. choices = [Y, N]
_C.MODEL.ADV_DA = "RevGrad" # choices = [None]
_C.MODEL.ADD_LOSS_DA = "attentive_entropy" # choices = [None, target_entropy, attentive_entropy]
Expand Down Expand Up @@ -128,6 +129,7 @@
_C.TRAINER.SHARE_PARAMS = "Y" # choices = [Y, N]
_C.TRAINER.PRETRAIN_SOURCE = False
_C.TRAINER.VERBOSE = True
_C.TRAINER.DANN_WARMUP = True

# Learning configs
_C.TRAINER.LOSS_TYPE = 'nll'
Expand All @@ -137,7 +139,7 @@
_C.TRAINER.LR_STEPS = [10, 20]
_C.TRAINER.MOMENTUM = 0.9
_C.TRAINER.WEIGHT_DECAY = 0.0001
_C.TRAINER.BATCH_SIZE = [128, 128*(_C.DATASET.NUM_TARGET/_C.DATASET.NUM_SOURCE), 128]
_C.TRAINER.BATCH_SIZE = [128, int(128*_C.DATASET.NUM_TARGET/_C.DATASET.NUM_SOURCE), 128]
_C.TRAINER.OPTIMIZER_NAME = "SGD" # choices = [SGD, Adam]
_C.TRAINER.CLIP_GRADIENT = 20

Expand All @@ -152,7 +154,7 @@



_C.PATHS.EXP_PATH = os.path.join(_C.DATASET.PATH_EXP + '_' + _C.TRAINER.OPTIMIZER_NAME + '-share_params_' + _C.MODEL.SHARE_PARAMS + '-lr_' + str(_C.TRAINER.LR) + '-bS_' + str(_C.TRAINER.BATCH_SIZE[0]), _C.DATASET.DATASET + '-'+ str(_C.DATASET.NUM_SEGMENTS) + '-seg-disDA_' + _C.MODEL.DIS_DA + '-alpha_' + str(_C.HYPERPARAMETERS.ALPHA) + '-advDA_' + _C.MODEL.ADV_DA + '-beta_' + str(_C.HYPERPARAMETERS.BETA[0])+ '_'+ str(_C.HYPERPARAMETERS.BETA[1])+'_'+ str(_C.HYPERPARAMETERS.BETA[2])+"_gamma_" + str(_C.HYPERPARAMETERS.GAMMA) + "_mu_" + str(_C.HYPERPARAMETERS.MU))
_C.PATHS.EXP_PATH = os.path.join(_C.PATHS.PATH_EXP + '_' + _C.TRAINER.OPTIMIZER_NAME + '-share_params_' + _C.MODEL.SHARE_PARAMS + '-lr_' + str(_C.TRAINER.LR) + '-bS_' + str(_C.TRAINER.BATCH_SIZE[0]), _C.DATASET.DATASET + '-'+ str(_C.DATASET.NUM_SEGMENTS) + '-alpha_' + str(_C.HYPERPARAMETERS.ALPHA) + '-beta_' + str(_C.HYPERPARAMETERS.BETA[0])+ '_'+ str(_C.HYPERPARAMETERS.BETA[1])+'_'+ str(_C.HYPERPARAMETERS.BETA[2])+"_gamma_" + str(_C.HYPERPARAMETERS.GAMMA) + "_mu_" + str(_C.HYPERPARAMETERS.MU))


# ---------------------------------------------------------------------------- #
Expand All @@ -162,12 +164,14 @@

_C.TESTER.TEST_TARGET_DATA = os.path.join(_C.PATHS.PATH_DATA_ROOT, "target_test")

_C.TESTER.WEIGHTS = os.path.join(_C.EXP_PATH , "checkpoint.pth.tar")
_C.TESTER.WEIGHTS = os.path.join(_C.PATHS.EXP_PATH , "checkpoint.pth.tar")
_C.TESTER.NOUN_WEIGHTS = None
_C.TESTER.BATCH_SIZE = 512
_C.TESTER.NOUN_TARGET_DATA = None
_C.TESTER.RESULT_JSON = "test.json"
_C.TESTER.TEST_SEGMENTS = 5 # sample frame # of each video for testing
_C.TESTER.SAVE_SCORES = os.path.join(_C.EXP_PATH , "scores")
_C.TESTER.SAVE_CONFUSION = os.path.join(_C.EXP_PATH , "confusion_matrix")
_C.TESTER.SAVE_SCORES = os.path.join(_C.PATHS.EXP_PATH , "scores")
_C.TESTER.SAVE_CONFUSION = os.path.join(_C.PATHS.EXP_PATH , "confusion_matrix")

_C.TESTER.VERBOSE = True

Expand All @@ -180,6 +184,7 @@
_C.MODEL.N_DIRECTIONS = 1
_C.MODEL.N_TS = 5
_C.MODEL.TENSORBOARD = True
_C.MODEL.FLOW_PREFIX = ""
_C.TRAINER.JOBS = 2
_C.TRAINER.EF = 1
_C.TRAINER.PF = 50
Expand Down
9 changes: 9 additions & 0 deletions configs/test_epic_RGM.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
PATHS:
PATH_DATA_ROOT: "data/"
PATH_LABELS_ROOT: "annotations/"
DATASET:
DATASET: "epic"
MODALITY: "RGB"
TESTER:
WEIGHTS: "model/action-model/Testexp-SGD-share_params_Y-lr_3e-3-bS_128_207/epic-5seg-disDA_none-alpha_0-advDA_RevGrad-beta_0.75_0.75_0.5-useBN_none-addlossDA_attentive_entropy-gamma_0.003-ensDA_none-mu_0-useAttn_TransAttn-n_attn_1/RGB/checkpoint.pth.tar"

11 changes: 11 additions & 0 deletions configs/train_epic_RGB.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
TO_VALIDATE: False
PATHS:
PATH_DATA_ROOT: "data/"
PATH_LABELS_ROOT: "annotations/"
DATASET:
DATASET: "epic"
MODALITY: "RGB"
TRAINER:
MIN_EPOCHS: 25
MAX_EPOCHS: 30
ACCELERATOR: 'ddp'
34 changes: 17 additions & 17 deletions models_lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ def __init__(self, num_class, baseline_type, frame_aggregation, modality,
train_segments=5, val_segments=25,
base_model='resnet101', path_pretrained='', new_length=None,
before_softmax=True,
dropout_i=0.5, dropout_v=0.5, use_bn='none', ens_DA='none',
dropout_i=0.5, dropout_v=0.5, use_bn=None, ens_DA=None,
crop_num=1, partial_bn=True, verbose=True, add_fc=1, fc_dim=1024,
n_rnn=1, rnn_cell='LSTM', n_directions=1, n_ts=5,
use_attn='TransAttn', n_attn=1, use_attn_frame='none',
use_attn='TransAttn', n_attn=1, use_attn_frame=None,
share_params='Y'):
super().__init__()
super(VideoModel, self).__init__()
Expand Down Expand Up @@ -238,7 +238,7 @@ def _prepare_DA(self, num_class, base_model, modality): # convert the model to D


# BN for the above layers
if self.use_bn != 'none': # S & T: use AdaBN (ICLRW 2017) approach
if self.use_bn != None: # S & T: use AdaBN (ICLRW 2017) approach
self.bn_shared_S = nn.BatchNorm1d(feat_shared_dim) # BN for the shared layers
self.bn_shared_T = nn.BatchNorm1d(feat_shared_dim)
self.bn_source_S = nn.BatchNorm1d(feat_frame_dim) # BN for the source feature layers
Expand Down Expand Up @@ -360,7 +360,7 @@ def _prepare_DA(self, num_class, base_model, modality): # convert the model to D
constant_(self.fc_classifier_video_noun_target.bias, 0)

# BN for the above layers
if self.use_bn != 'none': # S & T: use AdaBN (ICLRW 2017) approach
if self.use_bn != None: # S & T: use AdaBN (ICLRW 2017) approach
self.bn_source_video_S = nn.BatchNorm1d(feat_video_dim)
self.bn_source_video_T = nn.BatchNorm1d(feat_video_dim)
self.bn_source_video_2_S = nn.BatchNorm1d(feat_video_dim)
Expand Down Expand Up @@ -623,8 +623,8 @@ def forward(self, input_source, input_target, beta, mu, is_train, reverse):
feat_fc_target = self.fc_feature_shared_target(feat_base_target) if self.share_params == 'N' else self.fc_feature_shared_source(feat_base_target)

# adaptive BN
if self.use_bn != 'none':
feat_fc_source, feat_fc_target = self.domainAlign(feat_fc_source, feat_fc_target, is_train, 'shared', self.alpha.item(), num_segments, 1)
if self.use_bn != None:
feat_fc_source, feat_fc_target = self.domainAlign(feat_fc_source, feat_fc_target, is_train, 'shared', self.alpha, num_segments, 1)

feat_fc_source = self.relu(feat_fc_source)
feat_fc_target = self.relu(feat_fc_target)
Expand Down Expand Up @@ -666,7 +666,7 @@ def forward(self, input_source, input_target, beta, mu, is_train, reverse):
pred_domain_all_source.append(pred_fc_domain_frame_source.view((batch_source, num_segments) + pred_fc_domain_frame_source.size()[-1:]))
pred_domain_all_target.append(pred_fc_domain_frame_target.view((batch_target, num_segments) + pred_fc_domain_frame_target.size()[-1:]))

if self.use_attn_frame != 'none': # attend the frame-level features only
if self.use_attn_frame != None: # attend the frame-level features only
feat_fc_source = self.get_attn_feat_frame(feat_fc_source, pred_fc_domain_frame_source)
feat_fc_target = self.get_attn_feat_frame(feat_fc_target, pred_fc_domain_frame_target)

Expand Down Expand Up @@ -699,7 +699,7 @@ def forward(self, input_source, input_target, beta, mu, is_train, reverse):
pred_fc_domain_video_relation_target = self.domain_classifier_relation(feat_fc_video_relation_target, beta)

# transferable attention
if self.use_attn != 'none': # get the attention weighting
if self.use_attn != None: # get the attention weighting
feat_fc_video_relation_source, attn_relation_source = self.get_attn_feat_relation(feat_fc_video_relation_source, pred_fc_domain_video_relation_source, num_segments)
feat_fc_video_relation_target, attn_relation_target = self.get_attn_feat_relation(feat_fc_video_relation_target, pred_fc_domain_video_relation_target, num_segments)
else:
Expand All @@ -718,8 +718,8 @@ def forward(self, input_source, input_target, beta, mu, is_train, reverse):
feat_fc_video_source_3_1 = self.tcl_3_1(feat_fc_video_source)
feat_fc_video_target_3_1 = self.tcl_3_1(feat_fc_video_target)

if self.use_bn != 'none':
feat_fc_video_source_3_1, feat_fc_video_target_3_1 = self.domainAlign(feat_fc_video_source_3_1, feat_fc_video_target_3_1, is_train, 'temconv_1', self.alpha.item(), num_segments, 1)
if self.use_bn != None:
feat_fc_video_source_3_1, feat_fc_video_target_3_1 = self.domainAlign(feat_fc_video_source_3_1, feat_fc_video_target_3_1, is_train, 'temconv_1', self.alpha, num_segments, 1)

feat_fc_video_source = self.relu(feat_fc_video_source_3_1) # 16 x 1 x 5 x 512
feat_fc_video_target = self.relu(feat_fc_video_target_3_1) # 16 x 1 x 5 x 512
Expand Down Expand Up @@ -898,7 +898,7 @@ def training_step(self, train_batch, batch_idx):
loss = loss_verb # 0.5*(loss_verb+loss_noun)
else:
raise Exception("invalid metric to train")
#if args.ens_DA == 'MCD' and args.use_target != 'none':
#if args.ens_DA == 'MCD' and args.use_target != None:
# loss += criterion(out_source_2, label)

# compute gradient and do SGD step
Expand Down Expand Up @@ -941,7 +941,7 @@ def training_step(self, train_batch, batch_idx):

# 2. calculate the loss for DA
# (I) discrepancy-based approach: discrepancy loss
if self.dis_DA != 'none' and self.use_target != 'none':
if self.dis_DA != None and self.use_target != None:
loss_discrepancy = 0

kernel_muls = [2.0]*2
Expand Down Expand Up @@ -993,7 +993,7 @@ def training_step(self, train_batch, batch_idx):
loss += self.alpha * loss_discrepancy

# (II) adversarial discriminative model: adversarial loss
if self.adv_DA != 'none' and self.use_target != 'none':
if self.adv_DA != None and self.use_target != None:
self.loss_adversarial = 0
pred_domain_all = []
pred_domain_target_all = []
Expand Down Expand Up @@ -1029,7 +1029,7 @@ def training_step(self, train_batch, batch_idx):

# (III) other loss
# 1. entropy loss for target data
if self.add_loss_DA == 'target_entropy' and self.use_target != 'none':
if self.add_loss_DA == 'target_entropy' and self.use_target != None:
loss_entropy_verb = cross_entropy_soft(out_target[0])
loss_entropy_noun = cross_entropy_soft(out_target[1])

Expand All @@ -1044,7 +1044,7 @@ def training_step(self, train_batch, batch_idx):
#loss += gamma * 0.5*(loss_entropy_verb+loss_entropy_noun)

# 3. attentive entropy loss
if self.add_loss_DA == 'attentive_entropy' and self.use_attn != 'none' and self.use_target != 'none':
if self.add_loss_DA == 'attentive_entropy' and self.use_attn != None and self.use_target != None:
loss_entropy_verb = attentive_entropy(torch.cat((out_verb, out_target[0]),0), pred_domain_all[1])
loss_entropy_noun = attentive_entropy(torch.cat((out_noun, out_target[1]), 0), pred_domain_all[1])

Expand Down Expand Up @@ -1135,7 +1135,7 @@ def training_epoch_end(self, training_step_outputs):

if self.lr_adaptive == 'loss':
self.adjust_learning_rate_loss(self.optimizers(), self.lr_decay, losses_c, self.loss_c_previous, '>')
elif self.lr_adaptive == 'none' and self.current_epoch in self.lr_steps:
elif self.lr_adaptive == None and self.current_epoch in self.lr_steps:
self.adjust_learning_rate(self.optimizers(), self.lr_decay)

self.loss_c_previous = losses_c
Expand All @@ -1151,7 +1151,7 @@ def training_epoch_end(self, training_step_outputs):
self.writer_train.add_scalar("acc/verb", losses_c_verb, self.current_epoch)
self.writer_train.add_scalar("acc/noun", top1_noun, self.current_epoch)
self.writer_train.add_scalar("acc/action", top1_action, self.current_epoch)
if self.adv_DA != 'none' and self.use_target != 'none':
if self.adv_DA != None and self.use_target != None:
self.writer_train.add_scalar("loss/domain", self.loss_adversarial,self.current_epoch)

def accuracy(self, output, target, topk=(1,)):
Expand Down
Loading

0 comments on commit f249c96

Please sign in to comment.