name: X3D
cfg:
  # Parameters from https://github.com/facebookresearch/SlowFast/blob/master/configs/Kinetics/X3D_L.yaml
  MODEL:
    NUM_CLASSES: 400  # ${model.feature_size}  # In self-supervised learning, this is the feature size
    DROPOUT_RATE: 0.5
    HEAD_ACT: "softmax"
    FC_INIT_STD: 0.01
  X3D:
    WIDTH_FACTOR: 2.0
    DEPTH_FACTOR: 2.2
    BOTTLENECK_FACTOR: 2.25
    DIM_C5: 2048
    DIM_C1: 12
    SCALE_RES2: False
    CHANNELWISE_3x3x3: True
    BN_LIN5: False
  DATA:
    NUM_FRAMES: ${assert_value:${dataset.dataset_params.num_frames},13}
    SAMPLING_RATE: ${assert_value:${dataset.dataset_params.video_sampling_rate},6}
    TRAIN_JITTER_SCALES: ${assert_value:${dataset.dataset_params.jitter_info.train_jitter_scales},[ 182, 228 ]}
    TRAIN_CROP_SIZE: ${assert_value:${dataset.dataset_params.resolution},160}
    INPUT_CHANNEL_NUM: [ 3 ]
  RESNET:
    ZERO_INIT_FINAL_BN: True
    TRANS_FUNC: x3d_transform
    STRIDE_1X1: False
    DEPTH: 50
  BN:
    USE_PRECISE_STATS: True
    NUM_BATCHES_PRECISE: 200
    WEIGHT_DECAY: 0.0
    NORM_TYPE: "batchnorm"
    BN.NUM_SPLITS: 1
    NUM_SYNC_DEVICES: 1
  NONLOCAL:
    LOCATION: [ [ [ ] ], [ [ ] ], [ [ ] ], [ [ ] ] ]
    GROUP: [ [ 1 ], [ 1 ], [ 1 ], [ 1 ] ]
    INSTANTIATION: "dot_product"
    POOL: [
      # Res2
      [ [ 1, 2, 2 ], [ 1, 2, 2 ] ],
      # Res3
      [ [ 1, 2, 2 ], [ 1, 2, 2 ] ],
      # Res4
      [ [ 1, 2, 2 ], [ 1, 2, 2 ] ],
      # Res5
      [ [ 1, 2, 2 ], [ 1, 2, 2 ] ],
    ]