# GPUS: (0,)
OUTPUT_DIR: '../../OUTPUT/VITB32_CLIP/'

INPUT:
  MEAN: [0.48145466, 0.4578275, 0.40821073]
  STD: [0.26862954, 0.26130258, 0.27577711]

MODEL:
  NAME: 'ViT-B/16'
  NUM_PARAMS_IN_M: 151.2
  AUTHOR: 'OpenAI'
  PRETRAINED_DATA: 'CLIP-data'
  CREATION_TIME: '2021-01-05'

  SPEC:
    EMBED_DIM: 512
    VISION:
      MODEL: vit
      PATCH_SIZE: 16
      WIDTH: 384
      LAYERS: 12
    TEXT:
      TOKENIZER: clip
      STYLE: clip
      CONTEXT_LENGTH: 77
      VOCAB_SIZE: 49408
      WIDTH: 512
      HEADS: 8
      LAYERS: 12

TEST:
  BATCH_SIZE_PER_GPU: 128
  MODEL_FILE: ''

TRAIN:
  BATCH_SIZE_PER_GPU: 64
  BEGIN_EPOCH: 0
  END_EPOCH: 10
  EXTRA_FINAL_TRAIN_EPOCH: 40
  OPTIMIZER: sgd
  WD: 0.
  MOMENTUM: 0.9
  NESTEROV: false
  SHUFFLE: true
  LR_SCHEDULER:
    METHOD: 'WarmupCosine'
    WARMUP_EPOCH: 5