# network architecture
# encoder related
elayers: 8
eunits: 2048
elayers_cond: 1
eunits_cond: 1024
elayers_rec: 8
# decoder related
dlayers: 6
dunits: 2048 
# attention related
adim: 256
aheads: 4

# hybrid CTC/attention
mtlalpha: 1.0

# label smoothing
lsm-weight: 0.1

# minibatch related
batch-size: 12
maxlen-in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced

# optimization related
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
opt: noam
accum-grad: 2
grad-clip: 5
patience: 6
epochs: 100
dropout-rate: 0.1

# transformer specific setting
backend: pytorch
model-module: "espnet.nets.pytorch_backend.e2e_asr_mix_transformer_conditional:E2E"
transformer-input-layer: conv2d     # encoder architecture type
transformer-lr: 1.0
transformer-warmup-steps: 25000
transformer-attn-dropout-rate: 0.0
transformer-length-normalized-loss: false
transformer-init: pytorch

# Report CER & WER
report-cer: false
report-wer: false

sampling-probability: 0.1
use-stop-sign-ctc: false
use-stop-sign-bce: false
