---
dataset:
    task:
        background_class: {use: false}
        preprocess:
            shape:
                height: 224
                width: 224
                channels: 3
            validate:
                - {type: resize, height: 256, width: 256, fill: true}
                - {type: crop_or_pad, height: 224, width: 224}
            final_cpu:
                - {type: linear_map, scale: 2.0, shift: -1.0}
model:
    name: nasnet
    description:
        NasNet-Mobile references::
            https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py
            https://github.com/veronikayurchuk/pretrained-models.pytorch/blob/master/pretrainedmodels/models/nasnet_mobile.py
            https://github.com/johannesu/NASNet-keras/blob/master/nasnet.py
            https://github.com/keras-team/keras-applications/blob/master/keras_applications/nasnet.py
    layers:
        _init: &init
            weights_initializer: &initializer
                type: tensorflow.variance_scaling_initializer
        _conv: &conv
            <<: *init
            type: convolution
            biases_initializer: null
            weights_regularizer: &regularizer
                type: tensorflow.contrib.layers.l2_regularizer
                scale: 0.0001
            activation_fn: null
            normalizer_fn: null
            padding: same
        _norm: &norm
            type: batch_normalization
            scale: true
            decay: 0.9997
            epsilon: 0.001
            activation_fn: null
        _dsconv: &dsconv
            type: module
            kwargs: {stride: 1, num_outputs: null, kernel_size: 3}
            layers:
                depthwise:
                    <<: *conv
                    type: depthwise_convolution
                    stride: ^(stride)
                    kernel_size: [^(kernel_size), ^(kernel_size)]
                pointwise:
                    <<: *conv
                    kernel_size: [1, 1]
                    stride: 1
                    num_outputs: ^(num_outputs)
                    weights_regularizer: *regularizer
            graph: {from: input, with: [depthwise, pointwise], to: output}
        _sep: &sep
            type: module
            kwargs: {num_outputs: null, kernel_size: null, stride: 1}
            layers:
                relu1: &activator
                    type: activation
                    mode: relu
                sep1:
                    <<: *dsconv
                    num_outputs: ^(num_outputs)
                    kernel_size: ^(kernel_size)
                    stride: ^(stride)
                norm1: {<<: *norm}
                relu2: {<<: *activator}
                sep2:
                    <<: *dsconv
                    num_outputs: ^(num_outputs)
                    kernel_size: ^(kernel_size)
                    stride: 1
                norm2: {<<: *norm}
            graph:
                from: input
                with: [relu1, sep1, norm1, relu2, sep2, norm2]
                to: output
        _adjust: &adjust
            type: module
            kwargs: {mode: null, num_outputs: null}
            layers:
                relu: {<<: *activator}
                pool1: &adjust_pool
                    type: average_pool
                    kernel_size: 1
                    stride: 2
                    padding: valid
                conv1: &adjust_conv
                    <<: *conv
                    kernel_size: 1
                    padding: same
                    num_outputs: !arith ^(num_outputs) // 2
                pad:
                    type: pad
                    padding: [[0, 1], [0, 1]]
                crop:
                    type: crop
                    cropping: [[1, 0], [1, 0]]
                pool2: {<<: *adjust_pool}
                conv2:
                    <<: *conv
                    kernel_size: 1
                    padding: same
                    num_outputs: !arith ^(num_outputs) // 2
                conv: {<<: *adjust_conv, num_outputs: ^(num_outputs)}
                concat: &concat {type: concat, axis: 3}
                norm: {<<: *norm}
            graph:
                from: input
                with: [relu, conv, norm]
                to: output
            graph:
                - {from: input, with: relu, to: act}
                # reduce
                - {from: act, with: [pool1, conv1], to: post1}
                - {from: act, with: [pad, crop, pool2, conv2], to: post2}
                - {from: [post1, post2], with: concat, to: reduce}
                # project
                - {from: act, with: conv, to: project}
                # final
                - {from: ^(mode), with: norm, to: output}
        _normal_cell: &ncell
            type: module
            kwargs: {num_outputs: null, mode: project}
            inputs: [x, p]
            layers:
                conv: {<<: *adjust, num_outputs: ^(num_outputs), mode: project}
                adjust:
                    {<<: *adjust, num_outputs: ^(num_outputs), mode: ^(mode)}
                b1a: {<<: *sep, kernel_size: 5, num_outputs: ^(num_outputs)}
                b1b: {<<: *sep, kernel_size: 3, num_outputs: ^(num_outputs)}
                b2a: {<<: *sep, kernel_size: 5, num_outputs: ^(num_outputs)}
                b2b: {<<: *sep, kernel_size: 3, num_outputs: ^(num_outputs)}
                b3a: &npool
                    type: average_pool
                    kernel_size: 3
                    stride: 1
                    padding: same
                b4a: {<<: *npool}
                b4b: {<<: *npool}
                b5a: {<<: *sep, kernel_size: 3, num_outputs: ^(num_outputs)}
                add_b1: {type: add}
                add_b2: {type: add}
                add_b3: {type: add}
                add_b4: {type: add}
                add_b5: {type: add}
                concat: {<<: *concat}
            graph:
                - {from: p, with: adjust, to: p1}
                - {from: x, with: conv, to: h}
                - {from: h, with: b1a, to: b1a}
                - {from: p1, with: b1b, to: b1b}
                - {from: [b1a, b1b], with: add_b1, to: b1}
                - {from: p1, with: b2a, to: b2a}
                - {from: p1, with: b2b, to: b2b}
                - {from: [b2a, b2b], with: add_b2, to: b2}
                - {from: h, with: b3a, to: b3a}
                - {from: [b3a, p1], with: add_b3, to: b3}
                - {from: p1, with: b4a, to: b4a}
                - {from: p1, with: b4b, to: b4b}
                - {from: [b4a, b4b], with: add_b4, to: b4}
                - {from: h, with: b5a, to: b5a}
                - {from: [b5a, h], with: add_b5, to: b5}
                - {from: [p1, b1, b2, b3, b4, b5], with: concat, to: output}
        _reduction_cell: &rcell
            type: module
            inputs: [x, p]
            kwargs: {num_outputs: null, mode: project, skip: false}
            layers:
                conv: {<<: *adjust, num_outputs: ^(num_outputs), mode: project}
                adjust:
                    {<<: *adjust, num_outputs: ^(num_outputs), mode: ^(mode)}
                _rsep: &rsep
                    <<: *sep
                    stride: 2
                    num_outputs: ^(num_outputs)
                b1a: {<<: *rsep, kernel_size: 5}
                b1b: {<<: *rsep, kernel_size: 7}
                b2a: &rpool
                    type: max_pool
                    kernel_size: 3
                    stride: 2
                    padding: same
                b2b: {<<: *rsep, kernel_size: 7}
                b3a: {<<: *rpool, type: average_pool}
                b3b: {<<: *rsep, kernel_size: 5}
                b4a: {<<: *rpool, type: average_pool, stride: 1}
                b5a: {<<: *sep, kernel_size: 3, num_outputs: ^(num_outputs)}
                b5b: {<<: *rpool}
                add_b1: {type: add}
                add_b2: {type: add}
                add_b3: {type: add}
                add_b4: {type: add}
                add_b5: {type: add}
                concat: {<<: *concat}
            graph:
                - {from: p, with: adjust, to: p1}
                - {from: x, with: conv, to: h}
                - {from: !arith "'x' if ^(skip) else 'p1'", to: p2}
                - {from: h, with: b1a, to: b1a}
                - {from: p2, with: b1b, to: b1b}
                - {from: [b1a, b1b], with: add_b1, to: b1}
                - {from: h, with: b2a, to: b2a}
                - {from: p2, with: b2b, to: b2b}
                - {from: [b2a, b2b], with: add_b2, to: b2}
                - {from: h, with: b3a, to: b3a}
                - {from: p2, with: b3b, to: b3b}
                - {from: [b3a, b3b], with: add_b3, to: b3}
                - {from: b1, with: b4a, to: b4a}
                - {from: [b2, b4a], with: add_b4, to: b4}
                - {from: b1, with: b5a, to: b5a}
                - {from: h, with: b5b, to: b5b}
                - {from: [b5a, b5b], with: add_b5, to: b5}
                - {from: [b2, b3, b4, b5], with: concat, to: output}
        conv1:
            <<: *conv
            kernel_size: 3
            stride: 2
            padding: valid
            num_outputs: 32
        norm1: {<<: *norm}
        # two reduction cells
        s1: {<<: *rcell, num_outputs: 11, skip: true, mode: project}
        s2: {<<: *rcell, num_outputs: 22, mode: reduce}
        n0: {<<: *ncell, num_outputs: 44, mode: reduce}
        n1: {<<: *ncell, num_outputs: 44}
        n2: {<<: *ncell, num_outputs: 44}
        n3: {<<: *ncell, num_outputs: 44}
        r4: {<<: *rcell, num_outputs: 88}
        n5: {<<: *ncell, num_outputs: 88, mode: reduce}
        n6: {<<: *ncell, num_outputs: 88}
        n7: {<<: *ncell, num_outputs: 88}
        n8: {<<: *ncell, num_outputs: 88}
        r8: {<<: *rcell, num_outputs: 176}
        n9: {<<: *ncell, num_outputs: 176, mode: reduce}
        n10: {<<: *ncell, num_outputs: 176}
        n11: {<<: *ncell, num_outputs: 176}
        n12: {<<: *ncell, num_outputs: 176}
        relu: {<<: *activator}
        avgpool: {type: average_pool, kernel_size: global}
        # dropout: {type: dropout, keep_prob: 0.5}
        fc:
            <<: *init
            type: fully_connected
            num_outputs: $(dataset.task.num_classes)
            activation_fn: null
        squeeze: {type: squeeze, axis: [1, 2]}
    graph:
        - {from: input, with: [conv1, norm1], to: c1}
        - {from: [c1, c1], with: s1, to: s1}
        - {from: [s1, c1], with: s2, to: s2}
        - {from: [s2, s1], with: n0, to: b0}
        - {from: [n0, s2], with: n1, to: b1}
        - {from: [n1, n0], with: n2, to: b2}
        - {from: [n2, n1], with: n3, to: b3}
        - {from: [n3, n2], with: r4, to: r4}
        - {from: [r4, n3], with: n5, to: n5}
        - {from: [n5, r4], with: n6, to: n6}
        - {from: [n6, n5], with: n7, to: n7}
        - {from: [n7, n6], with: n8, to: n8}
        - {from: [n8, n7], with: r8, to: r8}
        - {from: [r8, n8], with: n9, to: n9}
        - {from: [n9, r8], with: n10, to: n10}
        - {from: [n10, n9], with: n11, to: n11}
        - {from: [n11, n10], with: n12, to: n12}
        - from: n12
          with: [relu, avgpool, squeeze, fc]
          to: output
