---
dataset:
    task:
        background_class: {use: false}
        preprocess:
            shape:
                height: 224
                width: 224
                channels: 3
            validate:
                # alternative validate preprocessing pipeline
                # - {type: resize, height: 256, width: 256, fill: true}
                # - {type: crop_or_pad, height: 224, width: 224}
                - {type: central_crop, fraction: 0.875}
            final_cpu:
                - {type: normalize_channels}
            final_gpu:
                null
model:
    name: alexnet_bn
    description: |
        This AlexNet implementation is based on::
            One weird trick for parallelizing convolutional neural networks
            (Alex Krizhevsky, 2014)

        We use the following reference model::
            https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py

        Unfortunately there are some discrepancies between this pytorch model
        and tensorflow/slim, namely `conv4` uses different output sizes (384
        vs. 256), and paddings are differnt.
    layers:
        _init: &init
            biases_initializer:
                type: tensorflow.constant_initializer
                value: 0.01
        _conv: &conv
            type: convolution
            normalizer_fn: tensorflow.contrib.slim.batch_norm
            normalizer_params:
                scale: true
                decay: 0.997
                epsilon: 0.00001
            padding: 2
            # weight_initializer defaults to xavier
            weights_initializer: &initializer
                type: tensorflow.variance_scaling_initializer
            weights_regularizer:
                type: tensorflow.contrib.layers.l2_regularizer
                scale: 0.0001
            <<: *init
        _fc: &fc
            type: fully_connected
            weights_initializer:
                type: tensorflow.truncated_normal_initializer
                stddev: 0.09
            <<: *init
        conv1: {<<: *conv, kernel_size: 11, stride: 4, num_outputs: 64}
        pool1: &pool
            {type: max_pool, kernel_size: 3, stride: 2, padding: valid}
        conv2: {<<: *conv, kernel_size: 5, stride: 1, num_outputs: 192}
        pool2: *pool
        conv3: &conv3
            {<<: *conv, kernel_size: 3, stride: 1, padding: 1, num_outputs: 384}
        conv4: &conv4 {<<: *conv3, num_outputs: 256}
        conv5: *conv4
        pool5: *pool
        dropout5: &dropout {type: dropout, keep_prob: 0.5}
        flatten5: {type: flatten}
        fc6: &fc6 {<<: *fc, num_outputs: 4096}
        dropout6: *dropout
        fc7: *fc6
        logits:
            <<: *fc
            num_outputs: $(dataset.task.num_classes)
            activation_fn: null
            biases_initializer: {type: tensorflow.zeros_initializer}
    graph:
        from: input
        with: [
            conv1, pool1, conv2, pool2,
            conv3, conv4, conv5, pool5, dropout5, flatten5,
            fc6, dropout6, fc7, logits]
        to: output
