# We used this schedule to train CIFAR10-VGG16 from scratch.
#
# time python3 compress_classifier.py --arch vgg16_cifar  ../../../data.cifar10 -p=50 --lr=0.05 --epochs=180 --compress=../baseline_networks/cifar/vgg16_cifar_baseline_training.yaml -j=1 --deterministic
#
# Parameters:
# +----------+---------------------------+------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+
# |          | Name                      | Shape            |   NNZ (dense) |   NNZ (sparse) |   Cols (%) |   Rows (%) |   Ch (%) |   2D (%) |   3D (%) |   Fine (%) |     Std |     Mean |   Abs-Mean |
# |----------+---------------------------+------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------|
# |  0.00000 | features.module.0.weight  | (64, 3, 3, 3)    |          1728 |           1728 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.25026 | -0.00189 |    0.18302 |
# |  1.00000 | features.module.2.weight  | (64, 64, 3, 3)   |         36864 |          36864 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.07487 | -0.01098 |    0.05490 |
# |  2.00000 | features.module.5.weight  | (128, 64, 3, 3)  |         73728 |          73728 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06412 | -0.00765 |    0.04841 |
# |  3.00000 | features.module.7.weight  | (128, 128, 3, 3) |        147456 |         147456 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.05229 | -0.00949 |    0.04124 |
# |  4.00000 | features.module.10.weight | (256, 128, 3, 3) |        294912 |         294912 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.04503 | -0.00606 |    0.03530 |
# |  5.00000 | features.module.12.weight | (256, 256, 3, 3) |        589824 |         589824 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03495 | -0.00444 |    0.02725 |
# |  6.00000 | features.module.14.weight | (256, 256, 3, 3) |        589824 |         589824 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.03285 | -0.00550 |    0.02571 |
# |  7.00000 | features.module.17.weight | (512, 256, 3, 3) |       1179648 |        1179648 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.02082 | -0.00241 |    0.01615 |
# |  8.00000 | features.module.19.weight | (512, 512, 3, 3) |       2359296 |        2359296 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01364 | -0.00101 |    0.01070 |
# |  9.00000 | features.module.21.weight | (512, 512, 3, 3) |       2359296 |        2359296 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01194 | -0.00085 |    0.00941 |
# | 10.00000 | features.module.24.weight | (512, 512, 3, 3) |       2359296 |        2359296 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01060 | -0.00013 |    0.00842 |
# | 11.00000 | features.module.26.weight | (512, 512, 3, 3) |       2359296 |        2359296 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01031 | -0.00002 |    0.00821 |
# | 12.00000 | features.module.28.weight | (512, 512, 3, 3) |       2359296 |        2359296 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.01036 | -0.00019 |    0.00823 |
# | 13.00000 | classifier.weight         | (10, 512)        |          5120 |           5120 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.06897 | -0.00003 |    0.04909 |
# | 14.00000 | Total sparsity:           | -                |      14715584 |       14715584 |    0.00000 |    0.00000 |  0.00000 |  0.00000 |  0.00000 |    0.00000 | 0.00000 |  0.00000 |    0.00000 |
# +----------+---------------------------+------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+
# Total sparsity: 0.00
#
# --- validate (epoch=179)-----------
# 5000 samples (256 per mini-batch)
# ==> Top1: 90.160    Top5: 99.260    Loss: 0.633
#
# Saving checkpoint to: logs/2018.07.13-234036/checkpoint.pth.tar
# --- test ---------------------
# 10000 samples (256 per mini-batch)
# ==> Top1: 90.930    Top5: 99.470    Loss: 0.669
#
#
# Log file for this run: /home/cvds_lab/nzmora/pytorch_workspace/distiller/examples/classifier_compression/logs/2018.07.13-234036/2018.07.13-234036.log
#
# real    49m11.296s
# user    82m20.495s
# sys     15m36.971s

lr_schedulers:
  training_lr:
    class: StepLR
    step_size: 45
    gamma: 0.10

policies:
    - lr_scheduler:
        instance_name: training_lr
      starting_epoch: 35
      ending_epoch: 200
      frequency: 1
