{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This file use gaussiam mixture to train model with hard label. \n",
    "The model was then used as an oracle target function in  perfect teacher distillation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "sys.path.append(os.path.realpath('../..'))\n",
    "import toy.ops as ops\n",
    "import toy.data as data\n",
    "import toy.net as net\n",
    "import toy.train as train\n",
    "import toy.ground_truth as gt\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import torch\n",
    "import pandas as pd\n",
    "import re\n",
    "import toy.ground_truth as gt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate data\n",
    "train_data = data.Input_Dataset(\n",
    "    input_std=5.0,\n",
    "    input_dim=2,\n",
    "    device=torch.device('cuda:0'),\n",
    "    datanum=4096,\n",
    "    online=True\n",
    ")\n",
    "test_data = data.Input_Dataset(\n",
    "    input_std=5.0,\n",
    "    input_dim=2,\n",
    "    device=torch.device('cuda:0')\n",
    ")\n",
    "\n",
    "generate_dir = '../../experiment/KD_training/Teacher_Gaussian_Stendent_Real_NN'\n",
    "if not os.path.exists(generate_dir):\n",
    "    os.makedirs(generate_dir)\n",
    "torch.save(train_data, generate_dir + '/train_data')\n",
    "torch.save(test_data, generate_dir + '/test_data')\n",
    "init_net = net.Net(hidden_layer_num=5, hidden_layer_dim=1024, input_dim=2)\n",
    "torch.save(init_net, generate_dir + '/init_net')\n",
    "linear_net = net.Linear_Net(init_net)\n",
    "print(init_net.vec().shape, init_net.vec().norm())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Teacher: Gaussian, Student: NN. Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend"
    ]
   },
   "outputs": [],
   "source": [
    "train.train(\n",
    "    dir = generate_dir,\n",
    "    target_function_path='../../experiment/KD_training/Gaussian_Function/function',\n",
    "    init_net_path=generate_dir + '/init_net',\n",
    "    train_dataset_path=generate_dir + '/train_data',\n",
    "    test_data_path=generate_dir + '/test_data',\n",
    "    model_config={\n",
    "        'rho': 0.0,\n",
    "        'T': 1.0,\n",
    "        'teacher_reduction':1.0,\n",
    "    },\n",
    "    training_strategry={\n",
    "        'batch_size': 4096,\n",
    "        'lr': 0.0005,\n",
    "        'epoch': 128*8,\n",
    "        'test_interval': 64,\n",
    "        'display_interval': 16,\n",
    "        'save_interval': 64,\n",
    "        'record_interval': 8,\n",
    "        'test_datanum': 32768,\n",
    "    }\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python38364bit5172cfd22f324156974f51e47e17b07a",
   "display_name": "Python 3.8.3 64-bit"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}