{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "yAq6aHVh5oCH"
   },
   "source": [
    "Licensed under the Apache License, Version 2.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Ynmlap5aip_f"
   },
   "source": [
    "## Overview\n",
    "\n",
    "### Generate synthetic datasets for various settings by sampling the original UCI_Adult dataset:\n",
    "\n",
    "Pre-requisite: Download the Adult train and test data files can be downloaded from: https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test and save them in the ./fairness_without_demographics/data/uci_adult folder.\n",
    "\n",
    "This notebook contains code to create synthetic datasets over the original uci_adult dataset. We provide code to control various parameters like \"base-rate\", \"group-size\", and \"label-noise\" and generate synthetic dataset by over-sampling / under-samping original uci_adult training dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "oQXx_KOalSTH"
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import json\n",
    "import collections\n",
    "import os\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "d6xEXeTP4ztd"
   },
   "outputs": [],
   "source": [
    "sns.set_context('paper',font_scale=1.5)\n",
    "dataset_base_dir = './fairness_without_demographics/data/uci_adult/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "foh3NoCAx3Kn"
   },
   "outputs": [],
   "source": [
    "def sample_data(data_df, num=None, restrictions=None):\n",
    "  \"\"\"A recursive function that samples data according to the restrictions.\n",
    "\n",
    "  Sampling is done with replacement.\n",
    "\n",
    "  The restrictions determine how the data should be sampled.\n",
    "\n",
    "  For example, \n",
    "  column_types = ['sex:Female','income:>50K']\n",
    "  percentages =  [[0.33,0.6]]\n",
    "  Returns an object of type Restriction, which can be used to sample a dataset \n",
    "  in which fraction of females is 0.33, and female-base-rate (females with income >50k) is 0.66. \n",
    "  Data for non-female group is sampled uniformly at random, \n",
    "  hence will have similar base-rate as in the original distribution.\n",
    "\n",
    "  Args:\n",
    "    data_df: A pandas DataFrame containing the dataset that should be sampled.\n",
    "    num: The total number of samples needed.\n",
    "    restrictions: A list of restrictions. Each restriction contains a column\n",
    "      name, a list of types (a value for a bucket), and a list of percentages.\n",
    "  Returns:\n",
    "    A pandas DataFrame of size 'num' that holds up to each of the restrictions.\n",
    "  \"\"\"\n",
    "  if not num:\n",
    "    num = data_df.shape[0]\n",
    "  if not restrictions:\n",
    "    return data_df.sample(n=num,replace=True)\n",
    "  num_other = num\n",
    "  other_subset = np.full(data_df.shape[0], True)\n",
    "  subsamples = []\n",
    "  for tp, percent in zip(restrictions[0].types, restrictions[0].percentages):\n",
    "    subset = data_df[restrictions[0].column_name] == tp\n",
    "    num_samples = (int)(num * percent)\n",
    "    subsamples.append(\n",
    "        sample_data(data_df[subset], num_samples, restrictions[1:]))\n",
    "    num_other -= num_samples\n",
    "    other_subset = other_subset & (-subset)\n",
    "  if num_other > 0:\n",
    "    subsamples.append(\n",
    "        sample_data(data_df[other_subset], num_other, restrictions=None))\n",
    "  \n",
    "  # Concat and shuffle\n",
    "  sample_df = pd.concat(subsamples, ignore_index=True).sample(frac=1.0)\n",
    "  return sample_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "K4BIuy9Wvo_y"
   },
   "outputs": [],
   "source": [
    "def sample_data_and_flip_class_label(data_df, frac, flip_dict):\n",
    "  \"\"\" Samples specified fraction of samples and flips their class label.\n",
    "    flip_dict: A dictionary of the form {current_value:new_value}.\n",
    "      Example {'<=50K':'>50K','>50K':'<=50K'}\n",
    "  Returns:\n",
    "    A pandas DataFrame with specified fraction of column values flipped\n",
    "  \"\"\"\n",
    "  flip_df = data_df.sample(frac=frac)\n",
    "  flip_df.replace(to_replace=flip_dict,inplace=True)\n",
    "  keep_df = data_df.drop(flip_df.index).sample(frac=1.0)\n",
    "  # Concat and shuffle\n",
    "  sample_df = pd.concat([keep_df,flip_df]).sample(frac=1.0)\n",
    "  return sample_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "yEYdy2adyWrt"
   },
   "outputs": [],
   "source": [
    "Restriction = collections.namedtuple(\n",
    "    'Restriction', ['column_name', 'types', 'percentages'])\n",
    "\n",
    "def create_restrictions(column_types, percentages):\n",
    "  \"\"\"Creates a list of restrictions.\n",
    "\n",
    "  For example, \n",
    "  column_types = ['sex:Female','income:>50K']\n",
    "  percentages =  [[0.33,0.6]]\n",
    "  Returns an object of type Restriction, which can be used to sample a dataset \n",
    "  in which fraction of females is 0.33, and female-base-rate (females with income >50k) is 0.66. \n",
    "  Data for non-female group is sampled uniformly at random, hence will have similar base-rate as in the original distribution.\n",
    "\n",
    "  Args:\n",
    "    column_types: A list of column type strings. They must be of the form\n",
    "      '(column name):(type name)'.\n",
    "    percentages: A list of percentages for each column type.\n",
    "  Returns:\n",
    "    A list of restrictions\n",
    "  \"\"\"\n",
    "  # Parse through the lists and create columns. We can have multiple types for\n",
    "  # one column name.\n",
    "  restrictions_dict = collections.defaultdict(list)\n",
    "  percentages_dict = collections.defaultdict(list)\n",
    "  if column_types and percentages:\n",
    "    for column_type, percentage in zip(column_types, percentages):\n",
    "      column, tp = column_type.split(':')\n",
    "      restrictions_dict[column].append(tp)\n",
    "      percentages_dict[column].append(percentage)\n",
    "  # Now create the restrictions.\n",
    "  restrictions = []\n",
    "  for k in restrictions_dict:\n",
    "    restrictions.append(\n",
    "        Restriction(k, restrictions_dict[k], percentages_dict[k]))\n",
    "  return restrictions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "AqX6EQ96mBAo"
   },
   "source": [
    "## Load Training Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "oA1tezgRlVXT"
   },
   "outputs": [],
   "source": [
    "TRAIN_FILE = os.path.join(dataset_base_dir,'adult.data')\n",
    "feature_names = [\n",
    "    \"age\", \"workclass\", \"fnlwgt\", \"education\", \"education-num\",\n",
    "    \"marital-status\", \"occupation\", \"relationship\", \"race\", \"sex\",\n",
    "    \"capital-gain\", \"capital-loss\", \"hours-per-week\", \"native-country\", \"income\"\n",
    "]\n",
    "\n",
    "with open(TRAIN_FILE, \"r\") as TRAIN_FILE:\n",
    "  train_df = pd.read_csv(TRAIN_FILE,sep=',',names=feature_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 204
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 338,
     "status": "ok",
     "timestamp": 1579569038496,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "-jGuNakm1hnU",
    "outputId": "208b8638-2ed1-45a1-f0c1-d78d290800de"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>income</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>77516</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>83311</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38</td>\n",
       "      <td>Private</td>\n",
       "      <td>215646</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53</td>\n",
       "      <td>Private</td>\n",
       "      <td>234721</td>\n",
       "      <td>11th</td>\n",
       "      <td>7</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28</td>\n",
       "      <td>Private</td>\n",
       "      <td>338409</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>Cuba</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age         workclass  fnlwgt  ... hours-per-week  native-country income\n",
       "0   39         State-gov   77516  ...             40   United-States  <=50K\n",
       "1   50  Self-emp-not-inc   83311  ...             13   United-States  <=50K\n",
       "2   38           Private  215646  ...             40   United-States  <=50K\n",
       "3   53           Private  234721  ...             40   United-States  <=50K\n",
       "4   28           Private  338409  ...             40            Cuba  <=50K\n",
       "\n",
       "[5 rows x 15 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {
      "tags": []
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "pUlJmY8fqkam"
   },
   "source": [
    "## Sample data to have a specified group-size: Testing if the data is sampled as expected:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "S_FEpomKqka2"
   },
   "source": [
    "#### Original data distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 85
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 367,
     "status": "ok",
     "timestamp": 1579569910907,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "FZ7DoqAfqkas",
    "outputId": "2d9e1d69-5a55-4b3f-b482-6fde2bbd455d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Female group-size in original train-data: 0.33079450876815825\n",
      "Male group-size  in original data: 0.6692054912318418\n",
      "Female base-rate in original train-data: 0.10946058861758426\n",
      "Male base-rate in original data: 0.3057365764111978\n"
     ]
    }
   ],
   "source": [
    "print('Female group-size in original train-data: {}'.format(len(train_df[(train_df.sex=='Female')])/len(train_df)))\n",
    "print('Male group-size  in original data: {}'.format(len(train_df[(train_df.sex!='Female')])/len(train_df)))\n",
    "print('Female base-rate in original train-data: {}'.format(len(train_df[(train_df.sex=='Female') & (train_df.income == '>50K')])/len(train_df[(train_df.sex=='Female')])))\n",
    "print('Male base-rate in original data: {}'.format(len(train_df[(train_df.sex!='Female') & (train_df.income == '>50K')])/len(train_df[(train_df.sex!='Female')])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 218
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 583,
     "status": "ok",
     "timestamp": 1579569912460,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "7oSLYrlWqka3",
    "outputId": "7d86eef6-3d35-410e-b7cb-3a6ab0ebf56d"
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAASQAAADJCAYAAACKVE8EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAH+lJREFUeJzt3XtUVFX/P/D3gNxvCjKgqFwNHjBR\nMFBRbiMSQkup9FFTREwuTngB74w1UN7SxzS8ZYlQovHQg8mDUV5AtKyBIC9oy8sT5F1Q1BagBMz+\n/eGP83UEdFCGOTif11pnxZy9Z8/es/K99jlzzj4CxhgDIYTwgJa6O0AIIS0okAghvEGBRAjhDQok\nQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbxBgdQBAoFA3V0g5KVGgUQI4Q0KJEII\nb1AgEUJ4o0sDKScnByKRCGZmZq3Oxxw9ehQCgUBhGzJkiEKd2tpaREZGwtTUFBYWFkhMTERzc7NC\nnV27dsHe3h4GBgYICAjApUuXFMovXLgAf39/GBgYwN7eHunp6SoZKyGk47o0kOrr6xEYGIilS5e2\nW+fGjRvcduTIEYUysViMkpISHD58GNnZ2di7dy9WrlzJlRcUFCAmJgYSiQQlJSUQCoUIDQ1FU1MT\nAKCxsRGhoaGwsrJCSUkJJBIJoqOjUVRUpJoBE0I6hqlBYWEhe/Kj29r3uJqaGqatrc0KCgq4fTt3\n7mRCoZA1NzczxhgLDw9nERERXHltbS0zMDBg//3vfxljjO3fv58ZGBiw2tpars706dPZW2+9pVS/\n1fR1EaIxeqg5D1uxs7ODXC7HyJEj8fHHH2PAgAEAgNLSUggEAvj6+nJ1RSIRqqqqUFFRAUdHRxQX\nFyvMmIyMjODt7Q2ZTIawsDAUFxfD29sbRkZGCm1IJJIX6rPnoi9f6P3PUrouQqXtE8IXvDmp3adP\nH+zcuRP79+9HRkYGqqqqEBAQgAcPHgAAqqqqYG5uDm1tbe49lpaWXFnLf4VCoUK7lpaWSpc/TiqV\ntjqnRQhRLd4EkrOzM6KiouDu7o6AgADs378fNTU1yMvLAwCwNpb+7mhItNVGe6RSKRhjChshRLV4\nE0hPMjExgZOTEyorKwEAVlZWqKmpUfhVrWVm0zLrEQqFrWY71dXVXLmVldVTywkh6sXbQHrw4AH+\n+OMP2NraAgA8PDzAGMPx48e5OgUFBRAKhbC3twcAeHl5obCwkCuvr6+HTCaDt7c3Vy6TyVBfX6/Q\nRks5IUS9uvSkdk1NDS5fvsxdG3Ty5EkAgKurK9LT02FtbQ03Nzfcu3cPycnJMDY2xrhx4wAA5ubm\nmDp1KuLj47Fz507U1dVBIpFALBZDS+tRrorFYoSEhMDX1xfe3t748MMP0b9/fwQHBwMAXn/9dfTt\n2xezZs2CRCKBTCbD3r17cejQoa78Gggh7ejSQMrNzcXMmTO510OHDgUAVFRUoLGxEQkJCbh69SrM\nzMzg4+ODI0eOwNjYmKu/detWiMViiEQi6OjoIDIyEklJSVy5SCTC9u3bkZKSgps3b2L48OHIy8uD\njo4OAEBXVxcHDhxATEwMPD09YW1tjR07dsDPz6+LvgFCyNMIGJ2tVZpAIGjz5Db97E9I5+DtOSRC\niOahQCKE8AYFEiGENyiQCCG8QYFECOENCiRCCG9QIBFCeIMCiRDCGxRIhBDeoEAihPAGBRIhhDco\nkAghvEGBRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbxBgUQI4Q0KJEII\nb1AgEUJ4gwKJEMIbFEiEEN6gQCKE8AYFEiGENyiQCCG8QYFECOENCiRCCG9QIBFCeIMCiRDCGxRI\nhBDeoEAihPBGlwZSTk4ORCIRzMzMIBAIWpXLZDIMGzYM+vr6cHV1RX5+vkJ5bW0tIiMjYWpqCgsL\nCyQmJqK5uVmhzq5du2Bvbw8DAwMEBATg0qVLCuUXLlyAv78/DAwMYG9vj/T09E4fJyHk+XRpINXX\n1yMwMBBLly5tVXbnzh2EhITAx8cHZWVlmD59OsLDw3Hx4kWujlgsRklJCQ4fPozs7Gzs3bsXK1eu\n5MoLCgoQExMDiUSCkpISCIVChIaGoqmpCQDQ2NiI0NBQWFlZoaSkBBKJBNHR0SgqKlL94AkhzyRg\njLGu/tCjR48iICAAj3/0p59+ig0bNqCiooKbPfn6+sLLywvr16/H3bt3YWlpiUOHDiEgIAAAkJaW\nhmXLluHGjRvQ0tLCm2++CRMTE2RkZAAA6urqYGlpiX//+98ICwtDbm4uJk+ejOrqahgZGQEAIiIi\nUF9fj2+++eaZ/RYIBGjr6/Jc9OULfydPU7ouQqXtE8IXvDmHVFxcjICAAIVDOZFIBJlMBgAoLS2F\nQCCAr6+vQnlVVRUqKiq4NgIDA7lyIyMjeHt7c20UFxfD29ubC6MnP4MQol5KB9Lly5fbnB0wxnD5\n8uUX7khVVRWEQqHCPktLS1RVVXHl5ubm0NbWVihvKVO2jaeVP04qlUIgEChshBDVUjqQ7O3tUV1d\n3Wp/TU0N7O3tX7gjzzpybKu8oyHRkaNTqVQKxpjCRghRLaUDqb1/kPX19dDT03vhjlhZWbWaqVRX\nV3MzGisrK9TU1Cj8qtZSv6WOUCh8ZhtPKyeEqFePZ1VISUkB8Gg2sn79ehgbG3Nlzc3N+Omnn+Dm\n5vbCHfHy8sInn3yisK+goADe3t4AAA8PDzDGcPz4cfj7+3PlQqGQm6F5eXmhsLAQM2bMAPAoLGUy\nGRITE7nyDRs2oL6+HoaGhq0+g68up7yq0vYHvH9Gpe0ToqxnBtJXX30F4NEM6ZtvvlE4h6Orqwt7\ne3usXbtWqQ+rqanB5cuXuWuDTp48CQBwdXXFO++8A6lUivnz5yMmJga5ubmQyWT44osvAADm5uaY\nOnUq4uPjsXPnTtTV1UEikUAsFkNL69FETywWIyQkBL6+vvD29saHH36I/v37Izg4GADw+uuvo2/f\nvpg1axYkEglkMhn27t2LQ4cOKft9EUJU6JmB1HIdUEBAAHJyctCrV6/n/rDc3FzMnDmTez106FAA\nQEVFBezs7PDdd99BLBZj27ZtcHBwQE5ODgYOHMjV37p1K8RiMUQiEXR0dBAZGYmkpCSuXCQSYfv2\n7UhJScHNmzcxfPhw5OXlQUdHB8CjAD1w4ABiYmLg6ekJa2tr7NixA35+fs89JkJI51HLdUjdlbqu\nQ9pnsk6l7dMhG+GLZ86QHldUVISDBw/i1q1bkMvlCmVpaWmd2jFCiOZROpDWrVuHJUuWwNnZGTY2\nNnRdDiGk0ykdSKmpqdi0aRPi4+NV2R9CiAZT+jqke/fuISwsTJV9IYRoOKUDacKECSgoKFBlXwgh\nGk7pQBoxYgQkEgkWLFiA9PR07NmzR2EjhHSO9evXo3///jA2NoatrS0yMzMBPLom0M3NDT179sTo\n0aNx9uxZAEBlZSXMzc3x448/AgDu378POzs77N69W21jeF5K/+zfcvFhm40IBK0WSnsZ0c/+RNUu\nXLiAoUOHoqysDM7Ozrhx4wZqampQUVGB9957D7m5uXBzc8OOHTuwbt06/P7779DT08Pu3buxYsUK\nnDp1CnFxcWCMdcuJgtIzJLlc3u6mCWFESFfQ1taGXC5HeXk56urq0KdPH7i5uWHr1q1YsmQJBg8e\nDG1tbcTFxQEAfvnlFwDAtGnTMHLkSIwaNQo//fQTtm3bps5hPDferIdECAEcHR2RkZGBTz/9FNbW\n1nj99ddx7tw5VFZWYtGiRejZsye33bx5E9evX+feGxMTgzNnziA6OhpmZmZqHMXzU/qQreUm2/a8\n//77ndIhPqNDNtKV6urqsGTJEpw8eRLGxsaYPHkyIiMj26z78OFDeHp6YsSIEcjJyUFZWRns7Oy6\ntL+dQenrkFpusm3R2NiIa9euQV9fH3369NGIQCJE1c6fP48rV65g1KhR0NfXR8+ePdGjRw/ExsZi\n8eLFcHd3x5AhQ1BXV4fCwkL4+/vDxMQECxcuhJ2dHb744gtYWVlh2rRpKCoqUrgZvjtQOpAeX2y/\nRVVVFWbMmIGYmJhO7RQhmqqhoQFJSUk4d+4cevToAQ8PD2zfvh0uLi548OABZs6ciYqKChgaGmL0\n6NHw9/dHfn4+srOzcfr0aQBAcnIyRo4ciVWrVmHFihVqHlHHvPDNtaWlpZg6dSrOnz/fWX3iLTpk\nI0S1Xvikto6OjsKJNUIIeV5KH7KdOHFC4TVjDNevX8fHH3+MYcOGdXrHCCGaR+lAGjVqVJuHLD4+\nPvj88887vWOEEM2jdCC1PPushZaWFiwtLaGvr9/pnSKEaCalA8nW1laV/SCEkI6d1L5w4QLeffdd\njBgxAiNHjsTs2bNx4cIFVfWNEKJhlA6kQ4cO4dVXX8Vvv/2G4cOHw8vLC2VlZRg8eDCOHDmiyj4S\nQjSE0odsy5cvR1xcHDZu3Kiwf968eVi2bBmKi4s7vXOEEM2i9AypvLycu8P4cXPmzMGZM3RhHSHk\nxSkdSCYmJrhy5Uqr/X/++SdMTU07tVOEENVIT0/n9U23Sh+yhYeHIzo6Gtu2bcPo0aMBAMeOHcOc\nOXPw5ptvqqyDhF9UfZtM6boIlbb/NKoe2+M6Ok5/f38UFRVhz549mDJlCrf/6tWrsLW1hVwub/O2\npu5G6RnSv/71L3h6eiIkJAQmJiYwMTFBaGgoXnvtNXz88ceq7CMhBICNjU2rVTcyMzPRt29fNfWo\n8ykdSMbGxsjOzsbFixfx7bff4ttvv8XFixeRlZUFExMTVfaREALgrbfewrFjx1BVVcXty8zMxNSp\nU7nXJ06cgL+/P3r27AlLS0tMmTIFt2/ffmq7n376KRwcHGBoaIjXXnsNx44dU9kYnkXpQJo0aRJW\nrVoFR0dHvPHGG3jjjTfg4OCANWvW4J///Kcq+0gIAdCrVy+MHTsWX3/9NQDg1KlTuHXrFoKCgrg6\ntbW1mDNnDn799Vfk5+fj2rVrmDNnTrttpqWlITU1Fdu2bUN5eTkiIiIwbty4Ns8XdwWlA6moqAjj\nxo1rtT8kJEStiUqIJpk2bRr3NJHdu3dj8uTJ6NHj/04Fjx07FpMmTYKTkxOGDRuGDRs2YN++fe2u\ne//RRx8hNTUVwcHBcHBwQHx8PEaOHMk96aSrKX1S+/79+zA2Nm6139DQEHfv3u3UThFC2hYWFoZ3\n330Xv//+O/bu3Yt9+/ahrq6OK79+/TqWLl2KH3/8EdXV1ZDL5WhqasLNmzdhY2Oj0FZtbS0qKirw\n1ltvQSAQcPsbGhrU9kuc0oHk6OiIQ4cOwcnJSWH/oUOHYG9v3+kdI4S0pquri7fffhvR0dEwNjbG\na6+9hqNHj3LlkZGRaGxsxOeffw4bGxtcvnwZwcHBaGxsbNVWS5BlZWXBxcVFoUxdl/IoHUhz5szB\nkiVL8PDhQwQFBUEgEOCHH36AVCrFypUrVdlHQshjpk2bBj8/vzYfvHHixAl8/fXXEIlEAIDffvut\n3XaEQiGsra1x5coVhIWFqay/HaF0IInFYlRVVSEpKQkLFy4EAOjp6SExMRHx8fEq6yAhRJGvry+q\nq6vbfNSRo6Mj0tPT4eLigkuXLuGjjz5qtx2BQIDly5cjKSkJBgYGGD16NO7evYuDBw/Cx8cHfn5+\nqhxGm5QOJODR4uFLlizhHuHr6uoKIyMjlXSMkK6mzosyO6p3795t7v/iiy/w7rvvYtCgQRg0aBBW\nrlyJ8PDwdtuJj4+Hnp4e1q5di+joaFhYWGDkyJGYOHGiqrr+VC+8yL8moUX+X+4rtYn68erJtZGR\nkRAIBArbk6sLyGQyDBs2DPr6+nB1dUV+fr5CeW1tLSIjI2FqagoLCwskJia2+slz165dsLe3h4GB\nAQICAnDp0iWVj40Q8my8CiTg0QWYN27c4Lbo6Giu7M6dOwgJCYGPjw/Kysowffp0hIeHKzwzTiwW\no6SkBIcPH0Z2djb27t2rcNK9oKAAMTExkEgkKCkpgVAoRGhoKJqamrp0nISQ1ngXSAYGBrC2tuY2\nQ0NDriwzMxOmpqbYuHEjXF1dsWzZMnh5eeGzzz4DANy9exeZmZnYvHkzvLy8EBgYiI8++ghbtmyB\nXC4HAGzevBlTpkzBrFmzMGjQIKSlpeHKlSv4/vvv1TJeQsj/4V0g5ebmwtLSEoMHD8bq1asVZi7F\nxcUICAhQuIhLJBJBJpMBePTQSoFAAF9fX4Xyqqoq7iEFxcXFCAwM5MqNjIzg7e3NtUEIUR9eBdK4\nceOwZ88eFBQUICEhARs2bMD777/PlVdVVUEoFCq8x9LSkrvZsKqqCubm5grPM7e0tOTKlGmjhVQq\nbXU+ixCiWh362V/VJk2axP396quvQltbG3FxcVi5cmW7v3A9rq3y5w0SqVQKqVTaKW0RQpTDqxnS\nkzw8PFBXV8ctn2BlZdVqJlNdXc3NeKysrFBTU6Pwq1pL/ZY6QqHwqW0QQtSH14FUXl4OQ0ND7iIw\nLy8vhft2gEe/mnl7ewN4FGCMMRw/flyhXCgUcvfbeXl5obCwkCuvr6+HTCbj2iCEqA+vAikhIQEy\nmQyVlZXIyclBQkICYmJiuEOld955B/fv38f8+fPx+++/Y+3atZDJZIiJiQEAmJubY+rUqYiPj0dx\ncTEKCwshkUggFouhpfVoqGKxGHv27EFaWhrOnj2LqKgo9O/fH8HBwWobNyHkEV6dQzp37hzCwsLw\n119/YcCAAYiJicGSJUu4cgsLC3z33XcQi8XYtm0bHBwckJOTg4EDB3J1tm7dCrFYDJFIBB0dHURG\nRiIpKYkrF4lE2L59O1JSUnDz5k0MHz4ceXl50NHR6dKxEv65nPJql31WR66O5yOBQIDCwkL4+/t3\naru8CiRlrgUaPnw4SktL2y03NjZGRkYGMjIy2q0TFRWFqKio5+ojIerQssj/44KDg1+66+d4FUiE\nkPYlJiZyK20Aj1bbeNnw6hwSIaR9xsbGCncx9OrVCwBQVlYGf39/GBgYwM7ODsnJyQq/NAsEAqSn\np8PX15e7f/P27dvYs2cPbG1tYWlpiTVr1nD1GxoaEBERgX79+sHIyAienp4oKCh4at+e1QdlUSAR\n0o3duXMHY8eORVhYGM6cOYP09HTs3r271U3pK1euRFJSEmQyGW7cuIGJEyciJycH+fn5+OSTT7B8\n+XKUl5cDAJqamvDKK68gLy8Pp06dwoQJEzB+/PhWl8t0tA/KoEAipJtYtWoVjI2NuS0vLw9btmzB\nmDFjsHDhQjg5OcHf3x/Jycn4/PPPFd4rFosRHByMwYMHY+bMmTh+/Dh27NgBV1dXTJs2Dc7Oztzl\nMkZGRpBIJBgyZAicnJywYsUK2Nratnu+Stk+KIPOIRHSTcTGxmLu3Lnc6z59+iAjIwP79+9XeABH\nc3Nzq8MlNzc37m+hUAgrKyuYm5sr7KuuruZeb9iwARkZGbhy5Qr+/vtvPHjwoN1HI505c0apPiiD\nAomQbsLc3LzVQzZqa2sxbdo0LF++/KnvffyyFoFA0OoyF4FAwK2IsWfPHiQnJyM1NRXu7u4wMjLC\nxIkT23xQQEf6oAwKJEK6MXd3dxw9erRVUL2IEydOICgoCBERj1bvrK+vx59//tklfaBzSIR0Y2Kx\nGOfPn0dsbCxOnTqF8+fPIysrC6tWrXruNh0dHXH8+HEcP34cZ8+eRWRk5FMXMOzMPtAMiZD/rzte\nPd2/f38cO3YMixYtgo+PD7S0tODi4qJwrqmjYmNjUVJSgtDQUBgZGWHx4sXt/sLW2X2gRf47gBb5\np0X+iWrRIRshhDcokAghvEGBRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAI\nIbxBgUQI4Q0KJEIIb1AgEUJ4gwKJEMIbFEiEEN6gQCKE8AYFEiGEN2gJW8Irl1NeVWn73XGZWk1C\nMyRCCG9QIBFCeIMCiRDCGxRIhBDeoEAihPAGBRIhhDc0NpBWr16Nvn37wtDQEBMmTHjqkzkJIV1D\nIwNp165dWLVqFbZs2YITJ07g3r17mDJlirq7RYjG08gLI1NTU5GQkIDw8HAAQFpaGhwdHVFeXo5B\ngwapuXeEaC6NmyE1NDTg1KlTCAwM5PY5ODjAzs4OMplMjT0jhGjcDOnOnTuQy+UQCoUK+y0tLRXO\nI0mlUiQnJ7d6v0AgUHkfn2Sr6g/4oOvH1B51jZUxpupPJkrQuBmSsv/jSaVSMMZ4sbX0WxM2dY2V\n8IPGBVLv3r2hpaXV6le16urqVrMmQkjX0rhA0tPTg7u7OwoLC7l9FRUVqKyshLe3txp7RggRMA2c\nr6alpWH+/Pn46quvYGdnhwULFgAACgoK1NyztgkEAo05rNCksZLWNO6kNgBERUXh1q1biI2Nxf37\n9xEUFIQdO3aou1vt+uCDD9TdhS6jSWMlrWnkDIkQwk8adw6JEMJfFEiEEN6gQCKE8AYFEs9FRkZC\nIBAobBs3blSoI5PJMGzYMOjr68PV1RX5+fkK5QKBAEePHuVeX79+HS4uLhg/fjz+/vvvrhiGgpdx\nTKRzUCB1oWvXrj3X+yZNmoQbN25wW3R0NFd2584dhISEwMfHB2VlZZg+fTrCw8Nx8eLFNtu6cuUK\n/Pz84Obmhm+++Qa6urrP1ae2dGR83WVMpGtRIKlYVVUV1q1bB2dnZ0gkkudqw8DAANbW1txmaGjI\nlWVmZsLU1BQbN26Eq6srli1bBi8vL3z22Wet2vnzzz/h5+cHT09PZGVlQUdH57nH1RaJRAJnZ2es\nW7fumetLdZcxka5FgaQCcrkcBw8exMSJE9GvXz9kZ2cjISEBmzZtAgDExsbC2Ni43S02Nlahvdzc\nXFhaWmLw4MFYvXo1mpqauLLi4mIEBAQo3PQrEolarVzwxx9/wNfXFz4+PsjMzESPHp1/CdqmTZuQ\nkJCA7Oxs9OvXDxMnTsTBgwchl8tb1e0uYyJdjJFOc/XqVZaSksLs7OxY79692bx589jp06db1bt1\n6xa7ePFiu9utW7e4ullZWSw/P5+dPn2a7dq1i/Xu3ZstW7aMKw8KCmKLFy9WaH/Lli3slVde4V4D\nYLq6ukwkErHm5mYVjLy106dPs3nz5jELCwtmZ2fHUlJS2NWrV7v1mIjqUSB1ohkzZjAAbPbs2ezB\ngwcq+Ywvv/ySGRkZMblczhhjbMyYMUr9450wYQLT0dFheXl5KulXe+rr61lUVBQDwGbMmNFmne42\nJqI6dMjWiZKSkpCQkIB9+/bBxcUFUqkUlZWVrep19JDtcR4eHqirq8Pt27cBAFZWVkqtXDBv3jxI\nJBK8/fbbOHLkyIsP9hkqKyvxwQcfwNnZGbm5uUhISEBSUlKbdbvLmEgXUHcivowaGhpYVlYWCwoK\nYtra2kwkErHMzExu1tSRQ7Ynff3118zQ0JCbTWzatInZ2dkp1PH19WWJiYncawCssLCQMcbY0qVL\nmZGREfvxxx87edSMPXjwgGVmZjKRSMS0tbVZUFAQy8rKYg0NDU99H5/HRLoWBZKKVVRUMIlEwmxs\nbFhUVFSH379gwQL2yy+/sIqKCvaf//yH9e3bly1YsIArv337NuvVqxebN28eO3fuHFuzZg3T09Nj\nFy5c4Oo8/o+XMcbmzp3LzMzM2K+//vpCY3tSVFQU69evH5NIJKyiouKlGBPpWhRIXaSpqYldunSp\nw+8LDg5mvXv3Zrq6uszJyYklJyezhw8fKtT5+eefmYeHB9PV1WUuLi7swIEDCuVP/uOVy+Vs9uzZ\nzMLCgp05c+a5xtOWS5cusaampmfW605jIl2L7vYnhPAGndQmhPAGBRIhhDcokAghvEGBRAjhDQok\nQghvUCARQniDAqkbioyMxJgxY9TdDUI6HV2H1A3dv38fcrkcvXr1UndXCOlUFEiEEN6gQ7Zu6PFD\ntpa/d+zYAVtbW5iammL8+PGorq5WeM/hw4cxevRoGBoawszMDH5+fvjf//4HAGCMYf369XBwcICu\nri4cHR1brXFtZ2eHFStWIC4uDmZmZhAKhdi8eTMaGhoQHx+PXr16wcbGBps3b1Z4X21tLebNmwcb\nGxsYGhpi6NChyMnJUeG3Q7ozCqSXQElJCQoLC3HgwAF8//33OHnyJBYuXMiVHz58GMHBwfD09MTP\nP/8MmUyGiIgINDY2AgC2bt2KFStWYOnSpTh79iwWLVqEpUuXYufOnQqfk5qaioEDB6K0tBRz587F\n3LlzER4eDnt7e5SUlOC9997D3Llzce7cOQCPgu6NN97AqVOnkJWVhfLycsTFxWHy5Mm0XAhpm/pu\noyPPa8aMGUwkEnF/9+7dW+Hm1NWrVzNra2vu9ahRo1hoaGi77fXr148tWrRIYd/8+fOZvb0999rW\n1paNHz+ee93c3MxMTExYWFiYwr6ePXuy1NRUxhhjhYWFTE9Pj927d0+h7ZkzZyq0RUgLmiG9BP7x\nj39AT0+Pe21jY4Nbt25xr0tLSzF27Ng23/vXX3/h6tWr8PX1Vdjv5+eHyspK1NfXc/vc3d25v7W0\ntLg1sR/fJxQKucXVSkpK8Pfff8PGxkZhAbrdu3e3+wQRotloVfSXwJOP/REIBGBP/Fbx+IL5bXmy\n/Mn3A2j1RA+BQNDmvpZF/eVyOczMzFBSUvLMPhMC0DkkjeDp6YkffvihzTJTU1P069cPRUVFCvuP\nHTsGe3t7hccTddSwYcNw7949PHz4EE5OTgrbgAEDnrtd8vKiGZIGWLFiBUJCQjB//nxERUVBT08P\nP//8M0aMGAFnZ2csW7YMiYmJGDhwIPz9/VFQUIBt27Zhy5YtL/S5gYGBGDNmDN58802sXbsW7u7u\nuHv3Lk6cOAF9fX3Mnj27k0ZIXhYUSBpg7Nix+O677yCVSvHZZ59BV1cXHh4e3HmjuLg41NXVYdWq\nVZgzZw769++PNWvWYNasWS/0uQKBALm5uUhOTkZCQgKuXbsGc3NzDBkyBIsXL+6MoZGXDF0YSQjh\nDTqHRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbzx/wAGhSYGiuMWSAAA\nAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432.984x300 with 1 Axes>"
      ]
     },
     "metadata": {
      "tags": []
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.catplot(data=train_df,x='income',hue='sex',kind='count',size=3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "KAHLyF0eqka5"
   },
   "source": [
    "#### Synthetic data distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 34
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 382,
     "status": "ok",
     "timestamp": 1579569914533,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "TxDRJe-pqkaw",
    "outputId": "0ec76dc8-6468-45c3-b3e7-9d416f62e00b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampling data with restrictions: [Restriction(column_name='sex', types=['Female'], percentages=[0.4])]\n"
     ]
    }
   ],
   "source": [
    "column_types = ['sex:Female']\n",
    "percentages = [0.4]\n",
    "restrictions = create_restrictions(column_types,percentages)\n",
    "print('Sampling data with restrictions: {}'.format(restrictions))\n",
    "sampled_train_df = sample_data(train_df, restrictions=restrictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 85
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 358,
     "status": "ok",
     "timestamp": 1579569915235,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "kN7c1CKqqka0",
    "outputId": "3b6254b6-1605-4006-8d85-824b06e3f83f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Female group-size in synthetic data: 0.3999877153650072\n",
      "Male group-size in synthetic data: 0.6000122846349928\n",
      "Female base-rate in synthetic data: 0.10910626535626536\n",
      "Male base-rate in synthetic data: 0.3041920458616983\n"
     ]
    }
   ],
   "source": [
    "# Base-rate remains similar as in original distribution, group-size changes\n",
    "print('Female group-size in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex=='Female')])/len(sampled_train_df)))\n",
    "print('Male group-size in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex!='Female')])/len(sampled_train_df)))\n",
    "print('Female base-rate in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex=='Female') & (sampled_train_df.income == '>50K')])/len(sampled_train_df[(sampled_train_df.sex=='Female')])))\n",
    "print('Male base-rate in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex!='Female') & (sampled_train_df.income == '>50K')])/len(sampled_train_df[(sampled_train_df.sex!='Female')])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 218
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 621,
     "status": "ok",
     "timestamp": 1579569928511,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "JiylceYXqka6",
    "outputId": "3b6fb408-6b33-4f97-dab6-cd87c947caa8"
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAASQAAADJCAYAAACKVE8EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHKhJREFUeJzt3XlUFFf+NvCnUfZNURZFw6bBUaNR\nFFxZbNEYyVGS6KhxQaKAdnBBcQMTcOIWHUclSmIyqImYMDqYcDQkasAtmgY1oqDHZQaMO7hmBMVg\n3/cPf9RrC2ij3VDYz+ecOoeuW139vSQ83qquuqUQQggQEcmASX0XQERUiYFERLLBQCIi2WAgEZFs\nMJCISDYYSEQkGwwkIpINBhIRyQYDiYhkg4FERLLBQKoFhUJR3yUQvdQYSEQkGwwkIpINBhIRyQYD\niYhkg4FERLLBQCIi2Whc3wW8DHxivzLo/o8sG2vQ/RPJBUdIRCQbDCQikg0GEhHJBgOJiGSDgURE\nssFAIiLZYCARkWwwkIhINhhIRCQbDCQikg0GEhHJBgOJiGSDgUREssFAIiLZYCARkWwwkIhINhhI\nRCQbDCQikg0GEhHJBufUbgB+X/CaQff/yocnDLp/Il1xhEREssFAIiLZYCARkWwwkIhINhhIRCQb\nDCQikg0GEhHJBgOJiGSDgUREssFAIiLZYCARkWzUaSClp6dDqVTC3t4eCoWiSrtarUa3bt1gYWGB\n9u3bIzMzU6v97t27CAsLg52dHZo1a4YZM2bg4cOHWtusX78eHh4esLS0RFBQEM6dO6fVfubMGQQG\nBsLS0hIeHh7YsGGD3vtJRM+nTgOprKwM/fr1w5w5c6q03bhxA4MGDULv3r1x9OhRjBkzBqGhoTh7\n9qy0jUqlQm5uLnbv3o0tW7bgm2++wcKFC6X2rKwsREZGIj4+Hrm5uXBycsLgwYNRUVEBAPjzzz8x\nePBgODs7Izc3F/Hx8YiIiMDevXsN33kieiaFEELU9Yfu2bMHQUFBePyjV69ejRUrVqCwsFAaPfn7\n+8PX1xfLly/HrVu34OjoiF27diEoKAgAkJKSgrlz5+LKlSswMTHB22+/DVtbW2zcuBEAUFpaCkdH\nR/zrX/9CSEgIMjIyMGLECJSUlMDa2hoAMHbsWJSVlWHr1q3PrFuhUKC6X5dP7Fcv/Dt5mm22ywy6\nf97tT3Ihm3NIOTk5CAoK0jqUUyqVUKvVAIAjR45AoVDA399fq724uBiFhYXSPvr16ye1W1tbw8/P\nT9pHTk4O/Pz8pDB68jOIqH7JJpCKi4vh5OSktc7R0RHFxcVSu4ODAxo1aqTVXtmm6z6e1v64hIQE\nKBQKrYWIDEs2gfSsI8fq2msbErU5Ok1ISIAQQmshIsOSTSA5OztXGamUlJRIIxpnZ2fcvHlT61u1\nyu0rt3FycnrmPp7WTkT1SzaB5Ovriz179mity8rKgp+fHwCga9euEEJg//79Wu1OTk7w8PCQ9pGd\nnS21l5WVQa1WS/vw9fWFWq1GWVlZtZ9BRPWrTgPp5s2bOHbsmHRt0LFjx3Ds2DE8ePAA7733Hu7c\nuYNp06bh1KlTWLp0KdRqNSIjIwEADg4OGDVqFKKjo5GTk4Ps7GzEx8dDpVLBxORRN1QqFTZv3oyU\nlBQUFBQgPDwcrVu3xsCBAwEAb7zxBlq2bIn3338fBQUFSElJwTfffIPo6Oi6/DUQUQ3qdJL/jIwM\njB8/XnrdpUsXAEBhYSHc3d3xww8/QKVSITk5GZ6enkhPT0fbtm2l7deuXQuVSgWlUglTU1OEhYUh\nLi5Oalcqlfjss8+wYMECXL16FT169MD27dthamoKADAzM8OOHTsQGRkJHx8fuLi4YN26dQgICKij\n3wARPU29XIfUUPE6JCLDks05JCIiBhIRyQYDiYhkQ+dA+v3336s9fyKEwO+//67XoojIOOkcSB4e\nHigpKamy/ubNm9J1QEREL0LnQKrpy7iysjKYm5vrrSAiMl7PvA5pwYIFAB595b18+XLY2NhIbQ8f\nPsQvv/yCDh06GK5CIjIazwykr7/+GsCjEdLWrVu17rY3MzODh4cHli5dargKichoPDOQKmdsDAoK\nQnp6Opo2bWrwoojIOOl868jjN60SERlCre5l27t3L3bu3Ilr165Bo9FotaWkpOi1MCIyPjoH0rJl\nyzB79mx4e3vD1dWVMygSkd7pHEhJSUlYtWoVp+ogIoPR+Tqk27dvIyQkxJC1EJGR0zmQhg4diqys\nLEPWQkRGTudA6tmzJ+Lj4zF9+nRs2LABmzdv1lqISD+WL1+O1q1bw8bGBm5ubkhNTQXw6JrADh06\noEmTJujbty8KCgoAAEVFRXBwcMCBAwcAAHfu3IG7uzs2bdpUb314XjpP0FY5TWy1O1EoqjzS+mXE\nCdrI0M6cOYMuXbrg6NGj8Pb2xpUrV3Dz5k0UFhbigw8+QEZGBjp06IB169Zh2bJlOHXqFMzNzbFp\n0ybMnz8feXl5mDRpEoQQDXKgoPMISaPR1LgYQxgR1YVGjRpBo9EgPz8fpaWlaNGiBTp06IC1a9di\n9uzZ6NSpExo1aoRJkyYBAH799VcAwOjRo9GrVy/06dMHv/zyC5KTk+uzG8+N8yERyYiXlxc2btyI\n1atXw8XFBW+88QZOnjyJoqIixMbGokmTJtJy9epVXL58WXpvZGQkTpw4gYiICNjb29djL56fzods\nlTfZ1uTDDz/US0FyxkM2qkulpaWYPXs2jh07BhsbG4wYMQJhYWHVbnv//n34+PigZ8+eSE9Px9Gj\nR+Hu7l6n9eqDztchVd5kW+nPP//EpUuXYGFhgRYtWhhFIBEZ2unTp3HhwgX06dMHFhYWaNKkCRo3\nboyoqCjMmjULnTt3xuuvv47S0lJkZ2cjMDAQtra2mDlzJtzd3fHll1/C2dkZo0ePxt69e7Vuhm8I\ndA6kyptsH1dcXIxx48ZJz04johdTXl6OuLg4nDx5Eo0bN0bXrl3x2WefoV27drh37x7Gjx+PwsJC\nWFlZoW/fvggMDERmZia2bNmC48ePAwASExPRq1cvLFq0CPPnz6/nHtXOCz8G6ciRIxg1ahROnz6t\nr5pki4dsRIb1wie1TU1NtU6sERE9L50P2Q4ePKj1WgiBy5cv45NPPkG3bt30XhgRGR+dA6lPnz7V\nHrL07t0bX3zxhd4LIyLjo3MgFRYWar02MTGBo6MjLCws9F4UERknnQPJzc3NkHUQEdXupPaZM2cw\nYcIE9OzZE7169cLEiRNx5swZQ9VGREZG50DatWsXXnvtNfz222/o0aMHfH19cfToUXTq1Ak///yz\nIWskIiOh8yHbvHnzMGnSJKxcuVJr/dSpUzF37lzk5OTovTgiMi46j5Dy8/OlO4wfN3nyZJw4wQvr\niOjF6RxItra2uHDhQpX158+fh52dnV6LIiLD2LBhg6xvutX5kC00NBQRERFITk5G3759AQD79u3D\n5MmT8fbbbxusQJIXQ98mc2TZWIPu/2kM3bfH1bafgYGB2Lt3LzZv3oyRI0dK6y9evAg3NzdoNJpq\nb2tqaHQeIf3973+Hj48PBg0aBFtbW9ja2mLw4MHo3r07PvnkE0PWSEQAXF1dq8y6kZqaipYtW9ZT\nRfqncyDZ2Nhgy5YtOHv2LL777jt89913OHv2LNLS0mBra2vIGokIwDvvvIN9+/ahuLhYWpeamopR\no0ZJrw8ePIjAwEA0adIEjo6OGDlyJK5fv/7U/a5evRqenp6wsrJC9+7dsW/fPoP14Vl0DqThw4dj\n0aJF8PLywltvvYW33noLnp6eWLJkCf76178askYiAtC0aVMMGDAA3377LQAgLy8P165dQ3BwsLTN\n3bt3MXnyZBw+fBiZmZm4dOkSJk+eXOM+U1JSkJSUhOTkZOTn52Ps2LF48803qz1fXBd0DqS9e/fi\nzTffrLJ+0KBB9ZqoRMZk9OjR0tNENm3ahBEjRqBx4/9/KnjAgAEYPnw42rRpg27dumHFihXYtm1b\njfPef/zxx0hKSsLAgQPh6emJ6Oho9OrVS3rSSV3T+aT2nTt3YGNjU2W9lZUVbt26pdeiiKh6ISEh\nmDBhAk6dOoVvvvkG27ZtQ2lpqdR++fJlzJkzBwcOHEBJSQk0Gg0qKipw9epVuLq6au3r7t27KCws\nxDvvvAOFQiGtLy8vr7dv4nQeIXl5eWHXrl1V1u/atQseHh56KSYsLAwKhUJrefJCTLVajW7dusHC\nwgLt27dHZmamVvvdu3cRFhYGOzs7NGvWDDNmzKjyr8P69evh4eEBS0tLBAUF4dy5c3qpn8jQzMzM\n8O677yIiIgI2Njbo3r27VntYWBguXLiAL774Arm5udi2bRuAR1NOP6kyyNLS0nDs2DFpOXXqFD7+\n+GPDd6YaOo+QJk+ejNmzZ+P+/fsIDg6GQqHATz/9hISEBCxcuFBvBQ0fPhyrVq2SXj9+jdONGzcw\naNAgjBkzBl999RW+//57hIaG4sSJE2jbti0AQKVS4fDhw9i9ezfu3r2L0aNHw97eXprzOysrC5GR\nkUhOToafnx/+9re/YfDgwSgoKNAa+hLJ1ejRoxEQEFDtgzcOHjyIb7/9FkqlEgDw22+/1bgfJycn\nuLi44MKFCwgJCTFYvbWh81+gSqVCcXEx4uLiMHPmTACAubk5ZsyYgejoaL0VZGlpCRcXl2rbUlNT\nYWdnh5UrV0KhUEgjpM8//xzLly/HrVu3kJqail27dsHX1xfAo2PkuXPnIj4+HiYmJvj0008xcuRI\nvP/++wAendRzdHTEjz/+KJv/KERP4+/vj5KSkmofdeTl5YUNGzagXbt2OHfu3FNHOgqFAvPmzUNc\nXBwsLS3Rt29f3Lp1Czt37kTv3r0REBBgyG5Uq1ZDgsTERMyePVt6hG/79u1hbW2t14IyMjLg6OiI\nFi1aYOTIkYiNjZVGLjk5OQgKCtI63lUqldi9ezeAR/N7KxQK+Pv7a7UXFxejsLAQXl5eyMnJ0RrR\nWVtbw8/PD2q1moFk5Orzoszaat68ebXrv/zyS0yYMAEdO3ZEx44dsXDhQoSGhta4n+joaJibm2Pp\n0qWIiIhAs2bN0KtXLwwbNsxQpT9VrY9RKq9VMIQ333wTI0aMgKurK44cOYLY2Fj873//w6JFiwA8\nespJly5dtN7j6OgoXZdRXFwMBwcHrUe/ODo6Sm1eXl4oLi6Gk5NTjfuolJCQgMTERL33keh57Nmz\np8a2wMBA6Srt7t27Iy8vT6v98Su4w8LCqjzbLSIiAhEREXqr9UXI6qTJ8OHDpZ9fe+016ZHBCxcu\nrPGJH4+rrv3x0VRtJCQkICEhQS/7IiLdyPpR2l27dkVpaal0pamzs3OVkUxJSYk04nF2dsbNmze1\nvlWr3L5yGycnp6fug4jqj6wDKT8/H1ZWVtLxsq+vb5Wha1ZWFvz8/AA8CjAhBPbv36/V7uTkJF2a\n4Ovri+zsbKm9rKwMarVa2gcR1R9ZBVJMTAzUajWKioqQnp6OmJgYREZGSodK7733Hu7cuYNp06bh\n1KlTWLp0KdRqtfTkXAcHB4waNQrR0dHIyclBdnY24uPjoVKpYGLyqKsqlQqbN29GSkoKCgoKEB4e\njtatW2PgwIH11m8iekRW55BOnjyJkJAQ/PHHH3jllVcQGRmJ2bNnS+3NmjXDDz/8AJVKheTkZHh6\neiI9PV26BgkA1q5dC5VKBaVSCVNTU4SFhSEuLk5qVyqV+Oyzz7BgwQJcvXoVPXr0wPbt22Fqalqn\nfSWiql74UdrGhI/SfrnnQ6L6J6tDNiIybgwkIpINBhIRyYasTmoT1affF7xWZ59Vm/N2cqRQKJCd\nnY3AwEC97pcjJKIGIDAwsMrUPG+88UZ9l6V3HCERNRAzZsyQZtoAHs228bLhCImogbCxsYGLi4u0\nNG3aFABw9OhRBAYGwtLSEu7u7khMTNS6fUqhUGDDhg3w9/eXJiW8fv06Nm/eDDc3Nzg6OmLJkiXS\n9uXl5Rg7dixatWoFa2tr+Pj4ICsr66m1PasGXTGQiBqwGzduYMCAAQgJCcGJEyewYcMGbNq0qcpM\nqwsXLkRcXBzUajWuXLmCYcOGIT09HZmZmfjHP/6BefPmIT8/HwBQUVGBV199Fdu3b0deXh6GDh2K\nIUOGVLkHtLY16IKBRNRALFq0CDY2NtKyfft2rFmzBv3798fMmTPRpk0bBAYGIjExEV988YXWe1Uq\nFQYOHIhOnTph/Pjx2L9/P9atW4f27dtj9OjR8Pb2lu4Btba2Rnx8PF5//XW0adMG8+fPh5ubG378\n8cdq69K1Bl3wHBJRAxEVFYUpU6ZIr1u0aIGNGzfi+++/13oAx8OHD6scLnXo0EH62cnJCc7OznBw\ncNBaV1JSIr1esWIFNm7ciAsXLuDBgwe4d+9ejY9GOnHihE416IKBRNRAODg4oE2bNlrrKueNnzdv\n3lPf+/i9mgqFosq9mwqFAhqNBgCwefNmJCYmIikpCZ07d4a1tTWGDRtW7YMCalODLhhIRA1Y586d\nsWfPnipB9SIOHjyI4OBgjB376L7CsrIynD9/vk5q4DkkogZMpVLh9OnTiIqKQl5eHk6fPo20tDRp\n2ufn4eXlhf3792P//v0oKChAWFgYKioq6qQGjpCI/k9DvHq6devW2LdvH2JjY9G7d2+YmJigXbt2\nWueaaisqKgq5ubkYPHgwrK2tMWvWrBq/YdN3DZx+pBY4/QinHyHD4iEbEckGA4mIZIOBRESywUAi\nItlgIBGRbDCQiEg2GEhEJBsMJCKSDQYSEckGA4mIZIOBRESywUAiItlgIBGRbHD6EZIVQz+ssSFO\nMWJMOEIiItlgIBGRbDCQiEg2GEhEJBsMJCKSDQYSEckGA4mIZIOBRESywUAiItlgIBGRbBhtIC1e\nvBgtW7aElZUVhg4d+tQncxJR3TDKQFq/fj0WLVqENWvW4ODBg7h9+zZGjhxZ32URGT2jvLk2KSkJ\nMTExCA0NBQCkpKTAy8sL+fn56NixYz1XR2S8jG6EVF5ejry8PPTr109a5+npCXd3d6jV6nqsjIiM\nboR048YNaDQaODk5aa13dHTUOo+UkJCAxMTEKu9XKBQGr/FJbob+gI/qvk81qa++CiEM/cmkA6Mb\nIen6P15CQgKEELJYKus2hqW++kryYHSB1Lx5c5iYmFT5Vq2kpKTKqImI6pbRBZK5uTk6d+6M7Oxs\naV1hYSGKiorg5+dXj5URkUIY4Xg1JSUF06ZNw9dffw13d3dMnz4dAJCVlVXPlVVPoVAYzWGFMfWV\nqjK6k9oAEB4ejmvXriEqKgp37txBcHAw1q1bV99l1eijjz6q7xLqjDH1laoyyhESEcmT0Z1DIiL5\nYiARkWwwkIhINhhIMhcWFgaFQqG1rFy5UmsbtVqNbt26wcLCAu3bt0dmZqZWu0KhwJ49e6TXly9f\nRrt27TBkyBA8ePCgLrqh5WXsE+kHA6kOXbp06bneN3z4cFy5ckVaIiIipLYbN25g0KBB6N27N44e\nPYoxY8YgNDQUZ8+erXZfFy5cQEBAADp06ICtW7fCzMzsuWqqTm3611D6RHWLgWRgxcXFWLZsGby9\nvREfH/9c+7C0tISLi4u0WFlZSW2pqamws7PDypUr0b59e8ydOxe+vr74/PPPq+zn/PnzCAgIgI+P\nD9LS0mBqavrc/apOfHw8vL29sWzZsmfOL9VQ+kR1i4FkABqNBjt37sSwYcPQqlUrbNmyBTExMVi1\nahUAICoqCjY2NjUuUVFRWvvLyMiAo6MjOnXqhMWLF6OiokJqy8nJQVBQkNZNv0qlssrMBf/973/h\n7++P3r17IzU1FY0b6/8StFWrViEmJgZbtmxBq1atMGzYMOzcuRMajabKtg2lT1THBOnNxYsXxYIF\nC4S7u7to3ry5mDp1qjh+/HiV7a5duybOnj1b43Lt2jVp27S0NJGZmSmOHz8u1q9fL5o3by7mzp0r\ntQcHB4tZs2Zp7X/NmjXi1VdflV4DEGZmZkKpVIqHDx8aoOdVHT9+XEydOlU0a9ZMuLu7iwULFoiL\nFy826D6R4TGQ9GjcuHECgJg4caK4d++eQT7jq6++EtbW1kKj0QghhOjfv79Of7xDhw4VpqamYvv2\n7QapqyZlZWUiPDxcABDjxo2rdpuG1icyHB6y6VFcXBxiYmKwbds2tGvXDgkJCSgqKqqyXW0P2R7X\ntWtXlJaW4vr16wAAZ2dnnWYumDp1KuLj4/Huu+/i559/fvHOPkNRURE++ugjeHt7IyMjAzExMYiL\ni6t224bSJ6oD9Z2IL6Py8nKRlpYmgoODRaNGjYRSqRSpqanSqKk2h2xP+vbbb4WVlZU0mli1apVw\nd3fX2sbf31/MmDFDeg1AZGdnCyGEmDNnjrC2thYHDhzQc6+FuHfvnkhNTRVKpVI0atRIBAcHi7S0\nNFFeXv7U98m5T1S3GEgGVlhYKOLj44Wrq6sIDw+v9funT58ufv31V1FYWCj+/e9/i5YtW4rp06dL\n7devXxdNmzYVU6dOFSdPnhRLliwR5ubm4syZM9I2j//xCiHElClThL29vTh8+PAL9e1J4eHholWr\nViI+Pl4UFha+FH2iusVAqiMVFRXi3LlztX7fwIEDRfPmzYWZmZlo06aNSExMFPfv39fa5tChQ6Jr\n167CzMxMtGvXTuzYsUOr/ck/Xo1GIyZOnCiaNWsmTpw48Vz9qc65c+dERUXFM7drSH2iusW7/YlI\nNnhSm4hkg4FERLLBQCIi2WAgEZFsMJCISDYYSEQkGwykBigsLAz9+/ev7zKI9I7XITVAd+7cgUaj\nQdOmTeu7FCK9YiARkWzwkK0BevyQrfLndevWwc3NDXZ2dhgyZAhKSkq03rN792707dsXVlZWsLe3\nR0BAAP7zn/8AAIQQWL58OTw9PWFmZgYvL68qc1y7u7tj/vz5mDRpEuzt7eHk5IRPP/0U5eXliI6O\nRtOmTeHq6opPP/1U6313797F1KlT4erqCisrK3Tp0gXp6ekG/O1QQ8ZAegnk5uYiOzsbO3bswI8/\n/ohjx45h5syZUvvu3bsxcOBA+Pj44NChQ1Cr1Rg7diz+/PNPAMDatWsxf/58zJkzBwUFBYiNjcWc\nOXPwz3/+U+tzkpKS0LZtWxw5cgRTpkzBlClTEBoaCg8PD+Tm5uKDDz7AlClTcPLkSQCPgu6tt95C\nXl4e0tLSkJ+fj0mTJmHEiBGcLoSqV3+30dHzGjdunFAqldLPzZs317o5dfHixcLFxUV63adPHzF4\n8OAa99eqVSsRGxurtW7atGnCw8NDeu3m5iaGDBkivX748KGwtbUVISEhWuuaNGkikpKShBBCZGdn\nC3Nzc3H79m2tfY8fP15rX0SVOEJ6CfzlL3+Bubm59NrV1RXXrl2TXh85cgQDBgyo9r1//PEHLl68\nCH9/f631AQEBKCoqQllZmbSuc+fO0s8mJibSnNiPr3NycpImV8vNzcWDBw/g6uqqNQHdpk2banyC\nCBk3zor+EnjysT8KhQLiie8qHp8wvzpPtj/5fgBVnuihUCiqXVc5qb9Go4G9vT1yc3OfWTMRwHNI\nRsHHxwc//fRTtW12dnZo1aoV9u7dq7V+37598PDw0Ho8UW1169YNt2/fxv3799GmTRut5ZVXXnnu\n/dLLiyMkIzB//nwMGjQI06ZNQ3h4OMzNzXHo0CH07NkT3t7emDt3LmbMmIG2bdsiMDAQWVlZSE5O\nxpo1a17oc/v164f+/fvj7bffxtKlS9G5c2fcunULBw8ehIWFBSZOnKinHtLLgoFkBAYMGIAffvgB\nCQkJ+Pzzz2FmZoauXbtK540mTZqE0tJSLFq0CJMnT0br1q2xZMkSvP/++y/0uQqFAhkZGUhMTERM\nTAwuXboEBwcHvP7665g1a5Y+ukYvGV4YSUSywXNIRCQbDCQikg0GEhHJBgOJiGSDgUREssFAIiLZ\nYCARkWwwkIhINv4fuY+NcaGaKlUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432.984x300 with 1 Axes>"
      ]
     },
     "metadata": {
      "tags": []
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Group sizes changes, base-rate remain the same.\n",
    "sns.catplot(data=sampled_train_df,x='income',hue='sex',hue_order=['Male','Female'],order=['<=50K','>50K'],kind='count',size=3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "D2ttHNbMks2_"
   },
   "source": [
    "## Sample data to have a specified group base-rate: Testing if the data is sampled as expected:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "om2IYVW8qD84"
   },
   "source": [
    "#### Original data distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 85
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 369,
     "status": "ok",
     "timestamp": 1579569930094,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "huNVldIfjbUV",
    "outputId": "74fac090-ae5c-467e-ae91-406189514121"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Female group-size in synthetic data: 0.33079450876815825\n",
      "Male group-size in synthetic data: 0.6692054912318418\n",
      "Female base-rate in original train-data: 0.10946058861758426\n",
      "Male base-rate in original data: 0.3057365764111978\n"
     ]
    }
   ],
   "source": [
    "print('Female group-size in synthetic data: {}'.format(len(train_df[(train_df.sex=='Female')])/len(train_df)))\n",
    "print('Male group-size in synthetic data: {}'.format(len(train_df[(train_df.sex!='Female')])/len(train_df)))\n",
    "print('Female base-rate in original train-data: {}'.format(len(train_df[(train_df.sex=='Female') & (train_df.income == '>50K')])/len(train_df[(train_df.sex=='Female')])))\n",
    "print('Male base-rate in original data: {}'.format(len(train_df[(train_df.sex!='Female') & (train_df.income == '>50K')])/len(train_df[(train_df.sex!='Female')])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 218
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 567,
     "status": "ok",
     "timestamp": 1579569931236,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "8XNpjHdAokl3",
    "outputId": "107ee91e-0962-45fd-dcae-ed169a9be40b"
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAASQAAADJCAYAAACKVE8EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAH+lJREFUeJzt3XtUVFX/P/D3gNxvCjKgqFwNHjBR\nMFBRbiMSQkup9FFTREwuTngB74w1UN7SxzS8ZYlQovHQg8mDUV5AtKyBIC9oy8sT5F1Q1BagBMz+\n/eGP83UEdFCGOTif11pnxZy9Z8/es/K99jlzzj4CxhgDIYTwgJa6O0AIIS0okAghvEGBRAjhDQok\nQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbxBgdQBAoFA3V0g5KVGgUQI4Q0KJEII\nb1AgEUJ4o0sDKScnByKRCGZmZq3Oxxw9ehQCgUBhGzJkiEKd2tpaREZGwtTUFBYWFkhMTERzc7NC\nnV27dsHe3h4GBgYICAjApUuXFMovXLgAf39/GBgYwN7eHunp6SoZKyGk47o0kOrr6xEYGIilS5e2\nW+fGjRvcduTIEYUysViMkpISHD58GNnZ2di7dy9WrlzJlRcUFCAmJgYSiQQlJSUQCoUIDQ1FU1MT\nAKCxsRGhoaGwsrJCSUkJJBIJoqOjUVRUpJoBE0I6hqlBYWEhe/Kj29r3uJqaGqatrc0KCgq4fTt3\n7mRCoZA1NzczxhgLDw9nERERXHltbS0zMDBg//3vfxljjO3fv58ZGBiw2tpars706dPZW2+9pVS/\n1fR1EaIxeqg5D1uxs7ODXC7HyJEj8fHHH2PAgAEAgNLSUggEAvj6+nJ1RSIRqqqqUFFRAUdHRxQX\nFyvMmIyMjODt7Q2ZTIawsDAUFxfD29sbRkZGCm1IJJIX6rPnoi9f6P3PUrouQqXtE8IXvDmp3adP\nH+zcuRP79+9HRkYGqqqqEBAQgAcPHgAAqqqqYG5uDm1tbe49lpaWXFnLf4VCoUK7lpaWSpc/TiqV\ntjqnRQhRLd4EkrOzM6KiouDu7o6AgADs378fNTU1yMvLAwCwNpb+7mhItNVGe6RSKRhjChshRLV4\nE0hPMjExgZOTEyorKwEAVlZWqKmpUfhVrWVm0zLrEQqFrWY71dXVXLmVldVTywkh6sXbQHrw4AH+\n+OMP2NraAgA8PDzAGMPx48e5OgUFBRAKhbC3twcAeHl5obCwkCuvr6+HTCaDt7c3Vy6TyVBfX6/Q\nRks5IUS9uvSkdk1NDS5fvsxdG3Ty5EkAgKurK9LT02FtbQ03Nzfcu3cPycnJMDY2xrhx4wAA5ubm\nmDp1KuLj47Fz507U1dVBIpFALBZDS+tRrorFYoSEhMDX1xfe3t748MMP0b9/fwQHBwMAXn/9dfTt\n2xezZs2CRCKBTCbD3r17cejQoa78Gggh7ejSQMrNzcXMmTO510OHDgUAVFRUoLGxEQkJCbh69SrM\nzMzg4+ODI0eOwNjYmKu/detWiMViiEQi6OjoIDIyEklJSVy5SCTC9u3bkZKSgps3b2L48OHIy8uD\njo4OAEBXVxcHDhxATEwMPD09YW1tjR07dsDPz6+LvgFCyNMIGJ2tVZpAIGjz5Db97E9I5+DtOSRC\niOahQCKE8AYFEiGENyiQCCG8QYFECOENCiRCCG9QIBFCeIMCiRDCGxRIhBDeoEAihPAGBRIhhDco\nkAghvEGBRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbxBgUQI4Q0KJEII\nb1AgEUJ4gwKJEMIbFEiEEN6gQCKE8AYFEiGENyiQCCG8QYFECOENCiRCCG9QIBFCeIMCiRDCGxRI\nhBDeoEAihPBGlwZSTk4ORCIRzMzMIBAIWpXLZDIMGzYM+vr6cHV1RX5+vkJ5bW0tIiMjYWpqCgsL\nCyQmJqK5uVmhzq5du2Bvbw8DAwMEBATg0qVLCuUXLlyAv78/DAwMYG9vj/T09E4fJyHk+XRpINXX\n1yMwMBBLly5tVXbnzh2EhITAx8cHZWVlmD59OsLDw3Hx4kWujlgsRklJCQ4fPozs7Gzs3bsXK1eu\n5MoLCgoQExMDiUSCkpISCIVChIaGoqmpCQDQ2NiI0NBQWFlZoaSkBBKJBNHR0SgqKlL94AkhzyRg\njLGu/tCjR48iICAAj3/0p59+ig0bNqCiooKbPfn6+sLLywvr16/H3bt3YWlpiUOHDiEgIAAAkJaW\nhmXLluHGjRvQ0tLCm2++CRMTE2RkZAAA6urqYGlpiX//+98ICwtDbm4uJk+ejOrqahgZGQEAIiIi\nUF9fj2+++eaZ/RYIBGjr6/Jc9OULfydPU7ouQqXtE8IXvDmHVFxcjICAAIVDOZFIBJlMBgAoLS2F\nQCCAr6+vQnlVVRUqKiq4NgIDA7lyIyMjeHt7c20UFxfD29ubC6MnP4MQol5KB9Lly5fbnB0wxnD5\n8uUX7khVVRWEQqHCPktLS1RVVXHl5ubm0NbWVihvKVO2jaeVP04qlUIgEChshBDVUjqQ7O3tUV1d\n3Wp/TU0N7O3tX7gjzzpybKu8oyHRkaNTqVQKxpjCRghRLaUDqb1/kPX19dDT03vhjlhZWbWaqVRX\nV3MzGisrK9TU1Cj8qtZSv6WOUCh8ZhtPKyeEqFePZ1VISUkB8Gg2sn79ehgbG3Nlzc3N+Omnn+Dm\n5vbCHfHy8sInn3yisK+goADe3t4AAA8PDzDGcPz4cfj7+3PlQqGQm6F5eXmhsLAQM2bMAPAoLGUy\nGRITE7nyDRs2oL6+HoaGhq0+g68up7yq0vYHvH9Gpe0ToqxnBtJXX30F4NEM6ZtvvlE4h6Orqwt7\ne3usXbtWqQ+rqanB5cuXuWuDTp48CQBwdXXFO++8A6lUivnz5yMmJga5ubmQyWT44osvAADm5uaY\nOnUq4uPjsXPnTtTV1UEikUAsFkNL69FETywWIyQkBL6+vvD29saHH36I/v37Izg4GADw+uuvo2/f\nvpg1axYkEglkMhn27t2LQ4cOKft9EUJU6JmB1HIdUEBAAHJyctCrV6/n/rDc3FzMnDmTez106FAA\nQEVFBezs7PDdd99BLBZj27ZtcHBwQE5ODgYOHMjV37p1K8RiMUQiEXR0dBAZGYmkpCSuXCQSYfv2\n7UhJScHNmzcxfPhw5OXlQUdHB8CjAD1w4ABiYmLg6ekJa2tr7NixA35+fs89JkJI51HLdUjdlbqu\nQ9pnsk6l7dMhG+GLZ86QHldUVISDBw/i1q1bkMvlCmVpaWmd2jFCiOZROpDWrVuHJUuWwNnZGTY2\nNnRdDiGk0ykdSKmpqdi0aRPi4+NV2R9CiAZT+jqke/fuISwsTJV9IYRoOKUDacKECSgoKFBlXwgh\nGk7pQBoxYgQkEgkWLFiA9PR07NmzR2EjhHSO9evXo3///jA2NoatrS0yMzMBPLom0M3NDT179sTo\n0aNx9uxZAEBlZSXMzc3x448/AgDu378POzs77N69W21jeF5K/+zfcvFhm40IBK0WSnsZ0c/+RNUu\nXLiAoUOHoqysDM7Ozrhx4wZqampQUVGB9957D7m5uXBzc8OOHTuwbt06/P7779DT08Pu3buxYsUK\nnDp1CnFxcWCMdcuJgtIzJLlc3u6mCWFESFfQ1taGXC5HeXk56urq0KdPH7i5uWHr1q1YsmQJBg8e\nDG1tbcTFxQEAfvnlFwDAtGnTMHLkSIwaNQo//fQTtm3bps5hPDferIdECAEcHR2RkZGBTz/9FNbW\n1nj99ddx7tw5VFZWYtGiRejZsye33bx5E9evX+feGxMTgzNnziA6OhpmZmZqHMXzU/qQreUm2/a8\n//77ndIhPqNDNtKV6urqsGTJEpw8eRLGxsaYPHkyIiMj26z78OFDeHp6YsSIEcjJyUFZWRns7Oy6\ntL+dQenrkFpusm3R2NiIa9euQV9fH3369NGIQCJE1c6fP48rV65g1KhR0NfXR8+ePdGjRw/ExsZi\n8eLFcHd3x5AhQ1BXV4fCwkL4+/vDxMQECxcuhJ2dHb744gtYWVlh2rRpKCoqUrgZvjtQOpAeX2y/\nRVVVFWbMmIGYmJhO7RQhmqqhoQFJSUk4d+4cevToAQ8PD2zfvh0uLi548OABZs6ciYqKChgaGmL0\n6NHw9/dHfn4+srOzcfr0aQBAcnIyRo4ciVWrVmHFihVqHlHHvPDNtaWlpZg6dSrOnz/fWX3iLTpk\nI0S1Xvikto6OjsKJNUIIeV5KH7KdOHFC4TVjDNevX8fHH3+MYcOGdXrHCCGaR+lAGjVqVJuHLD4+\nPvj88887vWOEEM2jdCC1PPushZaWFiwtLaGvr9/pnSKEaCalA8nW1laV/SCEkI6d1L5w4QLeffdd\njBgxAiNHjsTs2bNx4cIFVfWNEKJhlA6kQ4cO4dVXX8Vvv/2G4cOHw8vLC2VlZRg8eDCOHDmiyj4S\nQjSE0odsy5cvR1xcHDZu3Kiwf968eVi2bBmKi4s7vXOEEM2i9AypvLycu8P4cXPmzMGZM3RhHSHk\nxSkdSCYmJrhy5Uqr/X/++SdMTU07tVOEENVIT0/n9U23Sh+yhYeHIzo6Gtu2bcPo0aMBAMeOHcOc\nOXPw5ptvqqyDhF9UfZtM6boIlbb/NKoe2+M6Ok5/f38UFRVhz549mDJlCrf/6tWrsLW1hVwub/O2\npu5G6RnSv/71L3h6eiIkJAQmJiYwMTFBaGgoXnvtNXz88ceq7CMhBICNjU2rVTcyMzPRt29fNfWo\n8ykdSMbGxsjOzsbFixfx7bff4ttvv8XFixeRlZUFExMTVfaREALgrbfewrFjx1BVVcXty8zMxNSp\nU7nXJ06cgL+/P3r27AlLS0tMmTIFt2/ffmq7n376KRwcHGBoaIjXXnsNx44dU9kYnkXpQJo0aRJW\nrVoFR0dHvPHGG3jjjTfg4OCANWvW4J///Kcq+0gIAdCrVy+MHTsWX3/9NQDg1KlTuHXrFoKCgrg6\ntbW1mDNnDn799Vfk5+fj2rVrmDNnTrttpqWlITU1Fdu2bUN5eTkiIiIwbty4Ns8XdwWlA6moqAjj\nxo1rtT8kJEStiUqIJpk2bRr3NJHdu3dj8uTJ6NHj/04Fjx07FpMmTYKTkxOGDRuGDRs2YN++fe2u\ne//RRx8hNTUVwcHBcHBwQHx8PEaOHMk96aSrKX1S+/79+zA2Nm6139DQEHfv3u3UThFC2hYWFoZ3\n330Xv//+O/bu3Yt9+/ahrq6OK79+/TqWLl2KH3/8EdXV1ZDL5WhqasLNmzdhY2Oj0FZtbS0qKirw\n1ltvQSAQcPsbGhrU9kuc0oHk6OiIQ4cOwcnJSWH/oUOHYG9v3+kdI4S0pquri7fffhvR0dEwNjbG\na6+9hqNHj3LlkZGRaGxsxOeffw4bGxtcvnwZwcHBaGxsbNVWS5BlZWXBxcVFoUxdl/IoHUhz5szB\nkiVL8PDhQwQFBUEgEOCHH36AVCrFypUrVdlHQshjpk2bBj8/vzYfvHHixAl8/fXXEIlEAIDffvut\n3XaEQiGsra1x5coVhIWFqay/HaF0IInFYlRVVSEpKQkLFy4EAOjp6SExMRHx8fEq6yAhRJGvry+q\nq6vbfNSRo6Mj0tPT4eLigkuXLuGjjz5qtx2BQIDly5cjKSkJBgYGGD16NO7evYuDBw/Cx8cHfn5+\nqhxGm5QOJODR4uFLlizhHuHr6uoKIyMjlXSMkK6mzosyO6p3795t7v/iiy/w7rvvYtCgQRg0aBBW\nrlyJ8PDwdtuJj4+Hnp4e1q5di+joaFhYWGDkyJGYOHGiqrr+VC+8yL8moUX+X+4rtYn68erJtZGR\nkRAIBArbk6sLyGQyDBs2DPr6+nB1dUV+fr5CeW1tLSIjI2FqagoLCwskJia2+slz165dsLe3h4GB\nAQICAnDp0iWVj40Q8my8CiTg0QWYN27c4Lbo6Giu7M6dOwgJCYGPjw/Kysowffp0hIeHKzwzTiwW\no6SkBIcPH0Z2djb27t2rcNK9oKAAMTExkEgkKCkpgVAoRGhoKJqamrp0nISQ1ngXSAYGBrC2tuY2\nQ0NDriwzMxOmpqbYuHEjXF1dsWzZMnh5eeGzzz4DANy9exeZmZnYvHkzvLy8EBgYiI8++ghbtmyB\nXC4HAGzevBlTpkzBrFmzMGjQIKSlpeHKlSv4/vvv1TJeQsj/4V0g5ebmwtLSEoMHD8bq1asVZi7F\nxcUICAhQuIhLJBJBJpMBePTQSoFAAF9fX4Xyqqoq7iEFxcXFCAwM5MqNjIzg7e3NtUEIUR9eBdK4\nceOwZ88eFBQUICEhARs2bMD777/PlVdVVUEoFCq8x9LSkrvZsKqqCubm5grPM7e0tOTKlGmjhVQq\nbXU+ixCiWh362V/VJk2axP396quvQltbG3FxcVi5cmW7v3A9rq3y5w0SqVQKqVTaKW0RQpTDqxnS\nkzw8PFBXV8ctn2BlZdVqJlNdXc3NeKysrFBTU6Pwq1pL/ZY6QqHwqW0QQtSH14FUXl4OQ0ND7iIw\nLy8vhft2gEe/mnl7ewN4FGCMMRw/flyhXCgUcvfbeXl5obCwkCuvr6+HTCbj2iCEqA+vAikhIQEy\nmQyVlZXIyclBQkICYmJiuEOld955B/fv38f8+fPx+++/Y+3atZDJZIiJiQEAmJubY+rUqYiPj0dx\ncTEKCwshkUggFouhpfVoqGKxGHv27EFaWhrOnj2LqKgo9O/fH8HBwWobNyHkEV6dQzp37hzCwsLw\n119/YcCAAYiJicGSJUu4cgsLC3z33XcQi8XYtm0bHBwckJOTg4EDB3J1tm7dCrFYDJFIBB0dHURG\nRiIpKYkrF4lE2L59O1JSUnDz5k0MHz4ceXl50NHR6dKxEv65nPJql31WR66O5yOBQIDCwkL4+/t3\naru8CiRlrgUaPnw4SktL2y03NjZGRkYGMjIy2q0TFRWFqKio5+ojIerQssj/44KDg1+66+d4FUiE\nkPYlJiZyK20Aj1bbeNnw6hwSIaR9xsbGCncx9OrVCwBQVlYGf39/GBgYwM7ODsnJyQq/NAsEAqSn\np8PX15e7f/P27dvYs2cPbG1tYWlpiTVr1nD1GxoaEBERgX79+sHIyAienp4oKCh4at+e1QdlUSAR\n0o3duXMHY8eORVhYGM6cOYP09HTs3r271U3pK1euRFJSEmQyGW7cuIGJEyciJycH+fn5+OSTT7B8\n+XKUl5cDAJqamvDKK68gLy8Pp06dwoQJEzB+/PhWl8t0tA/KoEAipJtYtWoVjI2NuS0vLw9btmzB\nmDFjsHDhQjg5OcHf3x/Jycn4/PPPFd4rFosRHByMwYMHY+bMmTh+/Dh27NgBV1dXTJs2Dc7Oztzl\nMkZGRpBIJBgyZAicnJywYsUK2Nratnu+Stk+KIPOIRHSTcTGxmLu3Lnc6z59+iAjIwP79+9XeABH\nc3Nzq8MlNzc37m+hUAgrKyuYm5sr7KuuruZeb9iwARkZGbhy5Qr+/vtvPHjwoN1HI505c0apPiiD\nAomQbsLc3LzVQzZqa2sxbdo0LF++/KnvffyyFoFA0OoyF4FAwK2IsWfPHiQnJyM1NRXu7u4wMjLC\nxIkT23xQQEf6oAwKJEK6MXd3dxw9erRVUL2IEydOICgoCBERj1bvrK+vx59//tklfaBzSIR0Y2Kx\nGOfPn0dsbCxOnTqF8+fPIysrC6tWrXruNh0dHXH8+HEcP34cZ8+eRWRk5FMXMOzMPtAMiZD/rzte\nPd2/f38cO3YMixYtgo+PD7S0tODi4qJwrqmjYmNjUVJSgtDQUBgZGWHx4sXt/sLW2X2gRf47gBb5\np0X+iWrRIRshhDcokAghvEGBRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAI\nIbxBgUQI4Q0KJEIIb1AgEUJ4gwKJEMIbFEiEEN6gQCKE8AYFEiGEN2gJW8Irl1NeVWn73XGZWk1C\nMyRCCG9QIBFCeIMCiRDCGxRIhBDeoEAihPAGBRIhhDc0NpBWr16Nvn37wtDQEBMmTHjqkzkJIV1D\nIwNp165dWLVqFbZs2YITJ07g3r17mDJlirq7RYjG08gLI1NTU5GQkIDw8HAAQFpaGhwdHVFeXo5B\ngwapuXeEaC6NmyE1NDTg1KlTCAwM5PY5ODjAzs4OMplMjT0jhGjcDOnOnTuQy+UQCoUK+y0tLRXO\nI0mlUiQnJ7d6v0AgUHkfn2Sr6g/4oOvH1B51jZUxpupPJkrQuBmSsv/jSaVSMMZ4sbX0WxM2dY2V\n8IPGBVLv3r2hpaXV6le16urqVrMmQkjX0rhA0tPTg7u7OwoLC7l9FRUVqKyshLe3txp7RggRMA2c\nr6alpWH+/Pn46quvYGdnhwULFgAACgoK1NyztgkEAo05rNCksZLWNO6kNgBERUXh1q1biI2Nxf37\n9xEUFIQdO3aou1vt+uCDD9TdhS6jSWMlrWnkDIkQwk8adw6JEMJfFEiEEN6gQCKE8AYFEs9FRkZC\nIBAobBs3blSoI5PJMGzYMOjr68PV1RX5+fkK5QKBAEePHuVeX79+HS4uLhg/fjz+/vvvrhiGgpdx\nTKRzUCB1oWvXrj3X+yZNmoQbN25wW3R0NFd2584dhISEwMfHB2VlZZg+fTrCw8Nx8eLFNtu6cuUK\n/Pz84Obmhm+++Qa6urrP1ae2dGR83WVMpGtRIKlYVVUV1q1bB2dnZ0gkkudqw8DAANbW1txmaGjI\nlWVmZsLU1BQbN26Eq6srli1bBi8vL3z22Wet2vnzzz/h5+cHT09PZGVlQUdH57nH1RaJRAJnZ2es\nW7fumetLdZcxka5FgaQCcrkcBw8exMSJE9GvXz9kZ2cjISEBmzZtAgDExsbC2Ni43S02Nlahvdzc\nXFhaWmLw4MFYvXo1mpqauLLi4mIEBAQo3PQrEolarVzwxx9/wNfXFz4+PsjMzESPHp1/CdqmTZuQ\nkJCA7Oxs9OvXDxMnTsTBgwchl8tb1e0uYyJdjJFOc/XqVZaSksLs7OxY79692bx589jp06db1bt1\n6xa7ePFiu9utW7e4ullZWSw/P5+dPn2a7dq1i/Xu3ZstW7aMKw8KCmKLFy9WaH/Lli3slVde4V4D\nYLq6ukwkErHm5mYVjLy106dPs3nz5jELCwtmZ2fHUlJS2NWrV7v1mIjqUSB1ohkzZjAAbPbs2ezB\ngwcq+Ywvv/ySGRkZMblczhhjbMyYMUr9450wYQLT0dFheXl5KulXe+rr61lUVBQDwGbMmNFmne42\nJqI6dMjWiZKSkpCQkIB9+/bBxcUFUqkUlZWVrep19JDtcR4eHqirq8Pt27cBAFZWVkqtXDBv3jxI\nJBK8/fbbOHLkyIsP9hkqKyvxwQcfwNnZGbm5uUhISEBSUlKbdbvLmEgXUHcivowaGhpYVlYWCwoK\nYtra2kwkErHMzExu1tSRQ7Ynff3118zQ0JCbTWzatInZ2dkp1PH19WWJiYncawCssLCQMcbY0qVL\nmZGREfvxxx87edSMPXjwgGVmZjKRSMS0tbVZUFAQy8rKYg0NDU99H5/HRLoWBZKKVVRUMIlEwmxs\nbFhUVFSH379gwQL2yy+/sIqKCvaf//yH9e3bly1YsIArv337NuvVqxebN28eO3fuHFuzZg3T09Nj\nFy5c4Oo8/o+XMcbmzp3LzMzM2K+//vpCY3tSVFQU69evH5NIJKyiouKlGBPpWhRIXaSpqYldunSp\nw+8LDg5mvXv3Zrq6uszJyYklJyezhw8fKtT5+eefmYeHB9PV1WUuLi7swIEDCuVP/uOVy+Vs9uzZ\nzMLCgp05c+a5xtOWS5cusaampmfW605jIl2L7vYnhPAGndQmhPAGBRIhhDcokAghvEGBRAjhDQok\nQghvUCARQniDAqkbioyMxJgxY9TdDUI6HV2H1A3dv38fcrkcvXr1UndXCOlUFEiEEN6gQ7Zu6PFD\ntpa/d+zYAVtbW5iammL8+PGorq5WeM/hw4cxevRoGBoawszMDH5+fvjf//4HAGCMYf369XBwcICu\nri4cHR1brXFtZ2eHFStWIC4uDmZmZhAKhdi8eTMaGhoQHx+PXr16wcbGBps3b1Z4X21tLebNmwcb\nGxsYGhpi6NChyMnJUeG3Q7ozCqSXQElJCQoLC3HgwAF8//33OHnyJBYuXMiVHz58GMHBwfD09MTP\nP/8MmUyGiIgINDY2AgC2bt2KFStWYOnSpTh79iwWLVqEpUuXYufOnQqfk5qaioEDB6K0tBRz587F\n3LlzER4eDnt7e5SUlOC9997D3Llzce7cOQCPgu6NN97AqVOnkJWVhfLycsTFxWHy5Mm0XAhpm/pu\noyPPa8aMGUwkEnF/9+7dW+Hm1NWrVzNra2vu9ahRo1hoaGi77fXr148tWrRIYd/8+fOZvb0999rW\n1paNHz+ee93c3MxMTExYWFiYwr6ePXuy1NRUxhhjhYWFTE9Pj927d0+h7ZkzZyq0RUgLmiG9BP7x\nj39AT0+Pe21jY4Nbt25xr0tLSzF27Ng23/vXX3/h6tWr8PX1Vdjv5+eHyspK1NfXc/vc3d25v7W0\ntLg1sR/fJxQKucXVSkpK8Pfff8PGxkZhAbrdu3e3+wQRotloVfSXwJOP/REIBGBP/Fbx+IL5bXmy\n/Mn3A2j1RA+BQNDmvpZF/eVyOczMzFBSUvLMPhMC0DkkjeDp6YkffvihzTJTU1P069cPRUVFCvuP\nHTsGe3t7hccTddSwYcNw7949PHz4EE5OTgrbgAEDnrtd8vKiGZIGWLFiBUJCQjB//nxERUVBT08P\nP//8M0aMGAFnZ2csW7YMiYmJGDhwIPz9/VFQUIBt27Zhy5YtL/S5gYGBGDNmDN58802sXbsW7u7u\nuHv3Lk6cOAF9fX3Mnj27k0ZIXhYUSBpg7Nix+O677yCVSvHZZ59BV1cXHh4e3HmjuLg41NXVYdWq\nVZgzZw769++PNWvWYNasWS/0uQKBALm5uUhOTkZCQgKuXbsGc3NzDBkyBIsXL+6MoZGXDF0YSQjh\nDTqHRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbzx/wAGhSYGiuMWSAAA\nAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432.984x300 with 1 Axes>"
      ]
     },
     "metadata": {
      "tags": []
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.catplot(data=train_df,x='income',hue='sex',kind='count',size=3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "9rbnk2RfqJMI"
   },
   "source": [
    "#### Synthetic data distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 34
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 403,
     "status": "ok",
     "timestamp": 1579569932783,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "rYddZ57fo-cT",
    "outputId": "6114958e-0e74-469f-e227-bafff5a54225"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampling data with restrictions: [Restriction(column_name='sex', types=['Female'], percentages=[0.33]), Restriction(column_name='income', types=['>50K'], percentages=[0.3])]\n"
     ]
    }
   ],
   "source": [
    "column_types = ['sex:Female','income:>50K']\n",
    "percentages = [0.33,0.30]\n",
    "restrictions = create_restrictions(column_types,percentages)\n",
    "print('Sampling data with restrictions: {}'.format(restrictions))\n",
    "sampled_train_df = sample_data(train_df, restrictions=restrictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 85
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 345,
     "status": "ok",
     "timestamp": 1579569933432,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "MheqnB9yogzx",
    "outputId": "0bcb2ae6-a721-4941-ce13-02f8b2ef0ab8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Female group-size in synthetic data: 0.32999600749362734\n",
      "Male group-size in synthetic data: 0.6700039925063727\n",
      "Female base-rate in synthetic data: 0.29995346672871104\n",
      "Male base-rate in synthetic data: 0.3092225889255592\n"
     ]
    }
   ],
   "source": [
    "print('Female group-size in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex=='Female')])/len(sampled_train_df)))\n",
    "print('Male group-size in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex!='Female')])/len(sampled_train_df)))\n",
    "print('Female base-rate in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex=='Female') & (sampled_train_df.income == '>50K')])/len(sampled_train_df[(sampled_train_df.sex=='Female')])))\n",
    "print('Male base-rate in synthetic data: {}'.format(len(sampled_train_df[(sampled_train_df.sex!='Female') & (sampled_train_df.income == '>50K')])/len(sampled_train_df[(sampled_train_df.sex!='Female')])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "height": 218
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 623,
     "status": "ok",
     "timestamp": 1579569938347,
     "user": {
      "displayName": "Preethi Lahoti",
      "photoUrl": "",
      "userId": "13345756409883217889"
     },
     "user_tz": 480
    },
    "id": "pcv_QZ8Onx8I",
    "outputId": "9b2a4fa8-e501-4754-f952-c8a4a6e71bdb"
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAASQAAADJCAYAAACKVE8EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAH+RJREFUeJzt3XlUVHX/B/D3gOybgjOgqKwGD5gL\nEKgo24iE0FEqfdRcEJPFCRdwZ6yBXNPHNNyyRCjReOjB4sEoFxAtayDIBe24/ILEFRS1AygB8/39\n4eE+joAOyjAX+bzOuUfmfu985/PlyPt87527CBhjDIQQwgNami6AEEKaUSARQniDAokQwhsUSIQQ\n3qBAIoTwBgUSIYQ3KJAIIbxBgUQI4Q0KJEIIb1AgEUJ4gwKpHQQCgaZLIOSlRoFECOENCiRCCG9Q\nIBFCeKNTAykrKwtisRhmZmYtjsccO3YMAoFAaRk6dKjSNjU1NQgPD4epqSksLCwQHx+PpqYmpW32\n7NkDOzs7GBgYwN/fH5cvX1Zqv3jxIvz8/GBgYAA7OzukpqaqZayEkPbr1ECqq6tDQEAAli1b1uY2\nN27c4JajR48qtUkkEhQVFeHIkSPIzMzE/v37sXr1aq49Ly8PUVFRkEqlKCoqgkgkQkhICBobGwEA\nDQ0NCAkJgaWlJYqKiiCVShEZGYmCggL1DJgQ0j5MA/Lz89mTH93ausdVV1czbW1tlpeXx63bvXs3\nE4lErKmpiTHGWFhYGJsxYwbXXlNTwwwMDNh///tfxhhj3377LTMwMGA1NTXcNtOnT2dvvfWWSnVr\n6NdFSLfRQ8N52IKtrS0UCgVGjhyJjz76CAMGDAAAFBcXQyAQwMfHh9tWLBajsrISZWVlcHBwQGFh\nodKMycjICF5eXpDL5QgNDUVhYSG8vLxgZGSk1IdUKm1Rh0wmQ2Jioko1uy/+4nmHq5LiDTPU2j8h\nfMGbg9p9+vTB7t278e233yItLQ2VlZXw9/fHgwcPAACVlZUwNzeHtrY29x6hUMi1Nf8rEomU+hUK\nhSq3P04mk4ExprQQQtSLNzMkJycnODk5ca89PDwwYMAA5OTkYOLEia0GQntPVKRQIYTfeDNDepKJ\niQkcHR1RXl4OALC0tER1dbXSt2rNM5vmWY9IJGox26mqquLaLS0tn9pOCNEs3gbSgwcP8Mcff8DG\nxgYA4ObmBsYYTpw4wW2Tl5cHkUgEOzs7AICnpyfy8/O59rq6Osjlcnh5eXHtcrkcdXV1Sn00txNC\nNKtTd9mqq6tx5coV7tygU6dOAQBcXFyQmpoKKysruLq64t69e0hMTISxsTHGjRsHADA3N8fUqVMR\nGxuL3bt3o7a2FlKpFBKJBFpaj3JVIpEgODgYPj4+8PLywocffoj+/fsjKCgIAPD666+jb9++mD17\nNqRSKeRyOfbv34/Dhw935q+BENKGTg2k7OxszJo1i3s9bNgwAEBZWRkaGhoQFxeHq1evwszMDN7e\n3jh69CiMjY257bdv3w6JRAKxWAwdHR2Eh4cjISGBaxeLxdi5cyeSkpJw8+ZNDB8+HDk5OdDR0QEA\n6Orq4uDBg4iKioK7uzusrKywa9cu+Pr6dtJvgBDyNAJGR3pVJhAIWj0wTl/7E9IxeHsMiRDS/VAg\nEUJ4gwKJEMIbFEiEEN6gQCKE8AYFEiGENyiQCCG8QYFECOENCiRCCG9QIBFCeIMCiRDCGxRIhBDe\noEAihPAGBRIhhDcokAghvEGBRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAI\nIbxBgUQI4Q0KJEIIb1AgEUJ4gwKJEMIbFEiEEN6gQCKE8AYFEiGENyiQCCG8QYFECOENCiRCCG9Q\nIBFCeKNTAykrKwtisRhmZmYQCAQt2uVyOTw8PKCvrw8XFxfk5uYqtdfU1CA8PBympqawsLBAfHw8\nmpqalLbZs2cP7OzsYGBgAH9/f1y+fFmp/eLFi/Dz84OBgQHs7OyQmpra4eMkhDyfTg2kuro6BAQE\nYNmyZS3a7ty5g+DgYHh7e6OkpATTp09HWFgYLl26xG0jkUhQVFSEI0eOIDMzE/v378fq1au59ry8\nPERFRUEqlaKoqAgikQghISFobGwEADQ0NCAkJASWlpYoKiqCVCpFZGQkCgoK1D94QsgzCRhjrLM/\n9NixY/D398fjH/3JJ59g06ZNKCsr42ZPPj4+8PT0xMaNG3H37l0IhUIcPnwY/v7+AICUlBQsX74c\nN27cgJaWFt58802YmJggLS0NAFBbWwuhUIh///vfCA0NRXZ2NiZPnoyqqioYGRkBAGbMmIG6ujp8\n/fXXz6xbIBCgtV+X++IvXvh38jTFG2aotX9C+II3x5AKCwvh7++vtCsnFoshl8sBAMXFxRAIBPDx\n8VFqr6ysRFlZGddHQEAA125kZAQvLy+uj8LCQnh5eXFh9ORnPE4mk0EgECgthBD1UjmQrly50urs\ngDGGK1euvHAhlZWVEIlESuuEQiEqKyu5dnNzc2hrayu1N7ep2sfT2h8nk8nAGFNaCCHqpXIg2dnZ\noaqqqsX66upq2NnZvXAhz/qDb629vbMWChVC+E3lQGrrj7murg56enovXIilpWWLmUpVVRU3o7G0\ntER1dbXSt2rN2zdvIxKJntnH09oJIZrV41kbJCUlAXg0G9m4cSOMjY25tqamJvz0009wdXV94UI8\nPT3x8ccfK63Ly8uDl5cXAMDNzQ2MMZw4cQJ+fn5cu0gk4mZonp6eyM/Px8yZMwE8Cku5XI74+Hiu\nfdOmTairq4OhoWGLzyCEaNYzA+nLL78E8GiG9PXXXysdw9HV1YWdnR3Wr1+v0odVV1fjypUr3LlB\np06dAgC4uLjgnXfegUwmw4IFCxAVFYXs7GzI5XJ8/vnnAABzc3NMnToVsbGx2L17N2prayGVSiGR\nSKCl9WiiJ5FIEBwcDB8fH3h5eeHDDz9E//79ERQUBAB4/fXX0bdvX8yePRtSqRRyuRz79+/H4cOH\nVf19EULU6JmB1HwekL+/P7KystCrV6/n/rDs7GzMmjWLez1s2DAAQFlZGWxtbfHdd99BIpFgx44d\nsLe3R1ZWFgYOHMhtv337dkgkEojFYujo6CA8PBwJCQlcu1gsxs6dO5GUlISbN29i+PDhyMnJgY6O\nDoBHAXrw4EFERUXB3d0dVlZW2LVrF3x9fZ97TISQjqOR85C6KjoPiRD1euYM6XEFBQU4dOgQbt26\nBYVCodSWkpLSoYURQroflQNpw4YNWLp0KZycnGBtbU0nChJCOpzKgZScnIwtW7YgNjZWnfUQQrox\nlc9DunfvHkJDQ9VZCyGkm1M5kCZMmIC8vDx11kII6eZUDqQRI0ZAKpVi4cKFSE1Nxb59+5QWQkjH\n2LhxI/r37w9jY2PY2NggPT0dwKNzAl1dXdGzZ0+MHj0a586dAwCUl5fD3NwcP/74IwDg/v37sLW1\nxd69ezU2huel8tf+zScfttqJQNDiRmkvI/ran6jbxYsXMWzYMJSUlMDJyQk3btxAdXU1ysrK8N57\n7yE7Oxuurq7YtWsXNmzYgN9//x16enrYu3cvVq5cidOnTyMmJgaMsS45UVB5hqRQKNpcukMYEdIZ\ntLW1oVAoUFpaitraWvTp0weurq7Yvn07li5disGDB0NbWxsxMTEAgF9++QUAMG3aNIwcORKjRo3C\nTz/9hB07dmhyGM+NN/dDIoQADg4OSEtLwyeffAIrKyu8/vrrOH/+PMrLy7F48WL07NmTW27evInr\n169z742KisLZs2cRGRkJMzMzDY7i+am8y9Z8kW1b3n///Q4piM9ol410ptraWixduhSnTp2CsbEx\nJk+ejPDw8Fa3ffjwIdzd3TFixAhkZWWhpKQEtra2nVpvR1D5PKTmi2ybNTQ04Nq1a9DX10efPn26\nRSARom4XLlxARUUFRo0aBX19ffTs2RM9evRAdHQ0lixZgiFDhmDo0KGora1Ffn4+/Pz8YGJigkWL\nFsHW1haff/45LC0tMW3aNBQUFChdDN8VqBxIj99sv1llZSVmzpyJqKioDi2KkO6qvr4eCQkJOH/+\nPHr06AE3Nzfs3LkTzs7OePDgAWbNmoWysjIYGhpi9OjR8PPzQ25uLjIzM3HmzBkAQGJiIkaOHIk1\na9Zg5cqVGh5R+7zwxbXFxcWYOnUqLly40FE18RbtshGiXi98UFtHR0fpwBohhDwvlXfZTp48qfSa\nMYbr16/jo48+goeHR4cXRgjpflQOpFGjRrW6y+Lt7Y3PPvuswwsj/3Ml6VW19j/g/bNq7Z8QVakc\nSM3PPmumpaUFoVAIfX39Di+KENI9qRxINjY26qyDEELad1D74sWLePfddzFixAiMHDkSc+bMwcWL\nF9VVGyGkm1E5kA4fPoxXX30Vv/32G4YPHw5PT0+UlJRg8ODBOHr0qDprJIR0Eyrvsq1YsQIxMTHY\nvHmz0vr58+dj+fLlKCws7PDiCCHdi8ozpNLSUu4K48fNnTsXZ8/StzSEkBen8gzJxMQEFRUVcHJy\nUlr/559/wtTUtMMLI/xEZ6V3bampqZDJZCgvL9d0Ka1SOZDCwsIQGRmJHTt2YPTo0QCA48ePY+7c\nuXjzzTfVViAhnUXdYfu49gavn58fCgoKsG/fPkyZMoVbf/XqVdjY2EChULR6WVNXo/Iu27/+9S+4\nu7sjODgYJiYmMDExQUhICF577TV89NFH6qyREALA2tq6xV030tPT0bdvXw1V1PFUDiRjY2NkZmbi\n0qVL+Oabb/DNN9/g0qVLyMjIgImJiTprJIQAeOutt3D8+HFUVlZy69LT0zF16lTu9cmTJ+Hn54ee\nPXtCKBRiypQpuH379lP7/eSTT2Bvbw9DQ0O89tprOH78uNrG8CwqB9KkSZOwZs0aODg44I033sAb\nb7wBe3t7rFu3Dv/85z/VWSMhBECvXr0wduxYfPXVVwCA06dP49atWwgMDOS2qampwdy5c/Hrr78i\nNzcX165dw9y5c9vsMyUlBcnJydixYwdKS0sxY8YMjBs3DhUVFWofT2tUDqSCggKMGzeuxfrg4GCN\nJioh3cm0adO4p4ns3bsXkydPRo8e/zsUPHbsWEyaNAmOjo7w8PDApk2bcODAgTbve79q1SokJycj\nKCgI9vb2iI2NxciRI7knnXQ2lQ9q379/H8bGxi3WGxoa4u7dux1aFCGkdaGhoXj33Xfx+++/Y//+\n/Thw4ABqa2u59uvXr2PZsmX48ccfUVVVBYVCgcbGRty8eRPW1tZKfdXU1KCsrAxvvfUWBAIBt76+\nvl5jt79VOZAcHBxw+PBhODo6Kq0/fPgw7OzsOrwwQkhLurq6ePvttxEZGQljY2O89tprOHbsGNce\nHh6OhoYGfPbZZ7C2tsaVK1cQFBSEhoaGFn01B1lGRgacnZ2V2jR1Ko/KgTR37lwsXboUDx8+RGBg\nIAQCAX744QfIZDKsXr1anTUSQh4zbdo0+Pr6tvrgjZMnT+Krr76CWCwGAPz2229t9iMSiWBlZYWK\nigqEhoaqrd72UDmQJBIJKisrkZCQgEWLFgEA9PT0EB8fj9jYWLUVSAhR5uPjg6qqqlYfdeTg4IDU\n1FQ4Ozvj8uXLWLVqVZv9CAQCrFixAgkJCTAwMMDo0aNx9+5dHDp0CN7e3vD19VXnMFqlciABj24e\nvnTpUu4Rvi4uLjAyMlJLYYR0tq50lnjv3r1bXf/555/j3XffxaBBgzBo0CCsXr0aYWFhbfYTGxsL\nPT09rF+/HpGRkbCwsMDIkSMxceJEdZX+VC98k//uRFM3+T9gskGt/bfnjpF06QhRJ149uTY8PBwC\ngUBpefLuAnK5HB4eHtDX14eLiwtyc3OV2mtqahAeHg5TU1NYWFggPj6+xVeee/bsgZ2dHQwMDODv\n74/Lly+rfWyEkGfjVSABj07AvHHjBrdERkZybXfu3EFwcDC8vb1RUlKC6dOnIywsTOmZcRKJBEVF\nRThy5AgyMzOxf/9+pYPueXl5iIqKglQqRVFREUQiEUJCQtDY2Nip4ySEtMS7QDIwMICVlRW3GBoa\ncm3p6ekwNTXF5s2b4eLiguXLl8PT0xOffvopAODu3btIT0/H1q1b4enpiYCAAKxatQrbtm2DQqEA\nAGzduhVTpkzB7NmzMWjQIKSkpKCiogLff/+9RsZLCPkf3gVSdnY2hEIhBg8ejLVr1yrNXAoLC+Hv\n7690EpdYLIZcLgfw6KGVAoEAPj4+Su2VlZXcQwoKCwsREBDAtRsZGcHLy4vro5lMJmux+0gIUS9e\nBdK4ceOwb98+5OXlIS4uDps2bcL777/PtVdWVkIkEim9RygUchcbVlZWwtzcXOl55kKhkGtTpY9m\nMpkMjDGlhRCiXu362l/dJk2axP386quvQltbGzExMVi9enWb33A9rrV2mtkQ0nXwaob0JDc3N9TW\n1nK3T7C0tGwxk6mqquJmPJaWlqiurlb6Vq15++ZtRCLRU/sghGgOrwOptLQUhoaG3Elgnp6eStft\nAI++NfPy8gLwKMAYYzhx4oRSu0gk4q638/T0RH5+PtdeV1cHuVzO9UEI0RxeBVJcXBzkcjnKy8uR\nlZWFuLg4REVFcbtd77zzDu7fv48FCxbg999/x/r16yGXyxEVFQUAMDc3x9SpUxEbG4vCwkLk5+dD\nKpVCIpFAS+vRUCUSCfbt24eUlBScO3cOERER6N+/P4KCgjQ2bkLII7w6hnT+/HmEhobir7/+woAB\nAxAVFYWlS5dy7RYWFvjuu+8gkUiwY8cO2NvbIysrCwMHDuS22b59OyQSCcRiMXR0dBAeHo6EhASu\nXSwWY+fOnUhKSsLNmzcxfPhw5OTkQEdHp1PHSvjnStKrnfZZ7Tk7no8EAgHy8/Ph5+fXof3yKpBU\nORdo+PDhKC4ubrPd2NgYaWlpSEtLa3ObiIgIREREPFeNhGhC803+HxcUFPTSnT/Hq0AihLQtPj6e\nu9MG8OhuGy8bXh1DIoS0zdjYWOkqhl69egEASkpK4OfnBwMDA9ja2iIxMVHpm2aBQIDU1FT4+Phw\n12/evn0b+/btg42NDYRCIdatW8dtX19fjxkzZqBfv34wMjKCu7s78vLynlrbs2pQFQUSIV3YnTt3\nMHbsWISGhuLs2bNITU3F3r17W1yUvnr1aiQkJEAul+PGjRuYOHEisrKykJubi48//hgrVqxAaWkp\nAKCxsRGvvPIKcnJycPr0aUyYMAHjx49vcbpMe2tQBQUSIV3EmjVrYGxszC05OTnYtm0bxowZg0WL\nFsHR0RF+fn5ITEzEZ599pvReiUSCoKAgDB48GLNmzcKJEyewa9cuuLi4YNq0aXBycuJOlzEyMoJU\nKsXQoUPh6OiIlStXwsbGps3jVarWoAo6hkRIFxEdHY158+Zxr/v06YO0tDR8++23Sg/gaGpqarG7\n5Orqyv0sEolgaWkJc3NzpXVVVVXc602bNiEtLQ0VFRX4+++/8eDBgzYfjXT27FmValAFBRIhXYS5\nuXmLh2zU1NRg2rRpWLFixVPf+/hpLQKBoMVpLgKBgLsjxr59+5CYmIjk5GQMGTIERkZGmDhxYqsP\nCmhPDaqgQCKkCxsyZAiOHTvWIqhexMmTJxEYGIgZMx7dvbOurg5//vlnp9RAx5AI6cIkEgkuXLiA\n6OhonD59GhcuXEBGRgbWrFnz3H06ODjgxIkTOHHiBM6dO4fw8PCn3sCwI2ugGRLhFXWfLf20M6S7\n4tnT/fv3x/Hjx7F48WJ4e3tDS0sLzs7OSsea2is6OhpFRUUICQmBkZERlixZ0uY3bB1dA93kvx3o\nJv/da6yk89EuGyGENyiQCCG8QYFECOENCiRCCG9QIBFCeIMCiRDCGxRIhBDeoEAihPAGBRIhhDco\nkAghvEGBRAjhDQokQghvUCARQniDAokQwhsUSIQQ3qBAIoTwBgUSIYQ3KJAIIbxBgUQI4Q0KJEII\nb1AgEUJ4gwKJEMIbFEiEEN6gQCKE8Ea3DaS1a9eib9++MDQ0xIQJE576ZE5CSOfoloG0Z88erFmz\nBtu2bcPJkydx7949TJkyRdNlEdLt9dB0AZqQnJyMuLg4hIWFAQBSUlLg4OCA0tJSDBo0SMPVEdJ9\ndbsZUn19PU6fPo2AgABunb29PWxtbSGXyzVYGSGk282Q7ty5A4VCAZFIpLReKBQqHUeSyWRITExs\n8X6BQKD2Gp9ko+4P+KDzx9QWTY2VMabuTyYq6HYzJFX/48lkMjDGeLE0190dFk2NlfBDtwuk3r17\nQ0tLq8W3alVVVS1mTYSQztXtAklPTw9DhgxBfn4+t66srAzl5eXw8vLSYGWEEAHrhvPVlJQULFiw\nAF9++SVsbW2xcOFCAEBeXp6GK2udQCDoNrsV3WmspKVud1AbACIiInDr1i1ER0fj/v37CAwMxK5d\nuzRdVps++OADTZfQabrTWElL3XKGRAjhp253DIkQwl8USIQQ3qBAIoTwBgUSz4WHh0MgECgtmzdv\nVtpGLpfDw8MD+vr6cHFxQW5urlK7QCDAsWPHuNfXr1+Hs7Mzxo8fj7///rszhqHkZRwT6RgUSJ3o\n2rVrz/W+SZMm4caNG9wSGRnJtd25cwfBwcHw9vZGSUkJpk+fjrCwMFy6dKnVvioqKuDr6wtXV1d8\n/fXX0NXVfa6aWtOe8XWVMZHORYGkZpWVldiwYQOcnJwglUqfqw8DAwNYWVlxi6GhIdeWnp4OU1NT\nbN68GS4uLli+fDk8PT3x6aeftujnzz//hK+vL9zd3ZGRkQEdHZ3nHldrpFIpnJycsGHDhmfeX6qr\njIl0LgokNVAoFDh06BAmTpyIfv36ITMzE3FxcdiyZQsAIDo6GsbGxm0u0dHRSv1lZ2dDKBRi8ODB\nWLt2LRobG7m2wsJC+Pv7K130KxaLW9y54I8//oCPjw+8vb2Rnp6OHj06/hS0LVu2IC4uDpmZmejX\nrx8mTpyIQ4cOQaFQtNi2q4yJdDJGOszVq1dZUlISs7W1Zb1792bz589nZ86cabHdrVu32KVLl9pc\nbt26xW2bkZHBcnNz2ZkzZ9iePXtY79692fLly7n2wMBAtmTJEqX+t23bxl555RXuNQCmq6vLxGIx\na2pqUsPIWzpz5gybP38+s7CwYLa2tiwpKYldvXq1S4+JqB8FUgeaOXMmA8DmzJnDHjx4oJbP+OKL\nL5iRkRFTKBSMMcbGjBmj0h/vhAkTmI6ODsvJyVFLXW2pq6tjERERDACbOXNmq9t0tTER9aFdtg6U\nkJCAuLg4HDhwAM7OzpDJZCgvL2+xXXt32R7n5uaG2tpa3L59GwBgaWmp0p0L5s+fD6lUirfffhtH\njx598cE+Q3l5OT744AM4OTkhOzsbcXFxSEhIaHXbrjIm0gk0nYgvo/r6epaRkcECAwOZtrY2E4vF\nLD09nZs1tWeX7UlfffUVMzQ05GYTW7ZsYba2tkrb+Pj4sPj4eO41AJafn88YY2zZsmXMyMiI/fjj\njx08asYePHjA0tPTmVgsZtra2iwwMJBlZGSw+vr6p76Pz2MinYsCSc3KysqYVCpl1tbWLCIiot3v\nX7hwIfvll19YWVkZ+89//sP69u3LFi5cyLXfvn2b9erVi82fP5+dP3+erVu3junp6bGLFy9y2zz+\nx8sYY/PmzWNmZmbs119/faGxPSkiIoL169ePSaVSVlZW9lKMiXQuCqRO0tjYyC5fvtzu9wUFBbHe\nvXszXV1d5ujoyBITE9nDhw+Vtvn555+Zm5sb09XVZc7OzuzgwYNK7U/+8SoUCjZnzhxmYWHBzp49\n+1zjac3ly5dZY2PjM7frSmMinYuu9ieE8AYd1CaE8AYFEiGENyiQCCG8QYFECOENCiRCCG9QIBFC\neIMCqQsKDw/HmDFjNF0GIR2OzkPqgu7fvw+FQoFevXppuhRCOhQFEiGEN2iXrQt6fJet+eddu3bB\nxsYGpqamGD9+PKqqqpTec+TIEYwePRqGhoYwMzODr68v/u///g8AwBjDxo0bYW9vD11dXTg4OLS4\nx7WtrS1WrlyJmJgYmJmZQSQSYevWraivr0dsbCx69eoFa2trbN26Vel9NTU1mD9/PqytrWFoaIhh\nw4YhKytLjb8d0pVRIL0EioqKkJ+fj4MHD+L777/HqVOnsGjRIq79yJEjCAoKgru7O37++WfI5XLM\nmDEDDQ0NAIDt27dj5cqVWLZsGc6dO4fFixdj2bJl2L17t9LnJCcnY+DAgSguLsa8efMwb948hIWF\nwc7ODkVFRXjvvfcwb948nD9/HsCjoHvjjTdw+vRpZGRkoLS0FDExMZg8eTLdLoS0TnOX0ZHnNXPm\nTCYWi7mfe/furXRx6tq1a5mVlRX3etSoUSwkJKTN/vr168cWL16stG7BggXMzs6Oe21jY8PGjx/P\nvW5qamImJiYsNDRUaV3Pnj1ZcnIyY4yx/Px8pqenx+7du6fU96xZs5T6IqQZzZBeAv/4xz+gp6fH\nvba2tsatW7e418XFxRg7dmyr7/3rr79w9epV+Pj4KK339fVFeXk56urquHVDhgzhftbS0uLuif34\nOpFIxN1craioCH///Tesra2VbkC3d+/eNp8gQro3uiv6S+DJx/4IBAKwJ76rePyG+a15sv3J9wNo\n8UQPgUDQ6rrmm/orFAqYmZmhqKjomTUTAtAxpG7B3d0dP/zwQ6ttpqam6NevHwoKCpTWHz9+HHZ2\ndkqPJ2ovDw8P3Lt3Dw8fPoSjo6PSMmDAgOful7y8aIbUDaxcuRLBwcFYsGABIiIioKenh59//hkj\nRoyAk5MTli9fjvj4eAwcOBB+fn7Iy8vDjh07sG3bthf63ICAAIwZMwZvvvkm1q9fjyFDhuDu3bs4\nefIk9PX1MWfOnA4aIXlZUCB1A2PHjsV3330HmUyGTz/9FLq6unBzc+OOG8XExKC2thZr1qzB3Llz\n0b9/f6xbtw6zZ89+oc8VCATIzs5GYmIi4uLicO3aNZibm2Po0KFYsmRJRwyNvGToxEhCCG/QMSRC\nCG9QIBFCeIMCiRDCGxRIhBDeoEAihPAGBRIhhDcokAghvEGBRAjhjf8HfxQuPFWSGUUAAAAASUVO\nRK5CYII=\n",
      "text/plain": [
       "<Figure size 432.984x300 with 1 Axes>"
      ]
     },
     "metadata": {
      "tags": []
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Group sizes remain the same, base-rate changes.\n",
    "sns.catplot(data=sampled_train_df,x='income',hue='sex',hue_order=['Male','Female'],order=['<=50K','>50K'],kind='count',size=3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "-4bKIvkMmwKL"
   },
   "source": [
    "## Sample data with specified label-noise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "T0PdZDo45W9J"
   },
   "outputs": [],
   "source": [
    "fractions = [0.1, 0.2, 0.3, 0.4, 0.5]\n",
    "for frac in fractions:\n",
    "  for version in np.arange(10):\n",
    "    sampled_train_df = sample_data_and_flip_class_label(train_df, frac=frac, flip_dict={'<=50K':'>50K','>50K':'<=50K'})\n",
    "    output_file_path = os.path.join(dataset_base_dir,'income_flip_labels{}_version{}_train.csv'.format(frac,version)\n",
    "    with open(output_file_path, mode=\"w\") as output_file:\n",
    "        sampled_train_df.to_csv(output_file,index=False,columns=feature_names,header=False)\n",
    "        output_file.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "AICIIZkgmmwY"
   },
   "source": [
    "## Sample data with specified group-sizes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "K_lpbfUM5ZHq"
   },
   "outputs": [],
   "source": [
    "column_types = ['sex:Female']\n",
    "_percentages =  [[0.1],[0.2],[0.3],[0.4],[0.5],[0.6],[0.7],[0.8],[0.9]]\n",
    "for percentages in _percentages:\n",
    "  for version in np.arange(10):\n",
    "    restrictions = create_restrictions(column_types,percentages)\n",
    "    sampled_train_df = sample_data(train_df, restrictions=restrictions)\n",
    "\n",
    "    output_file_path = os.path.join(dataset_base_dir,'female_groupsize{}_version{}_train.csv'.format(percentages[0],version)\n",
    "    with open(output_file_path, mode=\"w\") as output_file:\n",
    "        sampled_train_df.to_csv(output_file,index=False,columns=feature_names,header=False)\n",
    "        output_file.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Dj7g4Uj75gRY"
   },
   "source": [
    "## Sample data with specified base-rates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "PNFiR4Ge5bZn"
   },
   "outputs": [],
   "source": [
    "column_types = ['sex:Female','income:>50K']\n",
    "_percentages =  [[0.33,0.1],[0.33,0.2],[0.33,0.3],[0.33,0.4],[0.33,0.5],[0.33,0.6],[0.33,0.7],[0.33,0.8],[0.33,0.9]]\n",
    "for percentages in _percentages:\n",
    "  for version in np.arange(10):\n",
    "    restrictions = create_restrictions(column_types,percentages)\n",
    "    sampled_train_df = sample_data(train_df, restrictions=restrictions)\n",
    "\n",
    "    output_file_path = os.path.join(dataset_base_dir,'female_baserate{}_version{}_train.csv'.format(percentages[1],version)\n",
    "    with open(output_file_path, mode=\"w\") as output_file:\n",
    "        sampled_train_df.to_csv(output_file,index=False,columns=feature_names,header=False)\n",
    "        output_file.close()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "last_runtime": {
    "build_target": "//research/colab/notebook:notebook_backend_py3",
    "kind": "private"
   },
   "name": "CreateUCISyntheticDataset.ipynb",
   "provenance": [
    {
     "file_id": "1EYOGQiordiZp7CQMm8UJl33_tH9iB8rN",
     "timestamp": 1579567890036
    }
   ]
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
