"""
Helpers to load (and pre-process?) the ACIC 2018 data
dataset description: https://www.researchgate.net/publication/11523952_Infant_Mortality_Statistics_from_the_1999_Period_Linked_BirthInfant_Death_Data_Set
"""
import os
import pandas as pd
import numpy as np
from sklearn import preprocessing


def load_and_format_covariates(file_path='~/ml/IBM-Causal-Inference-Benchmarking-Framework/data/LBIDD/x.csv'):
    df = pd.read_csv(file_path, index_col='sample_id', header=0, sep=',')
    return df


def load_treatment_and_outcome(covariates, file_path, standardize=True):
    output = pd.read_csv(file_path, index_col='sample_id', header=0, sep=',')

    dataset = covariates.join(output, how='inner')
    t = dataset['z'].values
    y = dataset['y'].values
    x = dataset.values[:, :-2]
    if standardize:
        normal_scalar = preprocessing.StandardScaler()
        x = normal_scalar.fit_transform(x)
    return t.reshape(-1, 1), y.reshape(-1, 1), dataset.index, x


if __name__ == '__main__':

    pass
