
# This will process ihdp in a way it will do some extra pre-processing and will be readable by MATLAB

load("example.data")

# Those columns were removed because we are not sure what they are: bwg
# Those columns were removed because we are not sure how they are encoded: livwho
# Those columns were removed because we believe they are not relevant for the "population study" we simulate here: dose400, othstudy, site*

sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, ]
#sel_rows <- !is.na(ihdp$iqsb.36)
#ihdp_simple <- ihdp[sel_rows, sel_vars]

# Remaining variables (description taken from Hill (2011), supplementary material):

#  1. iqsb.36: IQ measure at 3 years (outcome)
#  2. ncdctt: number of days of treatment (treatment)
#  3. bw: birthweight of child in grams
#  4. momage: mom's age when she gave birth to the child
#  5. nnhealth: neo-natal health index" some function of birth variables (number of days in hospital and gestational age..) that supposed to measure neonatal health 
#  6. birth.o: birth order
#  7. parity: number of children the mother has given birth to
#  8. moreprem: number of other children mom has given birth to prematurely
#  9. cigs: did mom smoke cigarettes when pregnant
# 10. alcohol: did mom consume alcohol when pregnant
# 11. ppvt.imp: mother's ppvt measured 1 year post-birth (some values imputed)
# 12. female: female indicator
# 13. mlt.birt: number of multiple births the mother has had
# 14. b.marry: indicator for whether mom was married when child born
# 15. language: primary language spoken at home
# 16. whenpren: what trimester did the mother begin prenatal care
# 17. drugs: did mom ever use drugs when pregnant
# 18-21. mom.lths, mom.hs, mom.scoll, mom.coll: indicators of mom's education level
# 22-24. momblack, momhisp, momwhite: indicators of mom's race
# 25. workdur.imp: indicator for whether mom worked during her pregnancy

### Some binarizations

ihdp_simple$birth.o[which(ihdp_simple$birth.o > 1)]   <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)

### Collapse the one-of-N encoding

momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
ihdp_simple$momed[which(ihdp_simple$momed <= 1)] <- 0
ihdp_simple$momed[which(ihdp_simple$momed >= 2)] <- 1

### Re-select columns and take note of binary encodings for consultation, if useful

ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"), 
                                           which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]

ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"), 
              which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
              which(ns == "momwhite"))

### Export

write.table(ihdp_simple, "ihdp.dat", sep = " ", col.names = FALSE, row.names = FALSE)
write.table(names(ihdp_simple), "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
