#  5. nnhealth: neo-natal health index" some function of birth variables (number of days in hospital and gestational age..) that supposed to measure neonatal health
#  6. birth.o: birth order
#  7. parity: number of children the mother has given birth to
#  8. moreprem: number of other children mom has given birth to prematurely
#  9. cigs: did mom smoke cigarettes when pregnant
# 10. alcohol: did mom consume alcohol when pregnant
# 11. ppvt.imp: mother’s ppvt measured 1 year post-birth (some values imputed)
# 12. female: female indicator
# 13. mlt.birt: number of multiple births the mother has had
# 14. b.marry: indicator for whether mom was married when child born
# 15. language: primary language spoken at home
# 16. whenpren: what trimester did the mother begin prenatal care
# 17. drugs: did mom ever use drugs when pregnant
# 18-21. mom.lths, mom.hs, mom.scoll, mom.coll: indicators of mom's education level
# 22-24. momblack, momhisp, momwhite: indicators of mom's race
# 25. workdur.imp: indicator for whether mom worked during her pregnancy
### Some binarizations
ihdp_simple$lbirth.o[which(ihdp_simple$birth.o > 1)]  <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
### Re-select columns, reselect rows, and take note of binary encodings
ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
dim(ihdp_simple)
sum(is.na(ihdp_simple))
sum(is.na(ihdp))
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, ]
sum(is.na(ihdp_simple))
ihdp_simple$lbirth.o[which(ihdp_simple$birth.o > 1)]  <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
sum(is.na(ihdp_simple))
sum(is.na(momed))
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, ]
sum(is.na(ihdp_simple))
ihdp_simple$lbirth.o[which(ihdp_simple$birth.o > 1)]  <- 0 # Binarize birth order, first (1) / not first (0)
sum(is.na(ihdp_simple))
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, ]
# Remaining variables (description taken from Hill (2011), supplementary material):
#  1. iqsb.36: IQ measure at 3 years
#  2. ncdctt: number of days of treatment
#  3. bw: birthweight of child in grams
#  4. momage: mom's age when she gave birth to the child
#  5. nnhealth: neo-natal health index" some function of birth variables (number of days in hospital and gestational age..) that supposed to measure neonatal health
#  6. birth.o: birth order
#  7. parity: number of children the mother has given birth to
#  8. moreprem: number of other children mom has given birth to prematurely
#  9. cigs: did mom smoke cigarettes when pregnant
# 10. alcohol: did mom consume alcohol when pregnant
# 11. ppvt.imp: mother’s ppvt measured 1 year post-birth (some values imputed)
# 12. female: female indicator
# 13. mlt.birt: number of multiple births the mother has had
# 14. b.marry: indicator for whether mom was married when child born
# 15. language: primary language spoken at home
# 16. whenpren: what trimester did the mother begin prenatal care
# 17. drugs: did mom ever use drugs when pregnant
# 18-21. mom.lths, mom.hs, mom.scoll, mom.coll: indicators of mom's education level
# 22-24. momblack, momhisp, momwhite: indicators of mom's race
# 25. workdur.imp: indicator for whether mom worked during her pregnancy
### Some binarizations
ihdp_simple$birth.o[which(ihdp_simple$birth.o > 1)]   <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
### Re-select columns, reselect rows, and take note of binary encodings
ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
sum(is.na(ihdp_simple))
fix(ihdp_simple)
hist(ihdp_simple$whenpren)
ihdp_simple$whenpren
?cvwrite
?csvwrite
?write.table
write.table(ihdp_simple, "ihdp.dat", sep = " ", col.names = FALSE, row.names = FALSE)
ns
write.table(ns, "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
dim(ihdp_simple)
dim(ihdp)
sum(ihdp$treat == 1)
dim(ihdp_simple)
mas(ihdp_simple$ncdctt)
max(ihdp_simple$ncdctt)
ihdp_simple$iqsb.36
min(ihdp_simple$iqsb.36)
max(ihdp_simple$iqsb.36)
max(ihdp$iqsb.36)
max(ihdp$iqsb.36, na.rm = TRUE)
min(ihdp$iqsb.36, na.rm = TRUE)
source('C:/Users/ricardo/Desktop/nips_2016/data/hill/code/functions.R')
covs.cont=c("bw","momage","nnhealth","birth.o","parity","moreprem","cigs","alcohol","ppvt.imp")
covs.cat=c("bwg","female","mlt.birt","b.marry","livwho","language","whenpren","drugs","othstudy","mom.lths","mom.hs","mom.coll","mom.scoll","site1","site2","site3","site4","site5","site6","site7","site8","momblack","momhisp","momwhite","workdur.imp")
covs=c(covs.cont,covs.cat)
ncovs=length(covs)
treat=ihdp$treat
#### FIRST OLS
use=ihdp[,c("iqsb.36","treat",covs)]
summary(lm(use))$coef[1:3,]
mod=lm(ihdp[,c("iqsb.36","treat","ncdctt",covs)])
summary(mod)$coef[1:3,]
plot(x=ihdp$ncdctt[!is.na(ihdp$iqsb.36)],y=mod$resid)
ihdp$ncdct2=ihdp$ncdctt^2
mod=lm(ihdp[,c("iqsb.36","treat","ncdctt","ncdct2",covs)])
summary(mod)$coef[1:4,]
plot(x=ihdp$ncdctt[!is.na(ihdp$iqsb.36)],y=mod$resid)
use=ihdp[,c("iqsb.36","treat","ncdctt",covs)]
xt=as.matrix(na.omit(use)[,-1])
xp1=as.matrix(use[use$treat==1,-1])
xp2=xp1
xp=rbind(xp1,xp2)
xp2[,c(1:2)]=0
nt=sum(use$treat==1)
y=as.numeric(na.omit(use)[,1])
library(BayesTree)
install.packages("BayesTree")
library(BayesTree)
bart.tot <- bart(x.train=xt,   y.train=y,  x.test=xp)
tmpa=mean(bart.tot$yhat.test.mean[1:nt])-mean(bart.tot$yhat.test.mean[(nt+1):(2*nt)])
tmpa
ndraws=nrow(bart.tot$yhat.test)
tmp=apply(bart.tot$yhat.test[,1:nt]-bart.tot$yhat.test[,(nt+1):(2*nt)],1,mean)
sd=sqrt(var(tmp)) #
ci=c(tmpa-1.96*sd,tmpa+1.96*sd)
ci
set.seed(3847293)
############# first load the data
load("example.data")
source("functions.R")
covs.cont=c("bw","momage","nnhealth","birth.o","parity","moreprem","cigs","alcohol","ppvt.imp")
covs.cat=c("bwg","female","mlt.birt","b.marry","livwho","language","whenpren","drugs","othstudy","mom.lths","mom.hs","mom.coll","mom.scoll","site1","site2","site3","site4","site5","site6","site7","site8","momblack","momhisp","momwhite","workdur.imp")
covs=c(covs.cont,covs.cat)
ncovs=length(covs)
treat=ihdp$treat
source('C:/Users/ricardo/Desktop/nips_2016/data/hill/code/functions.R')
covs.cont=c("bw","momage","nnhealth","birth.o","parity","moreprem","cigs","alcohol","ppvt.imp")
covs.cat=c("bwg","female","mlt.birt","b.marry","livwho","language","whenpren","drugs","othstudy","mom.lths","mom.hs","mom.coll","mom.scoll","site1","site2","site3","site4","site5","site6","site7","site8","momblack","momhisp","momwhite","workdur.imp")
covs=c(covs.cont,covs.cat)
ncovs=length(covs)
treat=ihdp$treat
dir()
covs.cont=c("bw","momage","nnhealth","birth.o","parity","moreprem","cigs","alcohol","ppvt.imp")
covs.cat=c("bwg","female","mlt.birt","b.marry","livwho","language","whenpren","drugs","othstudy","mom.lths","mom.hs","mom.coll","mom.scoll","site1","site2","site3","site4","site5","site6","site7","site8","momblack","momhisp","momwhite","workdur.imp")
covs=c(covs.cont,covs.cat)
ncovs=length(covs)
treat=ihdp$treat
#### FIRST OLS
use=ihdp[,c("iqsb.36","treat",covs)]
summary(lm(use))$coef[1:3,]
mod=lm(ihdp[,c("iqsb.36","treat","ncdctt",covs)])
summary(mod)$coef[1:3,]
plot(x=ihdp$ncdctt[!is.na(ihdp$iqsb.36)],y=mod$resid)
ihdp$ncdct2=ihdp$ncdctt^2
mod=lm(ihdp[,c("iqsb.36","treat","ncdctt","ncdct2",covs)])
summary(mod)$coef[1:4,]
plot(x=ihdp$ncdctt[!is.na(ihdp$iqsb.36)],y=mod$resid)
summary(lm(ihdp[,c("iqsb.36","treat","ncdctt",covs)],subset=treat==1))$coef[1:3,]
summary(lm(ihdp[,c("iqsb.36","treat","ncdctt","ncdct2",covs)],subset=treat==1))$coef[1:4,]
use=ihdp[,c("iqsb.36","treat","ncdctt",covs)]
xt=as.matrix(na.omit(use)[,-1])
xp1=as.matrix(use[use$treat==1,-1])
xp2=xp1
xp2[,c(1:2)]=0
xp=rbind(xp1,xp2)
nt=sum(use$treat==1)
y=as.numeric(na.omit(use)[,1])
library(BayesTree)
bart.tot <- bart(x.train=xt,   y.train=y,  x.test=xp)
tmpa=mean(bart.tot$yhat.test.mean[1:nt])-mean(bart.tot$yhat.test.mean[(nt+1):(2*nt)])
tmpa
ndraws=nrow(bart.tot$yhat.test)
tmp=apply(bart.tot$yhat.test[,1:nt]-bart.tot$yhat.test[,(nt+1):(2*nt)],1,mean)
sd=sqrt(var(tmp)) #
ci=c(tmpa-1.96*sd,tmpa+1.96*sd)
ci
xp
length(y)
dim(xt)
names(xt)
x[,1]
xt[,1]
xt[,2]
1
sum(xt[,1])
pp.draws1 <- bart.tot$yhat.test[,1:nt]
pp.draws0 <- bart.tot$yhat.test[,(nt+1):(2*nt)]
ci.fun <- function(a){
c(quantile(a,.025),quantile(a,.975))
}
cis1 <- apply(pp.draws1,2,ci.fun)
cis0 <- apply(pp.draws0,2,ci.fun)
tmp=bart.tot$yhat.test[,1:nt]-bart.tot$yhat.test[,(nt+1):(2*nt)]
tes=apply(tmp,2,mean)
te.cis=apply(tmp,2,ci.fun)
par(mfrow=c(1,2))
plot(lowess(xp1[,2],bart.tot$yhat.test.mean[1:nt]),pch=20,xlab="Number of CDC days (100)",   ylab="IQ at age 3",col="red",ylim=c(75,105))
points(lowess(xp1[,2],bart.tot$yhat.test.mean[(nt+1):(2*nt)]),pch=20)
plot(lowess(xp1[,2],tes),xlab="Number of CDC days (100)",ylab="Conditional treatment effects",ylim=c(-5,20))
points(lowess(xp1[,2],te.cis[1,]),type="l",lty=2)
plot(lowess(xp1[,2],tes),xlab="Number of CDC days (100)",ylab="Conditional treatment effects",ylim=c(-5,20))
points(lowess(xp1[,2],te.cis[1,]),type="l",lty=2)
points(lowess(xp1[,2],te.cis[2,]),type="l",lty=2)
abline(h=0)
par(mfrow=c(1,2))
plot(lowess(xp1[,2],bart.tot$yhat.test.mean[1:nt]),pch=20,xlab="Number of CDC days (100)",   ylab="IQ at age 3",col="red",ylim=c(75,105))
points(lowess(xp1[,2],bart.tot$yhat.test.mean[(nt+1):(2*nt)]),pch=20)
plot(lowess(xp1[,2],tes),xlab="Number of CDC days (100)",ylab="Conditional treatment effects",ylim=c(-5,20))
points(lowess(xp1[,2],te.cis[1,]),type="l",lty=2)
par(mfrow=c(1,2))
plot(lowess(xp1[,2],bart.tot$yhat.test.mean[1:nt]),pch=20,xlab="Number of CDC days (100)",   ylab="IQ at age 3",col="red",ylim=c(75,105))
bart.tot$yhat.test.mean[1:nt]
xp1[,2]
load("example.data")
# Those columns were removed because we are not sure what they are: bwg
# Those columns were removed because we are not sure how they are encoded: livwho
# Those columns were removed because we believe they are not relevant for the "population study" we simulate here: dose400, othstudy, site*
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, ]
# Remaining variables (description taken from Hill (2011), supplementary material):
#  1. iqsb.36: IQ measure at 3 years (outcome)
#  2. ncdctt: number of days of treatment (treatment)
#  3. bw: birthweight of child in grams
#  4. momage: mom's age when she gave birth to the child
#  5. nnhealth: neo-natal health index" some function of birth variables (number of days in hospital and gestational age..) that supposed to measure neonatal health
#  6. birth.o: birth order
#  7. parity: number of children the mother has given birth to
#  8. moreprem: number of other children mom has given birth to prematurely
#  9. cigs: did mom smoke cigarettes when pregnant
# 10. alcohol: did mom consume alcohol when pregnant
# 11. ppvt.imp: mother’s ppvt measured 1 year post-birth (some values imputed)
# 12. female: female indicator
# 13. mlt.birt: number of multiple births the mother has had
# 14. b.marry: indicator for whether mom was married when child born
# 15. language: primary language spoken at home
# 16. whenpren: what trimester did the mother begin prenatal care
# 17. drugs: did mom ever use drugs when pregnant
# 18-21. mom.lths, mom.hs, mom.scoll, mom.coll: indicators of mom's education level
# 22-24. momblack, momhisp, momwhite: indicators of mom's race
# 25. workdur.imp: indicator for whether mom worked during her pregnancy
### Some binarizations
ihdp_simple$birth.o[which(ihdp_simple$birth.o > 1)]   <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
### Re-select columns and take note of binary encodings for consultation, if desired
ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
ihdp$momed
mean(ihdp_simple$momed  <= 2)
mean(ihdp_simple$momed  <= 1)
mean(ihdp_simple$momed  <= 0)
mean(ihdp_simple$momed  <= 1)
ihdp_simple$momed[which(ihdp_simple$momed <= 1)] <- 0
ihdp_simple$momed[which(ihdp_simple$momed >= 2)] <- 1
hist(ihdp_simple$momed)
write.table(ihdp_simple, "ihdp.dat", sep = " ", col.names = FALSE, row.names = FALSE)
write.table(ihdp_simple, "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
dir()
write.table(names(ihdp_simple), "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
#ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
#sel_rows <- !is.na(ihdp_simple$iqsb.36)
#ihdp_simple <- ihdp_simple[sel_rows, ]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp[sel_rows, sel_vars]
ihdp_simple$birth.o[which(ihdp_simple$birth.o > 1)]   <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
ihdp_simple$momed[which(ihdp_simple$momed <= 1)] <- 0
ihdp_simple$momed[which(ihdp_simple$momed >= 2)] <- 1
ns <- names(ihdp_simple)d
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
write.table(ihdp_simple, "ihdp.dat", sep = " ", col.names = FALSE, row.names = FALSE)
write.table(names(ihdp_simple), "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
sum(is.na(ihdp_simple))
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
#ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
#sel_rows <- !is.na(ihdp_simple$iqsb.36)
#ihdp_simple <- ihdp_simple[sel_rows, ]
sel_rows <- !is.na(ihdp$iqsb.36)
ihdp_simple <- ihdp[sel_rows, sel_vars]
ihdp_simple$birth.o[which(ihdp_simple$birth.o > 1)]   <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
ihdp_simple$momed[which(ihdp_simple$momed <= 1)] <- 0
ihdp_simple$momed[which(ihdp_simple$momed >= 2)] <- 1
### Re-select columns and take note of binary encodings for consultation, if useful
ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
sum(is.na(ihdp_simple))
write.table(ihdp_simple, "ihdp.dat", sep = " ", col.names = FALSE, row.names = FALSE)
write.table(names(ihdp_simple), "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
c(1:2)
names(xp1)
names(use)
dim(xp1)
sel_vars <- c(1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 35, 36, 37, 38)
ihdp_simple <- ihdp[which(ihdp$treat == 1), sel_vars]
sel_rows <- !is.na(ihdp_simple$iqsb.36)
ihdp_simple <- ihdp_simple[sel_rows, ]
#sel_rows <- !is.na(ihdp$iqsb.36)
#ihdp_simple <- ihdp[sel_rows, sel_vars]
# Remaining variables (description taken from Hill (2011), supplementary material):
#  1. iqsb.36: IQ measure at 3 years (outcome)
#  2. ncdctt: number of days of treatment (treatment)
#  3. bw: birthweight of child in grams
#  4. momage: mom's age when she gave birth to the child
#  5. nnhealth: neo-natal health index" some function of birth variables (number of days in hospital and gestational age..) that supposed to measure neonatal health
#  6. birth.o: birth order
#  7. parity: number of children the mother has given birth to
#  8. moreprem: number of other children mom has given birth to prematurely
#  9. cigs: did mom smoke cigarettes when pregnant
# 10. alcohol: did mom consume alcohol when pregnant
# 11. ppvt.imp: mother’s ppvt measured 1 year post-birth (some values imputed)
# 12. female: female indicator
# 13. mlt.birt: number of multiple births the mother has had
# 14. b.marry: indicator for whether mom was married when child born
# 15. language: primary language spoken at home
# 16. whenpren: what trimester did the mother begin prenatal care
# 17. drugs: did mom ever use drugs when pregnant
# 18-21. mom.lths, mom.hs, mom.scoll, mom.coll: indicators of mom's education level
# 22-24. momblack, momhisp, momwhite: indicators of mom's race
# 25. workdur.imp: indicator for whether mom worked during her pregnancy
### Some binarizations
ihdp_simple$birth.o[which(ihdp_simple$birth.o > 1)]   <- 0 # Binarize birth order, first (1) / not first (0)
ihdp_simple$parity[which(ihdp_simple$parity > 1)]     <- 0 # Binarize parity, single child (1) / more than one (0)
ihdp_simple$moreprem[which(ihdp_simple$moreprem > 1)] <- 1 # Binarize moreprem, yes (1) / no (0)
ihdp_simple$cigs[which(ihdp_simple$cigs > 0)]         <- 1 # Binarize cigs, some (1) / none (0)
ihdp_simple$alcohol[which(ihdp_simple$alcohol > 0)]   <- 1 # Binarize alcohol, some (1) / none (0)
ihdp_simple$mlt.birt[which(ihdp_simple$mlt.birt > 0)] <- 1 # Binarize mlt.birt, some (1) / none (0)
ihdp_simple$language[which(ihdp_simple$language > 1)] <- 0 # Binarize language, English (1) / not English (0)
ihdp_simple$drugs                 <- ihdp_simple$drugs - 1 # Binarize drugs, no (1) / yes (0)
ihdp_simple$drugs[which(ihdp_simple$language == 1)]   <- 0 # Binarize language, English (1) / not English (0)
### Collapse the one-of-N encoding
momed1.idx <- which(ihdp_simple$mom.lths == 1)
momed2.idx <- which(ihdp_simple$mom.hs == 1)
momed3.idx <- which(ihdp_simple$mom.scoll == 1)
momed4.idx <- which(ihdp_simple$mom.coll == 1)
momed <- rep(0, nrow(ihdp_simple))
momed[momed2.idx] <- 1
momed[momed3.idx] <- 2
momed[momed4.idx] <- 3
ihdp_simple$momed <- momed
ihdp_simple$momed[which(ihdp_simple$momed <= 1)] <- 0
ihdp_simple$momed[which(ihdp_simple$momed >= 2)] <- 1
### Re-select columns and take note of binary encodings for consultation, if useful
ns <- names(ihdp_simple)
sel_vars <- setdiff(1:ncol(ihdp_simple), c(which(ns == "mom.lths"), which(ns == "mom.hs"), which(ns == "mom.scholl"), which(ns == "mom.coll"),
which(ns == "momblack"), which(ns == "momhisp")))
ihdp_simple <- ihdp_simple[, sel_vars]
ns <- names(ihdp_simple)
bin_vars <- c(which(ns == "birth.o"), which(ns == "parity"), which(ns == "moreprem"), which(ns == "cigs"), which(ns == "alcohol"),
which(ns == "female"), which(ns == "mlt.birt"), which(ns == "b.marry"), which(ns == "language"), which(ns == "drugs"),
which(ns == "momwhite"))
### Export
write.table(ihdp_simple, "ihdp.dat", sep = " ", col.names = FALSE, row.names = FALSE)
write.table(names(ihdp_simple), "ihdp.names", sep = " ", col.names = FALSE, row.names = FALSE)
n <- 100
sd0 <- 10
het <- 3
x <- 0:(n - 1)
y <- x + rnorm(n) * sd0 * (1 + (het - 1) * x / max(x))
plot(x, y)
out <- lm(y ~ x)
abline(out)
library(MASS)
plot(fitted(out), studres(out))
digamma
digamma(17)
