library(tidyverse)
library(purrr)
library(ggplot2)
library(pBrackets)
library(PSweight)
library(nleqslv)
library(ggpubr)
library(gridExtra)

set.seed(1234)

dat <- read.csv("cirrhosis.csv")
dat <- dat[is.na(dat$Drug) == F,]
dat$Cholesterol[is.na(dat$Cholesterol) == T] <- median(dat$Cholesterol[is.na(dat$Cholesterol) == F])
dat$Copper[is.na(dat$Copper) == T] <- median(dat$Copper[is.na(dat$Copper) == F])
dat$Tryglicerides[is.na(dat$Tryglicerides) == T] <- median(dat$Tryglicerides[is.na(dat$Tryglicerides) == F])
dat$Platelets[is.na(dat$Platelets) == T] <- median(dat$Platelets[is.na(dat$Platelets) == F])


# code binary treatment variable
dat$Drug <- ifelse(dat$Drug=="D-penicillamine",1,0) 
unique(dat$Drug)
D <- dat$Drug
Y <- sqrt(dat$N_Days)
X <- dat$Age

S1 <- (X <= 15695)
S2 <- (X > 15695 & X <= 17082)
S3 <- (X > 17082 & X <= 20440)
S4 <- (X > 20440 & X <= 21900)
S5 <- (X > 21900)

mean(Y[S1 & (D==1)]) #E[Y(1)|S1]
mean(Y[S1 & (D==0)]) #E[Y(0)|S1]
tau.s1 <- mean(Y[S1 & (D==1)]) - mean(Y[S1 & (D==0)]) 
tau.s1
mean(Y[S2 & (D==1)]) #E[Y(1)|S2]
mean(Y[S2 & (D==0)]) #E[Y(0)|S2]
tau.s2 <- mean(Y[S2 & (D==1)]) - mean(Y[S2 & (D==0)]) 
tau.s2
mean(Y[S3 & (D==1)]) #E[Y(1)|S3]
mean(Y[S3 & (D==0)]) #E[Y(0)|S3]
tau.s3 <- mean(Y[S3 & (D==1)]) - mean(Y[S3 & (D==0)]) 
tau.s3
mean(Y[S4 & (D==1)]) #E[Y(1)|S4]
mean(Y[S4 & (D==0)]) #E[Y(0)|S4]
tau.s4 <- mean(Y[S4 & (D==1)]) - mean(Y[S4 & (D==0)]) 
tau.s4
mean(Y[S5 & (D==1)]) #E[Y(1)|S5]
mean(Y[S5 & (D==0)]) #E[Y(0)|S5]
tau.s5 <- mean(Y[S5 & (D==1)]) - mean(Y[S5 & (D==0)]) 
tau.s5
tau <- c(tau.s1, tau.s2, tau.s3, tau.s4, tau.s5)
true_p <- c(sum(S1), sum(S2), sum(S3), sum(S4), sum(S5)) / length(Y)
mu1_vec <- c(mean(Y[S1 & (D==1)]), mean(Y[S2 & (D==1)]), mean(Y[S3 & (D==1)]), mean(Y[S4 & (D==1)]), mean(Y[S5 & (D==1)]))
mu0_vec <- c(mean(Y[S1 & (D==0)]), mean(Y[S2 & (D==0)]), mean(Y[S3 & (D==0)]), mean(Y[S4 & (D==0)]), mean(Y[S5 & (D==0)]))
sd1_vec <- c(sd(Y[S1 & (D==1)]), sd(Y[S2 & (D==1)]), sd(Y[S3 & (D==1)]), sd(Y[S4 & (D==1)]), sd(Y[S5 & (D==1)]))
sd0_vec <- c(sd(Y[S1 & (D==0)]), sd(Y[S2 & (D==0)]), sd(Y[S3 & (D==0)]), sd(Y[S4 & (D==0)]), sd(Y[S5 & (D==0)]))

# p value
mu_vec <- tau
sd_vec <- sqrt(sd1_vec^2 + sd0_vec^2)
n <- c(sum(S1), sum(S2), sum(S3), sum(S4), sum(S5))

pvalue <- function(mu1, mu2, sd1, sd2, n1, n2){
  t <- (mu1 - mu2) / sqrt(sd1^2 / n1 + sd2^2 / n2)
  df <- n1 + n2 - 2
  if (t > 0){
    pvalue <- pt(q = t, df = df, lower.tail = F)
  }else{
    pvalue <- pt(q = t, df = df, lower.tail = T)
  }
  return(pvalue)
}

p <- matrix(NA, nrow = 5, ncol = 5)
for (i in 1:5){
  for (j in 1:5){
    p[i,j] <- pvalue(mu_vec[i], mu_vec[j], sd_vec[i], sd_vec[j], n[i], n[j])
  }
}
