model.Fy <- cv.ncvreg(x, Fy-1/2, penalty=penalty, nfolds=nfolds)
lambda.Fy <- model.Fy$lambda.min
model.Fy <- ncvfit(x, Fy-1/2, penalty=penalty, lambda=lambda.Fy)
beta.hat <- as.numeric(model.Fy$beta)
return(beta.hat)
}
if(est == "beta.y") {
model.y <- cv.ncvreg(x, y, penalty=penalty, nfolds=nfolds)
lambda.y <- model.y$lambda.min
model.y <- ncvfit(x, y, penalty=penalty, lambda=lambda.y)
beta.y.hat <- as.numeric(model.y$beta)
return(beta.y.hat)
}
}
if(method == "proposed") {
temp <- mclapply(1:loops, function(i) {
cat(i, "\r")
set.seed(proc.time()[1]*1000 + i)
outdata <- genedata(i)
x <<- outdata$x
y <<- outdata$y
Fy <<- outdata$Fy
beta.hat <- fit_func(penalty, est="beta")
interest <- c(ceiling(2*sparsity/5):sparsity, (sparsity+4):(sparsity+53), (p-149):p)
stat.record <- c(); M.G <- matrix(NA, M, length(interest))
for (l in 1:length(interest)) {
j <- interest[l]
xj <<- x[,j]
z <<- x[,-j]
theta.hat <- fit_func(penalty, est="theta")
gamma.hat <- beta.hat[-j]
e.hat <- Fy-1/2-z%*%gamma.hat
my.hat.func <- function(yi) {
1/n * sum((xj-z%*%theta.hat)*(ifelse(y>=yi, 1, 0)-Fy))
}
stat.T <- 1/sqrt(n) * sum(e.hat*(xj-z%*%theta.hat))
my.hat <- c()
for (k in 1:n) { my.hat[k] <- my.hat.func(y[k]) }
Sig.hat <- 1/n * sum(((xj-z%*%theta.hat)*e.hat + my.hat)^2)
stat.S <- stat.T/sqrt(Sig.hat)
stat.record[l] <- stat.S
## Gaussian approximate
hj.hat_func <- function(o) {
hj.hat.oo <- c(); k.oo <- 1;
for (oo in 1:n) {
if(oo != o) {
hj.hat.oo[k.oo] <- 1/2*((ifelse(y[oo]<=y[o], 1, 0)-1/2-z[o,]%*%gamma.hat)*(x[o,j]-z[o,]%*%theta.hat) +
(ifelse(y[o]<=y[oo], 1, 0)-1/2-x[oo,-j]%*%gamma.hat)*(x[oo,j]-x[oo,-j]%*%theta.hat))
k.oo <- k.oo + 1
}
}
hj.hat.o <- sum(hj.hat.oo)
return(hj.hat.o)
}
hj.hat <- c()
for (o in 1:n) {hj.hat[o] <- hj.hat_func(o)}
for (m in 1:M) {
set.seed(m+0417)
e <- rnorm(n)
M.G[m,l] <- (2/(sqrt(Sig.hat)*sqrt(n))*sum(((1/(n-1))*hj.hat-(1/sqrt(n))*stat.T)*e))^2
}
}
G1 <- c(5:6, 200:204); T1.inf <- max(stat.record[G1]^2)
G2 <- c(1:4, G1); T2.inf <- max(stat.record[G2]^2)
G4 <- c(5:204); T4.inf <- max(stat.record[G4]^2)
G5 <- 1:204; T5.inf <- max(stat.record[G5]^2)
stat.boot1 <- apply(M.G[,G1], 1, max)
stat.boot2 <- apply(M.G[,G2], 1, max)
stat.boot4 <- apply(M.G[,G4], 1, max)
stat.boot5 <- apply(M.G[,G5], 1, max)
reject.boot <- c(T1.inf>=quantile(stat.boot1, 1-alpha),
T2.inf>=quantile(stat.boot2, 1-alpha),
T4.inf>=quantile(stat.boot4, 1-alpha),
T5.inf>=quantile(stat.boot5, 1-alpha))
return(reject.boot)
}, mc.cores=ncores)
reject.inf <- Reduce("rbind", temp)
res <- colMeans(reject.inf)
return(res)
}
}
## Simulation example
method="proposed"
beta.type=2;
p=800; rho=0.5;
outlier.multi=10;
penalty="lasso"; nfolds=10; alpha=0.05;
loops=500; M=500
global.df <- data.frame(method=NA, model=NA, n=NA, p=NA, x_type=NA, sparsity=NA,
err_type=NA, outprop=NA, G1=NA, G2=NA, G4=NA, G5=NA)
row <- 1
for (n in c(200, 500)) {
for (model in c(2, 1)) {
cat("#--------model (n, p) =", model, c(n, p), "--------#", "\n")
for (x.type in 1) {
for (sparsity in 6) {
# cat("sparsity =", sparsity, "\n")
for (err.type in (1:2)) {
# cat("err.type =", err.type, "\n")
if(err.type == 1) {
for (outlier.prop in c(0.1, 0)) {
# cat("outlier.prop =", outlier.prop, "\n")
out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
x.type, rho, err.type, model,
outlier.prop, outlier.multi,
penalty, nfolds, alpha,
method, loops, M)
global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
outlier.prop, out.res)
cat("row =", row, "\n")
row <- row + 1
}
}
if(err.type == 2) {
outlier.prop <- 0
out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
x.type, rho, err.type, model,
outlier.prop, outlier.multi,
penalty, nfolds, alpha,
method, loops, M)
global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
outlier.prop, out.res)
cat("row =", row, "\n")
row <- row + 1
}
}
}
}
}
}
View(global.df)
library(MASS)
library(ncvreg)
library(glmnet)
library(Matrix) # block diagonal matrix
library(SIS)
library(scalreg)
library(parallel)
library(hdi)
ncores <- 8
SIM_global <- function(n=200, p=400, sparsity=10, beta.type=1,
delta0=1, delta1=0.1, delta2=2,
x.type=1, rho=0.1, err.type=1, model=1,
outlier.prop=0, outlier.multi=10,
penalty="lasso", nfolds=10, alpha=0.05,
method="proposed", loops=3) {
# n=200; p=400; sparsity=10; beta.type=1;
# delta0=1; delta1=0.1; delta2=1;
# x.type=2; rho=0.1; err.type=1; model=1;
# outlier.prop=0; outlier.multi=10;
# penalty="lasso"; nfolds=10; alpha=0.05;
# method="proposed"; loops=3;
## generate data
# beta.true
if(beta.type == 1) {
beta.true <- c(rep(delta0, sparsity), rep(0, p-sparsity))
}
if(beta.type == 2) {
beta.true <- c(seq(delta1, delta2, length.out=sparsity), rep(0, p-sparsity))
}
# x
genedata <- function(i) {
y <- Inf
while (length(which(y == Inf)) != 0) {
if(x.type == 1) {
p.sub <- p/10
list.temp <- NULL
for (ii in 1:10) {
list.temp[[ii]] <- toeplitz((0.1*ii-0.1)^seq(0, p.sub-1))
}
Sig <- as.matrix(bdiag(list.temp))
x <- mvrnorm(n, mu=rep(0, p), Sigma=Sig)
}
if(x.type == 2) {
p.sub <- p/10
list.temp <- NULL
for (ii in 1:10) {
list.temp[[ii]] <- toeplitz((0.1*ii-0.1)^seq(0, p.sub-1))
}
Sig <- as.matrix(bdiag(list.temp))
for (i in c(1:sparsity, (sparsity+1):(sparsity+5))) {
for (j in c(1:sparsity, (sparsity+1):(sparsity+5))) {
Sig[i,j] <- 0.8
}
}
diag(Sig) <- 1
x <- mvrnorm(n, mu=rep(0, p), Sigma=Sig)
}
# error
if(err.type == 1) { error <- rnorm(n) }
if(err.type == 2) { error <- rt(n, 1) }
# linear model
if(model == 1) { y <- x%*%beta.true + 1*error }
# nonlinear model
if(model == 2) {
y <- exp(x%*%beta.true + error)
}
# add outliers
outlier.pos <- sample(1:n, n*outlier.prop)
y[outlier.pos] <- y[outlier.pos] + outlier.multi*max(y)
Fy <- (rank(y))/length(y)
}
return(list(x=x, y=y, Fy=Fy))
}
## penalized estimator
fit_func <- function(penalty, est="theta") {
if(est == "theta") {
model.x <- cv.ncvreg(z, xj, penalty=penalty, nfolds=nfolds)
lambda.x <- model.x$lambda.min
model.x <- ncvfit(z, xj, penalty=penalty, lambda=lambda.x)
theta.hat <- as.numeric(model.x$beta)
return(theta.hat)
}
if(est == "beta") {
model.Fy <- cv.ncvreg(x, Fy-1/2, penalty=penalty, nfolds=nfolds)
lambda.Fy <- model.Fy$lambda.min
model.Fy <- ncvfit(x, Fy-1/2, penalty=penalty, lambda=lambda.Fy)
beta.hat <- as.numeric(model.Fy$beta)
return(beta.hat)
}
if(est == "beta.y") {
model.y <- cv.ncvreg(x, y, penalty=penalty, nfolds=nfolds)
lambda.y <- model.y$lambda.min
model.y <- ncvfit(x, y, penalty=penalty, lambda=lambda.y)
beta.y.hat <- as.numeric(model.y$beta)
return(beta.y.hat)
}
}
if(method == "proposed") {
temp <- mclapply(1:loops, function(i) {
cat(i, "\r")
set.seed(proc.time()[1]*1000 + i)
outdata <- genedata(i)
x <<- outdata$x
y <<- outdata$y
Fy <<- outdata$Fy
beta.hat <- fit_func(penalty, est="beta")
q.alpha <- -log(pi)-2*log(log(1/(1-alpha)))
interest <- c(ceiling(2*sparsity/5):sparsity, (sparsity+4):(sparsity+53), (p-149):p)
stat.record <- c();
for (l in 1:length(interest)) {
j <- interest[l]
xj <<- x[,j]
z <<- x[,-j]
theta.hat <- fit_func(penalty, est="theta")
gamma.hat <- beta.hat[-j]
e.hat <- Fy-1/2-z%*%gamma.hat
my.hat.func <- function(yi) {
1/n * sum((xj-z%*%theta.hat)*(ifelse(y>=yi, 1, 0)-Fy))
}
stat.T <- 1/sqrt(n) * sum(e.hat*(xj-z%*%theta.hat))
my.hat <- c()
for (k in 1:n) { my.hat[k] <- my.hat.func(y[k]) }
Sig.hat <- 1/n * sum(((xj-z%*%theta.hat)*e.hat + my.hat)^2)
stat.S <- stat.T/sqrt(Sig.hat)
stat.record[l] <- stat.S
}
G1 <- c(5:6, 200:204); T1.inf <- max(stat.record[G1]^2)
G2 <- c(1:4, G1); T2.inf <- max(stat.record[G2]^2)
G4 <- c(5:204); T4.inf <- max(stat.record[G4]^2)
G5 <- 1:204; T5.inf <- max(stat.record[G5]^2)
reject.inf <- c(T1.inf>=2*log(length(G1))-log(log(length(G1)))+q.alpha,
T2.inf>=2*log(length(G2))-log(log(length(G2)))+q.alpha,
T4.inf>=2*log(length(G4))-log(log(length(G4)))+q.alpha,
T5.inf>=2*log(length(G5))-log(log(length(G5)))+q.alpha)
return(reject.inf)
}, mc.cores=ncores)
reject.inf <- Reduce("rbind", temp)
res <- colMeans(reject.inf)
return(res)
}
}
## Simulation example
method="proposed"
beta.type=2;
p=800; rho=0.5;
outlier.multi=10;
penalty="lasso"; nfolds=10; alpha=0.05;
loops=500;
global.df <- data.frame(method=NA, model=NA, n=NA, p=NA, x_type=NA, sparsity=NA,
err_type=NA, outprop=NA, G1=NA, G2=NA, G4=NA, G5=NA)
row <- 1
for (n in c(200, 500)) {
for (model in c(2, 1)) {
cat("#--------model (n, p) =", model, c(n, p), "--------#", "\n")
for (x.type in 1) {
for (sparsity in 6) {
# cat("sparsity =", sparsity, "\n")
for (err.type in (1:2)) {
# cat("err.type =", err.type, "\n")
if(err.type == 1) {
for (outlier.prop in c(0.1, 0)) {
# cat("outlier.prop =", outlier.prop, "\n")
out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
x.type, rho, err.type, model,
outlier.prop, outlier.multi,
penalty, nfolds, alpha,
method, loops)
global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
outlier.prop, out.res)
cat("row =", row, "\n")
row <- row + 1
}
}
if(err.type == 2) {
outlier.prop <- 0
out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
x.type, rho, err.type, model,
outlier.prop, outlier.multi,
penalty, nfolds, alpha,
method, loops)
global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
outlier.prop, out.res)
cat("row =", row, "\n")
row <- row + 1
}
}
}
}
}
}
library(MASS)
library(ncvreg)
library(glmnet)
library(Matrix) # block diagonal matrix
library(SIS)
library(scalreg)
library(parallel)
library(hdi)
ncores <- 8
SIM_global <- function(n=200, p=400, sparsity=10, beta.type=1,
delta0=1, delta1=0.1, delta2=2,
x.type=1, rho=0.1, err.type=1, model=1,
outlier.prop=0, outlier.multi=10,
penalty="lasso", nfolds=10, alpha=0.05,
method="proposed", loops=3) {
# n=200; p=400; sparsity=10; beta.type=1;
# delta0=1; delta1=0.1; delta2=1;
# x.type=2; rho=0.1; err.type=1; model=1;
# outlier.prop=0; outlier.multi=10;
# penalty="lasso"; nfolds=10; alpha=0.05;
# method="proposed"; loops=3;
## generate data
# beta.true
if(beta.type == 1) {
beta.true <- c(rep(delta0, sparsity), rep(0, p-sparsity))
}
if(beta.type == 2) {
beta.true <- c(seq(delta1, delta2, length.out=sparsity), rep(0, p-sparsity))
}
# x
genedata <- function(i) {
y <- Inf
while (length(which(y == Inf)) != 0) {
if(x.type == 1) {
p.sub <- p/10
list.temp <- NULL
for (ii in 1:10) {
list.temp[[ii]] <- toeplitz((0.1*ii-0.1)^seq(0, p.sub-1))
}
Sig <- as.matrix(bdiag(list.temp))
x <- mvrnorm(n, mu=rep(0, p), Sigma=Sig)
}
if(x.type == 2) {
p.sub <- p/10
list.temp <- NULL
for (ii in 1:10) {
list.temp[[ii]] <- toeplitz((0.1*ii-0.1)^seq(0, p.sub-1))
}
Sig <- as.matrix(bdiag(list.temp))
for (i in c(1:sparsity, (sparsity+1):(sparsity+5))) {
for (j in c(1:sparsity, (sparsity+1):(sparsity+5))) {
Sig[i,j] <- 0.8
}
}
diag(Sig) <- 1
x <- mvrnorm(n, mu=rep(0, p), Sigma=Sig)
}
# error
if(err.type == 1) { error <- rnorm(n) }
if(err.type == 2) { error <- rt(n, 1) }
# linear model
if(model == 1) { y <- x%*%beta.true + 1*error }
# nonlinear model
if(model == 2) {
y <- exp(x%*%beta.true + error)
}
# add outliers
outlier.pos <- sample(1:n, n*outlier.prop)
y[outlier.pos] <- y[outlier.pos] + outlier.multi*max(y)
Fy <- (rank(y))/length(y)
}
return(list(x=x, y=y, Fy=Fy))
}
## penalized estimator
fit_func <- function(penalty, est="theta") {
if(est == "theta") {
model.x <- cv.ncvreg(z, xj, penalty=penalty, nfolds=nfolds)
lambda.x <- model.x$lambda.min
model.x <- ncvfit(z, xj, penalty=penalty, lambda=lambda.x)
theta.hat <- as.numeric(model.x$beta)
return(theta.hat)
}
if(est == "beta") {
model.Fy <- cv.ncvreg(x, Fy-1/2, penalty=penalty, nfolds=nfolds)
lambda.Fy <- model.Fy$lambda.min
model.Fy <- ncvfit(x, Fy-1/2, penalty=penalty, lambda=lambda.Fy)
beta.hat <- as.numeric(model.Fy$beta)
return(beta.hat)
}
if(est == "beta.y") {
model.y <- cv.ncvreg(x, y, penalty=penalty, nfolds=nfolds)
lambda.y <- model.y$lambda.min
model.y <- ncvfit(x, y, penalty=penalty, lambda=lambda.y)
beta.y.hat <- as.numeric(model.y$beta)
return(beta.y.hat)
}
}
if(method == "proposed") {
temp <- mclapply(1:loops, function(i) {
cat(i, "\r")
set.seed(proc.time()[1]*1000 + i)
outdata <- genedata(i)
x <<- outdata$x
y <<- outdata$y
Fy <<- outdata$Fy
beta.hat <- fit_func(penalty, est="beta")
q.alpha <- -log(pi)-2*log(log(1/(1-alpha)))
interest <- c(ceiling(2*sparsity/5):sparsity, (sparsity+4):(sparsity+53), (p-149):p)
stat.record <- c();
for (l in 1:length(interest)) {
j <- interest[l]
xj <<- x[,j]
z <<- x[,-j]
theta.hat <- fit_func(penalty, est="theta")
gamma.hat <- beta.hat[-j]
e.hat <- Fy-1/2-z%*%gamma.hat
my.hat.func <- function(yi) {
1/n * sum((xj-z%*%theta.hat)*(ifelse(y>=yi, 1, 0)-Fy))
}
stat.T <- 1/sqrt(n) * sum(e.hat*(xj-z%*%theta.hat))
my.hat <- c()
for (k in 1:n) { my.hat[k] <- my.hat.func(y[k]) }
Sig.hat <- 1/n * sum(((xj-z%*%theta.hat)*e.hat + my.hat)^2)
stat.S <- stat.T/sqrt(Sig.hat)
stat.record[l] <- stat.S
}
G1 <- c(5:6, 200:204); T1.inf <- max(stat.record[G1]^2)
G2 <- c(1:4, G1); T2.inf <- max(stat.record[G2]^2)
G4 <- c(5:204); T4.inf <- max(stat.record[G4]^2)
G5 <- 1:204; T5.inf <- max(stat.record[G5]^2)
reject.inf <- c(T1.inf>=2*log(length(G1))-log(log(length(G1)))+q.alpha,
T2.inf>=2*log(length(G2))-log(log(length(G2)))+q.alpha,
T4.inf>=2*log(length(G4))-log(log(length(G4)))+q.alpha,
T5.inf>=2*log(length(G5))-log(log(length(G5)))+q.alpha)
return(reject.inf)
}, mc.cores=ncores)
reject.inf <- Reduce("rbind", temp)
res <- colMeans(reject.inf)
return(res)
}
}
## Simulation example
method="proposed"
beta.type=2;
p=800; rho=0.5;
outlier.multi=10;
penalty="lasso"; nfolds=10; alpha=0.05;
loops=10;
global.df <- data.frame(method=NA, model=NA, n=NA, p=NA, x_type=NA, sparsity=NA,
err_type=NA, outprop=NA, G1=NA, G2=NA, G4=NA, G5=NA)
row <- 1
for (n in c(200, 500)) {
for (model in c(2, 1)) {
cat("#--------model (n, p) =", model, c(n, p), "--------#", "\n")
for (x.type in 1) {
for (sparsity in 6) {
# cat("sparsity =", sparsity, "\n")
for (err.type in (1:2)) {
# cat("err.type =", err.type, "\n")
if(err.type == 1) {
for (outlier.prop in c(0.1, 0)) {
# cat("outlier.prop =", outlier.prop, "\n")
out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
x.type, rho, err.type, model,
outlier.prop, outlier.multi,
penalty, nfolds, alpha,
method, loops)
global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
outlier.prop, out.res)
cat("row =", row, "\n")
row <- row + 1
}
}
if(err.type == 2) {
outlier.prop <- 0
out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
x.type, rho, err.type, model,
outlier.prop, outlier.multi,
penalty, nfolds, alpha,
method, loops)
global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
outlier.prop, out.res)
cat("row =", row, "\n")
row <- row + 1
}
}
}
}
}
}
global.df
