n = 1000 # sample size n
p = 100 # dimension
N_o = round(log(n/10)/log(1.2)) # number of ...
#o = round(5*1.2^(1:N_o)) # number of outliers
o = (1:(0.03*n))*10;
N_o = length(o);
Ntrials = 100
beta = 0*(1:p)
#install.packages("glmnet", repos = "http://cran.us.r-project.org")
library(glmnet)
error = matrix(0,N_o,Ntrials)
print(o)
[1] 10 20 30 40 50 60 70 80 90 100 110 120 130 140
[15] 150 160 170 180 190 200 210 220 230 240 250 260 270 280
[29] 290 300
write.csv(o,'n1000p100grid_o.csv')
for (s in c(5,15,25))
{
beta[1:s] = 10;
file_name = paste0("n",n,"p",p,"s",s,"N",Ntrials,".RData")
file_name_csv = paste0("n",n,"p",p,"s",s,"N",Ntrials,".csv")
print(file_name)
for (itrial in 1:Ntrials)
{
if (itrial%%5==0)
cat("*** Starting trial no ", itrial,"\n")
for (i_nboutlier in 1:N_o)
{
ocurrent = o[i_nboutlier]
theta = 10 +0*(1:ocurrent)
X = matrix(rnorm(n*p),n,p) # Gaussian desing
#XX = (t(X)%*%X)/N_o; # Gram matrix
y = X[,1:s]%*%beta[1:s] + rnorm(n);
y[1:ocurrent] = y[1:ocurrent] + sqrt(n)*theta;
lambda0 = sqrt((8/n)*(log(p/s) + log(n/ocurrent)));
fit = glmnet(cbind(X,sqrt(n)*diag(n)), y, intercept = FALSE, alpha = 1,standardize=FALSE,standardize.response = FALSE);
est_beta = coef(fit,lambda0)
if (i_nboutlier==1 && itrial%%10==0)
plot(1:p,est_beta[2:(p+1)],'p')
currenterror = sqrt(sum((est_beta[2:(p+1)]-beta)^2))
error[i_nboutlier,itrial] = currenterror;
}
}
save(error, file = file_name)
write.csv(error,file_name_csv)
av_error = apply(error,1,mean)
plot(o,av_error,'l')
}
[1] "n1000p100s5N100.RData"
*** Starting trial no 5
*** Starting trial no 10
*** Starting trial no 15
*** Starting trial no 20
*** Starting trial no 25
*** Starting trial no 30
*** Starting trial no 35
*** Starting trial no 40
*** Starting trial no 45
*** Starting trial no 50
*** Starting trial no 55
*** Starting trial no 60
*** Starting trial no 65
*** Starting trial no 70
*** Starting trial no 75
*** Starting trial no 80
*** Starting trial no 85
*** Starting trial no 90
*** Starting trial no 95
*** Starting trial no 100
[1] "n1000p100s15N100.RData"
*** Starting trial no 5
*** Starting trial no 10
*** Starting trial no 15
*** Starting trial no 20
*** Starting trial no 25
*** Starting trial no 30
*** Starting trial no 35
*** Starting trial no 40
*** Starting trial no 45
*** Starting trial no 50
*** Starting trial no 55
*** Starting trial no 60
*** Starting trial no 65
*** Starting trial no 70
*** Starting trial no 75
*** Starting trial no 80
*** Starting trial no 85
*** Starting trial no 90
*** Starting trial no 95
*** Starting trial no 100
[1] "n1000p100s25N100.RData"
*** Starting trial no 5
*** Starting trial no 10
*** Starting trial no 15
*** Starting trial no 20
*** Starting trial no 25
*** Starting trial no 30
*** Starting trial no 35
*** Starting trial no 40
*** Starting trial no 45
*** Starting trial no 50
*** Starting trial no 55
*** Starting trial no 60
*** Starting trial no 65
*** Starting trial no 70
*** Starting trial no 75
*** Starting trial no 80
*** Starting trial no 85
*** Starting trial no 90
*** Starting trial no 95
*** Starting trial no 100
load('n1000p100s5N100.RData')
error1 = error;
load('n1000p100s15N100.RData')
error2 = error;
load('n1000p100s25N100.RData')
error3 = error;
av_error1 = apply(error3,1,mean)
av_error2 = apply(error2,1,mean)
av_error3 = apply(error1,1,mean)
plot(o,av_error1,'l',col='black')
points(o,av_error2,'l',col='red')
points(o,av_error3,'l',col='blue')
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.