function [Y,iter]=admm_bicluster(W,lambda,b0,b1,epsilon,tau_penalty,max_iter)
% maximize <W,Y>-lambda*\|X\|_*
%  s.t. X-Y=0 and b0<=Y<=b1
%
%
%  Toy Example:
%
%  Y_star=[ones(50,100),zeros(50,50); zeros(100,100),ones(100,50)];
%  noise=0.2;  
%  r=(rand(150)<noise);
%  L=(1-r).*(Y_star)+r.*(1-Y_star); 
%  W=log((1-noise)/noise)*L+log(noise/(1-noise))*(1-L);
%  Y=admm_bicluster(W,sqrt(300));
%  figure(1); clf; imagesc(L); colormap gray
%  figure(2); clf; imagesc(Y); colormap gray
%
%
% by S.H.Lim
%  
% Implementation of Algorithm 1 in  
% S.H.Lim, Y.Chen, H.Xu, "A Convex Optimization Framework for Bi-Clustering",ICML 2015.
%  
if nargin<4
    b0=0;
    b1=1;
end
if nargin<5
    epsilon=1e-4;
end
if nargin<6
    tau_penalty=2;
end
if nargin<7
    max_iter=1000;
end

rho=1;
[m,n]=size(W);
Y=zeros(m,n);
Q=zeros(m,n);
last_delta_rho=0;
rho_switch_count=0;
rho_switch_thres=20;

for iter=1:max_iter
    [U,sigma,V]=svd(Y-Q,'econ');
    d=diag(sigma);
    sigma=diag( max(d-lambda/rho,0)  );
    X=U*sigma*V';
    
    Yprev=Y;
    Y=min(max(X+Q+(1/rho)*W,b0),b1);
    
    Q=Q+(X-Y);
    
    epsilon_R=norm(X-Y,'fro');
    epsilon_S=norm(rho*(Y-Yprev),'fro');
    
    primal_thres=epsilon*max(norm(X,'fro'),norm(Y,'fro'));
    dual_thres=epsilon*rho*norm(Q,'fro');
    if epsilon_R<=primal_thres && epsilon_S<=dual_thres
        break;
    end
    
    %update rho
    if epsilon_R>tau_penalty*epsilon_S
        rho=2*rho;
        Q=Q/2;
        if last_delta_rho<0
            rho_switch_count=rho_switch_count+1;
        end
        last_delta_rho=1;
    elseif epsilon_S>tau_penalty*epsilon_R
        rho=rho/2;
        Q=2*Q;
        if last_delta_rho>0
            rho_switch_count=rho_switch_count+1;
        end
        last_delta_rho=-1;
    end
    if rho_switch_count>=rho_switch_thres
        tau_penalty=2*tau_penalty;
        rho_switch_count=0;
    end
end

