Contents

function [x,out] = lbreg_accel_w_reset(A,b,alpha,opts)
% lbreg_accelerated: linearized Bregman iteration with Nesterov's acceleration and reset (restart or skip)
%   minimize |x|_1 + 1/(2*alpha) |x|_2^2
%   subject to Ax = b
%
% input:
%       A: constraint matrix
%       b: constraint vector
%       alpha: smoothing parameter, typical value: 1 to 10 times estimated norm(x,inf)
%       opts.
%           lip: the estimated Lipschitz constrant of the dual objective, default: alpha*normest(A*A',1e-2)
%           tol: stop iteration once norm(Ax-b)<tol*norm(b), default: 1e-4
%           maxit: max number of iterations, default: 3000
%           maxT:  max running time in second, default: 1000
%           x_ref: if provided, norm(x^k - x_ref) is computed and saved in out.hist_err, default: []
%
% output:
%       x: solution vector
%       out.
%           iter: number of iterations
%           hist_obj: objective value at each iteration
%           hist_res: |Ax-b|_2 at each iteration
%           hist_err: if opts.x_ref is provided, contains norm(x^k - x_ref); otherwise, will be set to []
%           reset: Nesterov's acceleration restart (theta is reset) or skip (theta is not reset)
%                  0: no restart or skip
%                  1: restart if dual objective is non-monotonic
%                  2: skip    if dual objective is non-monotonic
%                  3: restart if residual is sufficiently non-monotonic
%                  4: skip    if residual is sufficiently non-monotonic
%                  5: restart according to the gradient test on P6 of http://www-stat.stanford.edu/~candes/papers/adap_restart_paper.pdf
%                  6: skip    according to the gradient test
%                 10: restart according to user input opts.reset_intvl; e.g. [50 10 10 10 10] means restart at 50th, 60th, 70th, 80th, and 90th iterations
%                 11: skip    according to user input opts.reset_intvl; e.g. [50 10 10 10 10] means skip    at 50th, 60th, 70th, 80th, and 90th iterations
%
% Algorithm:
%   Linearized Bregman is the dual gradient ascent iteration.
%   The dual problem objective is:
%     b'y - alpha/2 |shrink(A'y,1)|^2,  where shrink(z,1) = z - proj_[-1,1](z) = sign(z).*max(abs(z)-1,0)
%
%   Let y be the dual variable. The gradient ascent iteration is
%     y^{k+1} = y^k + stepsize (b - alpha A shrink(A'y^k,1));
%   Primal variable x is obtained as x^k = alpha shrink(A'y^k,1)
%
%   The accelerated gradient ascent iteration is
%     beta^k = (1-theta^k)*(sqrt(theta^k*theta^k+4) - theta^k)/2;  (extroplation weight computation)
%     z^{k+1} = y^k + stepsize (b - alpha A shrink(A'y^k,1));      (gradient step, we choose stepsize = 1 / lip)
%     y^{k+1} = z^{k+1} + beta^k (z^{k+1} - z^k);                  (extrapolation step)
%     theta^{k+1} = theta^k*(sqrt(theta^k*theta^k+4) - theta^k)/2; (update of theta)
%
%   Restart at k: beta^k is set to 0, and theta^k is reset to 1.
%   Skip at k: beta^k is set to 0, and theta^k is not affected.
%   Restart or skip help eliminate the (unwanted) oscillations of Nesterov's acceleration when the underlying function is locally strongly convex.
%
% How to set alpha:
%   There exists alpha0 so that any alpha >= alpha0 gives the solution to minimize |x|_1 subject to Ax = b.
%   The alpha depends on the data, but a typical value is 1 to 10 times the estimate of norm(solution_x,inf)
%
% How is the algorithm stopped: see "% stopping" below
%
% More information can be found at
% http://www.caam.rice.edu/~optimization/linearized_bregman

Parameters and defaults

if isfield(opts,'lip'),    lip = opts.lip;     else lip = alpha*normest(A*A',1e-2); end
if isfield(opts,'tol'),    tol = opts.tol;     else tol = 1e-4;   end
if isfield(opts,'maxit'),  maxit = opts.maxit; else maxit = 500;  end
if isfield(opts,'maxT'),   maxT = opts.maxT;   else maxT = 1e3;   end
if isfield(opts,'x_ref'),  x_ref = opts.x_ref; else x_ref = []; out.out.hist_err = [];  end
if isfield(opts,'reset'),  reset = opts.reset; else reset = 0;    end
if reset; out.hist_reset = []; end

Data preprocessing and initialization

m = size(A,1);

y = zeros(m,1);  % variable y in Nesterov's method
z = zeros(m,1);  % variable x in Nesterov's method
res = b; % residual (b - Ax)
norm_b = norm(b);

shrink = @(z) sign(z).*max(0,abs(z)-1);

theta = 1; % extrapolation auxilary parameter, initialized to 1

Main iterations

start_time = tic;

for k = 1:maxit

    % --- extrapolation parameter ---
    beta = (1-theta)*(sqrt(theta*theta+4) - theta)/2;   % computes beta_k in P.80 (2.2.9) of Nesterov's 2004 textbook

    % --- restart or skip extrapolation ---
    switch reset
        case 1 % restart by monotonic dual objective
            if k > 2 && out.hist_obj(k-1) < out.hist_obj(k-2);      beta = 0; theta = 1; end
        case 2 % skip    by monotonic dual objective
            if k > 2 && out.hist_obj(k-1) < out.hist_obj(k-2);      beta = 0;            end
        case 3 % restart by residual size
            if k > 2 && out.hist_res(k-1) > 1.5*out.hist_res(k-2);  beta = 0; theta = 1; end
        case 4 % skip    by residual size
            if k > 2 && out.hist_res(k-1) > 1.5*out.hist_res(k-2);  beta = 0;            end
        case 5 % restart by gradient scheme on P6 of http://www-stat.stanford.edu/~candes/papers/adap_restart_paper.pdf
            if k > 2 && res.'*(y + res/lip - z) < 0;                beta = 0; theta = 1; end
        case 6 % skip    by gradient scheme
            if k > 2 && res.'*(y + res/lip - z) < 0;                beta = 0;            end
        case 10 % user manual restart
            if ismember(k, cumsum(opts.reset_intvl));               beta = 0; theta = 1; end
        case 11 % user manual skip
            if ismember(k, cumsum(opts.reset_intvl));               beta = 0;            end
    end
    if beta == 0 && k > 2; out.hist_reset = [out.hist_reset k]; end

    % --- y-update ---
    z_new = y + res/lip;            % step 1a in P.80 of Nesterov's 2004 textbook
    y = z_new + beta*(z_new - z);   % step 1b in P.80 of Nesterov's 2004 textbook
    z = z_new;

    % --- x-update ---
    x = alpha * shrink(y.'*A).';
    Ax = A*x;
    res = b - Ax; % res will be used in next y-update

    % --- theta-update ---
    theta = theta*(sqrt(theta*theta+4) - theta)/2;      % computes alpha_{k+1} in P.80 (2.2.9) of Nesterov's 2004 textbook

    % --- diagnostics, reporting, stopping checks ---
    % reporting
    out.hist_obj(k) = b.'*y - norm(x)^2/alpha/2; % dual objective
    out.hist_res(k) = norm(res); % residual size |b - Ax|_2
    if ~isempty(x_ref); out.hist_err(k) = norm(x - x_ref); end

    % stopping
    if toc(start_time) > maxT; break; end; % running time check
    if k > 1 && norm(res) < tol*norm_b; break; end % relative primal residual check

end

out.iter = k;

Download this m-file