function hessfun = approxhessianFD(problem, options)
% Hessian approx. fnctn handle based on finite differences of the gradient.
%
% function hessfun = approxhessianFD(problem)
% function hessfun = approxhessianFD(problem, options)
%
% Input:
%
% A Manopt problem structure (already containing the manifold and enough
% information to compute the cost gradient) and an options structure
% (optional), containing one option:
%    options.stepsize (positive double; default: 2^-14).
%
% If the gradient cannot be computed or approximated on 'problem',
% a warning is issued.
%
% Output:
% 
% Returns a function handle, encapsulating a generic finite difference
% approximation of the Hessian of the problem cost. The finite difference
% is based on computations of the gradient.
% 
% The returned hessfun has this calling pattern:
% 
%   function hessfd = hessfun(x, xdot)
%   function hessfd = hessfun(x, xdot, storedb)
%   function hessfd = hessfun(x, xdot, storedb, key)
% 
% x is a point on the manifold problem.M, xdot is a tangent vector to that
% manifold at x, storedb is a StoreDB object, and key is the StoreDB key to
% point x.
%
% Usage:
%
% Typically, the user will set problem.M and other fields to define the
% cost and the gradient (typically, problem.cost and problem.grad or
% problem.egrad). Then, to use this generic purpose Hessian approximation:
%
%   problem.approxhess = approxhessianFD(problem, options);
%
% See also: trustregions

% The Riemannian Trust-Region method, used in combination with the present
% Hessian approximation, is called RTR-FD. Some convergence theory for it
% is available in this paper:
%
% @incollection{boumal2015rtrfd
%   author={Boumal, N.},
%   title={Riemannian trust regions with finite-difference Hessian approximations are globally convergent},
%   year={2015},
%   booktitle={Geometric Science of Information}
% }

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 8, 2015.
% Contributors: 
% Change log: 
%
%   Feb. 19, 2015 (NB):
%       It is sufficient to ensure positive radial linearity to guarantee
%       (together with other assumptions) that this approximation of the
%       Hessian will confer global convergence to the trust-regions method.
%       Formerly, in-code comments referred to the necessity of having
%       complete radial linearity, and that this was harder to achieve.
%       This appears not to be necessary after all, which simplifies the
%       code.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   April 8, 2015 (NB):
%       Changed to approxhessianFD, which now returns a function handle
%       that encapsulates the getHessianFD functionality. Will be better
%       aligned with the other Hessian approximations to come (which may
%       want to use storedb.internal), and now allows specifying the step
%       size.
%
%   March 17, 2020 (NB):
%       Following a bug report by Marco Sutti, added the instruction
%           storedb.remove(key1);
%       to avoid memory usage ramping up when many inner iterations are
%       needed inside of tCG for trustregions. The deciding factor is that
%       there is no need to cache the gradient at the artificially produced
%       point used here for finite differencing, as this point is not
%       visible outside of this function: there is no reason we would visit
%       it again.

    % This Hessian approximation is based on the gradient:
    % check availability.
    if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
        warning('manopt:approxhessianFD:nogradient', ...
                'approxhessianFD requires the gradient to be computable.');
    end

    % Set local defaults here, and merge with user options, if any.
    localdefaults.stepsize = 2^-14;
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % Finite-difference parameter: how far do we look?
    stepsize = options.stepsize;
                   
    % Build and return the function handle here. This extra construct via
    % funhandle makes it possible to make storedb and key optional.
    hessfun = @funhandle;
    function hessfd = funhandle(x, xdot, storedb, key)
        % Allow omission of the key, and even of storedb.
        if ~exist('key', 'var')
            if ~exist('storedb', 'var')
                storedb = StoreDB();
            end
            key = storedb.getNewKey();
        end 
        hessfd = hessianFD(stepsize, problem, x, xdot, storedb, key);
    end
    
end


function hessfd = hessianFD(stepsize, problem, x, xdot, storedb, key)
% This function does the actual work.
%
% Original code: Dec. 30, 2012 (NB).
    
    % Extract the input vector norm.
    norm_xdot = problem.M.norm(x, xdot);
    
    % First, check whether the step xdot is not too small.
    if norm_xdot < eps
        hessfd = problem.M.zerovec(x);
        return;
    end
    
    % Determine how far to retract xdot, so that the point reached does not
    % depend on the norm of xdot. This is what ensures radial linearity of
    % this present Hessian approximation.
    c = stepsize / norm_xdot;
    
    % Compute the gradient at the current point.
    grad = getGradient(problem, x, storedb, key);
    
    % Compute a point a little further along xdot, and the gradient there.
    % Since this is a new point, we need a new key for it, for storedb.
    x1 = problem.M.retr(x, xdot, c);
    key1 = storedb.getNewKey();
    grad1 = getGradient(problem, x1, storedb, key1);
    
    % Remove any caching associated to that new point, since there is no
    % reason we would be visiting it again.
    storedb.remove(key1);
    
    % Transport grad1 back from x1 to x.
    grad1 = problem.M.transp(x1, x, grad1);
    
    % Return the finite difference of them: (grad1 - grad)/c.
    hessfd = problem.M.lincomb(x, 1/c, grad1, -1/c, grad);
    
end
