function M = sympositivedefinitefactory(n)
% Manifold of n-by-n symmetric positive definite matrices with
% the bi-invariant geometry.
%
% function M = sympositivedefinitefactory(n)
%
% A point X on the manifold is represented as a symmetric positive definite
% matrix X (nxn). Tangent vectors are symmetric matrices of the same size
% (but not necessarily definite).
%
% The Riemannian metric is the bi-invariant metric, described notably in
% Chapter 6 of the 2007 book "Positive definite matrices"
% by Rajendra Bhatia, Princeton University Press.
%
%
% The retraction / exponential map involves expm (the matrix exponential).
% If too large a vector is retracted / exponentiated (e.g., a solver tries
% to make too big a step), this may result in NaN's in the returned point,
% which most likely would lead to NaN's in the cost / gradient / ... and
% will result in failure of the optimization. For trustregions, this can be
% controlled by setting options.Delta0 and options.Delta_bar, to prevent
% too large steps.
%
%
% Note also that many of the functions involve solving linear systems in X
% (a point on the manifold), taking matrix exponential and logarithms, etc.
% It could therefore be beneficial to do some precomputation on X (an
% eigenvalue decomposition for example) and store both X and the
% preprocessing in a structure. This would require modifying the present
% factory to work with such structures to represent both points and tangent
% vectors. We omit this in favor of simplicity, but it may be good to keep
% this in mind if efficiency becomes an issue in your application.

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, August 29, 2013.
% Contributors: Nicolas Boumal
% Change log:
%
%   March 5, 2014 (NB)
%       There were a number of mistakes in the code owing to the tacit
%       assumption that if X and eta are symmetric, then X\eta is
%       symmetric too, which is not the case. See discussion on the Manopt
%       forum started on Jan. 19, 2014. Functions norm, dist, exp and log
%       were modified accordingly. Furthermore, they only require matrix
%       inversion (as well as matrix log or matrix exp), not matrix square
%       roots or their inverse.
% 
%   July 28, 2014 (NB)
%       The dim() function returned n*(n-1)/2 instead of n*(n+1)/2.
%       Implemented proper parallel transport from Sra and Hosseini (not
%       used by default).
%       Also added symmetrization in exp and log (to be sure).
% 
%   April 3, 2015 (NB):
%       Replaced trace(A*B) by a faster equivalent that does not compute
%       the whole product A*B, for inner product, norm and distance.
%
%   May 23, 2017 (NB):
%       As seen in a talk of Wen Huang at the SIAM Optimization Conference
%       today, replaced the retraction of this factory (which was simply
%       equal to the exponential map) with a simpler, second-order
%       retraction. That this retraction is second order can be verified
%       numerically with checkretraction(sympositivedefinitefactory(5));
%       Notice that, for this retraction, it would be cheap to evaluate for
%       many values of t, that is, it is cheap to retract many points along
%       the same tangent direction. This could in principle be exploited to
%       speed up line-searches.
    
    symm = @(X) .5*(X+X');
    
    M.name = @() sprintf('Symmetric positive definite geometry of %dx%d matrices', n, n);
    
    M.dim = @() n*(n+1)/2;
    
    % Helpers to avoid computing full matrices simply to extract their trace
    vec  = @(A) A(:);
    trAB = @(A, B) vec(A')'*vec(B);  % = trace(A*B)
    trAA = @(A) sqrt(trAB(A, A));    % = sqrt(trace(A^2))
    
    % Choice of the metric on the orthonormal space is motivated by the
    % symmetry present in the space. The metric on the positive definite
    % cone is its natural bi-invariant metric.
    % The result is equal to: trace( (X\eta) * (X\zeta) )
    M.inner = @(X, eta, zeta) trAB(X\eta, X\zeta);
    
    % Notice that X\eta is *not* symmetric in general.
    % The result is equal to: sqrt(trace((X\eta)^2))
    % There should be no need to take the real part, but rounding errors
    % may cause a small imaginary part to appear, so we discard it.
    M.norm = @(X, eta) real(trAA(X\eta));
    
    % Same here: X\Y is not symmetric in general.
    % Same remark about taking the real part.
    M.dist = @(X, Y) real(trAA(real(logm(X\Y))));
    
    
    M.typicaldist = @() sqrt(n*(n+1)/2);
    
    
    M.egrad2rgrad = @egrad2rgrad;
    function eta = egrad2rgrad(X, eta)
        eta = X*symm(eta)*X;
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        % Directional derivatives of the Riemannian gradient
        Hess = X*symm(ehess)*X + 2*symm(eta*symm(egrad)*X);
        
        % Correction factor for the non-constant metric
        Hess = Hess - symm(eta*symm(egrad)*X);
    end
    
    
    M.proj = @(X, eta) symm(eta);
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            teta = eta;
        else
            teta = t*eta;
        end
        % The symm() call is mathematically unnecessary but numerically
        % necessary.
        Y = symm(X + teta + .5*teta*(X\teta));
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        % The symm() and real() calls are mathematically not necessary but
        % are numerically necessary.
        Y = symm(X*real(expm(X\(t*eta))));
    end
    
    M.log = @logarithm;
    function H = logarithm(X, Y)
        % Same remark regarding the calls to symm() and real().
        H = symm(X*real(logm(X\Y)));
    end
    
    M.hash = @(X) ['z' hashmd5(X(:))];
    
    % Generate a random symmetric positive definite matrix following a
    % certain distribution. The particular choice of a distribution is of
    % course arbitrary, and specific applications might require different
    % ones.
    M.rand = @random;
    function X = random()
        D = diag(1+rand(n, 1));
        [Q, R] = qr(randn(n)); %#ok
        X = Q*D*Q';
    end
    
    % Generate a uniformly random unit-norm tangent vector at X.
    M.randvec = @randomvec;
    function eta = randomvec(X)
        eta = symm(randn(n));
        nrm = M.norm(X, eta);
        eta = eta / nrm;
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(X) zeros(n);
    
    % Poor man's vector transport: exploit the fact that all tangent spaces
    % are the set of symmetric matrices, so that the identity is a sort of
    % vector transport. It may perform poorly if the origin and target (X1
    % and X2) are far apart though. This should not be the case for typical
    % optimization algorithms, which perform small steps.
    M.transp = @(X1, X2, eta) eta;
    
    % For reference, a proper vector transport is given here, following
    % work by Sra and Hosseini: "Conic geometric optimisation on the
    % manifold of positive definite matrices", in SIAM J. Optim.
    % in 2015; also available here: http://arxiv.org/abs/1312.1039
    % This will not be used by default. To force the use of this transport,
    % execute "M.transp = M.paralleltransp;" on your M returned by the
    % present factory.
    M.paralleltransp = @parallel_transport;
    function zeta = parallel_transport(X, Y, eta)
        E = sqrtm(Y/X);
        zeta = E*eta*E';
    end
    
    % vec and mat are not isometries, because of the unusual inner metric.
    M.vec = @(X, U) U(:);
    M.mat = @(X, u) reshape(u, n, n);
    M.vecmatareisometries = @() false;
    
end
