Description of stiefelgeneralizedfactory

0001 function M = stiefelgeneralizedfactory(n, p, B)
0002 % Returns a manifold structure of "scaled" orthonormal matrices.
0003 %
0004 % function M = stiefelgeneralizedfactory(n, p)
0005 % function M = stiefelgeneralizedfactory(n, p, B)
0006 %
0007 % The generalized Stiefel manifold is the set of "scaled" orthonormal
0008 % nxp matrices X such that X'*B*X is identity. B must be positive definite.
0009 % If B is identity, then this is the standard Stiefel manifold.
0010 %
0011 % The generalized Stiefel manifold is endowed with a scaled metric
0012 % by making it a Riemannian submanifold of the Euclidean space,
0013 % again endowed with the scaled inner product.
0014 %
0015 % Some notions (not all) are from Section 4.5 of the paper
0016 % "The geometry of algorithms with orthogonality constraints",
0017 % A. Edelman, T. A. Arias, S. T. Smith, SIMAX, 1998.
0018 %
0019 % Paper link: http://arxiv.org/abs/physics/9806030.
0020 %
0021 % Note: egrad2rgrad and ehess2rhess involve solving linear systems in B. If
0022 % this is a bottleneck for a specific application, then a way forward is to
0023 % create a modified version of this file which preprocesses B to speed this
0024 % up (typically, by computing a Cholesky factorization of it, then calling
0025 % an appropriate solver; or by exploiting sparsity.)
0026 %
0027 % See also: stiefelfactory  grassmannfactory  grassmanngeneralizedfactory
0028 
0029 % This file is part of Manopt: www.manopt.org.
0030 % Original author: Bamdev Mishra, June 30, 2015.
0031 % Contributors: Hiroyuki Sato and Kensuke Aihara, Dec. 27, 2018.
0032 %
0033 % Change log:
0034 %
0035 %    Sep.  6, 2018 (NB):
0036 %        Removed M.exp() as it was not implemented.
0037 %
0038 %    Dec. 27, 2018 (NB):
0039 %        Added retraction M.retr_qr (based on QR factorization) as an
0040 %        alternative to the previous retraction, which is now accessible as
0041 %        M.retr_polar. The new retraction should be more efficient: it is
0042 %        now the default, as M.retr = M.retr_qr. This new retraction is a
0043 %        contribution of H. Sato and K. Aihara, as per their paper:
0044 %        'Cholesky QR-based retraction on the generalized Stiefel manifold'
0045 %        https://link.springer.com/article/10.1007%2Fs10589-018-0046-7
0046 
0047     
0048     if ~exist('B', 'var') || isempty(B)
0049         B = speye(n); % Standard Stiefel manifold.
0050     end
0051     
0052     M.name = @() sprintf('Generalized Stiefel manifold St(%d, %d)', n, p);
0053     
0054     M.dim = @() (n*p - .5*p*(p+1));
0055     
0056     M.inner = @(X, eta, zeta) trace(eta'*(B*zeta)); % Scaled metric.
0057     
0058     M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
0059     
0060     M.dist = @(X, Y) error('stiefelgeneralizedfactory.dist not implemented yet.');
0061     
0062     M.typicaldist = @() sqrt(p);
0063     
0064     % Orthogonal projection of an ambient vector U to the tangent space
0065     % at X.
0066     M.proj = @projection;
0067     function Up = projection(X, U)
0068         BX = B*X;
0069         
0070         % Projection onto the tangent space
0071         Up = U - X*symm(BX'*U);  
0072     end
0073     
0074     M.tangent = M.proj;
0075     
0076     M.egrad2rgrad = @egrad2rgrad;
0077     function rgrad = egrad2rgrad(X, egrad)
0078         
0079         % First, scale egrad according the to the scaled metric in the
0080         % Euclidean space. Ideally, B should be preprocessed to ease
0081         % solving linear systems, e.g., via Cholesky factorization.
0082         egrad_scaled = B\egrad;
0083         
0084         % Second, project onto the tangent space.
0085         % rgrad = egrad_scaled - X*symm((B*X)'*egrad_scaled);
0086         %
0087         % Verify that symm(BX'*egrad_scaled) = symm(X'*egrad).
0088         
0089         rgrad = egrad_scaled - X*symm(X'*egrad);
0090     end
0091     
0092     
0093     
0094     M.ehess2rhess = @ehess2rhess;
0095     function rhess = ehess2rhess(X, egrad, ehess, H)
0096         egraddot = ehess;
0097         Xdot = H;
0098         
0099         % Directional derivative of the Riemannian gradient.
0100         egrad_scaleddot = B\egraddot;
0101         rgraddot = egrad_scaleddot - Xdot*symm(X'*egrad) ...
0102                                    - X*symm(Xdot'*egrad) ...
0103                                    - X*symm(X'*egraddot);
0104         
0105         % Project onto the tangent space.
0106         rhess = M.proj(X, rgraddot);
0107     end
0108     
0109     
0110     M.retr_polar = @retraction_polar;
0111     function Y = retraction_polar(X, U, t)
0112         if nargin < 3
0113             t = 1.0;
0114         end
0115         Y = guf(X + t*U); % Ensures Y'*B*Y is identity.
0116     end
0117 
0118     M.retr_qr = @retraction_qr;
0119     function Y = retraction_qr(X, U, t)
0120         if nargin < 3
0121             t = 1.0;
0122         end
0123         Y = gqf(X + t*U); % Ensures Y'*B*Y is identity.
0124     end
0125 
0126     % By default, we use the QR retraction
0127     M.retr = M.retr_qr;
0128 
0129     M.hash = @(X) ['z' hashmd5(X(:))];
0130     
0131     M.rand = @random;
0132     function X = random()
0133         X = guf(randn(n, p)); % Ensures X'*B*X is identity;
0134         % TODO: test if this can be replaced by gqf.
0135     end
0136     
0137     M.randvec = @randomvec;
0138     function U = randomvec(X)
0139         U = projection(X, randn(n, p));
0140         U = U / norm(U(:));
0141     end
0142     
0143     M.lincomb = @matrixlincomb;
0144     
0145     M.zerovec = @(X) zeros(n, p);
0146     
0147     % This transport is compatible with the generalized polar retraction.
0148     M.transp = @(X1, X2, d) projection(X2, d);
0149     
0150     M.vec = @(X, u_mat) u_mat(:);
0151     M.mat = @(X, u_vec) reshape(u_vec, [n, p]);
0152     M.vecmatareisometries = @() false;
0153     
0154     % Some auxiliary functions
0155     symm = @(D) (D + D')/2;
0156     
0157 
0158     function X = gqf(Y)
0159         % Generalized QR decomposition of an n-by-p matrix Y.
0160         % X'*B*X is identity. See Sato and Aihara 2018,
0161         % Cholesky QR-based retraction on the generalized Stiefel manifold,
0162         % https://link.springer.com/article/10.1007%2Fs10589-018-0046-7
0163         %
0164         % Comment by Nicolas Boumal, Dec. 27, 2018, following discussions
0165         % with Hiroyuki Sato, Kensuke Aihara and Bamdev Mishra:
0166         %
0167         % In principle, to orthonormalize the columns of Y against the
0168         % inner product defined by B, it would be better to implement (for
0169         % example) a modified Gram-Schmidt algorithm, possibly run twice.
0170         % Indeed, the latter would be affected by the condition number of
0171         % sqrtm(B)*Y. In contrast, the method below first squares the data,
0172         % hence is affected by the condition number of Y'*B*Y, which is the
0173         % square of that of sqrtm(B)*Y. Fortunately, it is easily shown
0174         % that, when Y = X + U for a tangent vector U at X, the condition
0175         % number of sqrtm(B)*Y is upper bounded by the square root of
0176         % 1 + ||U||_X^2. In Riemannian optimization, it is seldom necessary
0177         % to retract very large vectors, hence it is expected that the
0178         % condition numbers encountered in practice will be reasonable. As
0179         % a result, the implementation below which calls directly upon
0180         % low-level routines (Cholesky and a small triangular system solve)
0181         % is expected to run faster in practice than a home-made
0182         % implementation of modified Gram-Schmidt, which would involve a
0183         % loop, and still be sufficiently accurate.
0184         R = chol(symm(Y'*(B*Y)));
0185         X = Y / R;
0186     end
0187 
0188     function X = guf(Y)
0189         % Generalized polar decomposition of an n-by-p matrix Y.
0190         % X'*B*X is identity.
0191         
0192         % Method 1
0193         [u, ~, v] = svd(Y, 0);
0194   
0195         % Instead of the following three steps, an equivalent, but an
0196         % expensive way is to do X = u*(sqrtm(u'*(B*u))\(v')).
0197         [q, ssquare] = eig(u'*(B*u));
0198         qsinv = q/sparse(diag(sqrt(diag(ssquare))));
0199         X = u*((qsinv*q')*v'); % X'*B*X is identity.
0200         
0201         
0202         % Another computation using restricted_svd
0203         % [u, ~, v] = restricted_svd(Y);
0204         % X = u*v'; % X'*B*X is identity.
0205         
0206     end
0207     
0208     function [u, s, v] = restricted_svd(Y) %#ok<DEFNU>
0209         % We compute a thin svd-like decomposition of an n-by-p matrix Y
0210         % into matrices u, s, and v such that u is an n-by-p matrix
0211         % with u'*B*u being identity, s is a p-by-p diagonal matrix
0212         % with positive entries, and v is a p-by-p orthogonal matrix.
0213         % Y = u*s*v'.
0214         [v, ssquare] = eig(symm(Y'*(B*Y))); % Y*B*Y is positive definite
0215         ssquarevec = diag(ssquare);
0216         
0217         s = sparse(diag(abs(sqrt(ssquarevec))));
0218         u = Y*(v/s); % u'*B*u is identity.
0219     end
0220 
0221 end
stiefelgeneralizedfactory

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE