Description of low_rank_matrix

0001 function low_rank_matrix_completion()
0002 % Given partial observation of a low rank matrix, attempts to complete it.
0003 %
0004 % function low_rank_matrix_completion()
0005 %
0006 % This example demonstrates how to use the geometry factory for the
0007 % embedded submanifold of fixed-rank matrices, fixedrankembeddedfactory.
0008 % This geometry is described in the paper
0009 % "Low-rank matrix completion by Riemannian optimization"
0010 % Bart Vandereycken - SIAM Journal on Optimization, 2013.
0011 %
0012 % This can be a starting point for many optimization problems of the form:
0013 %
0014 % minimize f(X) such that rank(X) = k, size(X) = [m, n].
0015 %
0016 % Note that the code is long because it showcases quite a few features of
0017 % Manopt: most of the code is optional.
0018 %
0019 % Input:  None. This example file generates random data.
0020 %
0021 % Output: None.
0022 
0023 % This file is part of Manopt and is copyrighted. See the license file.
0024 %
0025 % Main author: Nicolas Boumal, July 15, 2014
0026 % Contributors: Bart Vandereycken
0027 %
0028 % Change log:
0029 %
0030 %    Xiaowen Jiang Aug. 20, 2021
0031 %       Added AD to compute the egrad and the ehess
0032     
0033     % Random data generation. First, choose the size of the problem.
0034     % We will complete a matrix of size mxn of rank k.
0035     m = 200;
0036     n = 200;
0037     k = 5;
0038     % Generate a random mxn matrix A of rank k
0039     L = randn(m, k);
0040     R = randn(n, k);
0041     A = L*R';
0042     % Generate a random mask for observed entries: P(i, j) = 1 if the entry
0043     % (i, j) of A is observed, and 0 otherwise.
0044     fraction = 4 * k*(m+n-k)/(m*n);
0045     P = sparse(rand(m, n) <= fraction);
0046     % Hence, we know the nonzero entries in PA:
0047     PA = P.*A;
0048     
0049     
0050     
0051     
0052     
0053     
0054     
0055     % Pick the manifold of matrices of size mxn of fixed rank k.
0056     problem.M = fixedrankembeddedfactory(m, n, k);
0057 
0058     % Define the problem cost function. The input X is a structure with
0059     % fields U, S, V representing a rank k matrix as U*S*V'.
0060     % f(X) = 1/2 * || P.*(X-A) ||^2
0061     problem.cost = @cost;
0062     function f = cost(X)
0063         % Note that it is very much inefficient to explicitly construct the
0064         % matrix X in this way. Seen as we only need to know the entries
0065         % of Xmat corresponding to the mask P, it would be far more
0066         % efficient to compute those only.
0067         Xmat = X.U*X.S*X.V';
0068         f = .5*norm( P.*Xmat - PA , 'fro')^2;
0069     end
0070 
0071     % Define the Euclidean gradient of the cost function, that is, the
0072     % gradient of f(X) seen as a standard function of X.
0073     % nabla f(X) = P.*(X-A)
0074     problem.egrad = @egrad;
0075     function G = egrad(X)
0076         % Same comment here about Xmat.
0077         Xmat = X.U*X.S*X.V';
0078         G = P.*Xmat - PA;
0079     end
0080 
0081     % This is optional, but it's nice if you have it.
0082     % Define the Euclidean Hessian of the cost at X, along H, where H is
0083     % represented as a tangent vector: a structure with fields Up, Vp, M.
0084     % This is the directional derivative of nabla f(X) at X along Xdot:
0085     % nabla^2 f(X)[Xdot] = P.*Xdot
0086     problem.ehess = @euclidean_hessian;
0087     function ehess = euclidean_hessian(X, H)
0088         % The function tangent2ambient transforms H (a tangent vector) into
0089         % its equivalent ambient vector representation. The output is a
0090         % structure with fields U, S, V such that U*S*V' is an mxn matrix
0091         % corresponding to the tangent vector H. Note that there are no
0092         % additional guarantees about U, S and V. In particular, U and V
0093         % are not orthonormal.
0094         ambient_H = problem.M.tangent2ambient(X, H);
0095         Xdot = ambient_H.U*ambient_H.S*ambient_H.V';
0096         % Same comment here about explicitly constructing the ambient
0097         % vector as an mxn matrix Xdot: we only need its entries
0098         % corresponding to the mask P, and this could be computed
0099         % efficiently.
0100         ehess = P.*Xdot;
0101     end
0102     
0103     % An alternative way to compute the egrad and the ehess is to use
0104     % automatic differentiation provided in the deep learning toolbox (slower)
0105     % Notice that the function norm is not supported for AD so far.
0106     % Replace norm(...,'fro')^2 with cnormsqfro described in the file
0107     % manoptADhelp.m. Also, operations between sparse matrices and dlarrays
0108     % are not supported for now. convert PA and P into full matrices.
0109     % PA_full = full(PA);
0110     % P_full = full(P);
0111     % problem.cost = @cost_AD;
0112     %    function f = cost_AD(X)
0113     %        Xmat = X.U*X.S*X.V';
0114     %        f = .5*cnormsqfro(P_full.*Xmat - PA_full);
0115     %    end
0116     
0117     % Computating the ehess for the set of fixed-rank matrices with
0118     % an embedded geometry is currently not supported.
0119     % Call manoptAD to automatically obtain the grad
0120     % problem = manoptAD(problem);
0121 
0122 
0123     % Check consistency of the gradient and the Hessian. Useful if you
0124     % adapt this example for a new cost function and you would like to make
0125     % sure there is no mistake.
0126     % warning('off', 'manopt:fixedrankembeddedfactory:exp');
0127     % checkgradient(problem); pause;
0128     % checkhessian(problem); pause;
0129     
0130     
0131     
0132     
0133     
0134     % Compute an initial guess. Points on the manifold are represented as
0135     % structures with three fields: U, S and V. U and V need to be
0136     % orthonormal, S needs to be diagonal.
0137     [U, S, V] = svds(PA, k);
0138     X0.U = U;
0139     X0.S = S;
0140     X0.V = V;
0141     
0142     % Minimize the cost function using Riemannian trust-regions, starting
0143     % from the initial guess X0.
0144     X = trustregions(problem, X0);
0145     
0146     % The reconstructed matrix is X, represented as a structure with fields
0147     % U, S and V.
0148     Xmat = X.U*X.S*X.V';
0149     fprintf('||X-A||_F = %g\n', norm(Xmat - A, 'fro'));
0150     
0151     
0152     
0153     
0154     
0155     
0156     % Alternatively, we could decide to use a solver such as
0157     % steepestdescent or conjugategradient. These solvers need to solve a
0158     % line-search problem at each iteration. Standard line searches in
0159     % Manopt have generic purpose systems to do this. But for the problem
0160     % at hand, it so happens that we can rather accurately guess how far
0161     % the line-search should look, and it would be a waste to not use that.
0162     % Look up the paper referenced above for the mathematical explanation
0163     % of the code below.
0164     %
0165     % To tell Manopt about this special information, we specify the
0166     % linesearch hint function in the problem structure. Notice that this
0167     % is not the same thing as specifying a linesearch function in the
0168     % options structure.
0169     %
0170     % Both the SD and the CG solvers will detect that we
0171     % specify the hint function below, and they will use an appropriate
0172     % linesearch algorithm by default, as a result. Typically, they will
0173     % try the step t*H first, then if it does not satisfy an Armijo
0174     % criterion, they will decrease t geometrically until satisfaction or
0175     % failure.
0176     %
0177     % Just like the cost, egrad and ehess functions, the linesearch
0178     % function could use a store structure if you like. The present code
0179     % does not use the store structure, which means quite a bit of the
0180     % computations are made redundantly, and as a result a better method
0181     % could appear slower. See the Manopt tutorial about caching when you
0182     % are ready to switch from a proof-of-concept code to an efficient
0183     % code.
0184     %
0185     % The inputs are X (a point on the manifold) and H, a tangent vector at
0186     % X that is assumed to be a descent direction. That is, there exists a
0187     % positive t such that f(Retraction_X(tH)) < f(X). The function below
0188     % is supposed to output a "t" that it is a good "guess" at such a t.
0189     problem.linesearch = @linesearch_helper;
0190     function t = linesearch_helper(X, H)
0191         % Note that you would not usually need the Hessian for this.
0192         residual_omega = nonzeros(problem.egrad(X));
0193         dir_omega      = nonzeros(problem.ehess(X, H));
0194         t = - dir_omega \ residual_omega ;
0195     end
0196 
0197     % Notice that for this solver, the Hessian is not needed.
0198     [Xcg, xcost, info, options] = conjugategradient(problem, X0); %#ok<ASGLU>
0199     
0200     fprintf('Take a look at the options that CG used:\n');
0201     disp(options);
0202     fprintf('And see how many trials were made at each line search call:\n');
0203     info_ls = [info.linesearch];
0204     disp([info_ls.costevals]);
0205 
0206     
0207     
0208     
0209     
0210     
0211     
0212     
0213     fprintf('Try it again without the linesearch helper.\n');
0214     
0215     % Remove the linesearch helper from the problem structure.
0216     problem = rmfield(problem, 'linesearch');
0217     
0218     [Xcg, xcost, info, options] = conjugategradient(problem, X0); %#ok<ASGLU>
0219     
0220     fprintf('Take a look at the options that CG used:\n');
0221     disp(options);
0222     fprintf('And see how many trials were made at each line search call:\n');
0223     info_ls = [info.linesearch];
0224     disp([info_ls.costevals]);
0225     
0226     
0227     
0228     
0229 
0230     % If the problem has a small enough dimension, we may (for analysis
0231     % purposes) compute the spectrum of the Hessian at a point X. This may
0232     % help in studying the conditioning of a problem. If you don't provide
0233     % the Hessian, Manopt will approximate the Hessian with finite
0234     % differences of the gradient and try to estimate its "spectrum" (it's
0235     % not a proper linear operator). This can give some intuition, but
0236     % should not be relied upon.
0237     if exist('OCTAVE_VERSION', 'builtin') == 0
0238         hstgrm = @histogram;
0239     else
0240         hstgrm = @hist;
0241     end
0242     if problem.M.dim() < 2000
0243         fprintf('Computing the spectrum of the Hessian...\n');
0244         s = hessianspectrum(problem, X);
0245         subplot(1, 2, 1);
0246         hstgrm(s);
0247         title('Histogram of eigenvalues of the Hessian at the solution');
0248         subplot(1, 2, 2);
0249         hstgrm(log10(abs(s)));
0250         title('Histogram of log_{10}(|eigenvalues|) of the Hessian at the solution');
0251     end
0252     
0253     
0254     
0255     
0256 end
low_rank_matrix_completion

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE