Modify eigendecomposition solver

abt8601 · abt8601 · commit 4eba5ddb1c57 · 2021-12-10T21:43:16.000+08:00
- Use the "reprojection" technique instead of the regular annihilation
  technique to avoid numerical instability.
- Remove the Aitken's Δ² method, because it will almost always be
  stopped before the eigenvector converges.
- API change.
diff --git a/include/diffusion_maps/internal/eig_solver.hpp b/include/diffusion_maps/internal/eig_solver.hpp
@@ -12,23 +12,44 @@ namespace diffusion_maps {
 namespace internal {
 
 /**
- * Find the dominant eigenvalue and its corresponding eigenvector of a symmetric
- * matrix using the symmetric power method.
+ * Find an eigenvalue and its corresponding eigenvector of a symmetric matrix.
+ * <p>
+ * The numerical method used is the symmetric power method with "reprojection",
+ * i.e., during each iteration, the eigenvector is orthogonalised against the
+ * previously found eigenvectors. This is done so that we can find the k-th
+ * eigenvalue/eigenvector after we have found the first k-1.
+ * <p>
+ * The reprojection is meant to solve the numerical instability problem of the
+ * annihilation technique. Say that A has eigenvalues λ₁, λ₂, ..., λₙ with the
+ * corresponding eigenvectors being β₁, β₂, ..., βₙ and we have found the first
+ * k-1 of them. The annihilation technique chooses the initial guess for the
+ * eigenvector to be x₀ = (A - λₖ₋₁ I) (A - λₖ₋₂ I) ... (A - λ₁ I) x where x is
+ * some random vector. Observe that if we write x₀ = ∑ᵢ cᵢ βᵢ, then c₁ = c₂
+ * = ... = cₖ₋₁ = 0. If we run the standard symmetric power method on x₀, then
+ * the components for β₁, β₂, ..., βₖ₋₁ in the eigenvector should stay zero and
+ * the eigenvector should converge to βₖ. However, with the presence of rounding
+ * error, the components for β₁, β₂, ..., βₖ₋₁ may start to appear and the
+ * standard symmetric power method will converge to a different eigenvector.
+ * Reprojection solves this problem by orthogonalising the eigenvector against
+ * β₁, β₂, ..., βₖ₋₁ after each time the eigenvector is multiplied by the
+ * matrix. Basically, this actively suppresses the components for β₁, β₂, ...,
+ * βₖ₋₁ in the eigenvector.
  *
  * @param a The matrix.
  * @param x0 The initial guess for the eigenvector.
- * @param tol The tolerance for the eigenvector.
+ * @param betas The array of previously found eigenvectors, all normalised with
+ * respect to the Euclidean norm.
+ * @param n_betas The number of previously found eigenvectors.
+ * @param tol The tolerance for the Euclidean norm of the eigenvector.
  * @param max_iters The maximum number of iterations.
- * @param epsilon The denominator cut-off for the Aitken's Δ² method. If the
- * absolute value of the denominator is less than this value, the method will
- * stop.
- * @return The dominant eigenvalue and its corresponding eigenvector. Or nullopt
- * if the maximum number of iterations is exceeded.
- * @throws std::invalid_argument if the dimensions are incorrect.
+ * @return An eigenvalue and its corresponding eigenvector. Or nullopt if the
+ * maximum number of iterations is exceeded.
+ * @exception std::invalid_argument If the dimensions are incorrect.
  */
 std::optional<std::pair<double, Vector>>
-symmetric_power_method(const SparseMatrix &a, const Vector &x0, double tol,
-                       unsigned max_iters, double epsilon = 1e-16);
+symmetric_power_method(const SparseMatrix &a, const Vector &x0,
+                       const Vector *betas, std::size_t n_betas, double tol,
+                       unsigned max_iters);
 
 /**
  * Find @p k dominant eigenvalues and their corresponding eigenvectors of a
@@ -46,9 +67,9 @@ symmetric_power_method(const SparseMatrix &a, const Vector &x0, double tol,
  * @throws std::invalid_argument if @p k is greater than the number of rows in
  * @p a.
  */
-std::vector<std::pair<double, Vector>> eigsh(const SparseMatrix &a, unsigned k,
-                                             double tol, unsigned max_iters,
-                                             unsigned max_restarts);
+std::pair<std::vector<double>, std::vector<Vector>>
+eigsh(const SparseMatrix &a, unsigned k, double tol, unsigned max_iters,
+      unsigned max_restarts);
 
 } // namespace internal
 
diff --git a/src/diffusion_maps.cpp b/src/diffusion_maps.cpp
@@ -89,13 +89,13 @@ diffusion_maps::diffusion_maps(
 
   // Step 3: Compute the eigenvalues and eigenvectors of the diffusion matrix.
 
-  const auto eigen_pairs =
+  const auto [eigenvalues, eigenvectors] =
       internal::eigsh(kernel_matrix, n_components + 1, eig_solver_tol,
                       eig_solver_max_iter, eig_solver_max_restarts);
 
   // Step 4: Compute the diffusion maps.
 
-  const std::size_t n_eigenvalues = eigen_pairs.size();
+  const std::size_t n_eigenvalues = eigenvalues.size();
   Matrix::Buffer dm_buffer(n_samples, n_eigenvalues - 1);
   Matrix diffusion_maps(dm_buffer);
 
@@ -104,8 +104,8 @@ diffusion_maps::diffusion_maps(
       // We drop the first eigenpair because the eigenvector is constant in all
       // dimensions.
 
-      const double lambda = eigen_pairs[j + 1].first;
-      const double psi_i = invsqrt_row_sum[i] * eigen_pairs[j + 1].second[i];
+      const double lambda = eigenvalues[j + 1];
+      const double psi_i = invsqrt_row_sum[i] * eigenvectors[j + 1][i];
       diffusion_maps(i, j) = std::pow(lambda, diffusion_time) * psi_i;
     }
   }
diff --git a/src/eig_solver.cpp b/src/eig_solver.cpp
@@ -4,11 +4,9 @@
 #include <stdexcept>
 
 std::optional<std::pair<double, diffusion_maps::Vector>>
-diffusion_maps::internal::symmetric_power_method(const SparseMatrix &a,
-                                                 const Vector &x0,
-                                                 const double tol,
-                                                 const unsigned max_iters,
-                                                 const double epsilon) {
+diffusion_maps::internal::symmetric_power_method(
+    const SparseMatrix &a, const Vector &x0, const Vector *const betas,
+    const std::size_t n_betas, const double tol, const unsigned max_iters) {
   if (a.n_rows() != a.n_cols()) { // a is not square.
     throw std::invalid_argument("matrix is not square");
   }
@@ -17,15 +15,16 @@ diffusion_maps::internal::symmetric_power_method(const SparseMatrix &a,
   }
 
   Vector x = x0 / x0.l2_norm();
-  double mu_0 = 0, mu_1 = 0;
 
   for (unsigned k = 0; k < max_iters; ++k) {
     Vector y = a * x;
+
+    // Orthogonalise y against betas.
+    for (std::size_t i = 0; i < n_betas; ++i) {
+      y -= betas[i] * betas[i].dot(y);
+    }
+
     const double mu = x.dot(y);
-    const double denom = (mu - mu_1) - (mu_1 - mu_0);
-    const double mu_hat = std::abs(denom) < epsilon
-                              ? mu_0
-                              : mu_0 - ((mu_1 - mu_0) * (mu_1 - mu_0)) / denom;
 
     const double l2_norm_y = y.l2_norm();
     if (l2_norm_y == 0) { // a has eigenvalue 0.
@@ -35,18 +34,15 @@ diffusion_maps::internal::symmetric_power_method(const SparseMatrix &a,
     y /= l2_norm_y;
     const double err = (x - y).l2_norm();
     x = y;
-    if (k >= 3 && err < tol) { // Success.
-      return std::make_pair(mu_hat, x);
+    if (err < tol) { // Success.
+      return std::make_pair(mu, x);
     }
-
-    mu_0 = mu_1;
-    mu_1 = mu;
   }
 
   return std::nullopt; // Failed to converge.
 }
 
-std::vector<std::pair<double, diffusion_maps::Vector>>
+std::pair<std::vector<double>, std::vector<diffusion_maps::Vector>>
 diffusion_maps::internal::eigsh(const SparseMatrix &a, const unsigned k,
                                 const double tol, const unsigned max_iters,
                                 const unsigned max_restarts) {
@@ -57,42 +53,39 @@ diffusion_maps::internal::eigsh(const SparseMatrix &a, const unsigned k,
     throw std::invalid_argument("k cannot be larger than the number of rows");
   }
 
-  std::vector<std::pair<double, Vector>> eig_pairs;
-  eig_pairs.reserve(k);
+  std::default_random_engine gen;
+  std::uniform_real_distribution<double> dist(0, 1);
+
+  std::vector<double> eigenvalues;
+  std::vector<Vector> eigenvectors;
+  eigenvalues.reserve(k);
+  eigenvectors.reserve(k);
 
   for (std::size_t i = 0; i < k; ++i) {
     for (unsigned restarts = 0; restarts < max_restarts; ++restarts) {
-      // Construct the initial vector for the symmetric power method
-      //   x0 = (A - λᵢ₋₁ I) … (A - λ₁ I) (A - λ₀ I) x
-      // where x is a random vector. (Annihilation technique.)
+      // Generate the initial guess for the eigenvector.
 
       Vector x0(a.n_rows());
-
-      std::default_random_engine gen;
-      std::uniform_real_distribution<double> dist(0, 1);
       for (std::size_t i = 0; i < x0.size(); ++i) {
         x0[i] = dist(gen);
       }
 
-      for (std::size_t j = 0; j < i; j++) {
-        const double lambda_j = eig_pairs[j].first;
-        x0 = a * x0 - x0 * lambda_j;
-      }
-
       // Use the symmetric power method to find the i-th eigenvalue and
       // eigenvector.
 
-      const auto eig_pair = symmetric_power_method(a, x0, tol, max_iters);
+      const auto eig_pair = symmetric_power_method(
+          a, x0, eigenvectors.data(), eigenvectors.size(), tol, max_iters);
 
       // Restart if the method does not converge or finds an eigenvalue 0.
       if (!eig_pair.has_value() || eig_pair->first == 0) {
         continue;
       }
 
-      eig_pairs.push_back(eig_pair.value());
+      eigenvalues.push_back(eig_pair->first);
+      eigenvectors.push_back(eig_pair->second);
       break;
     }
   }
 
-  return eig_pairs;
+  return std::make_pair(eigenvalues, eigenvectors);
 }
diff --git a/test/test_eig_solver.cpp b/test/test_eig_solver.cpp
@@ -28,7 +28,7 @@ Test(eig_solver, symmetric_power_method_simple) {
   const unsigned max_iters = 100;
 
   const auto result = diffusion_maps::internal::symmetric_power_method(
-      matrix, x0, tol, max_iters);
+      matrix, x0, nullptr, 0, tol, max_iters);
 
   cr_assert(result.has_value(), "Fail to converge");
 
@@ -67,25 +67,27 @@ Test(eig_solver, eigsh_simple) {
   const unsigned max_iters = 100;
   const unsigned max_restarts = 3;
 
-  const auto result =
+  const auto [eigenvalues, eigenvectors] =
       diffusion_maps::internal::eigsh(matrix, k, tol, max_iters, max_restarts);
 
-  cr_assert_eq(result.size(), k, "eigsh does not find all eigenvalues");
+  cr_assert_eq(eigenvalues.size(), k, "eigsh does not find all eigenvalues");
+  cr_assert_eq(eigenvectors.size(), k, "eigsh does not find all eigenvectors");
 
   const std::vector<std::pair<double, diffusion_maps::Vector>> expected_result =
       {{6, diffusion_maps::Vector{1, -1, 1} / std::sqrt(3)},
        {3, diffusion_maps::Vector{-2, -1, 1} / std::sqrt(6)},
        {1, diffusion_maps::Vector{0, 1, 1} / std::sqrt(2)}};
 
   for (std::size_t i = 0; i < k; ++i) {
-    const auto [eigenvalue, eigenvector] = result[i];
+    const double eigenvalue = eigenvalues[i];
+    const diffusion_maps::Vector eigenvector = eigenvectors[i];
     const auto [expected_eigenvalue, expected_eigenvector] = expected_result[i];
-    cr_assert_float_eq(
-        eigenvalue, expected_eigenvalue, tol,
-        "Calculated eigenvalue %lf does not match expected eigenvalue %lf",
-        eigenvalue, expected_eigenvalue);
+    cr_assert_float_eq(eigenvalue, expected_eigenvalue, tol,
+                       "%z-th calculated eigenvalue %lf does not match "
+                       "expected eigenvalue %lf",
+                       i, eigenvalue, expected_eigenvalue);
     cr_assert_lt(std::min((eigenvector - expected_eigenvector).l2_norm(),
                           (eigenvector - (-expected_eigenvector)).l2_norm()),
-                 tol, "Calculated eigenvector is incorrect");
+                 tol, "%z-th calculated eigenvector is incorrect", i);
   }
 }