Skip to content
This repository was archived by the owner on Jan 28, 2023. It is now read-only.

Commit 4eba5dd

Browse files
committed
Modify eigendecomposition solver
- Use the "reprojection" technique instead of the regular annihilation technique to avoid numerical instability. - Remove the Aitken's Δ² method, because it will almost always be stopped before the eigenvector converges. - API change.
1 parent 2ca0256 commit 4eba5dd

File tree

4 files changed

+75
-59
lines changed

4 files changed

+75
-59
lines changed

include/diffusion_maps/internal/eig_solver.hpp

+35-14
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,44 @@ namespace diffusion_maps {
1212
namespace internal {
1313

1414
/**
15-
* Find the dominant eigenvalue and its corresponding eigenvector of a symmetric
16-
* matrix using the symmetric power method.
15+
* Find an eigenvalue and its corresponding eigenvector of a symmetric matrix.
16+
* <p>
17+
* The numerical method used is the symmetric power method with "reprojection",
18+
* i.e., during each iteration, the eigenvector is orthogonalised against the
19+
* previously found eigenvectors. This is done so that we can find the k-th
20+
* eigenvalue/eigenvector after we have found the first k-1.
21+
* <p>
22+
* The reprojection is meant to solve the numerical instability problem of the
23+
* annihilation technique. Say that A has eigenvalues λ₁, λ₂, ..., λₙ with the
24+
* corresponding eigenvectors being β₁, β₂, ..., βₙ and we have found the first
25+
* k-1 of them. The annihilation technique chooses the initial guess for the
26+
* eigenvector to be x₀ = (A - λₖ₋₁ I) (A - λₖ₋₂ I) ... (A - λ₁ I) x where x is
27+
* some random vector. Observe that if we write x₀ = ∑ᵢ cᵢ βᵢ, then c₁ = c₂
28+
* = ... = cₖ₋₁ = 0. If we run the standard symmetric power method on x₀, then
29+
* the components for β₁, β₂, ..., βₖ₋₁ in the eigenvector should stay zero and
30+
* the eigenvector should converge to βₖ. However, with the presence of rounding
31+
* error, the components for β₁, β₂, ..., βₖ₋₁ may start to appear and the
32+
* standard symmetric power method will converge to a different eigenvector.
33+
* Reprojection solves this problem by orthogonalising the eigenvector against
34+
* β₁, β₂, ..., βₖ₋₁ after each time the eigenvector is multiplied by the
35+
* matrix. Basically, this actively suppresses the components for β₁, β₂, ...,
36+
* βₖ₋₁ in the eigenvector.
1737
*
1838
* @param a The matrix.
1939
* @param x0 The initial guess for the eigenvector.
20-
* @param tol The tolerance for the eigenvector.
40+
* @param betas The array of previously found eigenvectors, all normalised with
41+
* respect to the Euclidean norm.
42+
* @param n_betas The number of previously found eigenvectors.
43+
* @param tol The tolerance for the Euclidean norm of the eigenvector.
2144
* @param max_iters The maximum number of iterations.
22-
* @param epsilon The denominator cut-off for the Aitken's Δ² method. If the
23-
* absolute value of the denominator is less than this value, the method will
24-
* stop.
25-
* @return The dominant eigenvalue and its corresponding eigenvector. Or nullopt
26-
* if the maximum number of iterations is exceeded.
27-
* @throws std::invalid_argument if the dimensions are incorrect.
45+
* @return An eigenvalue and its corresponding eigenvector. Or nullopt if the
46+
* maximum number of iterations is exceeded.
47+
* @exception std::invalid_argument If the dimensions are incorrect.
2848
*/
2949
std::optional<std::pair<double, Vector>>
30-
symmetric_power_method(const SparseMatrix &a, const Vector &x0, double tol,
31-
unsigned max_iters, double epsilon = 1e-16);
50+
symmetric_power_method(const SparseMatrix &a, const Vector &x0,
51+
const Vector *betas, std::size_t n_betas, double tol,
52+
unsigned max_iters);
3253

3354
/**
3455
* Find @p k dominant eigenvalues and their corresponding eigenvectors of a
@@ -46,9 +67,9 @@ symmetric_power_method(const SparseMatrix &a, const Vector &x0, double tol,
4667
* @throws std::invalid_argument if @p k is greater than the number of rows in
4768
* @p a.
4869
*/
49-
std::vector<std::pair<double, Vector>> eigsh(const SparseMatrix &a, unsigned k,
50-
double tol, unsigned max_iters,
51-
unsigned max_restarts);
70+
std::pair<std::vector<double>, std::vector<Vector>>
71+
eigsh(const SparseMatrix &a, unsigned k, double tol, unsigned max_iters,
72+
unsigned max_restarts);
5273

5374
} // namespace internal
5475

src/diffusion_maps.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,13 @@ diffusion_maps::diffusion_maps(
8989

9090
// Step 3: Compute the eigenvalues and eigenvectors of the diffusion matrix.
9191

92-
const auto eigen_pairs =
92+
const auto [eigenvalues, eigenvectors] =
9393
internal::eigsh(kernel_matrix, n_components + 1, eig_solver_tol,
9494
eig_solver_max_iter, eig_solver_max_restarts);
9595

9696
// Step 4: Compute the diffusion maps.
9797

98-
const std::size_t n_eigenvalues = eigen_pairs.size();
98+
const std::size_t n_eigenvalues = eigenvalues.size();
9999
Matrix::Buffer dm_buffer(n_samples, n_eigenvalues - 1);
100100
Matrix diffusion_maps(dm_buffer);
101101

@@ -104,8 +104,8 @@ diffusion_maps::diffusion_maps(
104104
// We drop the first eigenpair because the eigenvector is constant in all
105105
// dimensions.
106106

107-
const double lambda = eigen_pairs[j + 1].first;
108-
const double psi_i = invsqrt_row_sum[i] * eigen_pairs[j + 1].second[i];
107+
const double lambda = eigenvalues[j + 1];
108+
const double psi_i = invsqrt_row_sum[i] * eigenvectors[j + 1][i];
109109
diffusion_maps(i, j) = std::pow(lambda, diffusion_time) * psi_i;
110110
}
111111
}

src/eig_solver.cpp

+25-32
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44
#include <stdexcept>
55

66
std::optional<std::pair<double, diffusion_maps::Vector>>
7-
diffusion_maps::internal::symmetric_power_method(const SparseMatrix &a,
8-
const Vector &x0,
9-
const double tol,
10-
const unsigned max_iters,
11-
const double epsilon) {
7+
diffusion_maps::internal::symmetric_power_method(
8+
const SparseMatrix &a, const Vector &x0, const Vector *const betas,
9+
const std::size_t n_betas, const double tol, const unsigned max_iters) {
1210
if (a.n_rows() != a.n_cols()) { // a is not square.
1311
throw std::invalid_argument("matrix is not square");
1412
}
@@ -17,15 +15,16 @@ diffusion_maps::internal::symmetric_power_method(const SparseMatrix &a,
1715
}
1816

1917
Vector x = x0 / x0.l2_norm();
20-
double mu_0 = 0, mu_1 = 0;
2118

2219
for (unsigned k = 0; k < max_iters; ++k) {
2320
Vector y = a * x;
21+
22+
// Orthogonalise y against betas.
23+
for (std::size_t i = 0; i < n_betas; ++i) {
24+
y -= betas[i] * betas[i].dot(y);
25+
}
26+
2427
const double mu = x.dot(y);
25-
const double denom = (mu - mu_1) - (mu_1 - mu_0);
26-
const double mu_hat = std::abs(denom) < epsilon
27-
? mu_0
28-
: mu_0 - ((mu_1 - mu_0) * (mu_1 - mu_0)) / denom;
2928

3029
const double l2_norm_y = y.l2_norm();
3130
if (l2_norm_y == 0) { // a has eigenvalue 0.
@@ -35,18 +34,15 @@ diffusion_maps::internal::symmetric_power_method(const SparseMatrix &a,
3534
y /= l2_norm_y;
3635
const double err = (x - y).l2_norm();
3736
x = y;
38-
if (k >= 3 && err < tol) { // Success.
39-
return std::make_pair(mu_hat, x);
37+
if (err < tol) { // Success.
38+
return std::make_pair(mu, x);
4039
}
41-
42-
mu_0 = mu_1;
43-
mu_1 = mu;
4440
}
4541

4642
return std::nullopt; // Failed to converge.
4743
}
4844

49-
std::vector<std::pair<double, diffusion_maps::Vector>>
45+
std::pair<std::vector<double>, std::vector<diffusion_maps::Vector>>
5046
diffusion_maps::internal::eigsh(const SparseMatrix &a, const unsigned k,
5147
const double tol, const unsigned max_iters,
5248
const unsigned max_restarts) {
@@ -57,42 +53,39 @@ diffusion_maps::internal::eigsh(const SparseMatrix &a, const unsigned k,
5753
throw std::invalid_argument("k cannot be larger than the number of rows");
5854
}
5955

60-
std::vector<std::pair<double, Vector>> eig_pairs;
61-
eig_pairs.reserve(k);
56+
std::default_random_engine gen;
57+
std::uniform_real_distribution<double> dist(0, 1);
58+
59+
std::vector<double> eigenvalues;
60+
std::vector<Vector> eigenvectors;
61+
eigenvalues.reserve(k);
62+
eigenvectors.reserve(k);
6263

6364
for (std::size_t i = 0; i < k; ++i) {
6465
for (unsigned restarts = 0; restarts < max_restarts; ++restarts) {
65-
// Construct the initial vector for the symmetric power method
66-
// x0 = (A - λᵢ₋₁ I) … (A - λ₁ I) (A - λ₀ I) x
67-
// where x is a random vector. (Annihilation technique.)
66+
// Generate the initial guess for the eigenvector.
6867

6968
Vector x0(a.n_rows());
70-
71-
std::default_random_engine gen;
72-
std::uniform_real_distribution<double> dist(0, 1);
7369
for (std::size_t i = 0; i < x0.size(); ++i) {
7470
x0[i] = dist(gen);
7571
}
7672

77-
for (std::size_t j = 0; j < i; j++) {
78-
const double lambda_j = eig_pairs[j].first;
79-
x0 = a * x0 - x0 * lambda_j;
80-
}
81-
8273
// Use the symmetric power method to find the i-th eigenvalue and
8374
// eigenvector.
8475

85-
const auto eig_pair = symmetric_power_method(a, x0, tol, max_iters);
76+
const auto eig_pair = symmetric_power_method(
77+
a, x0, eigenvectors.data(), eigenvectors.size(), tol, max_iters);
8678

8779
// Restart if the method does not converge or finds an eigenvalue 0.
8880
if (!eig_pair.has_value() || eig_pair->first == 0) {
8981
continue;
9082
}
9183

92-
eig_pairs.push_back(eig_pair.value());
84+
eigenvalues.push_back(eig_pair->first);
85+
eigenvectors.push_back(eig_pair->second);
9386
break;
9487
}
9588
}
9689

97-
return eig_pairs;
90+
return std::make_pair(eigenvalues, eigenvectors);
9891
}

test/test_eig_solver.cpp

+11-9
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Test(eig_solver, symmetric_power_method_simple) {
2828
const unsigned max_iters = 100;
2929

3030
const auto result = diffusion_maps::internal::symmetric_power_method(
31-
matrix, x0, tol, max_iters);
31+
matrix, x0, nullptr, 0, tol, max_iters);
3232

3333
cr_assert(result.has_value(), "Fail to converge");
3434

@@ -67,25 +67,27 @@ Test(eig_solver, eigsh_simple) {
6767
const unsigned max_iters = 100;
6868
const unsigned max_restarts = 3;
6969

70-
const auto result =
70+
const auto [eigenvalues, eigenvectors] =
7171
diffusion_maps::internal::eigsh(matrix, k, tol, max_iters, max_restarts);
7272

73-
cr_assert_eq(result.size(), k, "eigsh does not find all eigenvalues");
73+
cr_assert_eq(eigenvalues.size(), k, "eigsh does not find all eigenvalues");
74+
cr_assert_eq(eigenvectors.size(), k, "eigsh does not find all eigenvectors");
7475

7576
const std::vector<std::pair<double, diffusion_maps::Vector>> expected_result =
7677
{{6, diffusion_maps::Vector{1, -1, 1} / std::sqrt(3)},
7778
{3, diffusion_maps::Vector{-2, -1, 1} / std::sqrt(6)},
7879
{1, diffusion_maps::Vector{0, 1, 1} / std::sqrt(2)}};
7980

8081
for (std::size_t i = 0; i < k; ++i) {
81-
const auto [eigenvalue, eigenvector] = result[i];
82+
const double eigenvalue = eigenvalues[i];
83+
const diffusion_maps::Vector eigenvector = eigenvectors[i];
8284
const auto [expected_eigenvalue, expected_eigenvector] = expected_result[i];
83-
cr_assert_float_eq(
84-
eigenvalue, expected_eigenvalue, tol,
85-
"Calculated eigenvalue %lf does not match expected eigenvalue %lf",
86-
eigenvalue, expected_eigenvalue);
85+
cr_assert_float_eq(eigenvalue, expected_eigenvalue, tol,
86+
"%z-th calculated eigenvalue %lf does not match "
87+
"expected eigenvalue %lf",
88+
i, eigenvalue, expected_eigenvalue);
8789
cr_assert_lt(std::min((eigenvector - expected_eigenvector).l2_norm(),
8890
(eigenvector - (-expected_eigenvector)).l2_norm()),
89-
tol, "Calculated eigenvector is incorrect");
91+
tol, "%z-th calculated eigenvector is incorrect", i);
9092
}
9193
}

0 commit comments

Comments
 (0)