Skip to content

Commit

Permalink
Allocations tests for CGLS
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison authored and dpo committed Feb 6, 2019
1 parent 3bf2b0f commit 9bbaf39
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/cgls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ function cgls(A :: AbstractLinearOperator, b :: AbstractVector{T};
r = copy(b)
bNorm = @knrm2(m, r) # Marginally faster than norm(b);
bNorm == 0 && return x, SimpleStats(true, false, [0.0], [0.0], "x = 0 is a zero-residual solution");
s = A' * M * r;
Mr = M * r
s = A.tprod(Mr)
p = copy(s);
γ = @kdot(n, s, s) # Faster than γ = dot(s, s);
iter = 0;
Expand All @@ -71,7 +72,8 @@ function cgls(A :: AbstractLinearOperator, b :: AbstractVector{T};

while ! (solved || tired)
q = A * p;
δ = @kdot(m, q, M * q) # Faster than α = γ / dot(q, q);
Mq = M * q
δ = @kdot(m, q, Mq) # Faster than α = γ / dot(q, q);
λ > 0 &&+= λ * @kdot(n, p, p))
α = γ / δ;

Expand All @@ -84,14 +86,12 @@ function cgls(A :: AbstractLinearOperator, b :: AbstractVector{T};

@kaxpy!(n, α, p, x) # Faster than x = x + α * p;
@kaxpy!(m, -α, q, r) # Faster than r = r - α * q;
s = A' * M * r;
Mr = M * r
s = A.tprod(Mr);
λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x;
γ_next = @kdot(n, s, s) # Faster than γ_next = dot(s, s);
β = γ_next / γ;
@kscal!(n, β, p)
@kaxpy!(n, 1.0, s, p) # Faster than p = s + β * p;
# The combined BLAS calls tend to trigger some gc.
# BLAS.axpy!(n, 1.0, s, 1, BLAS.scal!(n, β, p, 1), 1);
@kaxpby!(n, 1.0, s, β, p) # Faster than p = s + β * p;
γ = γ_next;
rNorm = @knrm2(m, r) # Marginally faster than norm(r);
ArNorm = sqrt(γ);
Expand Down
10 changes: 10 additions & 0 deletions test/test_alloc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,13 @@ expected_cgne_bytes = storage_cgne_bytes(n, m)
(x, stats) = cgne(Au, c, M=N) # warmup
actual_cgne_bytes = @allocated cgne(Au, c, M=N)
@test actual_cgne_bytes 1.1 * expected_cgne_bytes

# without preconditioner and with (Ap, Aᵀq) preallocated, CGLS needs:
# - 2 m-vectors: x, p
# - 1 n-vector: r
storage_cgls(n, m) = 2*m + n
storage_cgls_bytes(n, m) = 8 * storage_cgls(n, m)
expected_cgls_bytes = storage_cgls_bytes(n, m)
(x, stats) = cgls(Ao, b, M=M) # warmup
actual_cgls_bytes = @allocated cgls(Ao, b, M=M)
@test actual_cgls_bytes 1.1 * expected_cgls_bytes

0 comments on commit 9bbaf39

Please sign in to comment.