diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 000000000..92ac58c74 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,71 @@ +steps: + - label: "Nvidia GPUs -- CUDA.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.9 + agents: + queue: "juliagpu" + cuda: "*" + command: | + julia --color=yes --project -e ' + using Pkg + Pkg.add("CUDA") + Pkg.add("LinearOperators") + Pkg.instantiate() + using CUDA + # CUDA.set_runtime_version!(v"11.8")' + + julia --color=yes --project -e ' + include("test/gpu/nvidia.jl")' + timeout_in_minutes: 30 + + # - label: "AMD GPUs -- AMDGPU.jl" + # plugins: + # - JuliaCI/julia#v1: + # version: 1.9 + # agents: + # queue: "juliagpu" + # rocm: "*" + # rocmgpu: "gfx1031" + # env: + # JULIA_AMDGPU_CORE_MUST_LOAD: "1" + # JULIA_AMDGPU_HIP_MUST_LOAD: "1" + # JULIA_AMDGPU_DISABLE_ARTIFACTS: "1" + # command: | + # julia --color=yes --project -e ' + # using Pkg + # Pkg.add("AMDGPU") + # Pkg.instantiate() + # include("test/gpu/amd.jl")' + # timeout_in_minutes: 30 + + - label: "Intel GPUs -- oneAPI.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.9 + agents: + queue: "juliagpu" + intel: "*" + command: | + julia --color=yes --project -e ' + using Pkg + Pkg.add("oneAPI") + Pkg.instantiate() + include("test/gpu/intel.jl")' + timeout_in_minutes: 30 + + - label: "Apple M1 GPUs -- Metal.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.9 + agents: + queue: "juliaecosystem" + os: "macos" + arch: "aarch64" + command: | + julia --color=yes --project -e ' + using Pkg + Pkg.add("Metal") + Pkg.instantiate() + include("test/gpu/metal.jl")' + timeout_in_minutes: 30 diff --git a/.cirrus.yml b/.cirrus.yml index d559cf609..792aad121 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,15 +1,36 @@ -freebsd_instance: - image: freebsd-13-0-release-amd64 task: - name: FreeBSD - env: - matrix: - - JULIA_VERSION: 1.6 - - JULIA_VERSION: 1 - - JULIA_VERSION: nightly - allow_failures: $JULIA_VERSION == 'nightly' - install_script: - - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)" + matrix: + - name: FreeBSD + freebsd_instance: + image_family: freebsd-13-1 + env: + matrix: + - JULIA_VERSION: 1.6 + - JULIA_VERSION: 1 + - name: musl Linux + container: + image: alpine:3.14 + env: + - JULIA_VERSION: 1 + - name: MacOS M1 + macos_instance: + image: ghcr.io/cirruslabs/macos-monterey-base:latest + env: + - JULIA_VERSION: 1 + install_script: | + URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh" + set -x + if [ "$(uname -s)" = "Linux" ] && command -v apt; then + apt update + apt install -y curl + fi + if command -v curl; then + sh -c "$(curl ${URL})" + elif command -v wget; then + sh -c "$(wget ${URL} -q -O-)" + elif command -v fetch; then + sh -c "$(fetch ${URL} -o -)" + fi build_script: - cirrusjl build test_script: diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 000000000..e3469746f --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,11 @@ +# Drops on the order 0.01% are typical even when no change occurs +# Having the threshold set a little higher (0.5%) than that makes it +# a little more tolerant to fluctuations +coverage: + status: + project: + default: + threshold: 0.5% + patch: + default: + threshold: 0.5% diff --git a/.github/workflows/Aqua.yml b/.github/workflows/Aqua.yml new file mode 100644 index 000000000..da872e225 --- /dev/null +++ b/.github/workflows/Aqua.yml @@ -0,0 +1,17 @@ +name: Aqua +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: julia-actions/setup-julia@latest + with: + version: '1' + - name: Aqua.jl + run: julia --color=yes -e 'using Pkg; Pkg.add("Aqua"); Pkg.develop(path="."); using Aqua, Krylov; Aqua.test_all(Krylov)' diff --git a/.github/workflows/Breakage.yml b/.github/workflows/Breakage.yml index 266eed3cc..4a907d631 100644 --- a/.github/workflows/Breakage.yml +++ b/.github/workflows/Breakage.yml @@ -19,19 +19,20 @@ jobs: "JuliaSmoothOptimizers/JSOSolvers.jl", "JuliaSmoothOptimizers/LLSModels.jl", "JuliaSmoothOptimizers/Percival.jl", - "JuliaSmoothOptimizers/RipQP.jl" + "JuliaSmoothOptimizers/RipQP.jl", + "SciML/LinearSolve.jl" ] pkgversion: [latest, stable] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Install Julia - uses: julia-actions/setup-julia@v1 with: version: '1' arch: x64 - - uses: actions/cache@v1 + - uses: actions/cache@v3 env: cache-name: cache-artifacts with: @@ -85,7 +86,7 @@ jobs: end; end' - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: pr path: pr/ @@ -94,9 +95,9 @@ jobs: needs: break runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: pr path: pr/ @@ -127,7 +128,7 @@ jobs: fi done >> MSG - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: pr path: pr/ diff --git a/.github/workflows/CI_M1.yml b/.github/workflows/CI_M1.yml deleted file mode 100644 index 6f9aa720b..000000000 --- a/.github/workflows/CI_M1.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: CI_M1 -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - name: Julia ${{ matrix.version }} - macOS - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: self-hosted - strategy: - fail-fast: false - matrix: - version: - - '1' - arch: - - aarch64 - steps: - - uses: actions/checkout@v3 - - uses: julia-actions/setup-julia@v1 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - name: Version Info - shell: julia --color=yes {0} - run: | - using InteractiveUtils - versioninfo() - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml index 14f6dcd47..043113f74 100644 --- a/.github/workflows/CommentPR.yml +++ b/.github/workflows/CommentPR.yml @@ -39,16 +39,36 @@ jobs: - run: unzip pr.zip - name: 'Comment on PR' - uses: actions/github-script@v3 + uses: actions/github-script@v6 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | - var fs = require('fs'); - var issue_number = Number(fs.readFileSync('./NR')); - var msg = fs.readFileSync('./MSG', 'utf8'); - await github.issues.createComment({ + var fs = require('fs') + var issue_number = Number(fs.readFileSync('./NR')) + var msg = fs.readFileSync('./MSG', 'utf8') + + // Get the existing comments. + const {data: comments} = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: issue_number, - body: msg - }); + issue_number: issue_number + }) + + // Find any comment already made by the bot. + const botComment = comments.find(comment => comment.user.id === 41898282) + + if (botComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: msg + }) + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue_number, + body: msg + }) + } diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index b546a8082..7a9c79fd4 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,19 +1,44 @@ name: CompatHelper - on: schedule: - - cron: '00 00 * * *' - + - cron: 0 0 * * * + workflow_dispatch: +permissions: + contents: write + pull-requests: write jobs: CompatHelper: runs-on: ubuntu-latest steps: - - uses: julia-actions/setup-julia@latest + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v1 with: version: '1' - - name: CompatHelper - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main() + arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' + - name: "Add the General registry via Git" + run: | + import Pkg + ENV["JULIA_PKG_SERVER"] = "" + Pkg.Registry.add("General") + shell: julia --color=yes {0} + - name: "Install CompatHelper" + run: | + import Pkg + name = "CompatHelper" + uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" + version = "3" + Pkg.add(; name, uuid, version) + shell: julia --color=yes {0} + - name: "Run CompatHelper" + run: | + import CompatHelper + CompatHelper.main() + shell: julia --color=yes {0} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: julia -e 'using CompatHelper; CompatHelper.main()' + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index be0b86584..406f15e0d 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -10,12 +10,12 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@latest with: version: '1' - name: Install dependencies - run: julia --project=docs -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + run: julia --project=docs --color=yes -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - name: Build and deploy env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml new file mode 100644 index 000000000..b0c37e05f --- /dev/null +++ b/.github/workflows/Invalidations.yml @@ -0,0 +1,43 @@ +name: Invalidations +# Uses SnoopCompile to evaluate number of invalidations caused by `using` the package +# using https://github.com/julia-actions/julia-invalidations +# Based on https://github.com/julia-actions/julia-invalidations + +on: + pull_request: + +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: always. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + evaluate: + # Only run on PRs to the default branch. + # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch + if: github.base_ref == github.event.repository.default_branch + runs-on: ubuntu-latest + steps: + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + - uses: actions/checkout@v3 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-invalidations@v1 + id: invs_pr + + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.repository.default_branch }} + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-invalidations@v1 + id: invs_default + + - name: Report invalidation counts + run: | + echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY + echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY + - name: Check if the PR does increase number of invalidations + if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total + run: exit 1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 409e0d146..9e1791f48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,12 +31,12 @@ jobs: arch: x64 allow_failure: true steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: actions/cache@v3 env: cache-name: cache-artifacts with: @@ -49,6 +49,6 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 + - uses: codecov/codecov-action@v3 with: - file: lcov.info + files: lcov.info diff --git a/LICENSE.md b/LICENSE.md index 1533671ce..befba1c4d 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ -Copyright (c) 2015-2019: Dominique Orban +Copyright (c) 2015-present: Alexis Montoison, Dominique Orban, and other contributors -Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.0/). +[Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.0/). ## License @@ -11,83 +11,83 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2. -------------- 1.1. "Contributor" - means each individual or legal entity that creates, contributes to - the creation of, or owns Covered Software. + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. 1.2. "Contributor Version" - means the combination of the Contributions of others (if any) used - by a Contributor and that particular Contributor's Contribution. + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" - means Covered Software of a particular Contributor. + means Covered Software of a particular Contributor. 1.4. "Covered Software" - means Source Code Form to which the initial Contributor has attached - the notice in Exhibit A, the Executable Form of such Source Code - Form, and Modifications of such Source Code Form, in each case - including portions thereof. + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. 1.5. "Incompatible With Secondary Licenses" - means + means - (a) that the initial Contributor has attached the notice described - in Exhibit B to the Covered Software; or + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or - (b) that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the - terms of a Secondary License. + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. 1.6. "Executable Form" - means any form of the work other than Source Code Form. + means any form of the work other than Source Code Form. 1.7. "Larger Work" - means a work that combines Covered Software with other material, in - a separate file or files, that is not Covered Software. + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. 1.8. "License" - means this document. + means this document. 1.9. "Licensable" - means having the right to grant, to the maximum extent possible, - whether at the time of the initial grant or subsequently, any and - all of the rights conveyed by this License. + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. 1.10. "Modifications" - means any of the following: + means any of the following: - (a) any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered - Software; or + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or - (b) any new file in Source Code Form that contains any Covered - Software. + (b) any new file in Source Code Form that contains any Covered + Software. 1.11. "Patent Claims" of a Contributor - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the - License, by the making, using, selling, offering for sale, having - made, import, or transfer of either its Contributions or its - Contributor Version. + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. 1.12. "Secondary License" - means either the GNU General Public License, Version 2.0, the GNU - Lesser General Public License, Version 2.1, the GNU Affero General - Public License, Version 3.0, or any later versions of those - licenses. + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. 1.13. "Source Code Form" - means the form of the work preferred for making modifications. + means the form of the work preferred for making modifications. 1.14. "You" (or "Your") - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that - controls, is controlled by, or is under common control with You. For - purposes of this definition, "control" means (a) the power, direct - or indirect, to cause the direction or management of such entity, - whether by contract or otherwise, or (b) ownership of more than - fifty percent (50%) of the outstanding shares or beneficial - ownership of such entity. + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. 2. License Grants and Conditions -------------------------------- @@ -98,14 +98,14 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2. non-exclusive license: (a) under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer - for sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. 2.2. Effective Date @@ -122,15 +122,15 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2. Contributor: (a) for any code that a Contributor has removed from Covered Software; - or + or (b) for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or (c) under Patent Claims infringed by Covered Software in the absence of - its Contributions. + its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with @@ -178,15 +178,15 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2. If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code - Form, as described in Section 3.1, and You must inform recipients of - the Executable Form how they can obtain a copy of such Source Code - Form by reasonable means in a timely manner, at a charge no more - than the cost of distribution to the recipient; and + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter - the recipients' rights in the Source Code Form under this License. + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work @@ -363,7 +363,7 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2. This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. + file, You can obtain one at https://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE diff --git a/Project.toml b/Project.toml index a91e07b8a..c711f565c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,18 +1,33 @@ name = "Krylov" uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" -version = "0.8.3" +version = "0.9.2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +[weakdeps] +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[extensions] +KrylovComponentArraysExt = "ComponentArrays" +KrylovFillArraysExt = "FillArrays" +KrylovStaticArraysExt = "StaticArrays" + [compat] +PackageExtensionCompat = "1.0.1" julia = "^1.6.0" [extras] +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Random", "Test"] +test = ["ComponentArrays", "FillArrays", "Random", "StaticArrays", "Test"] diff --git a/README.md b/README.md index a4664e187..57bcd1d81 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ should be solved when **_b_** is not in the range of **_A_** (inconsistent syste * **_A_** is square and singular, * **_A_** is tall and thin. -Underdetermined sytems are less common but also occur. +Underdetermined systems are less common but also occur. If there are infinitely many such **_x_** (because **_A_** is column rank-deficient), one with minimum norm is identified @@ -61,32 +61,32 @@ If there are infinitely many such **_x_** (because **_A_** is column rank-defici minimize ‖x‖   subject to   Ax = b

-sould be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**. +should be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**. This situation mainly occurs when * **_A_** is square and singular, * **_A_** is short and wide. -Overdetermined sytems are less common but also occur. +Overdetermined systems are less common but also occur. 4. Adjoint systems

- Ax = b   and   Aᵀy = c + Ax = b   and   Aᴴy = c

where **_A_** can have any shape. -5. Saddle-point and symmetric quasi-definite (SQD) systems +5. Saddle-point and Hermitian quasi-definite systems

[M     A]  [x] = [b]
- [Aᵀ   -N]  [y]    [c] + [Aᴴ   -N]  [y]    [c]

where **_A_** can have any shape. -6. Generalized saddle-point and unsymmetric partitioned systems +6. Generalized saddle-point and non-Hermitian partitioned systems

[M   A]  [x] = [b] @@ -94,7 +94,7 @@ where **_A_** can have any shape. [B   N]  [y]    [c]

-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**. +where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**. **_A_**, **_B_**, **_b_** and **_c_** must be all nonzero. Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because: @@ -121,3 +121,10 @@ julia> ] pkg> add Krylov pkg> test Krylov ``` + +## Bug reports and discussions + +If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues). +Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please. + +If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome. diff --git a/docs/make.jl b/docs/make.jl index 57ad87cd2..441ddb3ee 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,23 +6,26 @@ makedocs( linkcheck = true, strict = true, format = Documenter.HTML(assets = ["assets/style.css"], - ansicolor=true, + ansicolor = true, prettyurls = get(ENV, "CI", nothing) == "true", collapselevel = 1), sitename = "Krylov.jl", pages = ["Home" => "index.md", "API" => "api.md", - "Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md", - "Symmetric indefinite linear systems" => "solvers/sid.md", - "Unsymmetric linear systems" => "solvers/unsymmetric.md", + "Krylov processes" => "processes.md", + "Krylov methods" => ["Hermitian positive definite linear systems" => "solvers/spd.md", + "Hermitian indefinite linear systems" => "solvers/sid.md", + "Non-Hermitian square linear systems" => "solvers/unsymmetric.md", "Least-norm problems" => "solvers/ln.md", "Least-squares problems" => "solvers/ls.md", "Adjoint systems" => "solvers/as.md", - "Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md", - "Generalized saddle-point and unsymmetric partitioned systems" => "solvers/gsp.md"], + "Saddle-point and Hermitian quasi-definite systems" => "solvers/sp_sqd.md", + "Generalized saddle-point and non-Hermitian partitioned systems" => "solvers/gsp.md"], "In-place methods" => "inplace.md", + "Preconditioners" => "preconditioners.md", + "Storage requirements" => "storage.md", "GPU support" => "gpu.md", - "Warm start" => "warm_start.md", + "Warm-start" => "warm-start.md", "Factorization-free operators" => "factorization-free.md", "Callbacks" => "callbacks.md", "Performance tips" => "tips.md", diff --git a/docs/src/api.md b/docs/src/api.md index 7f2f4dff7..238c86f1a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -48,6 +48,7 @@ LnlqSolver CraigSolver CraigmrSolver GpmrSolver +FgmresSolver ``` ## Utilities @@ -60,4 +61,6 @@ Krylov.vec2str Krylov.ktypeof Krylov.kzeros Krylov.kones +Krylov.vector_to_matrix +Krylov.matrix_to_vector ``` diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md index f44018687..91e0b521c 100644 --- a/docs/src/callbacks.md +++ b/docs/src/callbacks.md @@ -1,43 +1,80 @@ -## Callbacks +# [Callbacks](@id callbacks) -Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. The callback should return `true` if the main loop should terminate, and `false` otherwise. +Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. +The callback should return `true` if the main loop should terminate, and `false` otherwise. If the method terminated because of the callback, the output status will be `"user-requested exit"`. -For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using +For example, if the user defines `minres_callback(solver::MinresSolver)`, it can be passed to the solver using ```julia -(x, stats) = minres(A, b, callback = my_callback) +(x, stats) = minres(A, b, callback = minres_callback) ``` -If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure: +If you need to write a callback that uses variables that are not in a `KrylovSolver`, use a closure: ```julia -function my_callback2(solver::MinresSolver, A, b, storage_vec, tol::Float64) - mul!(storage_vec, A, solver.x) - storage_vec .-= b - return norm(storage_vec) ≤ tol # tolerance based on the 2-norm of the residual +function custom_stopping_condition(solver::KrylovSolver, A, b, r, tol) + mul!(r, A, solver.x) + r .-= b # r := b - Ax + bool = norm(r) ≤ tol # tolerance based on the 2-norm of the residual + return bool end -storage_vec = similar(b) -(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, storage_vec, 0.1)) +cg_callback(solver) = custom_stopping_condition(solver, A, b, r, tol) +(x, stats) = cg(A, b, callback = cg_callback) ``` Alternatively, use a structure and make it callable: ```julia -mutable struct MyCallback3{S, M} - A::M - b::S - storage_vec::S - tol::Float64 +mutable struct CallbackWorkspace{T} + A::Matrix{T} + b::Vector{T} + r::Vector{T} + tol::T end -MyCallback3(A, b; tol = 0.1) = MyCallback3(A, b, similar(b), tol) -function (my_cb::MyCallback3)(solver) - mul!(my_cb.storage_vec, my_cb.A, solver.x) - my_cb.storage_vec .-= my_cb.b - return norm(my_cb.storage_vec) ≤ my_cb.tol # tolerance based on the 2-norm of the residual +function (workspace::CallbackWorkspace)(solver::KrylovSolver) + mul!(workspace.r, workspace.A, solver.x) + workspace.r .-= workspace.b + bool = norm(workspace.r) ≤ workspace.tol + return bool end -my_cb = MyCallback3(A, b; tol = 0.1) -(x, stats) = minres(A, b, callback = my_cb) +bicgstab_callback = CallbackWorkspace(A, b, r, tol) +(x, stats) = bicgstab(A, b, callback = bicgstab_callback) +``` + +Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations. +We now illustrate how to store all iterates $x_k$ of the GMRES method. + +```julia +S = Krylov.ktypeof(b) +global X = S[] # Storage for GMRES iterates + +function gmres_callback(solver) + z = solver.z + k = solver.inner_iter + nr = sum(1:k) + V = solver.V + R = solver.R + y = copy(z) + + # Solve Rk * yk = zk + for i = k : -1 : 1 + pos = nr + i - k + for j = k : -1 : i+1 + y[i] = y[i] - R[pos] * y[j] + pos = pos - j + 1 + end + y[i] = y[i] / R[pos] + end + + # xk = Vk * yk + xk = sum(V[i] * y[i] for i = 1:k) + push!(X, xk) + + return false # We don't want to add new stopping conditions +end + +(x, stats) = gmres(A, b, callback = gmres_callback) ``` diff --git a/docs/src/examples/tricg.md b/docs/src/examples/tricg.md index e981c2f7e..61750de5f 100644 --- a/docs/src/examples/tricg.md +++ b/docs/src/examples/tricg.md @@ -14,7 +14,7 @@ N = diagm(0 => [5.0 * i for i = 1:n]) c = -b # [I A] [x] = [b] -# [Aᵀ -I] [y] [c] +# [Aᴴ -I] [y] [c] (x, y, stats) = tricg(A, b, c) K = [eye(m) A; A' -eye(n)] B = [b; c] @@ -23,7 +23,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [-I A] [x] = [b] -# [ Aᵀ I] [y] [c] +# [ Aᴴ I] [y] [c] (x, y, stats) = tricg(A, b, c, flip=true) K = [-eye(m) A; A' eye(n)] B = [b; c] @@ -32,7 +32,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [I A] [x] = [b] -# [Aᵀ I] [y] [c] +# [Aᴴ I] [y] [c] (x, y, stats) = tricg(A, b, c, spd=true) K = [eye(m) A; A' eye(n)] B = [b; c] @@ -41,7 +41,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [-I A] [x] = [b] -# [ Aᵀ -I] [y] [c] +# [ Aᴴ -I] [y] [c] (x, y, stats) = tricg(A, b, c, snd=true) K = [-eye(m) A; A' -eye(n)] B = [b; c] @@ -50,7 +50,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [τI A] [x] = [b] -# [ Aᵀ νI] [y] [c] +# [ Aᴴ νI] [y] [c] (τ, ν) = (1e-4, 1e2) (x, y, stats) = tricg(A, b, c, τ=τ, ν=ν) K = [τ*eye(m) A; A' ν*eye(n)] @@ -60,7 +60,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [M⁻¹ A ] [x] = [b] -# [Aᵀ -N⁻¹] [y] [c] +# [Aᴴ -N⁻¹] [y] [c] (x, y, stats) = tricg(A, b, c, M=M, N=N, verbose=1) K = [inv(M) A; A' -inv(N)] H = BlockDiagonalOperator(M, N) diff --git a/docs/src/examples/trimr.md b/docs/src/examples/trimr.md index 2aa48be1e..adc4e82e5 100644 --- a/docs/src/examples/trimr.md +++ b/docs/src/examples/trimr.md @@ -14,7 +14,7 @@ m, n = size(A) c = -b # [D A] [x] = [b] -# [Aᵀ 0] [y] [c] +# [Aᴴ 0] [y] [c] llt_D = cholesky(D) opD⁻¹ = LinearOperator(Float64, 5, 5, true, true, (y, v) -> ldiv!(y, llt_D, v)) opH⁻¹ = BlockDiagonalOperator(opD⁻¹, eye(n)) @@ -34,7 +34,7 @@ N = diagm(0 => [5.0 * i for i = 1:n]) c = -b # [I A] [x] = [b] -# [Aᵀ -I] [y] [c] +# [Aᴴ -I] [y] [c] (x, y, stats) = trimr(A, b, c) K = [eye(m) A; A' -eye(n)] B = [b; c] @@ -43,7 +43,7 @@ resid = norm(r) @printf("TriMR: Relative residual: %8.1e\n", resid) # [M A] [x] = [b] -# [Aᵀ -N] [y] [c] +# [Aᴴ -N] [y] [c] ldlt_M = ldl(M) ldlt_N = ldl(N) opM⁻¹ = LinearOperator(Float64, size(M,1), size(M,2), true, true, (y, v) -> ldiv!(y, ldlt_M, v)) diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md index aa0f51f07..0bff49d4c 100644 --- a/docs/src/factorization-free.md +++ b/docs/src/factorization-free.md @@ -1,3 +1,32 @@ +```@raw html + +``` + ## [Factorization-free operators](@id factorization-free) All methods are factorization-free, which means that you only need to provide operator-vector products. @@ -10,8 +39,11 @@ Some methods only require `A * v` products, whereas other ones also require `A' |:--------------------------------------:|:----------------------------------------:| | CG, CR | CGLS, CRLS, CGNE, CRMR | | SYMMLQ, CG-LANCZOS, MINRES, MINRES-QLP | LSLQ, LSQR, LSMR, LNLQ, CRAIG, CRAIGMR | -| DIOM, FOM, DQGMRES, GMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR | -| CGS, BICGSTAB | TriCG, TriMR, USYMLQR | +| DIOM, FOM, DQGMRES, GMRES, FGMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR | +| CGS, BICGSTAB | TriCG, TriMR | + +!!! info + GPMR is the only method that requires `A * v` and `B * w` products. Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`. @@ -27,9 +59,9 @@ where * `type` is the operator element type; * `nrow` and `ncol` are its dimensions; * `symmetric` and `hermitian` should be set to `true` or `false`; -* `prod(y, v)`, `tprod(y, w)` and `ctprod(u, w)` are called when writing `mul!(y, A, v)`, `mul!(y, tranpose(A), w)`, and `mul!(y, A', u)`, respectively. +* `prod(y, v)`, `tprod(y, w)` and `ctprod(u, w)` are called when writing `mul!(y, A, v)`, `mul!(y, transpose(A), w)`, and `mul!(y, A', u)`, respectively. -See the [tutorial](https://juliasmoothoptimizers.github.io/tutorials/introduction-to-linear-operators/) and the detailed [documentation](https://juliasmoothoptimizers.github.io/LinearOperators.jl/dev/) for more informations on `LinearOperators.jl`. +See the [tutorial](https://juliasmoothoptimizers.github.io/tutorials/introduction-to-linear-operators/) and the detailed [documentation](https://juliasmoothoptimizers.github.io/LinearOperators.jl/dev/) for more information on `LinearOperators.jl`. ## Examples diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 4c9887f24..33b76b421 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -1,38 +1,51 @@ -## GPU support +# [GPU support](@id gpu) -All solvers in Krylov.jl can be used with `CuArrays` and allow computations with Nvidia GPU. Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to GPU format (`CuMatrix` and `CuVector`). +Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are highly parallelizable. + +The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia. +Those allow the implementations to be specialized automatically by the compiler for both CPU and GPU. +Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl). + +## Nvidia GPUs + +All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations on Nvidia GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`CuMatrix` and `CuVector`). ```julia using CUDA, Krylov -# CPU Arrays -A_cpu = rand(20, 20) -b_cpu = rand(20) +if CUDA.functional() + # CPU Arrays + A_cpu = rand(20, 20) + b_cpu = rand(20) -# GPU Arrays -A_gpu = CuMatrix(A_cpu) -b_gpu = CuVector(b_cpu) + # GPU Arrays + A_gpu = CuMatrix(A_cpu) + b_gpu = CuVector(b_cpu) -# Solve a square and dense system on GPU -x, stats = bilq(A_gpu, b_gpu) + # Solve a square and dense system on an Nivida GPU + x, stats = bilq(A_gpu, b_gpu) +end ``` -Sparse matrices have a specific storage on GPU (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`): +Sparse matrices have a specific storage on Nvidia GPUs (`CuSparseMatrixCSC`, `CuSparseMatrixCSR` or `CuSparseMatrixCOO`): ```julia using CUDA, Krylov using CUDA.CUSPARSE, SparseArrays -# CPU Arrays -A_cpu = sprand(200, 100, 0.3) -b_cpu = rand(200) +if CUDA.functional() + # CPU Arrays + A_cpu = sprand(200, 100, 0.3) + b_cpu = rand(200) -# GPU Arrays -A_gpu = CuSparseMatrixCSC(A_cpu) -b_gpu = CuVector(b_cpu) + # GPU Arrays + A_gpu = CuSparseMatrixCSC(A_cpu) + b_gpu = CuVector(b_cpu) -# Solve a rectangular and sparse system on GPU -x, stats = lsmr(A_gpu, b_gpu) + # Solve a rectangular and sparse system on an Nvidia GPU + x, stats = lsmr(A_gpu, b_gpu) +end ``` Optimized operator-vector products that exploit GPU features can be also used by means of linear operators. @@ -46,64 +59,168 @@ can be applied directly on GPU thanks to efficient operators that take advantage using SparseArrays, Krylov, LinearOperators using CUDA, CUDA.CUSPARSE -# Transfer the linear system from the CPU to the GPU -A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu) -b_gpu = CuVector(b_cpu) +if CUDA.functional() + # Transfer the linear system from the CPU to the GPU + A_gpu = CuSparseMatrixCSR(A_cpu) # A_gpu = CuSparseMatrixCSC(A_cpu) + b_gpu = CuVector(b_cpu) + + # IC(0) decomposition LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices + P = ic02(A_gpu) + + # Additional vector required for solving triangular systems + n = length(b_gpu) + T = eltype(b_gpu) + z = CUDA.zeros(T, n) + + # Solve Py = x + function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z) + ldiv!(z, LowerTriangular(P), x) # Forward substitution with L + ldiv!(y, LowerTriangular(P)', z) # Backward substitution with Lᴴ + return y + end + + function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z) + ldiv!(z, UpperTriangular(P)', x) # Forward substitution with L + ldiv!(y, UpperTriangular(P), z) # Backward substitution with Lᴴ + return y + end + + # Operator that model P⁻¹ + symmetric = hermitian = true + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z)) + + # Solve an Hermitian positive definite system with an IC(0) preconditioner on GPU + x, stats = cg(A_gpu, b_gpu, M=opM) +end +``` -# LLᵀ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices -P = ic02(A_gpu, 'O') +### Example with a general square system + +```julia +using SparseArrays, Krylov, LinearOperators +using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER -# Solve Py = x -function ldiv!(y, P, x) - copyto!(y, x) # Variant for CuSparseMatrixCSR - sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O') - sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O') - return y +if CUDA.functional() + # Optional -- Compute a permutation vector p such that A[:,p] has no zero diagonal + p = zfd(A_cpu) + p .+= 1 + A_cpu = A_cpu[:,p] + + # Transfer the linear system from the CPU to the GPU + A_gpu = CuSparseMatrixCSR(A_cpu) # A_gpu = CuSparseMatrixCSC(A_cpu) + b_gpu = CuVector(b_cpu) + + # ILU(0) decomposition LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices + P = ilu02(A_gpu) + + # Additional vector required for solving triangular systems + n = length(b_gpu) + T = eltype(b_gpu) + z = CUDA.zeros(T, n) + + # Solve Py = x + function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z) + ldiv!(z, UnitLowerTriangular(P), x) # Forward substitution with L + ldiv!(y, UpperTriangular(P), z) # Backward substitution with U + return y + end + + function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z) + ldiv!(z, LowerTriangular(P), x) # Forward substitution with L + ldiv!(y, UnitUpperTriangular(P), z) # Backward substitution with U + return y + end + + # Operator that model P⁻¹ + symmetric = hermitian = false + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z)) + + # Solve a non-Hermitian system with an ILU(0) preconditioner on GPU + x̄, stats = bicgstab(A_gpu, b_gpu, M=opM) + + # Recover the solution of Ax = b with the solution of A[:,p]x̄ = b + invp = invperm(p) + x = x̄[invp] end +``` + +## AMD GPUs + +All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`ROCMatrix` and `ROCVector`). + +```julia +using Krylov, AMDGPU -# Operator that model P⁻¹ -n = length(b_gpu) -T = eltype(b_gpu) -symmetric = hermitian = true -opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x)) +if AMDGPU.functional() + # CPU Arrays + A_cpu = rand(ComplexF64, 20, 20) + A_cpu = A_cpu + A_cpu' + b_cpu = rand(ComplexF64, 20) -# Solve a symmetric positive definite system with an incomplete Cholesky preconditioner on GPU -(x, stats) = cg(A_gpu, b_gpu, M=opM) + A_gpu = ROCMatrix(A_cpu) + b_gpu = ROCVector(b_cpu) + + # Solve a dense Hermitian system on an AMD GPU + x, stats = minres(A_gpu, b_gpu) +end ``` -### Example with a general square system +!!! info + The library `rocSPARSE` is not interfaced yet in AMDGPU.jl and only dense linear systems are supported. + +## Intel GPUs + +All solvers in Krylov.jl can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations on Intel GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`oneMatrix` and `oneVector`). ```julia -using SparseArrays, Krylov, LinearOperators -using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER +using Krylov, oneAPI + +if oneAPI.functional() + T = Float32 # oneAPI.jl also works with ComplexF32 + m = 20 + n = 10 + + # CPU Arrays + A_cpu = rand(T, m, n) + b_cpu = rand(T, m) -# Optional -- Compute a permutation vector p such that A[p,:] has no zero diagonal -p = zfd(A_cpu, 'O') -p .+= 1 -A_cpu = A_cpu[p,:] -b_cpu = b_cpu[p] - -# Transfer the linear system from the CPU to the GPU -A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu) -b_gpu = CuVector(b_cpu) - -# LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices -P = ilu02(A_gpu, 'O') - -# Solve Py = x -function ldiv!(y, P, x) - copyto!(y, x) # Variant for CuSparseMatrixCSR - sv2!('N', 'L', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'U', 1.0, P, y, 'O') - sv2!('N', 'U', 'U', 1.0, P, y, 'O') # sv2!('N', 'U', 'N', 1.0, P, y, 'O') - return y + # GPU Arrays + A_gpu = oneMatrix(A_cpu) + b_gpu = oneVector(b_cpu) + + # Solve a dense least-squares problem on an Intel GPU + x, stats = lsqr(A_gpu, b_gpu) end +``` -# Operator that model P⁻¹ -n = length(b_gpu) -T = eltype(b_gpu) -symmetric = hermitian = false -opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x)) +!!! note + The library `oneMKL` is interfaced in oneAPI.jl and accelerates linear algebra operations on Intel GPUs. Only dense linear systems are supported for the time being because sparse linear algebra routines are not interfaced yet. -# Solve an unsymmetric system with an incomplete LU preconditioner on GPU -(x, stats) = bicgstab(A_gpu, b_gpu, M=opM) +## Apple M1 GPUs + +All solvers in Krylov.jl can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations on Apple M1 GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`MtlMatrix` and `MtlVector`). + +```julia +using Krylov, Metal + +T = Float32 # Metal.jl also works with ComplexF32 +n = 10 +m = 20 + +# CPU Arrays +A_cpu = rand(T, n, m) +b_cpu = rand(T, n) + +# GPU Arrays +A_gpu = MtlMatrix(A_cpu) +b_gpu = MtlVector(b_cpu) + +# Solve a dense least-norm problem on an Apple M1 GPU +x, stats = craig(A_gpu, b_gpu) ``` + +!!! warning + Metal.jl is under heavy development and is considered experimental for now. diff --git a/docs/src/graphics/arnoldi.png b/docs/src/graphics/arnoldi.png new file mode 100644 index 000000000..9ef8bd3a3 Binary files /dev/null and b/docs/src/graphics/arnoldi.png differ diff --git a/docs/src/graphics/golub_kahan.png b/docs/src/graphics/golub_kahan.png new file mode 100644 index 000000000..32fc3d7b8 Binary files /dev/null and b/docs/src/graphics/golub_kahan.png differ diff --git a/docs/src/graphics/hermitian_lanczos.png b/docs/src/graphics/hermitian_lanczos.png new file mode 100644 index 000000000..c70082e72 Binary files /dev/null and b/docs/src/graphics/hermitian_lanczos.png differ diff --git a/docs/src/graphics/montoison_orban.png b/docs/src/graphics/montoison_orban.png new file mode 100644 index 000000000..5a14eda04 Binary files /dev/null and b/docs/src/graphics/montoison_orban.png differ diff --git a/docs/src/graphics/nonhermitian_lanczos.png b/docs/src/graphics/nonhermitian_lanczos.png new file mode 100644 index 000000000..b8d83961c Binary files /dev/null and b/docs/src/graphics/nonhermitian_lanczos.png differ diff --git a/docs/src/graphics/saunders_simon_yip.png b/docs/src/graphics/saunders_simon_yip.png new file mode 100644 index 000000000..c3acfd181 Binary files /dev/null and b/docs/src/graphics/saunders_simon_yip.png differ diff --git a/docs/src/index.md b/docs/src/index.md index ce657436d..1cc2c3302 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -22,7 +22,7 @@ should be solved when **_b_** is not in the range of **_A_** (inconsistent syste * **_A_** is square and singular, * **_A_** is tall and thin. -Underdetermined sytems are less common but also occur. +Underdetermined systems are less common but also occur. If there are infinitely many such **_x_** (because **_A_** is column rank-deficient), one with minimum norm is identified @@ -36,36 +36,36 @@ If there are infinitely many such **_x_** (because **_A_** is column rank-defici \min \|x\| \quad \text{subject to} \quad Ax = b ``` -sould be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**. +should be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**. This situation mainly occurs when * **_A_** is square and singular, * **_A_** is short and wide. -Overdetermined sytems are less common but also occur. +Overdetermined systems are less common but also occur. 4 - Adjoint systems ```math - Ax = b \quad \text{and} \quad A^T y = c + Ax = b \quad \text{and} \quad A^H y = c ``` where **_A_** can have any shape. -5 - Saddle-point and symmetric quasi-definite (SQD) systems +5 - Saddle-point and Hermitian quasi-definite systems ```math - \begin{bmatrix} M & \phantom{-}A \\ A^T & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right) + \begin{bmatrix} M & \phantom{-}A \\ A^H & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right) ``` where **_A_** can have any shape. -6 - Generalized saddle-point and unsymmetric partitioned systems +6 - Generalized saddle-point and non-Hermitian partitioned systems ```math \begin{bmatrix} M & A \\ B & N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix} ``` -where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**. +where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**. **_A_**, **_B_**, **_b_** and **_c_** must be all nonzero. Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because: @@ -92,3 +92,10 @@ julia> ] pkg> add Krylov pkg> test Krylov ``` + +# Bug reports and discussions + +If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues). +Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please. + +If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome. diff --git a/docs/src/inplace.md b/docs/src/inplace.md index 71a4e25de..9950575fe 100644 --- a/docs/src/inplace.md +++ b/docs/src/inplace.md @@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver` For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU. !!! note - `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`). + `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`). The workspace is always the first argument of the in-place methods: diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md new file mode 100644 index 000000000..fd203dddb --- /dev/null +++ b/docs/src/preconditioners.md @@ -0,0 +1,237 @@ +# [Preconditioners](@id preconditioners) + +The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear system $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic. +Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance. + +The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account. +Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations. + +The construction of a preconditioner necessitates trade-offs because we need to apply it at least once per iteration within a Krylov method. +Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense. + +There exist three variants of preconditioning: + +| Left preconditioning | Two-sided preconditioning | Right preconditioning | +|:----------------------------------:|:----------------------------------------------------------------------:|:--------------------------------------------:| +| $P_{\ell}^{-1}Ax = P_{\ell}^{-1}b$ | $P_{\ell}^{-1}AP_r^{-1}y = P_{\ell}^{-1}b~~\text{with}~~x = P_r^{-1}y$ | $AP_r^{-1}y = b~~\text{with}~~x = P_r^{-1}y$ | + +where $P_{\ell}$ and $P_r$ are square and nonsingular. + +In Krylov.jl , we call $P_{\ell}^{-1}$ and $P_r^{-1}$ the preconditioners and we assume that we can apply them with the operation $y \leftarrow P^{-1} * x$. +It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available. +Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov solvers. + +## How to use preconditioners in Krylov.jl? + +!!! info + - A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl. + - The default value of a preconditioner in Krylov.jl is the identity operator `I`. + +### Square non-Hermitian linear systems + +Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`FGMRES`](@ref fgmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom). + +A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning. + +| Preconditioners | $P_{\ell}^{-1}$ | $P_{\ell}$ | $P_r^{-1}$ | $P_r$ | +|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +### Hermitian linear systems + +Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp). + +When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-H}y = L^{-1}b$ with $x = L^{-H}y$. +Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^H$ that maintains hermicity. +However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its inverse directly. + +| Preconditioners | $P_c^{-1}$ | $P_c$ | +|:---------------:|:-------------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | + +!!! warning + The preconditioner `M` must be hermitian and positive definite. + +### Linear least-squares problems + +Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr). + +| Formulation | Without preconditioning | With preconditioning | +|:---------------------:|:------------------------------------:|:-------------------------------------------:| +| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ | +| Normal equation | $A^HAx = A^Hb$ | $A^HE^{-1}Ax = A^HE^{-1}b$ | +| Augmented system | $\begin{bmatrix} I & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | + +[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems. + +| Formulation | Without preconditioning | With preconditioning | +|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:| +| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ | +| Normal equation | $(A^HA + \lambda^2 I)x = A^Hb$ | $(A^HE^{-1}A + \lambda^2 F)x = A^HE^{-1}b$ | +| Augmented system | $\begin{bmatrix} I & A \\ A^H & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | + +| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ | +|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +!!! warning + The preconditioners `M` and `N` must be hermitian and positive definite. + +### Linear least-norm problems + +Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr). + +| Formulation | Without preconditioning | With preconditioning | +|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:| +| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ | +| Normal equation | $AA^Hy = b~~\text{with}~~x = A^Hy$ | $AF^{-1}A^Hy = b~~\text{with}~~x = F^{-1}A^Hy$ | +| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | + +[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems. + +| Formulation | Without preconditioning | With preconditioning | +|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:| +| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ | +| Normal equation | $(AA^H + \lambda^2 I)y = b~~\text{with}~~x = A^Hy$ | $(AF^{-1}A^H + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Hy$ | +| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | + +| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ | +|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +!!! warning + The preconditioners `M` and `N` must be hermitian and positive definite. + +### Saddle-point and symmetric quasi-definite systems + +[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure +```math + \begin{bmatrix} \tau E & \phantom{-}A \\ A^H & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}, +``` +| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ | +|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +!!! warning + The preconditioners `M` and `N` must be hermitian and positive definite. + +### Generalized saddle-point and unsymmetric partitioned systems + +[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure +```math + \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}, +``` +| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ | +|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:| +| Arguments | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` | + +!!! note + Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning. + +## Packages that provide preconditioners + +- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions. +- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in. +- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices. +- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners. +- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices. +- [BasicLU.jl](https://github.com/JuliaSmoothOptimizers/BasicLU.jl) uses a sparse LU factorization to compute a maximum volume basis that can be used as a preconditioner for least-norm and least-squares problems. + +## Examples + +```julia +using Krylov +n, m = size(A) +d = [A[i,i] ≠ 0 ? 1 / abs(A[i,i]) : 1 for i=1:n] # Jacobi preconditioner +P⁻¹ = diagm(d) +x, stats = symmlq(A, b, M=P⁻¹) +``` + +```julia +using Krylov +n, m = size(A) +d = [1 / norm(A[:,i]) for i=1:m] # diagonal preconditioner +P⁻¹ = diagm(d) +x, stats = minres(A, b, M=P⁻¹) +``` + +```julia +using IncompleteLU, Krylov +Pℓ = ilu(A) +x, stats = gmres(A, b, M=Pℓ, ldiv=true) # left preconditioning +``` + +```julia +using LimitedLDLFactorizations, Krylov +P = lldl(A) +P.D .= abs.(P.D) +x, stats = cg(A, b, M=P, ldiv=true) # centered preconditioning +``` + +```julia +using ILUZero, Krylov +Pᵣ = ilu0(A) +x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true) # right preconditioning +``` + +```julia +using LDLFactorizations, Krylov + +M = ldl(E) +N = ldl(F) + +# [E A] [x] = [b] +# [Aᴴ -F] [y] [c] +x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true) +``` + +```julia +using SuiteSparse, Krylov +import LinearAlgebra.ldiv! + +M = cholesky(E) + +# ldiv! is not implemented for the sparse Cholesky factorization (SuiteSparse.CHOLMOD) +ldiv!(y::Vector{T}, F::SuiteSparse.CHOLMOD.Factor{T}, x::Vector{T}) where T = (y .= F \ x) + +# [E A] [x] = [b] +# [Aᴴ 0] [y] [c] +x, y, stats = trimr(A, b, c, M=M, sp=true, ldiv=true) +``` + +```julia +using Krylov + +C = lu(M) + +# [M A] [x] = [b] +# [B 0] [y] [c] +x, y, stats = gpmr(A, B, b, c, C=C, gsp=true, ldiv=true) +``` + +```julia +import BasicLU +using LinearOperators, Krylov + +# Least-squares problem +m, n = size(A) +Aᴴ = sparse(A') +basis, B = BasicLU.maxvolbasis(Aᴴ) +opA = LinearOperator(A) +B⁻ᴴ = LinearOperator(Float64, n, n, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N'))) + +d, stats = lsmr(opA * B⁻ᴴ, b) # min ‖AB⁻ᴴd - b‖₂ +x = B⁻ᴴ * d # recover the solution of min ‖Ax - b‖₂ + +# Least-norm problem +m, n = size(A) +basis, B = maxvolbasis(A) +opA = LinearOperator(A) +B⁻¹ = LinearOperator(Float64, m, m, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T'))) + +x, y, stats = craigmr(B⁻¹ * opA, B⁻¹ * b) # min ‖x‖₂ s.t. B⁻¹Ax = B⁻¹b +``` diff --git a/docs/src/processes.md b/docs/src/processes.md new file mode 100644 index 000000000..e9d4066d2 --- /dev/null +++ b/docs/src/processes.md @@ -0,0 +1,334 @@ +```@raw html + +``` + +# [Krylov processes](@id krylov-processes) + +Krylov processes are the foundation of Krylov methods, they generate bases of Krylov subspaces. +Depending on the Krylov subspaces generated, Krylov processes are more or less specialized for a subset of linear problems. +The following table summarizes the most relevant processes for each linear problem. + +| Linear problems | Processes | +|:--------------------------------------------------------------:|:---------------------------------:| +| Hermitian linear systems | Hermitian Lanczos | +| Square Non-Hermitian linear systems | Non-Hermitian Lanczos -- Arnoldi | +| Least-squares problems | Golub-Kahan -- Saunders-Simon-Yip | +| Least-norm problems | Golub-Kahan -- Saunders-Simon-Yip | +| Saddle-point and Hermitian quasi-definite systems | Golub-Kahan -- Saunders-Simon-Yip | +| Generalized saddle-point and non-Hermitian partitioned systems | Montoison-Orban | + +### Notation + +For a matrix $A$, $A^H$ denotes the conjugate transpose of $A$. +It coincides with $A^T$, the transpose of $A$, for real matrices. +Define $V_k := \begin{bmatrix} v_1 & \ldots & v_k \end{bmatrix} \enspace$ and $\enspace U_k := \begin{bmatrix} u_1 & \ldots & u_k \end{bmatrix}$. + +For a matrix $C \in \mathbb{C}^{n \times n}$ and a vector $t \in \mathbb{C}^{n}$, the $k$-th Krylov subspace generated by $C$ and $t$ is +```math +\mathcal{K}_k(C, t) := +\left\{\sum_{i=0}^{k-1} \omega_i C^i t \, \middle \vert \, \omega_i \in \mathbb{C},~0 \le i \le k-1 \right\}. +``` + +For matrices $C \in \mathbb{C}^{n \times n} \enspace$ and $\enspace T \in \mathbb{C}^{n \times p}$, the $k$-th block Krylov subspace generated by $C$ and $T$ is +```math +\mathcal{K}_k^{\square}(C, T) := +\left\{\sum_{i=0}^{k-1} C^i T \, \Omega_i \, \middle \vert \, \Omega_i \in \mathbb{C}^{p \times p},~0 \le i \le k-1 \right\}. +``` + +## Hermitian Lanczos + +![hermitian_lanczos](./graphics/hermitian_lanczos.png) + +After $k$ iterations of the Hermitian Lanczos process, the situation may be summarized as +```math +\begin{align*} + A V_k &= V_k T_k + \beta_{k+1,k} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + V_k^H V_k &= I_k, +\end{align*} +``` +where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$, +```math +T_k = +\begin{bmatrix} + \alpha_1 & \beta_2 & & \\ + \beta_2 & \alpha_2 & \ddots & \\ + & \ddots & \ddots & \beta_k \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +T_{k+1,k} = +\begin{bmatrix} + T_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix}. +``` +Note that $T_{k+1,k}$ is a real tridiagonal matrix even if $A$ is a complex matrix. + +The function [`hermitian_lanczos`](@ref hermitian_lanczos) returns $V_{k+1}$ and $T_{k+1,k}$. + +Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift). + +```@docs +hermitian_lanczos +``` + +## Non-Hermitian Lanczos + +![nonhermitian_lanczos](./graphics/nonhermitian_lanczos.png) + +After $k$ iterations of the non-Hermitian Lanczos process (also named the Lanczos biorthogonalization process), the situation may be summarized as +```math +\begin{align*} + A V_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + A^H U_k &= U_k T_k^H + \bar{\gamma}_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\ + V_k^H U_k &= U_k^H V_k = I_k, +\end{align*} +``` +where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A,b)$ and $\mathcal{K}_k (A^H,c)$, respectively, +```math +T_k = +\begin{bmatrix} + \alpha_1 & \gamma_2 & & \\ + \beta_2 & \alpha_2 & \ddots & \\ + & \ddots & \ddots & \gamma_k \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +T_{k+1,k} = +\begin{bmatrix} + T_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix} +, \qquad +T_{k,k+1} = +\begin{bmatrix} + T_{k} & \gamma_{k+1} e_k +\end{bmatrix}. +``` + +The function [`nonhermitian_lanczos`](@ref nonhermitian_lanczos) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$. + +Related methods: [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`CGS`](@ref cgs) and [`BICGSTAB`](@ref bicgstab). + +!!! note + The scaling factors used in our implementation are $\beta_k = |u_k^H v_k|^{\tfrac{1}{2}}$ and $\gamma_k = (u_k^H v_k) / \beta_k$. + With these scaling factors, the non-Hermitian Lanczos process coincides with the Hermitian Lanczos process when $A = A^H$ and $b = c$. + +```@docs +nonhermitian_lanczos +``` + +## Arnoldi + +![arnoldi](./graphics/arnoldi.png) + +After $k$ iterations of the Arnoldi process, the situation may be summarized as +```math +\begin{align*} + A V_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\ + V_k^H V_k &= I_k, +\end{align*} +``` +where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$, +```math +H_k = +\begin{bmatrix} + h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\ + h_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & h_{k-1,k} \\ + & & h_{k,k-1} & h_{k,k} +\end{bmatrix} +, \qquad +H_{k+1,k} = +\begin{bmatrix} + H_{k} \\ + h_{k+1,k} e_{k}^T +\end{bmatrix}. +``` + +The function [`arnoldi`](@ref arnoldi) returns $V_{k+1}$ and $H_{k+1,k}$. + +Related methods: [`DIOM`](@ref diom), [`FOM`](@ref fom), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres) and [`FGMRES`](@ref fgmres). + +!!! note + The Arnoldi process coincides with the Hermitian Lanczos process when $A$ is Hermitian. + +```@docs +arnoldi +``` + +## Golub-Kahan + +![golub_kahan](./graphics/golub_kahan.png) + +After $k$ iterations of the Golub-Kahan bidiagonalization process, the situation may be summarized as +```math +\begin{align*} + A V_k &= U_{k+1} B_k, \\ + A^H U_{k+1} &= V_k B_k^H + \alpha_{k+1} v_{k+1} e_{k+1}^T = V_{k+1} L_{k+1}^H, \\ + V_k^H V_k &= U_k^H U_k = I_k, +\end{align*} +``` +where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A^HA,A^Hb)$ and $\mathcal{K}_k (AA^H,b)$, respectively, +```math +L_k = +\begin{bmatrix} + \alpha_1 & & & \\ + \beta_2 & \alpha_2 & & \\ + & \ddots & \ddots & \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +B_k = +\begin{bmatrix} + \alpha_1 & & & \\ + \beta_2 & \alpha_2 & & \\ + & \ddots & \ddots & \\ + & & \beta_k & \alpha_k \\ + & & & \beta_{k+1} \\ +\end{bmatrix} += +\begin{bmatrix} + L_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix}. +``` +Note that $L_k$ is a real bidiagonal matrix even if $A$ is a complex matrix. + +The function [`golub_kahan`](@ref golub_kahan) returns $V_{k+1}$, $U_{k+1}$ and $L_{k+1}$. + +Related methods: [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig), [`CRAIGMR`](@ref craigmr), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr). + +!!! note + The Golub-Kahan process coincides with the Hermitian Lanczos process applied to the normal equations $A^HA x = A^Hb$ and $AA^H x = b$. + It is also related to the Hermitian Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial vector $\begin{bmatrix} b \\ 0 \end{bmatrix}$. + +```@docs +golub_kahan +``` + +## Saunders-Simon-Yip + +![saunders_simon_yip](./graphics/saunders_simon_yip.png) + +After $k$ iterations of the Saunders-Simon-Yip process (also named the orthogonal tridiagonalization process), the situation may be summarized as +```math +\begin{align*} + A U_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + A^H V_k &= U_k T_k^H + \gamma_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\ + V_k^H V_k &= U_k^H U_k = I_k, +\end{align*} +``` +where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$, +```math +T_k = +\begin{bmatrix} + \alpha_1 & \gamma_2 & & \\ + \beta_2 & \alpha_2 & \ddots & \\ + & \ddots & \ddots & \gamma_k \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +T_{k+1,k} = +\begin{bmatrix} + T_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix} +, \qquad +T_{k,k+1} = +\begin{bmatrix} + T_{k} & \gamma_{k+1} e_{k} +\end{bmatrix}. +``` + +The function [`saunders_simon_yip`](@ref saunders_simon_yip) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$. + +Related methods: [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr), [`TriLQR`](@ref trilqr), [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr). + +```@docs +saunders_simon_yip +``` + +!!! note + The Saunders-Simon-Yip is equivalent to the block-Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$. + +## Montoison-Orban + +![montoison_orban](./graphics/montoison_orban.png) + +After $k$ iterations of the Montoison-Orban process (also named the orthogonal Hessenberg reduction process), the situation may be summarized as +```math +\begin{align*} + A U_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\ + B V_k &= U_k F_k + f_{k+1,k} u_{k+1} e_k^T = U_{k+1} F_{k+1,k}, \\ + V_k^H V_k &= U_k^H U_k = I_k, +\end{align*} +``` +where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$, +```math +H_k = +\begin{bmatrix} + h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\ + h_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & h_{k-1,k} \\ + & & h_{k,k-1} & h_{k,k} +\end{bmatrix} +, \qquad +F_k = +\begin{bmatrix} + f_{1,1}~ & f_{1,2}~ & \ldots & f_{1,k} \\ + f_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & f_{k-1,k} \\ + & & f_{k,k-1} & f_{k,k} +\end{bmatrix}, +``` +```math +H_{k+1,k} = +\begin{bmatrix} + H_{k} \\ + h_{k+1,k} e_{k}^T +\end{bmatrix} +, \qquad +F_{k+1,k} = +\begin{bmatrix} + F_{k} \\ + f_{k+1,k} e_{k}^T +\end{bmatrix}. +``` + +The function [`montoison_orban`](@ref montoison_orban) returns $V_{k+1}$, $H_{k+1,k}$, $U_{k+1}$ and $F_{k+1,k}$. + +Related methods: [`GPMR`](@ref gpmr). + +!!! note + The Montoison-Orban is equivalent to the block-Arnoldi process applied to $\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$. + It also coincides with the Saunders-Simon-Yip process when $B = A^H$. + +```@docs +montoison_orban +``` diff --git a/docs/src/reference.md b/docs/src/reference.md index 0896e1639..be0ac5288 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -10,5 +10,7 @@ Krylov.FloatOrComplex Krylov.niterations Krylov.Aprod Krylov.Atprod +Krylov.kstdout +Krylov.extract_parameters Base.show ``` diff --git a/docs/src/solvers/gsp.md b/docs/src/solvers/gsp.md index 10aaccbe0..33c580b8a 100644 --- a/docs/src/solvers/gsp.md +++ b/docs/src/solvers/gsp.md @@ -1,5 +1,5 @@ ```@meta -# Generalized saddle-point and unsymmetric partitioned systems +# Generalized saddle-point and non-Hermitian partitioned systems ``` ## GPMR diff --git a/docs/src/solvers/ln.md b/docs/src/solvers/ln.md index c5396ffdd..b638b8247 100644 --- a/docs/src/solvers/ln.md +++ b/docs/src/solvers/ln.md @@ -36,3 +36,10 @@ craig! craigmr craigmr! ``` + +## USYMLQ + +```@docs +usymlq +usymlq! +``` diff --git a/docs/src/solvers/ls.md b/docs/src/solvers/ls.md index f77057d94..fecfbc417 100644 --- a/docs/src/solvers/ls.md +++ b/docs/src/solvers/ls.md @@ -36,3 +36,10 @@ lsqr! lsmr lsmr! ``` + +## USYMQR + +```@docs +usymqr +usymqr! +``` diff --git a/docs/src/solvers/sid.md b/docs/src/solvers/sid.md index 1bd459cd2..e911681be 100644 --- a/docs/src/solvers/sid.md +++ b/docs/src/solvers/sid.md @@ -1,5 +1,5 @@ ```@meta -# Symmetric indefinite linear systems +# Hermitian indefinite linear systems ``` ## SYMMLQ diff --git a/docs/src/solvers/sp_sqd.md b/docs/src/solvers/sp_sqd.md index 518684b5b..4ee4ab09b 100644 --- a/docs/src/solvers/sp_sqd.md +++ b/docs/src/solvers/sp_sqd.md @@ -1,5 +1,5 @@ ```@meta -# Saddle-point and symmetric quasi-definite systems +# Saddle-point and Hermitian quasi-definite systems ``` ## TriCG diff --git a/docs/src/solvers/spd.md b/docs/src/solvers/spd.md index 79bb6e9e8..aebda285b 100644 --- a/docs/src/solvers/spd.md +++ b/docs/src/solvers/spd.md @@ -1,5 +1,5 @@ ```@meta -# Symmetric positive definite linear systems +# Hermitian positive definite linear systems ``` ## CG diff --git a/docs/src/solvers/unsymmetric.md b/docs/src/solvers/unsymmetric.md index 280908ea5..c9e77f787 100644 --- a/docs/src/solvers/unsymmetric.md +++ b/docs/src/solvers/unsymmetric.md @@ -1,5 +1,5 @@ ```@meta -# Unsymmetric linear systems +# Non-Hermitian square linear systems ``` ## BiLQ @@ -16,20 +16,6 @@ qmr qmr! ``` -## USYMLQ - -```@docs -usymlq -usymlq! -``` - -## USYMQR - -```@docs -usymqr -usymqr! -``` - ## CGS ```@docs @@ -71,3 +57,10 @@ dqgmres! gmres gmres! ``` + +## FGMRES + +```@docs +fgmres +fgmres! +``` diff --git a/docs/src/storage.md b/docs/src/storage.md new file mode 100644 index 000000000..903cc0558 --- /dev/null +++ b/docs/src/storage.md @@ -0,0 +1,152 @@ +```@meta +# Thanks Morten Piibeleht for the hack with the tables! +``` + +```@raw html + +``` + +# [Storage requirements](@id storage-requirements) + +This section provides the storage requirements of all Krylov methods available in Krylov.jl. + +### Notation + +We denote by $m$ and $n$ the number of rows and columns of the linear problem. +The memory parameter of DIOM, FOM, DQGMRES, GMRES, FGMRES and GPMR is $k$. +The numbers of shifts of CG-LANCZOS-SHIFT is $p$. + +## Theoretical storage requirements + +The following tables provide the number of coefficients that must be allocated for each Krylov method. +The coefficients have the same type as those that compose the linear problem we seek to solve. +Each table summarizes the storage requirements of Krylov methods recommended to a specific linear problem. + +#### Hermitian positive definite linear systems + +| Methods | [`CG`](@ref cg) | [`CR`](@ref cr) | [`CG-LANCZOS`](@ref cg_lanczos) | [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift) | +|:-------:|:---------------:|:---------------:|:-------------------------------:|:-------------------------------------------:| + Storage | $4n$ | $5n$ | $5n$ | $3n + 2np + 5p$ | + +#### Hermitian indefinite linear systems + +| Methods | [`SYMMLQ`](@ref symmlq) | [`MINRES`](@ref minres) | [`MINRES-QLP`](@ref minres_qlp) | +|:-------:|:-----------------------:|:-----------------------:|:-------------------------------:| +| Storage | $5n$ | $6n$ | $6n$ | + +#### Non-Hermitian square linear systems + +| Methods | [`CGS`](@ref cgs) | [`BICGSTAB`](@ref bicgstab) | [`BiLQ`](@ref bilq) | [`QMR`](@ref qmr) | +|:-------:|:-----------------:|:---------------------------:|:-------------------:|:-----------------:| +| Storage | $6n$ | $6n$ | $8n$ | $9n$ | + +| Methods | [`DIOM`](@ref diom) | [`DQGMRES`](@ref dqgmres) | +|:-------:|:-------------------:|:-------------------------:| +| Storage | $n(2k+1) + 2k - 1$ | $n(2k+2) + 3k + 1$ | + +| Methods | [`FOM`](@ref fom) | [`GMRES`](@ref gmres) | [`FGMRES`](@ref fgmres) | +|:-------:|:--------------------------------------------------:|:---------------------------------------:|:----------------------------------------:| +| Storage$\dfrac{}{}$ | $\!n(2+k) +2k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+k) + 3k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+2k) + 3k + \dfrac{k(k + 1)}{2}\!$ | + +#### Least-norm problems + +| Methods | [`USYMLQ`](@ref usymlq) | [`CGNE`](@ref cgne) | [`CRMR`](@ref crmr) | [`LNLQ`](@ref lnlq) | [`CRAIG`](@ref craig) | [`CRAIGMR`](@ref craigmr) | +|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:---------------------:|:-------------------------:| +| Storage | $5n + 3m$ | $3n + 2m$ | $3n + 2m$ | $3n + 4m$ | $3n + 4m$ | $4n + 5m$ | + +#### Least-squares problems + +| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) | +|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:| +| Storage | $6n + 3m$ | $3n + 2m$ | $4n + 3m$ | $4n + 2m$ | $4n + 2m$ | $5n + 2m$ | + +#### Adjoint systems + +| Methods | [`BiLQR`](@ref bilqr) | [`TriLQR`](@ref trilqr) | +|:-------:|:---------------------:|:-----------------------:| +| Storage | $11n$ | $6m + 5n$ | + +#### Saddle-point and Hermitian quasi-definite systems + +| Methods | [`TriCG`](@ref tricg) | [`TriMR`](@ref trimr) | +|:--------:|:---------------------:|:---------------------:| +| Storage | $6n + 6m$ | $8n + 8m$ | + +#### Generalized saddle-point and non-Hermitian partitioned systems + +| Method | [`GPMR`](@ref gpmr) | +|:-------:|:-------------------------:| +| Storage | $(2+k)(n+m) + 2k^2 + 11k$ | + +## Practical storage requirements + +Each method has its own `KrylovSolver` that contains all the storage needed by the method. +In the REPL, the size in bytes of each attribute and the total amount of memory allocated by the solver are displayed when we show a `KrylovSolver`. + +```@example storage +using Krylov + +m = 5000 +n = 12000 +A = rand(Float64, m, n) +b = rand(Float64, m) +solver = LsmrSolver(A, b) +show(stdout, solver, show_stats=false) +``` + +If we want the total number of bytes used by the solver, we can call `nbytes = sizeof(solver)`. + +```@example storage +nbytes = sizeof(solver) +``` + +Thereafter, we can use `Base.format_bytes(nbytes)` to recover what is displayed in the REPL. + +```@example storage +Base.format_bytes(nbytes) +``` + +To verify that we match the theoretical results, we just need to multiply the storage requirement of a method by the number of bytes associated to the precision of the linear problem. +For instance, we need 4 bytes for the precision `Float32`, 8 bytes for precisions `Float64` and `ComplexF32`, and 16 bytes for the precision `ComplexF64`. + +```@example storage +FC = Float64 # precision of the least-squares problem +ncoefs_lsmr = 5*n + 2*m # number of coefficients +nbytes_lsmr = sizeof(FC) * ncoefs_lsmr # number of bytes +``` + +Therefore, you can check that you have enough memory in RAM to allocate a `KrylovSolver`. + +```@example storage +free_nbytes = Sys.free_memory() +Base.format_bytes(free_nbytes) # Total free memory in RAM in bytes. +``` + +!!! note + - Beyond having faster operations, using low precisions, such as simple precision, allows to store more coefficients in RAM and solve larger linear problems. + - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to test that we match the expected storage requirement of each method with a tolerance of 2%. diff --git a/docs/src/tips.md b/docs/src/tips.md index 604c0633d..e08567ae1 100644 --- a/docs/src/tips.md +++ b/docs/src/tips.md @@ -16,14 +16,14 @@ If you don't know the maximum number of threads available on your computer, you NMAX = Sys.CPU_THREADS ``` -and define the number of OpenBLAS/MKL threads at runtine with +and define the number of OpenBLAS/MKL threads at runtime with ```julia BLAS.set_num_threads(N) # 1 ≤ N ≤ NMAX BLAS.get_num_threads() ``` -The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2`. +The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2` if your CPU supports simultaneous multithreading (SMT). By default Julia ships with OpenBLAS but it's also possible to use Intel MKL BLAS and LAPACK with [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl). diff --git a/docs/src/warm_start.md b/docs/src/warm-start.md similarity index 59% rename from docs/src/warm_start.md rename to docs/src/warm-start.md index 030cad6c0..6b830bff3 100644 --- a/docs/src/warm_start.md +++ b/docs/src/warm-start.md @@ -1,9 +1,10 @@ -## Warm Start +# [Warm-start](@id warm-start) -Most Krylov methods in this module accept a starting point as argument. The starting point is used as initial approximation to a solution. +Most Krylov methods in this module accept a starting point as argument. +The starting point is used as initial approximation to a solution. ```julia -solver = CgSolver(n, n, S) +solver = CgSolver(A, b) cg!(solver, A, b, itmax=100) if !issolved(solver) cg!(solver, A, b, solver.x, itmax=100) # cg! uses the approximate solution `solver.x` as starting point @@ -28,7 +29,7 @@ If a Krylov method doesn't have the option to warm start, it can still be done e We provide an example with `cg_lanczos!`. ```julia -solver = CgLanczosSolver(n, n, S) +solver = CgLanczosSolver(A, b) cg_lanczos!(solver, A, b) x₀ = solver.x # Ax₀ ≈ b r = b - A * x₀ # r = b - Ax₀ @@ -41,33 +42,34 @@ Explicit restarts cannot be avoided in certain block methods, such as TriMR, due ```julia # [E A] [x] = [b] -# [Aᵀ F] [y] [c] +# [Aᴴ F] [y] [c] M = inv(E) N = inv(F) x₀, y₀, stats = trimr(A, b, c, M=M, N=N) # E and F are not available inside TriMR b₀ = b - Ex₀ - Ay -c₀ = c - Aᵀx₀ - Fy +c₀ = c - Aᴴx₀ - Fy Δx, Δy, stats = trimr(A, b₀, c₀, M=M, N=N) x = x₀ + Δx y = y₀ + Δy ``` - -## Restarted methods - -The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses. -For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered. -In this section, we show how to use warm starts to implement GMRES(k) and FOM(k). - -```julia -k = 50 -solver = GmresSolver(A, b, k) # FomSolver(A, b, k) -solver.x .= 0 # solver.x .= x₀ -nrestart = 0 -while !issolved(solver) || nrestart ≤ 10 - solve!(solver, A, b, solver.x, itmax=k) - nrestart += 1 -end +```@meta +# ## Restarted methods +# +# The storage requirements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses. +# For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are preferred. +# In this section, we show how to use warm starts to implement GMRES(k) and FOM(k). +# +# ```julia +# k = 50 +# solver = GmresSolver(A, b, k) # FomSolver(A, b, k) +# solver.x .= 0 # solver.x .= x₀ +# nrestart = 0 +# while !issolved(solver) || nrestart ≤ 10 +# solve!(solver, A, b, solver.x, itmax=k) +# nrestart += 1 +# end +# ``` ``` diff --git a/ext/KrylovComponentArraysExt.jl b/ext/KrylovComponentArraysExt.jl new file mode 100644 index 000000000..68cc3e7cf --- /dev/null +++ b/ext/KrylovComponentArraysExt.jl @@ -0,0 +1,13 @@ +module KrylovComponentArraysExt + +using Krylov: Krylov +using ComponentArrays: ComponentVector + +""" + Krylov.ktypeof(::ComponentVector{T,V}) where {T,V} + +Return the underlying `V` type. +""" +Krylov.ktypeof(::ComponentVector{T,V}) where {T,V} = V + +end diff --git a/ext/KrylovFillArraysExt.jl b/ext/KrylovFillArraysExt.jl new file mode 100644 index 000000000..636533942 --- /dev/null +++ b/ext/KrylovFillArraysExt.jl @@ -0,0 +1,13 @@ +module KrylovFillArraysExt + +using Krylov: Krylov +using FillArrays: AbstractFill + +""" + Krylov.ktypeof(::AbstractFill{T,1}) where {T} + +Return the corresponding `Vector{T}` type. +""" +Krylov.ktypeof(::AbstractFill{T,1}) where {T} = Vector{T} + +end diff --git a/ext/KrylovStaticArraysExt.jl b/ext/KrylovStaticArraysExt.jl new file mode 100644 index 000000000..f24bd34cc --- /dev/null +++ b/ext/KrylovStaticArraysExt.jl @@ -0,0 +1,13 @@ +module KrylovStaticArraysExt + +using Krylov: Krylov +using StaticArrays: StaticVector + +""" + Krylov.ktypeof(::StaticVector{S,T}) where {S,T} + +Return the corresponding `Vector{T}` type. +""" +Krylov.ktypeof(::StaticVector{S,T}) where {S,T} = Vector{T} + +end diff --git a/src/Krylov.jl b/src/Krylov.jl index b714ccd79..013ea3e65 100644 --- a/src/Krylov.jl +++ b/src/Krylov.jl @@ -1,10 +1,16 @@ module Krylov using LinearAlgebra, SparseArrays, Printf +using PackageExtensionCompat + +function __init__() + @require_extensions +end include("krylov_utils.jl") include("krylov_stats.jl") include("krylov_solvers.jl") +include("krylov_processes.jl") include("cg.jl") include("cr.jl") @@ -19,6 +25,7 @@ include("diom.jl") include("fom.jl") include("dqgmres.jl") include("gmres.jl") +include("fgmres.jl") include("gpmr.jl") @@ -49,6 +56,6 @@ include("lnlq.jl") include("craig.jl") include("craigmr.jl") -include("callback_utils.jl") +include("krylov_solve.jl") end diff --git a/src/bicgstab.jl b/src/bicgstab.jl index c3b914599..16a3ceae9 100644 --- a/src/bicgstab.jl +++ b/src/bicgstab.jl @@ -16,40 +16,60 @@ export bicgstab, bicgstab! """ - (x, stats) = bicgstab(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = bicgstab(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, M=I, N=I, + ldiv::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the square linear system Ax = b using the BICGSTAB method. + (x, stats) = bicgstab(A, b, x0::AbstractVector; kwargs...) + +BICGSTAB can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the square linear system Ax = b of size n using BICGSTAB. BICGSTAB requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. The Biconjugate Gradient Stabilized method is a variant of BiCG, like CGS, -but using different updates for the Aᵀ-sequence in order to obtain smoother +but using different updates for the Aᴴ-sequence in order to obtain smoother convergence than CGS. If BICGSTAB stagnates, we recommend DQGMRES and BiLQ as alternative methods for unsymmetric square systems. BICGSTAB stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖b‖ * rtol`. -`atol` is an absolute tolerance and `rtol` is a relative tolerance. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. -This implementation allows a left preconditioner `M` and a right preconditioner `N`. +#### Optional argument -BICGSTAB can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = bicgstab(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -58,18 +78,6 @@ and `false` otherwise. """ function bicgstab end -function bicgstab(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = BicgstabSolver(A, b) - bicgstab!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function bicgstab(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = BicgstabSolver(A, b) - bicgstab!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = bicgstab!(solver::BicgstabSolver, A, b; kwargs...) solver = bicgstab!(solver::BicgstabSolver, A, b, x0; kwargs...) @@ -80,150 +88,201 @@ See [`BicgstabSolver`](@ref) for more details about the `solver`. """ function bicgstab! end -function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - bicgstab!(solver, A, b; kwargs...) - return solver -end - -function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("BICGSTAB: system of size %d\n", n) - - # Check M = Iₙ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :t , S, n) - allocate_if(!NisI, solver, :yz, S, n) - Δx, x, r, p, v, s, qd, stats = solver.Δx, solver.x, solver.r, solver.p, solver.v, solver.s, solver.qd, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - q = d = solver.qd - t = MisI ? d : solver.t - y = NisI ? p : solver.yz - z = NisI ? s : solver.yz - r₀ = MisI ? r : solver.qd - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) - else - r₀ .= b +def_args_bicgstab = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_bicgstab = (:(x0::AbstractVector),) + +def_kwargs_bicgstab = (:(; c::AbstractVector{FC} = b ), + :(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_bicgstab = mapreduce(extract_parameters, vcat, def_kwargs_bicgstab) + +args_bicgstab = (:A, :b) +optargs_bicgstab = (:x0,) +kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function bicgstab($(def_args_bicgstab...), $(def_optargs_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = BicgstabSolver(A, b) + warm_start!(solver, $(optargs_bicgstab...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + bicgstab!(solver, $(args_bicgstab...); $(kwargs_bicgstab...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - x .= zero(FC) # x₀ - s .= zero(FC) # s₀ - v .= zero(FC) # v₀ - MisI || mulorldiv!(r, M, r₀, ldiv) # r₀ - p .= r # p₁ - - α = one(FC) # α₀ - ω = one(FC) # ω₀ - ρ = one(FC) # ρ₀ - - # Compute residual norm ‖r₀‖₂. - rNorm = @knrm2(n, r) - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function bicgstab($(def_args_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = BicgstabSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + bicgstab!(solver, $(args_bicgstab...); $(kwargs_bicgstab...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2*n) - - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s %8s %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|") - kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω)) - - next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩ - if next_ρ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = false, false - stats.status = "Breakdown bᵀc = 0" - solver.warm_start = false - return solver - end - - # Stopping criterion. - solved = rNorm ≤ ε - tired = iter ≥ itmax - breakdown = false - status = "unknown" - user_requested_exit = false - - while !(solved || tired || breakdown || user_requested_exit) - # Update iteration index and ρ. - iter = iter + 1 - ρ = next_ρ - - NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ - mul!(q, A, y) # qₖ = Ayₖ - mulorldiv!(v, M, q, ldiv) # vₖ = M⁻¹qₖ - α = ρ / @kdot(n, c, v) # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩ - @kcopy!(n, r, s) # sₖ = rₖ₋₁ - @kaxpy!(n, -α, v, s) # sₖ = sₖ - αₖvₖ - @kaxpy!(n, α, y, x) # xₐᵤₓ = xₖ₋₁ + αₖyₖ - NisI || mulorldiv!(z, N, s, ldiv) # zₖ = N⁻¹sₖ - mul!(d, A, z) # dₖ = Azₖ - MisI || mulorldiv!(t, M, d, ldiv) # tₖ = M⁻¹dₖ - ω = @kdot(n, t, s) / @kdot(n, t, t) # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩ - @kaxpy!(n, ω, z, x) # xₖ = xₐᵤₓ + ωₖzₖ - @kcopy!(n, s, r) # rₖ = sₖ - @kaxpy!(n, -ω, t, r) # rₖ = rₖ - ωₖtₖ - next_ρ = @kdot(n, c, r) # ρₖ₊₁ = ⟨r̅₀,rₖ⟩ - β = (next_ρ / ρ) * (α / ω) # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ) - @kaxpy!(n, -ω, v, p) # pₐᵤₓ = pₖ - ωₖvₖ - @kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ - - # Compute residual norm ‖rₖ‖₂. + function bicgstab!(solver :: BicgstabSolver{T,FC,S}, $(def_args_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "BICGSTAB: system of size %d\n", n) + + # Check M = Iₙ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :t , S, n) + allocate_if(!NisI, solver, :yz, S, n) + Δx, x, r, p, v, s, qd, stats = solver.Δx, solver.x, solver.r, solver.p, solver.v, solver.s, solver.qd, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + q = d = solver.qd + t = MisI ? d : solver.t + y = NisI ? p : solver.yz + z = NisI ? s : solver.yz + r₀ = MisI ? r : solver.qd + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) + else + r₀ .= b + end + + x .= zero(FC) # x₀ + s .= zero(FC) # s₀ + v .= zero(FC) # v₀ + MisI || mulorldiv!(r, M, r₀, ldiv) # r₀ + p .= r # p₁ + + α = one(FC) # α₀ + ω = one(FC) # ω₀ + ρ = one(FC) # ρ₀ + + # Compute residual norm ‖r₀‖₂. rNorm = @knrm2(n, r) history && push!(rNorms, rNorm) - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - solved = resid_decrease_lim || resid_decrease_mach + if rNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + + iter = 0 + itmax == 0 && (itmax = 2*n) + + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %5s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time)) + + next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩ + if next_ρ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = false, false + stats.timer = ktimer(start_time) + stats.status = "Breakdown bᴴc = 0" + solver.warm_start = false + return solver + end + + # Stopping criterion. + solved = rNorm ≤ ε tired = iter ≥ itmax - breakdown = (α == 0 || isnan(α)) - kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω)) + breakdown = false + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || breakdown || user_requested_exit || overtimed) + # Update iteration index and ρ. + iter = iter + 1 + ρ = next_ρ + + NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ + mul!(q, A, y) # qₖ = Ayₖ + mulorldiv!(v, M, q, ldiv) # vₖ = M⁻¹qₖ + α = ρ / @kdot(n, c, v) # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩ + @kcopy!(n, r, s) # sₖ = rₖ₋₁ + @kaxpy!(n, -α, v, s) # sₖ = sₖ - αₖvₖ + @kaxpy!(n, α, y, x) # xₐᵤₓ = xₖ₋₁ + αₖyₖ + NisI || mulorldiv!(z, N, s, ldiv) # zₖ = N⁻¹sₖ + mul!(d, A, z) # dₖ = Azₖ + MisI || mulorldiv!(t, M, d, ldiv) # tₖ = M⁻¹dₖ + ω = @kdot(n, t, s) / @kdot(n, t, t) # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩ + @kaxpy!(n, ω, z, x) # xₖ = xₐᵤₓ + ωₖzₖ + @kcopy!(n, s, r) # rₖ = sₖ + @kaxpy!(n, -ω, t, r) # rₖ = rₖ - ωₖtₖ + next_ρ = @kdot(n, c, r) # ρₖ₊₁ = ⟨r̅₀,rₖ⟩ + β = (next_ρ / ρ) * (α / ω) # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ) + @kaxpy!(n, -ω, v, p) # pₐᵤₓ = pₖ - ωₖvₖ + @kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ + + # Compute residual norm ‖rₖ‖₂. + rNorm = @knrm2(n, r) + history && push!(rNorms, rNorm) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + breakdown = (α == 0 || isnan(α)) + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time)) + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "breakdown αₖ == 0") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "breakdown αₖ == 0") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/bilq.jl b/src/bilq.jl index 39725fbfe..2e8823e93 100644 --- a/src/bilq.jl +++ b/src/bilq.jl @@ -13,50 +13,58 @@ export bilq, bilq! """ - (x, stats) = bilq(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + (x, stats) = bilq(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, transfer_to_bicg::Bool=true, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the square linear system Ax = b using the BiLQ method. + (x, stats) = bilq(A, b, x0::AbstractVector; kwargs...) +BiLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the square linear system Ax = b of size n using BiLQ. BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. -When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. +When `A` is Hermitian and `b = c`, BiLQ is equivalent to SYMMLQ. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -An option gives the possibility of transferring to the BiCG point, -when it exists. The transfer is based on the residual norm. +* `x0`: a vector of length n that represents an initial guess of the solution x. -BiLQ can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = bilq(A, b, x0; kwargs...) +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. -#### Reference +#### References * A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. +* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, Springer, pp. 73--89, 1976. """ function bilq end -function bilq(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = BilqSolver(A, b) - bilq!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function bilq(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = BilqSolver(A, b) - bilq!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = bilq!(solver::BilqSolver, A, b; kwargs...) solver = bilq!(solver::BilqSolver, A, b, x0; kwargs...) @@ -67,263 +75,312 @@ See [`BilqSolver`](@ref) for more details about the `solver`. """ function bilq! end -function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - bilq!(solver, A, b; kwargs...) - return solver -end - -function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("BILQ: system of size %d\n", n) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ - p, Δx, x, d̅, stats = solver.p, solver.Δx, solver.x, solver.d̅, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - r₀ = warm_start ? q : b - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) +def_args_bilq = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_bilq = (:(x0::AbstractVector),) + +def_kwargs_bilq = (:(; c::AbstractVector{FC} = b ), + :(; transfer_to_bicg::Bool = true), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_bilq = mapreduce(extract_parameters, vcat, def_kwargs_bilq) + +args_bilq = (:A, :b) +optargs_bilq = (:x0,) +kwargs_bilq = (:c, :transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function bilq($(def_args_bilq...), $(def_optargs_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = BilqSolver(A, b) + warm_start!(solver, $(optargs_bilq...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + bilq!(solver, $(args_bilq...); $(kwargs_bilq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Initial solution x₀ and residual norm ‖r₀‖. - x .= zero(FC) - bNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ - - history && push!(rNorms, bNorm) - if bNorm == 0 - stats.niter = 0 - stats.solved = true - stats.inconsistent = false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function bilq($(def_args_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = BilqSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + bilq!(solver, $(args_bilq...); $(kwargs_bilq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2*n) - - ε = atol + rtol * bNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) - - # Initialize the Lanczos biorthogonalization process. - cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩ - if cᵗb == 0 - stats.niter = 0 - stats.solved = false - stats.inconsistent = false - stats.status = "Breakdown bᵀc = 0" - solver.warm_start = false - return solver - end - - βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀) - γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀) - vₖ₋₁ .= zero(FC) # v₀ = 0 - uₖ₋₁ .= zero(FC) # u₀ = 0 - vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ - cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ - sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ - ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ - ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ - δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations - norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates - - # Stopping criterion. - solved_lq = bNorm ≤ ε - solved_cg = false - breakdown = false - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved_lq || solved_cg || tired || breakdown || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the Lanczos biorthogonalization process. - # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ - - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - - αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - - @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - - pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|) - γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁ - - # Update the LQ factorization of Tₖ = L̅ₖQₖ. - # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] - # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] - # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] - # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ - # [ • • • • • 0 ] [ • • • • • • • ] - # [ • • • • γₖ] [ • • • • • 0 ] - # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] - - if iter == 1 - δbarₖ = αₖ - elseif iter == 2 - # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] - # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - λₖ₋₁ = cₖ * βₖ + sₖ * αₖ - δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ - else - # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] - # [sₖ₋₁ -cₖ₋₁ 0] - # [ 0 0 1] - # - # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] - # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] - # [0 sₖ -cₖ] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - ϵₖ₋₂ = sₖ₋₁ * βₖ - λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ - δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ + function bilq!(solver :: BilqSolver{T,FC,S}, $(def_args_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "BILQ: system of size %d\n", n) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ + p, Δx, x, d̅, stats = solver.p, solver.Δx, solver.x, solver.d̅, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + r₀ = warm_start ? q : b + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) end - # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ - # [δbar₁] [ζbar₁] = [β₁] - if iter == 1 - ηₖ = βₖ - end - # [δ₁ 0 ] [ ζ₁ ] = [β₁] - # [λ₁ δbar₂] [ζbar₂] [0 ] - if iter == 2 - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -λₖ₋₁ * ζₖ₋₁ - end - # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] - # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] - # [ζbarₖ] - if iter ≥ 3 - ζₖ₋₂ = ζₖ₋₁ - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ + # Initial solution x₀ and residual norm ‖r₀‖. + x .= zero(FC) + bNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ + + history && push!(rNorms, bNorm) + if bNorm == 0 + stats.niter = 0 + stats.solved = true + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ. - # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ - # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ - if iter ≥ 2 - # Compute solution xₖ. - # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) + iter = 0 + itmax == 0 && (itmax = 2*n) + + ε = atol + rtol * bNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time)) + + # Initialize the Lanczos biorthogonalization process. + cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩ + if cᴴb == 0 + stats.niter = 0 + stats.solved = false + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "Breakdown bᴴc = 0" + solver.warm_start = false + return solver end - # Compute d̅ₖ. - if iter == 1 - # d̅₁ = v₁ - @. d̅ = vₖ - else - # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ - @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) + βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀) + γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀) + vₖ₋₁ .= zero(FC) # v₀ = 0 + uₖ₋₁ .= zero(FC) # u₀ = 0 + vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ + uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ + cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ + sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ + d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ + ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ + ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ + δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations + norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates + + # Stopping criterion. + solved_lq = bNorm ≤ ε + solved_cg = false + breakdown = false + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved_lq || solved_cg || tired || breakdown || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 + + # Continue the Lanczos biorthogonalization process. + # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ + # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ + + mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ + mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ + + @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ + + αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ + + @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + + pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ + + # Update the LQ factorization of Tₖ = L̅ₖQₖ. + # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] + # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] + # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] + # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ + # [ • • • • • 0 ] [ • • • • • • • ] + # [ • • • • γₖ] [ • • • • • 0 ] + # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] + + if iter == 1 + δbarₖ = αₖ + elseif iter == 2 + # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] + # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + λₖ₋₁ = cₖ * βₖ + sₖ * αₖ + δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ + else + # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] + # [sₖ₋₁ -cₖ₋₁ 0] + # [ 0 0 1] + # + # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] + # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] + # [0 sₖ -cₖ] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + ϵₖ₋₂ = sₖ₋₁ * βₖ + λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ + δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ + end + + # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ + # [δbar₁] [ζbar₁] = [β₁] + if iter == 1 + ηₖ = βₖ + end + # [δ₁ 0 ] [ ζ₁ ] = [β₁] + # [λ₁ δbar₂] [ζbar₂] [0 ] + if iter == 2 + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -λₖ₋₁ * ζₖ₋₁ + end + # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] + # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] + # [ζbarₖ] + if iter ≥ 3 + ζₖ₋₂ = ζₖ₋₁ + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ + end + + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ. + # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ + # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ + if iter ≥ 2 + # Compute solution xₖ. + # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁ + @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) + end + + # Compute d̅ₖ. + if iter == 1 + # d̅₁ = v₁ + @. d̅ = vₖ + else + # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ + @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) + end + + # Compute vₖ₊₁ and uₖ₊₁. + @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ + @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + + if pᴴq ≠ 0 + @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p + end + + # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ + vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ) + norm_vₖ₊₁ = @knrm2(n, vₖ) + + # Compute BiLQ residual norm + # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩) + if iter == 1 + rNorm_lq = bNorm + else + μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ + ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ + θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁ + rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ)) + end + history && push!(rNorms, rNorm_lq) + + # Compute BiCG residual norm + # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖ + if transfer_to_bicg && (abs(δbarₖ) > eps(T)) + ζbarₖ = ηₖ / δbarₖ + ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) + rNorm_cg = abs(ρₖ) * norm_vₖ₊₁ + end + + # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ, δbarₖ₋₁ and norm_vₖ. + sₖ₋₁ = sₖ + cₖ₋₁ = cₖ + γₖ = γₖ₊₁ + βₖ = βₖ₊₁ + δbarₖ₋₁ = δbarₖ + norm_vₖ = norm_vₖ₊₁ + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + solved_lq = rNorm_lq ≤ ε + solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε) + tired = iter ≥ itmax + breakdown = !solved_lq && !solved_cg && (pᴴq == 0) + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm_lq, ktimer(start_time)) end + (verbose > 0) && @printf(iostream, "\n") - # Compute vₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if pᵗq ≠ 0 - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p + # Compute BICG point + # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ + if solved_cg + @kaxpy!(n, ζbarₖ, d̅, x) end - # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ - vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ) - norm_vₖ₊₁ = @knrm2(n, vₖ) - - # Compute BiLQ residual norm - # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩) - if iter == 1 - rNorm_lq = bNorm - else - μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ - ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ - θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁ - rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ)) - end - history && push!(rNorms, rNorm_lq) - - # Compute BiCG residual norm - # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖ - if transfer_to_bicg && (abs(δbarₖ) > eps(T)) - ζbarₖ = ηₖ / δbarₖ - ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) - rNorm_cg = abs(ρₖ) * norm_vₖ₊₁ - end + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") + solved_lq && (status = "solution xᴸ good enough given atol and rtol") + solved_cg && (status = "solution xᶜ good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ, δbarₖ₋₁ and norm_vₖ. - sₖ₋₁ = sₖ - cₖ₋₁ = cₖ - γₖ = γₖ₊₁ - βₖ = βₖ₊₁ - δbarₖ₋₁ = δbarₖ - norm_vₖ = norm_vₖ₊₁ - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - solved_lq = rNorm_lq ≤ ε - solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε) - tired = iter ≥ itmax - breakdown = !solved_lq && !solved_cg && (pᵗq == 0) - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq) - end - (verbose > 0) && @printf("\n") + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - # Compute BICG point - # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ - if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + # Update stats + stats.niter = iter + stats.solved = solved_lq || solved_cg + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") - solved_lq && (status = "solution xᴸ good enough given atol and rtol") - solved_cg && (status = "solution xᶜ good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved_lq || solved_cg - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/bilqr.jl b/src/bilqr.jl index 09fef1f6c..486ccceec 100644 --- a/src/bilqr.jl +++ b/src/bilqr.jl @@ -1,5 +1,5 @@ # An implementation of BILQR for the solution of square -# consistent linear adjoint systems Ax = b and Aᵀy = c. +# consistent linear adjoint systems Ax = b and Aᴴy = c. # # This method is described in # @@ -14,33 +14,55 @@ export bilqr, bilqr! """ (x, y, stats) = bilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + transfer_to_bicg::Bool=true, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, y, stats) = bilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +BiLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + Combine BiLQ and QMR to solve adjoint systems. [0 A] [y] = [b] - [Aᵀ 0] [x] [c] + [Aᴴ 0] [x] [c] + +The relation `bᴴc ≠ 0` must be satisfied. +BiLQ is used for solving primal system `Ax = b` of size n. +QMR is used for solving dual system `Aᴴy = c` of size n. + +#### Input arguments -The relation `bᵀc ≠ 0` must be satisfied. -BiLQ is used for solving primal system `Ax = b`. -QMR is used for solving dual system `Aᵀy = c`. +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n; +* `c`: a vector of length n. -An option gives the possibility of transferring from the BiLQ point to the -BiCG point, when it exists. The transfer is based on the residual norm. +#### Optional arguments -BiLQR can be warm-started from initial guesses `x0` and `y0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. - (x, y, stats) = bilqr(A, b, c, x0, y0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure. #### Reference @@ -48,18 +70,6 @@ and `false` otherwise. """ function bilqr end -function bilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = BilqrSolver(A, b) - bilqr!(solver, A, b, c, x0, y0; kwargs...) - return (solver.x, solver.y, solver.stats) -end - -function bilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = BilqrSolver(A, b) - bilqr!(solver, A, b, c; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = bilqr!(solver::BilqrSolver, A, b, c; kwargs...) solver = bilqr!(solver::BilqrSolver, A, b, c, x0, y0; kwargs...) @@ -70,369 +80,417 @@ See [`BilqrSolver`](@ref) for more details about the `solver`. """ function bilqr! end -function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0, y0) - bilqr!(solver, A, b, c; kwargs...) - return solver -end - -function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("Systems must be square") - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("BILQR: systems of size %d\n", n) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ - p, Δx, Δy, x, t = solver.p, solver.Δx, solver.Δy, solver.x, solver.y - d̅, wₖ₋₃, wₖ₋₂, stats = solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂, solver.stats - warm_start = solver.warm_start - rNorms, sNorms = stats.residuals_primal, stats.residuals_dual - reset!(stats) - r₀ = warm_start ? q : b - s₀ = warm_start ? p : c - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) - mul!(s₀, Aᵀ, Δy) - @kaxpby!(n, one(FC), c, -one(FC), s₀) +def_args_bilqr = (:(A ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_bilqr = (:(x0 :: AbstractVector), + :(y0 :: AbstractVector)) + +def_kwargs_bilqr = (:(; transfer_to_bicg::Bool = true), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_bilqr = mapreduce(extract_parameters, vcat, def_kwargs_bilqr) + +args_bilqr = (:A, :b, :c) +optargs_bilqr = (:x0, :y0) +kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function bilqr($(def_args_bilqr...), $(def_optargs_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = BilqrSolver(A, b) + warm_start!(solver, $(optargs_bilqr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + bilqr!(solver, $(args_bilqr...); $(kwargs_bilqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖. - x .= zero(FC) # x₀ - bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖ - - # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀy₀‖. - t .= zero(FC) # t₀ - cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ - - iter = 0 - itmax == 0 && (itmax = 2*n) - - history && push!(rNorms, bNorm) - history && push!(sNorms, cNorm) - εL = atol + rtol * bNorm - εQ = atol + rtol * cNorm - (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm) - - # Initialize the Lanczos biorthogonalization process. - cᵗb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᵀy₀,b - Ax₀⟩ - if cᵗb == 0 - stats.niter = 0 - stats.solved_primal = false - stats.solved_dual = false - stats.status = "Breakdown bᵀc = 0" - solver.warm_start = false - return solver + function bilqr($(def_args_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = BilqrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + bilqr!(solver, $(args_bilqr...); $(kwargs_bilqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # Set up workspace. - βₖ = √(abs(cᵗb)) # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀) - γₖ = cᵗb / βₖ # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀) - vₖ₋₁ .= zero(FC) # v₀ = 0 - uₖ₋₁ .= zero(FC) # u₀ = 0 - vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᵀy₀) / γ̄₁ - cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ - sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ - ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ - ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ - δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations - ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁ - norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates - ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ - wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ - wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ - τₖ = zero(T) # τₖ is used for the dual residual norm estimate - - # Stopping criterion. - solved_lq = bNorm == 0 - solved_lq_tol = solved_lq_mach = false - solved_cg = solved_cg_tol = solved_cg_mach = false - solved_primal = solved_lq || solved_cg - solved_qr_tol = solved_qr_mach = false - solved_dual = cNorm == 0 - tired = iter ≥ itmax - breakdown = false - status = "unknown" - user_requested_exit = false - - while !((solved_primal && solved_dual) || tired || breakdown || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the Lanczos biorthogonalization process. - # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ - - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - - αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - - @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - - pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|) - γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁ - - # Update the LQ factorization of Tₖ = L̅ₖQₖ. - # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] - # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] - # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] - # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ - # [ • • • • • 0 ] [ • • • • • • • ] - # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ] - # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] - - if iter == 1 - δbarₖ = αₖ - elseif iter == 2 - # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] - # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - λₖ₋₁ = cₖ * βₖ + sₖ * αₖ - δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ - else - # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] - # [sₖ₋₁ -cₖ₋₁ 0] - # [ 0 0 1] - # - # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] - # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] - # [0 sₖ -cₖ] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - ϵₖ₋₂ = sₖ₋₁ * βₖ - λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ - δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ + function bilqr!(solver :: BilqrSolver{T,FC,S}, $(def_args_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("Systems must be square") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "BILQR: systems of size %d\n", n) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ + p, Δx, Δy, x, t = solver.p, solver.Δx, solver.Δy, solver.x, solver.y + d̅, wₖ₋₃, wₖ₋₂, stats = solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂, solver.stats + warm_start = solver.warm_start + rNorms, sNorms = stats.residuals_primal, stats.residuals_dual + reset!(stats) + r₀ = warm_start ? q : b + s₀ = warm_start ? p : c + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) + mul!(s₀, Aᴴ, Δy) + @kaxpby!(n, one(FC), c, -one(FC), s₀) end - if !solved_primal - # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ - # [δbar₁] [ζbar₁] = [β₁] - if iter == 1 - ηₖ = βₖ - end - # [δ₁ 0 ] [ ζ₁ ] = [β₁] - # [λ₁ δbar₂] [ζbar₂] [0 ] - if iter == 2 - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -λₖ₋₁ * ζₖ₋₁ - end - # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] - # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] - # [ζbarₖ] - if iter ≥ 3 - ζₖ₋₂ = ζₖ₋₁ - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ - end + # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖. + x .= zero(FC) # x₀ + bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖ + + # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖. + t .= zero(FC) # t₀ + cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ + + iter = 0 + itmax == 0 && (itmax = 2*n) + + history && push!(rNorms, bNorm) + history && push!(sNorms, cNorm) + εL = atol + rtol * bNorm + εQ = atol + rtol * cNorm + (verbose > 0) && @printf(iostream, "%5s %7s %7s %5s\n", "k", "‖rₖ‖", "‖sₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, bNorm, cNorm, ktimer(start_time)) + + # Initialize the Lanczos biorthogonalization process. + cᴴb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩ + if cᴴb == 0 + stats.niter = 0 + stats.solved_primal = false + stats.solved_dual = false + stats.timer = ktimer(start_time) + stats.status = "Breakdown bᴴc = 0" + solver.warm_start = false + return solver + end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ. - # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ - # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ - if iter ≥ 2 - # Compute solution xₖ. - # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) - end + # Set up workspace. + βₖ = √(abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀) + γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀) + vₖ₋₁ .= zero(FC) # v₀ = 0 + uₖ₋₁ .= zero(FC) # u₀ = 0 + vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ + uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁ + cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ + sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ + d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ + ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ + ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ + δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations + ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁ + norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates + ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ + wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ + wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ + τₖ = zero(T) # τₖ is used for the dual residual norm estimate + + # Stopping criterion. + solved_lq = bNorm == 0 + solved_lq_tol = solved_lq_mach = false + solved_cg = solved_cg_tol = solved_cg_mach = false + solved_primal = solved_lq || solved_cg + solved_qr_tol = solved_qr_mach = false + solved_dual = cNorm == 0 + tired = iter ≥ itmax + breakdown = false + status = "unknown" + user_requested_exit = false + overtimed = false - # Compute d̅ₖ. - if iter == 1 - # d̅₁ = v₁ - @. d̅ = vₖ - else - # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ - @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) - end + while !((solved_primal && solved_dual) || tired || breakdown || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 - # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ - vₖᵀvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁ - norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁ + # Continue the Lanczos biorthogonalization process. + # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ + # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ - # Compute BiLQ residual norm - # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩) - if iter == 1 - rNorm_lq = bNorm - else - μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ - ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ - θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁ - rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ)) - end - history && push!(rNorms, rNorm_lq) + mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ + mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ - # Update ‖vₖ‖ - norm_vₖ = norm_vₖ₊₁ + @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - # Compute BiCG residual norm - # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖ - if transfer_to_bicg && (abs(δbarₖ) > eps(T)) - ζbarₖ = ηₖ / δbarₖ - ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) - rNorm_cg = abs(ρₖ) * norm_vₖ₊₁ - end + αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - # Update primal stopping criterion - solved_lq_tol = rNorm_lq ≤ εL - solved_lq_mach = rNorm_lq + 1 ≤ 1 - solved_lq = solved_lq_tol || solved_lq_mach - solved_cg_tol = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL) - solved_cg_mach = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1) - solved_cg = solved_cg_tol || solved_cg_mach - solved_primal = solved_lq || solved_cg - end + @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + + pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ + + # Update the LQ factorization of Tₖ = L̅ₖQₖ. + # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] + # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] + # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] + # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ + # [ • • • • • 0 ] [ • • • • • • • ] + # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ] + # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] - if !solved_dual - # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ̄₁e₁. if iter == 1 - ψbarₖ = conj(γₖ) + δbarₖ = αₖ + elseif iter == 2 + # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] + # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + λₖ₋₁ = cₖ * βₖ + sₖ * αₖ + δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ else - # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ] - # [sₖ -cₖ] [ 0 ] [ ψbarₖ] - ψₖ₋₁ = cₖ * ψbarₖ₋₁ - ψbarₖ = sₖ * ψbarₖ₋₁ + # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] + # [sₖ₋₁ -cₖ₋₁ 0] + # [ 0 0 1] + # + # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] + # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] + # [0 sₖ -cₖ] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + ϵₖ₋₂ = sₖ₋₁ * βₖ + λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ + δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ end - # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ. - # w₁ = u₁ / δ̄₁ - if iter == 2 - wₖ₋₁ = wₖ₋₂ - @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) - @. wₖ₋₁ = uₖ₋₁ / conj(δₖ₋₁) + if !solved_primal + # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ + # [δbar₁] [ζbar₁] = [β₁] + if iter == 1 + ηₖ = βₖ + end + # [δ₁ 0 ] [ ζ₁ ] = [β₁] + # [λ₁ δbar₂] [ζbar₂] [0 ] + if iter == 2 + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -λₖ₋₁ * ζₖ₋₁ + end + # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] + # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] + # [ζbarₖ] + if iter ≥ 3 + ζₖ₋₂ = ζₖ₋₁ + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ + end + + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ. + # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ + # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ + if iter ≥ 2 + # Compute solution xₖ. + # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁ + @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) + end + + # Compute d̅ₖ. + if iter == 1 + # d̅₁ = v₁ + @. d̅ = vₖ + else + # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ + @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) + end + + # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ + vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁ + norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁ + + # Compute BiLQ residual norm + # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩) + if iter == 1 + rNorm_lq = bNorm + else + μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ + ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ + θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁ + rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ)) + end + history && push!(rNorms, rNorm_lq) + + # Update ‖vₖ‖ + norm_vₖ = norm_vₖ₊₁ + + # Compute BiCG residual norm + # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖ + if transfer_to_bicg && (abs(δbarₖ) > eps(T)) + ζbarₖ = ηₖ / δbarₖ + ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) + rNorm_cg = abs(ρₖ) * norm_vₖ₊₁ + end + + # Update primal stopping criterion + solved_lq_tol = rNorm_lq ≤ εL + solved_lq_mach = rNorm_lq + 1 ≤ 1 + solved_lq = solved_lq_tol || solved_lq_mach + solved_cg_tol = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL) + solved_cg_mach = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1) + solved_cg = solved_cg_tol || solved_cg_mach + solved_primal = solved_lq || solved_cg end - # w₂ = (u₂ - λ̄₁w₁) / δ̄₂ - if iter == 3 - wₖ₋₁ = wₖ₋₃ - @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) - @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) - @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + + if !solved_dual + # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ̄₁e₁. + if iter == 1 + ψbarₖ = conj(γₖ) + else + # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ] + # [sₖ -cₖ] [ 0 ] [ ψbarₖ] + ψₖ₋₁ = cₖ * ψbarₖ₋₁ + ψbarₖ = sₖ * ψbarₖ₋₁ + end + + # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ. + # w₁ = u₁ / δ̄₁ + if iter == 2 + wₖ₋₁ = wₖ₋₂ + @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) + @. wₖ₋₁ = uₖ₋₁ / conj(δₖ₋₁) + end + # w₂ = (u₂ - λ̄₁w₁) / δ̄₂ + if iter == 3 + wₖ₋₁ = wₖ₋₃ + @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) + @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + end + # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁ + if iter ≥ 4 + @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃) + wₖ₋₁ = wₖ₋₃ + @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) + @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + end + + if iter ≥ 3 + # Swap pointers. + @kswap(wₖ₋₃, wₖ₋₂) + end + + if iter ≥ 2 + # Compute solution tₖ₋₁. + # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁ + @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t) + end + + # Update ψbarₖ₋₁ + ψbarₖ₋₁ = ψbarₖ + + # Compute τₖ = τₖ₋₁ + ‖uₖ‖² + τₖ += @kdotr(n, uₖ, uₖ) + + # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ + sNorm = abs(ψbarₖ) * √τₖ + history && push!(sNorms, sNorm) + + # Update dual stopping criterion + solved_qr_tol = sNorm ≤ εQ + solved_qr_mach = sNorm + 1 ≤ 1 + solved_dual = solved_qr_tol || solved_qr_mach end - # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁ - if iter ≥ 4 - @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃) - wₖ₋₁ = wₖ₋₃ - @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) - @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) - @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + + # Compute vₖ₊₁ and uₖ₊₁. + @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ + @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + + if pᴴq ≠ zero(FC) + @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p end + # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ. if iter ≥ 3 - # Swap pointers. - @kswap(wₖ₋₃, wₖ₋₂) + ϵₖ₋₃ = ϵₖ₋₂ end - if iter ≥ 2 - # Compute solution tₖ₋₁. - # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁ - @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t) + λₖ₋₂ = λₖ₋₁ end - - # Update ψbarₖ₋₁ - ψbarₖ₋₁ = ψbarₖ - - # Compute τₖ = τₖ₋₁ + ‖uₖ‖² - τₖ += @kdotr(n, uₖ, uₖ) - - # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ - sNorm = abs(ψbarₖ) * √τₖ - history && push!(sNorms, sNorm) - - # Update dual stopping criterion - solved_qr_tol = sNorm ≤ εQ - solved_qr_mach = sNorm + 1 ≤ 1 - solved_dual = solved_qr_tol || solved_qr_mach - end - - # Compute vₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if pᵗq ≠ zero(FC) - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p + δbarₖ₋₁ = δbarₖ + cₖ₋₁ = cₖ + sₖ₋₁ = sₖ + γₖ = γₖ₊₁ + βₖ = βₖ₊₁ + + user_requested_exit = callback(solver) :: Bool + tired = iter ≥ itmax + breakdown = !solved_lq && !solved_cg && (pᴴq == 0) + timer = time_ns() - start_time + overtimed = timer > timemax_ns + + kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e %.2fs\n", iter, "✗ ✗ ✗ ✗", sNorm, ktimer(start_time)) + kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s %.2fs\n", iter, rNorm_lq, "✗ ✗ ✗ ✗", ktimer(start_time)) + kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, rNorm_lq, sNorm, ktimer(start_time)) end + (verbose > 0) && @printf(iostream, "\n") - # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ. - if iter ≥ 3 - ϵₖ₋₃ = ϵₖ₋₂ - end - if iter ≥ 2 - λₖ₋₂ = λₖ₋₁ + # Compute BICG point + # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ + if solved_cg + @kaxpy!(n, ζbarₖ, d̅, x) end - δbarₖ₋₁ = δbarₖ - cₖ₋₁ = cₖ - sₖ₋₁ = sₖ - γₖ = γₖ₊₁ - βₖ = βₖ₊₁ - user_requested_exit = callback(solver) :: Bool - tired = iter ≥ itmax - breakdown = !solved_lq && !solved_cg && (pᵗq == 0) - - kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm) - kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "") - kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm) - end - (verbose > 0) && @printf("\n") + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") + solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol") + solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol") + !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol") + solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol") + solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol") + solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ") + solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ") + !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t") + solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)") + solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)") + solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol") + solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol") + solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t") + solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x and y + warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && @kaxpy!(n, one(FC), Δy, t) + solver.warm_start = false - # Compute BICG point - # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ - if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + # Update stats + stats.niter = iter + stats.solved_primal = solved_primal + stats.solved_dual = solved_dual + stats.timer = ktimer(start_time) + stats.status = status + return solver end - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") - solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol") - solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol") - !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol") - solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol") - solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol") - solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ") - solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ") - !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t") - solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)") - solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)") - solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol") - solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol") - solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t") - solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t") - user_requested_exit && (status = "user-requested exit") - - # Update x and y - warm_start && @kaxpy!(n, one(FC), Δx, x) - warm_start && @kaxpy!(n, one(FC), Δy, t) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.status = status - stats.solved_primal = solved_primal - stats.solved_dual = solved_dual - return solver end diff --git a/src/callback_utils.jl b/src/callback_utils.jl deleted file mode 100644 index eac362e5d..000000000 --- a/src/callback_utils.jl +++ /dev/null @@ -1,50 +0,0 @@ -export StorageGetxRestartedGmres - -export get_x_restarted_gmres! - -mutable struct StorageGetxRestartedGmres{S} - x::S - y::S - p::S -end -StorageGetxRestartedGmres(solver::GmresSolver; N = I) = - StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x)) - -function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, - stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S} - NisI = (N === I) - x2, y2, p2 = stor.x, stor.y, stor.p - n = size(A, 2) - # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution. - nr = sum(1:solver.inner_iter) - y = solver.z # yᵢ = zᵢ - y2 .= y - R = solver.R - V = solver.V - x2 .= solver.Δx - for i = solver.inner_iter : -1 : 1 - pos = nr + i - solver.inner_iter # position of rᵢ.ₖ - for j = solver.inner_iter : -1 : i+1 - y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ - pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ - end - # Rₖ can be singular if the system is inconsistent - if abs(R[pos]) ≤ eps(T)^(3/4) - y2[i] = zero(FC) - inconsistent = true - else - y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ - end - end - - # Form xₖ = N⁻¹Vₖyₖ - for i = 1 : solver.inner_iter - @kaxpy!(n, y2[i], V[i], x2) - end - if !NisI - p2 .= solver.p - p2 .= x2 - mul!(x2, N, p2) - end - x2 .+= solver.x -end diff --git a/src/cg.jl b/src/cg.jl index 8a974accc..1345a6232 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -15,36 +15,54 @@ export cg, cg! - """ (x, stats) = cg(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, radius::T=zero(T), linesearch::Bool=false, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + linesearch::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -The conjugate gradient method to solve the symmetric linear system Ax=b. + (x, stats) = cg(A, b, x0::AbstractVector; kwargs...) -The method does _not_ abort if A is not definite. +CG can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. +The conjugate gradient method to solve the Hermitian linear system Ax = b of size n. + +The method does _not_ abort if A is not definite. M also indicates the weighted norm in which residuals are measured. -If `itmax=0`, the default number of iterations is set to `2 * n`, -with `n = length(b)`. +#### Input arguments -CG can be warm-started from an initial guess `x0` with the method +* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n; +* `b`: a vector of length n. - (x, stats) = cg(A, b, x0; kwargs...) +#### Optional argument -where `kwargs` are the same keyword arguments as above. +* `x0`: a vector of length n that represents an initial guess of the solution x. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient); +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -52,18 +70,6 @@ and `false` otherwise. """ function cg end -function cg(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = CgSolver(A, b) - cg!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function cg(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CgSolver(A, b) - cg!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cg!(solver::CgSolver, A, b; kwargs...) solver = cg!(solver::CgSolver, A, b, x0; kwargs...) @@ -74,152 +80,200 @@ See [`CgSolver`](@ref) for more details about the `solver`. """ function cg! end -function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - cg!(solver, A, b; kwargs...) - return solver -end - -function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0") - - n, m = size(A) - m == n || error("System must be square") - length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("CG: system of %d equations in %d variables\n", n, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :z, S, n) - Δx, x, r, p, Ap, stats = solver.Δx, solver.x, solver.r, solver.p, solver.Ap, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - z = MisI ? r : solver.z - - x .= zero(FC) - if warm_start - mul!(r, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r) - else - r .= b +def_args_cg = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_cg = (:(x0::AbstractVector),) + +def_kwargs_cg = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; radius::T = zero(T) ), + :(; linesearch::Bool = false ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_cg = mapreduce(extract_parameters, vcat, def_kwargs_cg) + +args_cg = (:A, :b) +optargs_cg = (:x0,) +kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cg($(def_args_cg...), $(def_optargs_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgSolver(A, b) + warm_start!(solver, $(optargs_cg...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cg!(solver, $(args_cg...); $(kwargs_cg...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(z, M, r, ldiv) - p .= z - γ = @kdotr(n, r, z) - rNorm = sqrt(γ) - history && push!(rNorms, rNorm) - if γ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + + function cg($(def_args_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cg!(solver, $(args_cg...); $(kwargs_cg...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2 * n) - - pAp = zero(T) - pNorm² = γ - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s %8s %8s %8s\n", "k", "‖r‖", "pAp", "α", "σ") - kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm) - - solved = rNorm ≤ ε - tired = iter ≥ itmax - inconsistent = false - on_boundary = false - zero_curvature = false - user_requested_exit = false - - status = "unknown" - - while !(solved || tired || zero_curvature || user_requested_exit) - mul!(Ap, A, p) - pAp = @kdotr(n, p, Ap) - if (pAp ≤ eps(T) * pNorm²) && (radius == 0) - if abs(pAp) ≤ eps(T) * pNorm² - zero_curvature = true - inconsistent = !linesearch - end - if linesearch - iter == 0 && (x .= b) - solved = true - end + function cg!(solver :: CgSolver{T,FC,S}, $(def_args_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == n || error("Inconsistent problem size") + linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0") + (verbose > 0) && @printf(iostream, "CG: system of %d equations in %d variables\n", n, n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :z, S, n) + Δx, x, r, p, Ap, stats = solver.Δx, solver.x, solver.r, solver.p, solver.Ap, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + z = MisI ? r : solver.z + + x .= zero(FC) + if warm_start + mul!(r, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r) + else + r .= b + end + MisI || mulorldiv!(z, M, r, ldiv) + p .= z + γ = @kdotr(n, r, z) + rNorm = sqrt(γ) + history && push!(rNorms, rNorm) + if γ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver end - (zero_curvature || solved) && continue - α = γ / pAp + iter = 0 + itmax == 0 && (itmax = 2 * n) - # Compute step size to boundary if applicable. - σ = radius > 0 ? maximum(to_boundary(x, p, radius, dNorm2=pNorm²)) : α + pAp = zero(T) + pNorm² = γ + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %8s %5s\n", "k", "‖r‖", "pAp", "α", "σ", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e", iter, rNorm) - kdisplay(iter, verbose) && @printf("%8.1e %8.1e %8.1e\n", pAp, α, σ) + solved = rNorm ≤ ε + tired = iter ≥ itmax + inconsistent = false + on_boundary = false + zero_curvature = false + user_requested_exit = false + overtimed = false + + status = "unknown" + + while !(solved || tired || zero_curvature || user_requested_exit || overtimed) + mul!(Ap, A, p) + pAp = @kdotr(n, p, Ap) + if (pAp ≤ eps(T) * pNorm²) && (radius == 0) + if abs(pAp) ≤ eps(T) * pNorm² + zero_curvature = true + inconsistent = !linesearch + end + if linesearch + iter == 0 && (x .= b) + solved = true + end + end + (zero_curvature || solved) && continue - # Move along p from x to the boundary if either - # the next step leads outside the trust region or - # we have nonpositive curvature. - if (radius > 0) && ((pAp ≤ 0) || (α > σ)) - α = σ - on_boundary = true - end + α = γ / pAp - @kaxpy!(n, α, p, x) - @kaxpy!(n, -α, Ap, r) - MisI || mulorldiv!(z, M, r, ldiv) - γ_next = @kdotr(n, r, z) - rNorm = sqrt(γ_next) - history && push!(rNorms, rNorm) + # Compute step size to boundary if applicable. + σ = radius > 0 ? maximum(to_boundary(n, x, p, radius, dNorm2=pNorm²)) : α - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + kdisplay(iter, verbose) && @printf(iostream, " %8.1e %8.1e %8.1e %.2fs\n", pAp, α, σ, ktimer(start_time)) + + # Move along p from x to the boundary if either + # the next step leads outside the trust region or + # we have nonpositive curvature. + if (radius > 0) && ((pAp ≤ 0) || (α > σ)) + α = σ + on_boundary = true + end - resid_decrease_lim = rNorm ≤ ε - resid_decrease = resid_decrease_lim || resid_decrease_mach - solved = resid_decrease || on_boundary + @kaxpy!(n, α, p, x) + @kaxpy!(n, -α, Ap, r) + MisI || mulorldiv!(z, M, r, ldiv) + γ_next = @kdotr(n, r, z) + rNorm = sqrt(γ_next) + history && push!(rNorms, rNorm) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + resid_decrease_lim = rNorm ≤ ε + resid_decrease = resid_decrease_lim || resid_decrease_mach + solved = resid_decrease || on_boundary + + if !solved + β = γ_next / γ + pNorm² = γ_next + β^2 * pNorm² + γ = γ_next + @kaxpby!(n, one(FC), z, β, p) + end - if !solved - β = γ_next / γ - pNorm² = γ_next + β^2 * pNorm² - γ = γ_next - @kaxpby!(n, one(FC), z, β, p) + iter = iter + 1 + tired = iter ≥ itmax + user_requested_exit = callback(solver) :: Bool + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e", iter, rNorm) end + (verbose > 0) && @printf(iostream, "\n\n") + + # Termination status + solved && on_boundary && (status = "on trust-region boundary") + solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected") + solved && (status == "unknown") && (status = "solution good enough given atol and rtol") + zero_curvature && (status = "zero curvature detected") + tired && (status = "maximum number of iterations exceeded") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - iter = iter + 1 - tired = iter ≥ itmax - user_requested_exit = callback(solver) :: Bool - kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm) + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - solved && on_boundary && (status = "on trust-region boundary") - solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected") - solved && (status == "unknown") && (status = "solution good enough given atol and rtol") - zero_curvature && (status = "zero curvature detected") - tired && (status = "maximum number of iterations exceeded") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl index a8e24f02f..2c5d72a64 100644 --- a/src/cg_lanczos.jl +++ b/src/cg_lanczos.jl @@ -12,34 +12,53 @@ export cg_lanczos, cg_lanczos! - """ (x, stats) = cg_lanczos(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, - check_curvature::Bool=false, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, + check_curvature::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -The Lanczos version of the conjugate gradient method to solve the -symmetric linear system + (x, stats) = cg_lanczos(A, b, x0::AbstractVector; kwargs...) + +CG-LANCZOS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. - Ax = b +The Lanczos version of the conjugate gradient method to solve the +Hermitian linear system Ax = b of size n. The method does _not_ abort if A is not definite. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be hermitian and positive definite. +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -CG-LANCZOS can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = cg_lanczos(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`LanczosStats`](@ref) structure. #### References @@ -48,18 +67,6 @@ and `false` otherwise. """ function cg_lanczos end -function cg_lanczos(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = CgLanczosSolver(A, b) - cg_lanczos!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function cg_lanczos(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CgLanczosSolver(A, b) - cg_lanczos!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cg_lanczos!(solver::CgLanczosSolver, A, b; kwargs...) solver = cg_lanczos!(solver::CgLanczosSolver, A, b, x0; kwargs...) @@ -70,150 +77,199 @@ See [`CgLanczosSolver`](@ref) for more details about the `solver`. """ function cg_lanczos! end -function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - cg_lanczos!(solver, A, b; kwargs...) - return solver -end - -function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, - check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $T") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :v, S, n) - Δx, x, Mv, Mv_prev = solver.Δx, solver.x, solver.Mv, solver.Mv_prev - p, Mv_next, stats = solver.p, solver.Mv_next, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - v = MisI ? Mv : solver.v - - # Initial state. - x .= zero(FC) - if warm_start - mul!(Mv, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), Mv) - else - Mv .= b +def_args_cg_lanczos = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_cg_lanczos = (:(x0::AbstractVector),) + +def_kwargs_cg_lanczos = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; check_curvature::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_cg_lanczos = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos) + +args_cg_lanczos = (:A, :b) +optargs_cg_lanczos = (:x0,) +kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cg_lanczos($(def_args_cg_lanczos...), $(def_optargs_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgLanczosSolver(A, b) + warm_start!(solver, $(optargs_cg_lanczos...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cg_lanczos!(solver, $(args_cg_lanczos...); $(kwargs_cg_lanczos...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀ - β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁ - σ = β - rNorm = σ - history && push!(rNorms, rNorm) - if β == 0 - stats.niter = 0 - stats.solved = true - stats.Anorm = zero(T) - stats.indefinite = false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + + function cg_lanczos($(def_args_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgLanczosSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cg_lanczos!(solver, $(args_cg_lanczos...); $(kwargs_cg_lanczos...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - p .= v - - # Initialize Lanczos process. - # β₁Mv₁ = b - @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ - Mv_prev .= Mv - - iter = 0 - itmax == 0 && (itmax = 2 * n) - - # Initialize some constants used in recursions below. - ω = zero(T) - γ = one(T) - Anorm2 = zero(T) - β_prev = zero(T) - - # Define stopping tolerance. - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) - - indefinite = false - solved = rNorm ≤ ε - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - # Main loop. - while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit) - # Form next Lanczos vector. - # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ - mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ - δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ - - # Check curvature. Exit fast if requested. - # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ A pₖ. - γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁) - indefinite |= (γ ≤ 0) - (check_curvature & indefinite) && continue - - @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ - if iter > 0 - @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ - @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ + + function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, $(def_args_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CG-LANCZOS: system of %d equations in %d variables\n", n, n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :v, S, n) + Δx, x, Mv, Mv_prev = solver.Δx, solver.x, solver.Mv, solver.Mv_prev + p, Mv_next, stats = solver.p, solver.Mv_next, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + v = MisI ? Mv : solver.v + + # Initial state. + x .= zero(FC) + if warm_start + mul!(Mv, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), Mv) + else + Mv .= b end - @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁ - MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ - β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ - @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ - Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂. - β_prev = β - - # Compute next CG iterate. - @kaxpy!(n, γ, p, x) # xₖ₊₁ = xₖ + γₖ * pₖ - ω = β * γ - σ = -ω * σ # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ - ω = ω * ω # ωₖ = (βₖ₊₁ * γₖ)² - @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ - rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1 + MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀ + β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ + σ = β + rNorm = σ history && push!(rNorms, rNorm) - iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - solved = resid_decrease_lim || resid_decrease_mach + if β == 0 + stats.niter = 0 + stats.solved = true + stats.Anorm = zero(T) + stats.indefinite = false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + p .= v + + # Initialize Lanczos process. + # β₁Mv₁ = b + @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ + MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ + Mv_prev .= Mv + + iter = 0 + itmax == 0 && (itmax = 2 * n) + + # Initialize some constants used in recursions below. + ω = zero(T) + γ = one(T) + Anorm2 = zero(T) + β_prev = zero(T) + + # Define stopping tolerance. + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + + indefinite = false + solved = rNorm ≤ ε tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + # Main loop. + while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit || overtimed) + # Form next Lanczos vector. + # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ + mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ + δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ + + # Check curvature. Exit fast if requested. + # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ. + γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁) + indefinite |= (γ ≤ 0) + (check_curvature & indefinite) && continue + + @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ + if iter > 0 + @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ + @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ + end + @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁ + MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ + β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ + @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ + MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ + Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂. + β_prev = β + + # Compute next CG iterate. + @kaxpy!(n, γ, p, x) # xₖ₊₁ = xₖ + γₖ * pₖ + ω = β * γ + σ = -ω * σ # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ + ω = ω * ω # ωₖ = (βₖ₊₁ * γₖ)² + @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ + rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1 + history && push!(rNorms, rNorm) + iter = iter + 1 + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + (check_curvature & indefinite) && (status = "negative curvature") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats. TODO: Estimate Acond. + stats.niter = iter + stats.solved = solved + stats.Anorm = sqrt(Anorm2) + stats.indefinite = indefinite + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - (check_curvature & indefinite) && (status = "negative curvature") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats. TODO: Estimate Acond. - stats.niter = iter - stats.solved = solved - stats.Anorm = sqrt(Anorm2) - stats.indefinite = indefinite - stats.status = status - return solver end diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl index 01f11e41f..b523e5cc3 100644 --- a/src/cg_lanczos_shift.jl +++ b/src/cg_lanczos_shift.jl @@ -13,13 +13,13 @@ export cg_lanczos_shift, cg_lanczos_shift! - """ (x, stats) = cg_lanczos_shift(A, b::AbstractVector{FC}, shifts::AbstractVector{T}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, check_curvature::Bool=false, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, + check_curvature::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -27,25 +27,42 @@ export cg_lanczos_shift, cg_lanczos_shift! The Lanczos version of the conjugate gradient method to solve a family of shifted systems - (A + αI) x = b (α = α₁, ..., αₙ) + (A + αI) x = b (α = α₁, ..., αₚ) + +of size n. The method does _not_ abort if A + αI is not definite. + +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n; +* `shifts`: a vector of length p. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments -The method does _not_ abort if A + αI is not definite. +* `x`: a vector of p dense vectors, each one of length n; +* `stats`: statistics collected on the run in a [`LanczosShiftStats`](@ref) structure. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be hermitian and positive definite. +#### References -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* A. Frommer and P. Maass, [*Fast CG-Based Methods for Tikhonov-Phillips Regularization*](https://doi.org/10.1137/S1064827596313310), SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999. +* C. C. Paige and M. A. Saunders, [*Solution of Sparse Indefinite Systems of Linear Equations*](https://doi.org/10.1137/0712047), SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975. """ function cg_lanczos_shift end -function cg_lanczos_shift(A, b :: AbstractVector{FC}, shifts :: AbstractVector{T}; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} - nshifts = length(shifts) - solver = CgLanczosShiftSolver(A, b, nshifts) - cg_lanczos_shift!(solver, A, b, shifts; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cg_lanczos!(solver::CgLanczosShiftSolver, A, b, shifts; kwargs...) @@ -55,174 +72,213 @@ See [`CgLanczosShiftSolver`](@ref) for more details about the `solver`. """ function cg_lanczos_shift! end -function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: AbstractVector{FC}, shifts :: AbstractVector{T}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, check_curvature :: Bool=false, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == n || error("Inconsistent problem size") - - nshifts = length(shifts) - (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :v, S, n) - Mv, Mv_prev, Mv_next = solver.Mv, solver.Mv_prev, solver.Mv_next - x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat - ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged - not_cv, stats = solver.not_cv, solver.stats - rNorms_history, indefinite = stats.residuals, stats.indefinite - reset!(stats) - v = MisI ? Mv : solver.v - - # Initial state. - ## Distribute x similarly to shifts. - for i = 1 : nshifts - x[i] .= zero(FC) # x₀ - end - Mv .= b # Mv₁ ← b - MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁ - β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁ - rNorms .= β - if history - for i = 1 : nshifts - push!(rNorms_history[i], rNorms[i]) - end +def_args_cg_lanczos_shift = (:(A ), + :(b::AbstractVector{FC} ), + :(shifts::AbstractVector{T})) + +def_kwargs_cg_lanczos_shift = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; check_curvature::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_cg_lanczos_shift = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos_shift) + +args_cg_lanczos_shift = (:A, :b, :shifts) +kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cg_lanczos_shift($(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + nshifts = length(shifts) + solver = CgLanczosShiftSolver(A, b, nshifts) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cg_lanczos_shift!(solver, $(args_cg_lanczos_shift...); $(kwargs_cg_lanczos_shift...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Keep track of shifted systems with negative curvature if required. - indefinite .= false + function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, $(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} - if β == 0 - stats.niter = 0 - stats.solved = true - stats.status = "x = 0 is a zero-residual solution" - return solver - end + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax - # Initialize each p to v. - for i = 1 : nshifts - p[i] .= v - end + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == n || error("Inconsistent problem size") - # Initialize Lanczos process. - # β₁Mv₁ = b - @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ - Mv_prev .= Mv - - # Initialize some constants used in recursions below. - ρ = one(T) - σ .= β - δhat .= zero(T) - ω .= zero(T) - γ .= one(T) - - # Define stopping tolerance. - ε = atol + rtol * β - - # Keep track of shifted systems that have converged. - for i = 1 : nshifts - converged[i] = rNorms[i] ≤ ε - not_cv[i] = !converged[i] - end - iter = 0 - itmax == 0 && (itmax = 2 * n) - - # Build format strings for printing. - if kdisplay(iter, verbose) - fmt = "%5d" * repeat(" %8.1e", nshifts) * "\n" - # precompile printf for our particular format - local_printf(data...) = Core.eval(Main, :(@printf($fmt, $(data)...))) - local_printf(iter, rNorms...) - end + nshifts = length(shifts) + nshifts == solver.nshifts || error("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $nshifts") + (verbose > 0) && @printf(iostream, "CG-LANCZOS-SHIFT: system of %d equations in %d variables with %d shifts\n", n, n, nshifts) - solved = sum(not_cv) == 0 - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - # Main loop. - while ! (solved || tired || user_requested_exit) - # Form next Lanczos vector. - # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ - mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ - δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ - @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ - if iter > 0 - @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ - @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ - end - @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁ - MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ - β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ - @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ - - # Check curvature: vₖᵀ(A + sᵢI)vₖ = vₖᵀAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖². - # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ (A + sᵢ I) pₖ. - MisI || (ρ = @kdotr(n, v, v)) + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :v, S, n) + Mv, Mv_prev, Mv_next = solver.Mv, solver.Mv_prev, solver.Mv_next + x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat + ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged + not_cv, stats = solver.not_cv, solver.stats + rNorms_history, indefinite = stats.residuals, stats.indefinite + reset!(stats) + v = MisI ? Mv : solver.v + + # Initial state. + ## Distribute x similarly to shifts. for i = 1 : nshifts - δhat[i] = δ + ρ * shifts[i] - γ[i] = 1 / (δhat[i] - ω[i] / γ[i]) + x[i] .= zero(FC) # x₀ end - for i = 1 : nshifts - indefinite[i] |= γ[i] ≤ 0 + Mv .= b # Mv₁ ← b + MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁ + β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ + rNorms .= β + if history + for i = 1 : nshifts + push!(rNorms_history[i], rNorms[i]) + end end - # Compute next CG iterate for each shifted system that has not yet converged. - # Stop iterating on indefinite problems if requested. - for i = 1 : nshifts - not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] - if not_cv[i] - @kaxpy!(n, γ[i], p[i], x[i]) - ω[i] = β * γ[i] - σ[i] *= -ω[i] - ω[i] *= ω[i] - @kaxpby!(n, σ[i], v, ω[i], p[i]) - - # Update list of systems that have not converged. - rNorms[i] = abs(σ[i]) - converged[i] = rNorms[i] ≤ ε - end + # Keep track of shifted systems with negative curvature if required. + indefinite .= false + + if β == 0 + stats.niter = 0 + stats.solved = true + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + return solver end - if length(not_cv) > 0 && history - for i = 1 : nshifts - not_cv[i] && push!(rNorms_history[i], rNorms[i]) - end + # Initialize each p to v. + for i = 1 : nshifts + p[i] .= v end - # Is there a better way than to update this array twice per iteration? + # Initialize Lanczos process. + # β₁Mv₁ = b + @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ + MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ + Mv_prev .= Mv + + # Initialize some constants used in recursions below. + ρ = one(T) + σ .= β + δhat .= zero(T) + ω .= zero(T) + γ .= one(T) + + # Define stopping tolerance. + ε = atol + rtol * β + + # Keep track of shifted systems that have converged. for i = 1 : nshifts - not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] + converged[i] = rNorms[i] ≤ ε + not_cv[i] = !converged[i] end - iter = iter + 1 - kdisplay(iter, verbose) && local_printf(iter, rNorms...) + iter = 0 + itmax == 0 && (itmax = 2 * n) + + # Build format strings for printing. + (verbose > 0) && (fmt = Printf.Format("%5d" * repeat(" %8.1e", nshifts) * " %.2fs\n")) + kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time)) - user_requested_exit = callback(solver) :: Bool - solved = sum(not_cv) == 0 + solved = !reduce(|, not_cv) tired = iter ≥ itmax - end - (verbose > 0) && @printf("\n") + status = "unknown" + user_requested_exit = false + overtimed = false + + # Main loop. + while ! (solved || tired || user_requested_exit || overtimed) + # Form next Lanczos vector. + # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ + mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ + δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ + @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ + if iter > 0 + @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ + @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ + end + @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁ + MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ + β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ + @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ + MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ + + # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖². + # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ. + MisI || (ρ = @kdotr(n, v, v)) + for i = 1 : nshifts + δhat[i] = δ + ρ * shifts[i] + γ[i] = 1 / (δhat[i] - ω[i] / γ[i]) + end + for i = 1 : nshifts + indefinite[i] |= γ[i] ≤ 0 + end + + # Compute next CG iterate for each shifted system that has not yet converged. + # Stop iterating on indefinite problems if requested. + for i = 1 : nshifts + not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] + if not_cv[i] + @kaxpy!(n, γ[i], p[i], x[i]) + ω[i] = β * γ[i] + σ[i] *= -ω[i] + ω[i] *= ω[i] + @kaxpby!(n, σ[i], v, ω[i], p[i]) + + # Update list of systems that have not converged. + rNorms[i] = abs(σ[i]) + converged[i] = rNorms[i] ≤ ε + end + end - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") + if length(not_cv) > 0 && history + for i = 1 : nshifts + not_cv[i] && push!(rNorms_history[i], rNorms[i]) + end + end - # Update stats. TODO: Estimate Anorm and Acond. - stats.niter = iter - stats.solved = solved - stats.status = status - return solver + # Is there a better way than to update this array twice per iteration? + for i = 1 : nshifts + not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] + end + iter = iter + 1 + kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time)) + + user_requested_exit = callback(solver) :: Bool + solved = !reduce(|, not_cv) + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats. TODO: Estimate Anorm and Acond. + stats.niter = iter + stats.solved = solved + stats.timer = ktimer(start_time) + stats.status = status + return solver + end end diff --git a/src/cgls.jl b/src/cgls.jl index f5529fbfb..e36d5acbd 100644 --- a/src/cgls.jl +++ b/src/cgls.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # CGLS is formally equivalent to applying the conjugate gradient method # to the normal equations but should be more stable. It is also formally @@ -28,12 +28,13 @@ export cgls, cgls! - """ (x, stats) = cgls(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), + itmax::Int=0, timemax::Float64=Inf, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -42,19 +43,41 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ‖x‖₂² -using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization +of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying CG to the normal equations - (AᵀA + λI) x = Aᵀb + (AᴴA + λI) x = Aᴴb but is more stable. -CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂. +CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂. It is formally equivalent to LSQR, though can be slightly less accurate, but simpler to implement. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -63,12 +86,6 @@ and `false` otherwise. """ function cgls end -function cgls(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CglsSolver(A, b) - cgls!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cgls!(solver::CglsSolver, A, b; kwargs...) @@ -78,110 +95,151 @@ See [`CglsSolver`](@ref) for more details about the `solver`. """ function cgls! end -function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CGLS: system of %d equations in %d variables\n", m, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :Mr, S, m) - x, p, s, r, q, stats = solver.x, solver.p, solver.s, solver.r, solver.q, solver.stats - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - Mr = MisI ? r : solver.Mr - Mq = MisI ? q : solver.Mr - - x .= zero(FC) - r .= b - bNorm = @knrm2(m, r) # Marginally faster than norm(b) - if bNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(rNorms, zero(T)) - history && push!(ArNorms, zero(T)) - return solver +def_args_cgls = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_cgls = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; radius::T = zero(T) ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_cgls = mapreduce(extract_parameters, vcat, def_kwargs_cgls) + +args_cgls = (:A, :b) +kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cgls($(def_args_cgls...); $(def_kwargs_cgls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CglsSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cgls!(solver, $(args_cgls...); $(kwargs_cgls...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(Mr, M, r, ldiv) - mul!(s, Aᵀ, Mr) - p .= s - γ = @kdotr(n, s, s) # γ = sᵀs - iter = 0 - itmax == 0 && (itmax = m + n) - - rNorm = bNorm - ArNorm = sqrt(γ) - history && push!(rNorms, rNorm) - history && push!(ArNorms, ArNorm) - ε = atol + rtol * ArNorm - (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) - - status = "unknown" - on_boundary = false - solved = ArNorm ≤ ε - tired = iter ≥ itmax - user_requested_exit = false - - while ! (solved || tired || user_requested_exit) - mul!(q, A, p) - MisI || mulorldiv!(Mq, M, q, ldiv) - δ = @kdotr(m, q, Mq) # δ = qᵀMq - λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᵀp - α = γ / δ - - # if a trust-region constraint is give, compute step to the boundary - σ = radius > 0 ? maximum(to_boundary(x, p, radius)) : α - if (radius > 0) & (α > σ) - α = σ - on_boundary = true - end - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, q, r) # Faster than r = r - α * q + function cgls!(solver :: CglsSolver{T,FC,S}, $(def_args_cgls...); $(def_kwargs_cgls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CGLS: system of %d equations in %d variables\n", m, n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :Mr, S, m) + x, p, s, r, q, stats = solver.x, solver.p, solver.s, solver.r, solver.q, solver.stats + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + Mr = MisI ? r : solver.Mr + Mq = MisI ? q : solver.Mr + + x .= zero(FC) + r .= b + bNorm = @knrm2(m, r) # Marginally faster than norm(b) + if bNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(rNorms, zero(T)) + history && push!(ArNorms, zero(T)) + return solver + end MisI || mulorldiv!(Mr, M, r, ldiv) - mul!(s, Aᵀ, Mr) - λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x - γ_next = @kdotr(n, s, s) # γ_next = sᵀs - β = γ_next / γ - @kaxpby!(n, one(FC), s, β, p) # p = s + βp - γ = γ_next - rNorm = @knrm2(m, r) # Marginally faster than norm(r) + mul!(s, Aᴴ, Mr) + p .= s + γ = @kdotr(n, s, s) # γ = sᴴs + iter = 0 + itmax == 0 && (itmax = m + n) + + rNorm = bNorm ArNorm = sqrt(γ) history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) - iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) - user_requested_exit = callback(solver) :: Bool - solved = (ArNorm ≤ ε) | on_boundary + ε = atol + rtol * ArNorm + (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) + + status = "unknown" + on_boundary = false + solved = ArNorm ≤ ε tired = iter ≥ itmax + user_requested_exit = false + overtimed = false + + while ! (solved || tired || user_requested_exit || overtimed) + mul!(q, A, p) + MisI || mulorldiv!(Mq, M, q, ldiv) + δ = @kdotr(m, q, Mq) # δ = qᴴMq + λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᴴp + α = γ / δ + + # if a trust-region constraint is give, compute step to the boundary + σ = radius > 0 ? maximum(to_boundary(n, x, p, radius)) : α + if (radius > 0) & (α > σ) + α = σ + on_boundary = true + end + + @kaxpy!(n, α, p, x) # Faster than x = x + α * p + @kaxpy!(m, -α, q, r) # Faster than r = r - α * q + MisI || mulorldiv!(Mr, M, r, ldiv) + mul!(s, Aᴴ, Mr) + λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x + γ_next = @kdotr(n, s, s) # γ_next = sᴴs + β = γ_next / γ + @kaxpby!(n, one(FC), s, β, p) # p = s + βp + γ = γ_next + rNorm = @knrm2(m, r) # Marginally faster than norm(r) + ArNorm = sqrt(γ) + history && push!(rNorms, rNorm) + history && push!(ArNorms, ArNorm) + iter = iter + 1 + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) + user_requested_exit = callback(solver) :: Bool + solved = (ArNorm ≤ ε) || on_boundary + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + on_boundary && (status = "on trust-region boundary") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - on_boundary && (status = "on trust-region boundary") - user_requested_exit && (status = "user-requested exit") - - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/cgne.jl b/src/cgne.jl index 2859414e1..8a4e6dddb 100644 --- a/src/cgne.jl +++ b/src/cgne.jl @@ -10,7 +10,7 @@ # and is equivalent to applying the conjugate gradient method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # # This method is also known as Craig's method, CGME, and other # names, and is described in @@ -28,12 +28,13 @@ export cgne, cgne! - """ (x, stats) = cgne(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + N=I, ldiv::Bool=false, + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -42,11 +43,11 @@ Solve the consistent linear system Ax + √λs = b -using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization +of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying CG to the normal equations of the second kind - (AAᵀ + λI) y = b + (AAᴴ + λI) y = b but is more stable. When λ = 0, this method solves the minimum-norm problem @@ -60,10 +61,29 @@ CGNE produces monotonic errors ‖x-x*‖₂ but not residuals ‖r‖₂. It is formally equivalent to CRAIG, though can be slightly less accurate, but simpler to implement. Only the x-part of the solution is returned. -A preconditioner M may be provided in the form of a linear operator. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -72,12 +92,6 @@ and `false` otherwise. """ function cgne end -function cgne(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CgneSolver(A, b) - cgne!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cgne!(solver::CgneSolver, A, b; kwargs...) @@ -87,113 +101,154 @@ See [`CgneSolver`](@ref) for more details about the `solver`. """ function cgne! end -function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CGNE: system of %d equations in %d variables\n", m, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :z, S, m) - allocate_if(λ > 0, solver, :s, S, m) - x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats - rNorms = stats.residuals - reset!(stats) - z = MisI ? r : solver.z - - x .= zero(FC) - r .= b - MisI || mulorldiv!(z, M, r, ldiv) - rNorm = @knrm2(m, r) # Marginally faster than norm(r) - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - return solver +def_args_cgne = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_cgne = (:(; N = I ), + :(; ldiv::Bool = false ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_cgne = mapreduce(extract_parameters, vcat, def_kwargs_cgne) + +args_cgne = (:A, :b) +kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cgne($(def_args_cgne...); $(def_kwargs_cgne...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgneSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cgne!(solver, $(args_cgne...); $(kwargs_cgne...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - λ > 0 && (s .= r) - mul!(p, Aᵀ, z) - - # Use ‖p‖ to detect inconsistent system. - # An inconsistent system will necessarily have AA' singular. - # Because CGNE is equivalent to CG applied to AA'y = b, there will be a - # conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this - # implementation, p is a substitute for A'u. - pNorm = @knrm2(n, p) - - γ = @kdotr(m, r, z) # Faster than γ = dot(r, z) - iter = 0 - itmax == 0 && (itmax = m + n) - - ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. - ɛ_i = atol + rtol * pNorm # Stopping tolerance for inconsistent systems. - (verbose > 0) && @printf("%5s %8s\n", "k", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm) - - status = "unknown" - solved = rNorm ≤ ɛ_c - inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i) - tired = iter ≥ itmax - user_requested_exit = false - - while ! (solved || inconsistent || tired || user_requested_exit) - mul!(q, A, p) - λ > 0 && @kaxpy!(m, λ, s, q) - δ = @kdotr(n, p, p) # Faster than dot(p, p) - λ > 0 && (δ += λ * @kdotr(m, s, s)) - α = γ / δ - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, q, r) # Faster than r = r - α * q - MisI || mulorldiv!(z, M, r, ldiv) - γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z) - β = γ_next / γ - mul!(Aᵀz, Aᵀ, z) - @kaxpby!(n, one(FC), Aᵀz, β, p) # Faster than p = Aᵀz + β * p - pNorm = @knrm2(n, p) - if λ > 0 - @kaxpby!(m, one(FC), r, β, s) # s = r + β * s - end - γ = γ_next - rNorm = sqrt(γ_next) + + function cgne!(solver :: CgneSolver{T,FC,S}, $(def_args_cgne...); $(def_kwargs_cgne...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CGNE: system of %d equations in %d variables\n", m, n) + + # Tests N = Iₙ + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!NisI, solver, :z, S, m) + allocate_if(λ > 0, solver, :s, S, m) + x, p, Aᴴz, r, q, s, stats = solver.x, solver.p, solver.Aᴴz, solver.r, solver.q, solver.s, solver.stats + rNorms = stats.residuals + reset!(stats) + z = NisI ? r : solver.z + + x .= zero(FC) + r .= b + NisI || mulorldiv!(z, N, r, ldiv) + rNorm = @knrm2(m, r) # Marginally faster than norm(r) history && push!(rNorms, rNorm) - iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm) + if rNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + return solver + end + λ > 0 && (s .= r) + mul!(p, Aᴴ, z) + + # Use ‖p‖ to detect inconsistent system. + # An inconsistent system will necessarily have AA' singular. + # Because CGNE is equivalent to CG applied to AA'y = b, there will be a + # conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this + # implementation, p is a substitute for A'u. + pNorm = @knrm2(n, p) + + γ = @kdotr(m, r, z) # Faster than γ = dot(r, z) + iter = 0 + itmax == 0 && (itmax = m + n) - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. + ɛ_i = atol + rtol * pNorm # Stopping tolerance for inconsistent systems. + (verbose > 0) && @printf(iostream, "%5s %8s %5s\n", "k", "‖r‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %.2fs\n", iter, rNorm, ktimer(start_time)) - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ɛ_c - solved = resid_decrease_lim || resid_decrease_mach + status = "unknown" + solved = rNorm ≤ ɛ_c inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i) tired = iter ≥ itmax + user_requested_exit = false + overtimed = false + + while ! (solved || inconsistent || tired || user_requested_exit || overtimed) + mul!(q, A, p) + λ > 0 && @kaxpy!(m, λ, s, q) + δ = @kdotr(n, p, p) # Faster than dot(p, p) + λ > 0 && (δ += λ * @kdotr(m, s, s)) + α = γ / δ + @kaxpy!(n, α, p, x) # Faster than x = x + α * p + @kaxpy!(m, -α, q, r) # Faster than r = r - α * q + NisI || mulorldiv!(z, N, r, ldiv) + γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z) + β = γ_next / γ + mul!(Aᴴz, Aᴴ, z) + @kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p + pNorm = @knrm2(n, p) + if λ > 0 + @kaxpby!(m, one(FC), r, β, s) # s = r + β * s + end + γ = γ_next + rNorm = sqrt(γ_next) + history && push!(rNorms, rNorm) + iter = iter + 1 + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %.2fs\n", iter, rNorm, ktimer(start_time)) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ɛ_c + solved = resid_decrease_lim || resid_decrease_mach + inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i) + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + inconsistent && (status = "system probably inconsistent") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - inconsistent && (status = "system probably inconsistent") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/src/cgs.jl b/src/cgs.jl index c1eb1056e..e95e74d17 100644 --- a/src/cgs.jl +++ b/src/cgs.jl @@ -11,17 +11,23 @@ export cgs, cgs! """ - (x, stats) = cgs(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = cgs(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, M=I, N=I, + ldiv::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the consistent linear system Ax = b using conjugate gradient squared algorithm. + (x, stats) = cgs(A, b, x0::AbstractVector; kwargs...) + +CGS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the consistent linear system Ax = b of size n using CGS. CGS requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. From "Iterative Methods for Sparse Linear Systems (Y. Saad)" : @@ -38,16 +44,34 @@ to become inaccurate. TFQMR and BICGSTAB were developed to remedy this difficulty.» -This implementation allows a left preconditioner M and a right preconditioner N. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. -CGS can be warm-started from an initial guess `x0` with the method +#### Optional argument - (x, stats) = cgs(A, b, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -55,18 +79,6 @@ and `false` otherwise. """ function cgs end -function cgs(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = CgsSolver(A, b) - cgs!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function cgs(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CgsSolver(A, b) - cgs!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cgs!(solver::CgsSolver, A, b; kwargs...) solver = cgs!(solver::CgsSolver, A, b, x0; kwargs...) @@ -77,153 +89,204 @@ See [`CgsSolver`](@ref) for more details about the `solver`. """ function cgs! end -function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - cgs!(solver, A, b; kwargs...) - return solver -end - -function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CGS: system of size %d\n", n) - - # Check M = Iₙ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :vw, S, n) - allocate_if(!NisI, solver, :yz, S, n) - Δx, x, r, u, p, q, ts, stats = solver.Δx, solver.x, solver.r, solver.u, solver.p, solver.q, solver.ts, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - t = s = solver.ts - v = MisI ? t : solver.vw - w = MisI ? s : solver.vw - y = NisI ? p : solver.yz - z = NisI ? u : solver.yz - r₀ = MisI ? r : solver.ts - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) - else - r₀ .= b +def_args_cgs = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_cgs = (:(x0::AbstractVector),) + +def_kwargs_cgs = (:(; c::AbstractVector{FC} = b ), + :(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_cgs = mapreduce(extract_parameters, vcat, def_kwargs_cgs) + +args_cgs = (:A, :b) +optargs_cgs = (:x0,) +kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cgs($(def_args_cgs...), $(def_optargs_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgsSolver(A, b) + warm_start!(solver, $(optargs_cgs...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cgs!(solver, $(args_cgs...); $(kwargs_cgs...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - x .= zero(FC) # x₀ - MisI || mulorldiv!(r, M, r₀, ldiv) # r₀ - - # Compute residual norm ‖r₀‖₂. - rNorm = @knrm2(n, r) - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver - end - - # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩ - ρ = @kdot(n, c, r) - if ρ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = false, false - stats.status = "Breakdown bᵀc = 0" - solver.warm_start =false - return solver + function cgs($(def_args_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CgsSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cgs!(solver, $(args_cgs...); $(kwargs_cgs...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2*n) - - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) - - u .= r # u₀ - p .= r # p₀ - q .= zero(FC) # q₋₁ - - # Stopping criterion. - solved = rNorm ≤ ε - tired = iter ≥ itmax - breakdown = false - status = "unknown" - user_requested_exit = false - - while !(solved || tired || breakdown || user_requested_exit) - - NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ - mul!(t, A, y) # tₖ = Ayₖ - MisI || mulorldiv!(v, M, t, ldiv) # vₖ = M⁻¹tₖ - σ = @kdot(n, c, v) # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩ - α = ρ / σ # αₖ = ρₖ / σₖ - @kcopy!(n, u, q) # qₖ = uₖ - @kaxpy!(n, -α, v, q) # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ - @kaxpy!(n, one(FC), q, u) # uₖ₊½ = uₖ + qₖ - NisI || mulorldiv!(z, N, u, ldiv) # zₖ = N⁻¹uₖ₊½ - @kaxpy!(n, α, z, x) # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ) - mul!(s, A, z) # sₖ = Azₖ - MisI || mulorldiv!(w, M, s, ldiv) # wₖ = M⁻¹sₖ - @kaxpy!(n, -α, w, r) # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ) - ρ_next = @kdot(n, c, r) # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩ - β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ - @kcopy!(n, r, u) # uₖ₊₁ = rₖ₊₁ - @kaxpy!(n, β, q, u) # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ - @kaxpby!(n, one(FC), q, β, p) # pₐᵤₓ = qₖ + βₖ * pₖ - @kaxpby!(n, one(FC), u, β, p) # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ - - # Update ρ. - ρ = ρ_next # ρₖ ← ρₖ₊₁ - - # Update iteration index. - iter = iter + 1 - - # Compute residual norm ‖rₖ‖₂. + function cgs!(solver :: CgsSolver{T,FC,S}, $(def_args_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CGS: system of size %d\n", n) + + # Check M = Iₙ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :vw, S, n) + allocate_if(!NisI, solver, :yz, S, n) + Δx, x, r, u, p, q, ts, stats = solver.Δx, solver.x, solver.r, solver.u, solver.p, solver.q, solver.ts, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + t = s = solver.ts + v = MisI ? t : solver.vw + w = MisI ? s : solver.vw + y = NisI ? p : solver.yz + z = NisI ? u : solver.yz + r₀ = MisI ? r : solver.ts + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) + else + r₀ .= b + end + + x .= zero(FC) # x₀ + MisI || mulorldiv!(r, M, r₀, ldiv) # r₀ + + # Compute residual norm ‖r₀‖₂. rNorm = @knrm2(n, r) history && push!(rNorms, rNorm) - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - solved = resid_decrease_lim || resid_decrease_mach + if rNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + + # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩ + ρ = @kdot(n, c, r) + if ρ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = false, false + stats.timer = ktimer(start_time) + stats.status = "Breakdown bᴴc = 0" + solver.warm_start =false + return solver + end + + iter = 0 + itmax == 0 && (itmax = 2*n) + + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + + u .= r # u₀ + p .= r # p₀ + q .= zero(FC) # q₋₁ + + # Stopping criterion. + solved = rNorm ≤ ε tired = iter ≥ itmax - breakdown = (α == 0 || isnan(α)) - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + breakdown = false + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || breakdown || user_requested_exit || overtimed) + + NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ + mul!(t, A, y) # tₖ = Ayₖ + MisI || mulorldiv!(v, M, t, ldiv) # vₖ = M⁻¹tₖ + σ = @kdot(n, c, v) # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩ + α = ρ / σ # αₖ = ρₖ / σₖ + @kcopy!(n, u, q) # qₖ = uₖ + @kaxpy!(n, -α, v, q) # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ + @kaxpy!(n, one(FC), q, u) # uₖ₊½ = uₖ + qₖ + NisI || mulorldiv!(z, N, u, ldiv) # zₖ = N⁻¹uₖ₊½ + @kaxpy!(n, α, z, x) # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ) + mul!(s, A, z) # sₖ = Azₖ + MisI || mulorldiv!(w, M, s, ldiv) # wₖ = M⁻¹sₖ + @kaxpy!(n, -α, w, r) # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ) + ρ_next = @kdot(n, c, r) # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩ + β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ + @kcopy!(n, r, u) # uₖ₊₁ = rₖ₊₁ + @kaxpy!(n, β, q, u) # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ + @kaxpby!(n, one(FC), q, β, p) # pₐᵤₓ = qₖ + βₖ * pₖ + @kaxpby!(n, one(FC), u, β, p) # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ + + # Update ρ. + ρ = ρ_next # ρₖ ← ρₖ₊₁ + + # Update iteration index. + iter = iter + 1 + + # Compute residual norm ‖rₖ‖₂. + rNorm = @knrm2(n, r) + history && push!(rNorms, rNorm) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + breakdown = (α == 0 || isnan(α)) + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "breakdown αₖ == 0") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "breakdown αₖ == 0") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/cr.jl b/src/cr.jl index c678c7d29..96194f459 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -6,6 +6,9 @@ # E. Stiefel, Relaxationsmethoden bester Strategie zur Losung linearer Gleichungssysteme. # Commentarii Mathematici Helvetici, 29(1), pp. 157--179, 1955. # +# D. G. Luenberger, The conjugate residual method for constrained minimization problems. +# SIAM Journal on Numerical Analysis, 7(3), pp. 390--398, 1970. +# # M-A. Dahito and D. Orban, The Conjugate Residual Method in Linesearch and Trust-Region Methods. # SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019. # @@ -16,53 +19,63 @@ export cr, cr! """ (x, stats) = cr(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), γ::T=√eps(T), itmax::Int=0, - radius::T=zero(T), verbose::Int=0, linesearch::Bool=false, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + linesearch::Bool=false, γ::T=√eps(T), + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -A truncated version of Stiefel’s Conjugate Residual method to solve the symmetric linear system Ax = b or the least-squares problem min ‖b - Ax‖. -The matrix A must be positive semi-definite. + (x, stats) = cr(A, b, x0::AbstractVector; kwargs...) + +CR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. -A preconditioner M may be provided in the form of a linear operator and is assumed to be symmetric and positive definite. +A truncated version of Stiefel’s Conjugate Residual method to solve the Hermitian linear system Ax = b +of size n or the least-squares problem min ‖b - Ax‖ if A is singular. +The matrix A must be Hermitian semi-definite. M also indicates the weighted norm in which residuals are measured. -In a linesearch context, 'linesearch' must be set to 'true'. +#### Input arguments + +* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n; +* `b`: a vector of length n. -If `itmax=0`, the default number of iterations is set to `2 * n`, -with `n = length(b)`. +#### Optional argument -CR can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = cr(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient); +* `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References * M. R. Hestenes and E. Stiefel, [*Methods of conjugate gradients for solving linear systems*](https://doi.org/10.6028/jres.049.044), Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952. * E. Stiefel, [*Relaxationsmethoden bester Strategie zur Losung linearer Gleichungssysteme*](https://doi.org/10.1007/BF02564277), Commentarii Mathematici Helvetici, 29(1), pp. 157--179, 1955. +* D. G. Luenberger, [*The conjugate residual method for constrained minimization problems*](https://doi.org/10.1137/0707032), SIAM Journal on Numerical Analysis, 7(3), pp. 390--398, 1970. * M-A. Dahito and D. Orban, [*The Conjugate Residual Method in Linesearch and Trust-Region Methods*](https://doi.org/10.1137/18M1204255), SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019. """ function cr end -function cr(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = CrSolver(A, b) - cr!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function cr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CrSolver(A, b) - cr!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = cr!(solver::CrSolver, A, b; kwargs...) solver = cr!(solver::CrSolver, A, b, x0; kwargs...) @@ -73,286 +86,339 @@ See [`CrSolver`](@ref) for more details about the `solver`. """ function cr! end -function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - cr!(solver, A, b; kwargs...) - return solver -end - -function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0, - radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0") - n, m = size(A) - m == n || error("System must be square") - length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("CR: system of %d equations in %d variables\n", n, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace - allocate_if(!MisI, solver, :Mq, S, n) - Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats - warm_start = solver.warm_start - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - Mq = MisI ? q : solver.Mq - - # Initial state. - x .= zero(FC) - if warm_start - mul!(p, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), p) - else - p .= b +def_args_cr = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_cr = (:(x0::AbstractVector),) + +def_kwargs_cr = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; radius::T = zero(T) ), + :(; linesearch::Bool = false ), + :(; γ::T = √eps(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_cr = mapreduce(extract_parameters, vcat, def_kwargs_cr) + +args_cr = (:A, :b) +optargs_cr = (:x0,) +kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function cr($(def_args_cr...), $(def_optargs_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CrSolver(A, b) + warm_start!(solver, $(optargs_cr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cr!(solver, $(args_cr...); $(kwargs_cr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - mulorldiv!(r, M, p, ldiv) - mul!(Ar, A, r) - ρ = @kdotr(n, r, Ar) - - rNorm = sqrt(@kdotr(n, r, p)) # ‖r‖ - history && push!(rNorms, rNorm) # Values of ‖r‖ - - if ρ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(ArNorms, zero(T)) - solver.warm_start = false - return solver + + function cr($(def_args_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cr!(solver, $(args_cr...); $(kwargs_cr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - p .= r - q .= Ar - (verbose > 0) && (m = zero(T)) # quadratic model - - iter = 0 - itmax == 0 && (itmax = 2 * n) - - rNorm² = rNorm * rNorm - pNorm = rNorm - pNorm² = rNorm² - pr = rNorm² - abspr = pr - pAp = ρ - abspAp = abs(pAp) - xNorm = zero(T) - ArNorm = @knrm2(n, Ar) # ‖Ar‖ - history && push!(ArNorms, ArNorm) - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad") - kdisplay(iter, verbose) && @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m) - - descent = pr > 0 # pᵀr > 0 means p is a descent direction - solved = rNorm ≤ ε - tired = iter ≥ itmax - on_boundary = false - npcurv = false - status = "unknown" - user_requested_exit = false - - while ! (solved || tired || user_requested_exit) - if linesearch - if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²) - npcurv = true - (verbose > 0) && @printf("nonpositive curvature detected: pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ) - stats.solved = solved - stats.inconsistent = false - stats.status = "nonpositive curvature" - return solver - end - elseif pAp ≤ 0 && radius == 0 - error("Indefinite system and no trust region") + + function cr!(solver :: CrSolver{T,FC,S}, $(def_args_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == n || error("Inconsistent problem size") + linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0") + (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace + allocate_if(!MisI, solver, :Mq, S, n) + Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats + warm_start = solver.warm_start + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + Mq = MisI ? q : solver.Mq + + # Initial state. + x .= zero(FC) + if warm_start + mul!(p, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), p) + else + p .= b end - MisI || mulorldiv!(Mq, M, q, ldiv) - - if radius > 0 - (verbose > 0) && @printf("radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm) - # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2) - xNorm² = xNorm * xNorm - t = to_boundary(x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²) - t1 = maximum(t) # > 0 - t2 = minimum(t) # < 0 - tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²)) - (verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr) - - if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᵀAp ≃ 0 - npcurv = true # nonpositive curvature - (verbose > 0) && @printf("pᵀAp = %8.1e ≃ 0\n", pAp) - if abspr ≤ γ * pNorm * rNorm # pᵀr ≃ 0 - (verbose > 0) && @printf("pᵀr = %8.1e ≃ 0, redefining p := r\n", pr) - p = r # - ∇q(x) - q = Ar - # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᵀAr - # 1) if rᵀAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᵀAr - # 2) if rᵀAr ≤ 0, the quadratic decreases to -∞ in the direction r - if ρ > 0 # case 1 - (verbose > 0) && @printf("quadratic is convex in direction r, curv = %8.1e\n", ρ) - α = min(tr, rNorm² / ρ) - else # case 2 - (verbose > 0) && @printf("r is a direction of nonpositive curvature: %8.1e\n", ρ) + mulorldiv!(r, M, p, ldiv) + mul!(Ar, A, r) + ρ = @kdotr(n, r, Ar) + + rNorm = sqrt(@kdotr(n, r, p)) # ‖r‖ + history && push!(rNorms, rNorm) # Values of ‖r‖ + + if ρ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(ArNorms, zero(T)) + solver.warm_start = false + return solver + end + p .= r + q .= Ar + (verbose > 0) && (m = zero(T)) # quadratic model + + iter = 0 + itmax == 0 && (itmax = 2 * n) + + rNorm² = rNorm * rNorm + pNorm = rNorm + pNorm² = rNorm² + pr = rNorm² + abspr = pr + pAp = ρ + abspAp = abs(pAp) + xNorm = zero(T) + ArNorm = @knrm2(n, Ar) # ‖Ar‖ + history && push!(ArNorms, ArNorm) + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %5s\n", "k", "‖x‖", "‖r‖", "quad", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.1e %8.1e %8.1e %.2fs\n", iter, xNorm, rNorm, m, ktimer(start_time)) + + descent = pr > 0 # pᴴr > 0 means p is a descent direction + solved = rNorm ≤ ε + tired = iter ≥ itmax + on_boundary = false + npcurv = false + status = "unknown" + user_requested_exit = false + overtimed = false + + while ! (solved || tired || user_requested_exit || overtimed) + if linesearch + if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²) + npcurv = true + (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "nonpositive curvature" + return solver + end + elseif pAp ≤ 0 && radius == 0 + error("Indefinite system and no trust region") + end + MisI || mulorldiv!(Mq, M, q, ldiv) + + if radius > 0 + (verbose > 0) && @printf(iostream, "radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm) + # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2) + xNorm² = xNorm * xNorm + t = to_boundary(n, x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²) + t1 = maximum(t) # > 0 + t2 = minimum(t) # < 0 + tr = maximum(to_boundary(n, x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²)) + (verbose > 0) && @printf(iostream, "t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr) + + if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0 + npcurv = true # nonpositive curvature + (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp) + if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0 + (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr) + p = r # - ∇q(x) + q = Ar + # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr + # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr + # 2) if rᴴAr ≤ 0, the quadratic decreases to -∞ in the direction r + if ρ > 0 # case 1 + (verbose > 0) && @printf(iostream, "quadratic is convex in direction r, curv = %8.1e\n", ρ) + α = min(tr, rNorm² / ρ) + else # case 2 + (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ) + α = tr + end + else + # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp + # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᴴAr + # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed + α = descent ? t1 : t2 + ρ > 0 && (tr = min(tr, rNorm² / ρ)) + Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0 + if Δ > 0 # direction r engenders a better decrease + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") + p = r + q = Ar + α = tr + else + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + end + end + + elseif pAp > 0 && ρ > 0 # no negative curvature + (verbose > 0) && @printf(iostream, "positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) + α = ρ / @kdotr(n, q, Mq) + if α ≥ t1 + α = t1 + on_boundary = true + end + + elseif pAp > 0 && ρ < 0 + npcurv = true + (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ) + # q_p is minimal for α_p = rᴴp / pᴴAp + α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp) + Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 + if Δ > 0 + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") + p = r + q = Ar α = tr + else + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) end - else - # q_p = q(x + α_p * p) - q(x) = -α_p * rᵀp + ½ (α_p)² * pᵀAp - # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᵀAr - # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed + + elseif pAp < 0 && ρ > 0 + npcurv = true + (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ) α = descent ? t1 : t2 - ρ > 0 && (tr = min(tr, rNorm² / ρ)) - Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᵀAp = 0 - if Δ > 0 # direction r engenders a better decrease - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") + tr = min(tr, rNorm² / ρ) + Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 + if Δ > 0 + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") p = r q = Ar α = tr else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) end - end - - elseif pAp > 0 && ρ > 0 # no negative curvature - (verbose > 0) && @printf("positive curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ) - α = ρ / @kdotr(n, q, Mq) - if α ≥ t1 - α = t1 - on_boundary = true - end - - elseif pAp > 0 && ρ < 0 - npcurv = true - (verbose > 0) && @printf("pᵀAp = %8.1e > 0 and rᵀAr = %8.1e < 0\n", pAp, ρ) - # q_p is minimal for α_p = rᵀp / pᵀAp - α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp) - Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 - if Δ > 0 - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") - p = r - q = Ar - α = tr - else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) - end - elseif pAp < 0 && ρ > 0 - npcurv = true - (verbose > 0) && @printf("pᵀAp = %8.1e < 0 and rᵀAr = %8.1e > 0\n", pAp, ρ) - α = descent ? t1 : t2 - tr = min(tr, rNorm² / ρ) - Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 - if Δ > 0 - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") - p = r - q = Ar - α = tr - else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + elseif pAp < 0 && ρ < 0 + npcurv = true + (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) + α = descent ? t1 : t2 + Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 + if Δ > 0 + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") + p = r + q = Ar + α = tr + else + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + end end - elseif pAp < 0 && ρ < 0 - npcurv = true - (verbose > 0) && @printf("negative curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ) - α = descent ? t1 : t2 - Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 - if Δ > 0 - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") - p = r - q = Ar - α = tr - else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) - end + elseif radius == 0 + α = ρ / @kdotr(n, q, Mq) # step end - elseif radius == 0 - α = ρ / @kdotr(n, q, Mq) # step - end - - @kaxpy!(n, α, p, x) - xNorm = @knrm2(n, x) - xNorm ≈ radius && (on_boundary = true) - @kaxpy!(n, -α, Mq, r) # residual - if MisI - rNorm² = @kdotr(n, r, r) - rNorm = sqrt(rNorm²) - else - ω = sqrt(α) * sqrt(ρ) - rNorm = sqrt(abs(rNorm + ω)) * sqrt(abs(rNorm - ω)) - rNorm² = rNorm * rNorm # rNorm² = rNorm² - α * ρ - end - history && push!(rNorms, rNorm) - mul!(Ar, A, r) - ArNorm = @knrm2(n, Ar) - history && push!(ArNorms, ArNorm) - - iter = iter + 1 - if kdisplay(iter, verbose) - m = m - α * pr + α^2 * pAp / 2 - @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m) - end - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + @kaxpy!(n, α, p, x) + xNorm = @knrm2(n, x) + xNorm ≈ radius && (on_boundary = true) + @kaxpy!(n, -α, Mq, r) # residual + if MisI + rNorm² = @kdotr(n, r, r) + rNorm = sqrt(rNorm²) + else + ω = sqrt(α) * sqrt(ρ) + rNorm = sqrt(abs(rNorm + ω)) * sqrt(abs(rNorm - ω)) + rNorm² = rNorm * rNorm # rNorm² = rNorm² - α * ρ + end + history && push!(rNorms, rNorm) + mul!(Ar, A, r) + ArNorm = @knrm2(n, Ar) + history && push!(ArNorms, ArNorm) + + iter = iter + 1 + if kdisplay(iter, verbose) + m = m - α * pr + α^2 * pAp / 2 + @printf(iostream, "%5d %8.1e %8.1e %8.1e %.2fs\n", iter, xNorm, rNorm, m, ktimer(start_time)) + end - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - resid_decrease = resid_decrease_lim || resid_decrease_mach - solved = resid_decrease || npcurv || on_boundary - tired = iter ≥ itmax + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + resid_decrease = resid_decrease_lim || resid_decrease_mach + solved = resid_decrease || npcurv || on_boundary + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + + (solved || tired || user_requested_exit || overtimed) && continue + ρbar = ρ + ρ = @kdotr(n, r, Ar) + β = ρ / ρbar # step for the direction computation + @kaxpby!(n, one(FC), r, β, p) + @kaxpby!(n, one(FC), Ar, β, q) + + pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm² + if pNorm² > sqrt(eps(T)) + pNorm = sqrt(pNorm²) + elseif abs(pNorm²) ≤ sqrt(eps(T)) + pNorm = zero(T) + else + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "solver encountered numerical issues" + solver.warm_start = false + return solver + end + pr = rNorm² + β * pr - β * α * pAp # pᴴr + abspr = abs(pr) + pAp = ρ + β^2 * pAp # pᴴq + abspAp = abs(pAp) + descent = pr > 0 - (solved || tired || user_requested_exit) && continue - ρbar = ρ - ρ = @kdotr(n, r, Ar) - β = ρ / ρbar # step for the direction computation - @kaxpby!(n, one(FC), r, β, p) - @kaxpby!(n, one(FC), Ar, β, q) - - pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm² - if pNorm² > sqrt(eps(T)) - pNorm = sqrt(pNorm²) - elseif abs(pNorm²) ≤ sqrt(eps(T)) - pNorm = zero(T) - else - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = "solver encountered numerical issues" - solver.warm_start = false - return solver end - pr = rNorm² + β * pr - β * α * pAp # pᵀr - abspr = abs(pr) - pAp = ρ + β^2 * pAp # pᵀq - abspAp = abs(pAp) - descent = pr > 0 + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + on_boundary && (status = "on trust-region boundary") + npcurv && (status = "nonpositive curvature") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - on_boundary && (status = "on trust-region boundary") - npcurv && (status = "nonpositive curvature") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/craig.jl b/src/craig.jl index 20597ea02..46e8f93e5 100644 --- a/src/craig.jl +++ b/src/craig.jl @@ -11,7 +11,7 @@ # and is equivalent to applying the conjugate gradient method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # # This method, sometimes known under the name CRAIG, is the # Golub-Kahan implementation of CGNE, and is described in @@ -32,13 +32,15 @@ export craig, craig! - """ (x, y, stats) = craig(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T), - btol::T=√eps(T), rtol::T=√eps(T), conlim::T=1/√eps(T), itmax::Int=0, - verbose::Int=0, transfer_to_lsqr::Bool=false, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + transfer_to_lsqr::Bool=false, sqd::Bool=false, + λ::T=zero(T), btol::T=√eps(T), + conlim::T=1/√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -47,19 +49,19 @@ Find the least-norm solution of the consistent linear system Ax + λ²y = b -using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a +of size m × n using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a regularization parameter. This method is equivalent to CGNE but is more stable. For a system in the form Ax = b, Craig's method is equivalent to applying -CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange +CG to AAᴴy = b and recovering x = Aᴴy. Note that y are the Lagrange multipliers of the least-norm problem minimize ‖x‖ s.t. Ax = b. If `λ > 0`, CRAIG solves the symmetric and quasi-definite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A λ²E ] [ y ] = [ b ], where E and F are symmetric and positive definite. @@ -70,12 +72,12 @@ The system above represents the optimality conditions of min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -CRAIG is then equivalent to applying CG to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +CRAIG is then equivalent to applying CG to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`. If `λ = 0`, CRAIG solves the symmetric and indefinite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A 0 ] [ y ] = [ b ]. The system above represents the optimality conditions of @@ -86,8 +88,35 @@ In this case, `M` can still be specified and indicates the weighted norm in whic In this implementation, both the x and y-parts of the solution are returned. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -96,12 +125,6 @@ and `false` otherwise. """ function craig end -function craig(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CraigSolver(A, b) - craig!(solver, A, b; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = craig!(solver::CraigSolver, A, b; kwargs...) @@ -111,192 +134,130 @@ See [`CraigSolver`](@ref) for more details about the `solver`. """ function craig! end -function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0, - verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRAIG: system of %d equations in %d variables\n", m, n) - - # Check sqd and λ parameters - sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") - sqd && (λ = one(T)) - - # Tests M = Iₘ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :u , S, m) - allocate_if(!NisI, solver, :v , S, n) - allocate_if(λ > 0, solver, :w2, S, n) - x, Nv, Aᵀu, y, w = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w - Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats - rNorms = stats.residuals - reset!(stats) - u = MisI ? Mu : solver.u - v = NisI ? Nv : solver.v - - x .= zero(FC) - y .= zero(FC) - - Mu .= b - MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) - rNorm = β₁ - history && push!(rNorms, rNorm) - if β₁ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - return solver +def_args_craig = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_craig = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; transfer_to_lsqr::Bool = false), + :(; sqd::Bool = false ), + :(; λ::T = zero(T) ), + :(; btol::T = √eps(T) ), + :(; conlim::T = 1/√eps(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_craig = mapreduce(extract_parameters, vcat, def_kwargs_craig) + +args_craig = (:A, :b) +kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function craig($(def_args_craig...); $(def_kwargs_craig...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CraigSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + craig!(solver, $(args_craig...); $(kwargs_craig...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - β₁² = β₁^2 - β = β₁ - θ = β₁ # θ will differ from β when there is regularization (λ > 0). - ξ = -one(T) # Most recent component of x in Range(V). - δ = λ - ρ_prev = one(T) - - # Initialize Golub-Kahan process. - # β₁Mu₁ = b. - @kscal!(m, one(FC) / β₁, u) - MisI || @kscal!(m, one(FC) / β₁, Mu) - - Nv .= zero(FC) - w .= zero(FC) # Used to update y. - - λ > 0 && (w2 .= zero(FC)) - - Anorm² = zero(T) # Estimate of ‖A‖²_F. - Anorm = zero(T) - Dnorm² = zero(T) # Estimate of ‖(AᵀA)⁻¹‖². - Acond = zero(T) # Estimate of cond(A). - xNorm² = zero(T) # Estimate of ‖x‖². - xNorm = zero(T) - - iter = 0 - itmax == 0 && (itmax = m + n) - - ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. - ɛ_i = atol # Stopping tolerance for inconsistent systems. - ctol = conlim > 0 ? 1/conlim : zero(T) # Stopping tolerance for ill-conditioned operators. - (verbose > 0) && @printf("%5s %8s %8s %8s %8s %8s %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e\n", iter, rNorm, xNorm, Anorm, Acond) - - bkwerr = one(T) # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²) - - status = "unknown" - - solved_lim = bkwerr ≤ btol - solved_mach = one(T) + bkwerr ≤ one(T) - solved_resid_tol = rNorm ≤ ɛ_c - solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁ - solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim - - ill_cond = ill_cond_mach = ill_cond_lim = false - - inconsistent = false - tired = iter ≥ itmax - user_requested_exit = false - - while ! (solved || inconsistent || ill_cond || tired || user_requested_exit) - # Generate the next Golub-Kahan vectors - # 1. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - if α == 0 - inconsistent = true - continue - end - @kscal!(n, one(FC) / α, v) - NisI || @kscal!(n, one(FC) / α, Nv) - - Anorm² += α * α + λ * λ - - if λ > 0 - # Givens rotation to zero out the δ in position (k, 2k): - # k-1 k 2k k 2k k-1 k 2k - # k [ θ α δ ] [ c₁ s₁ ] = [ θ ρ ] - # k+1 [ β ] [ s₁ -c₁ ] [ θ+ γ ] - (c₁, s₁, ρ) = sym_givens(α, δ) - else - ρ = α - end - ξ = -θ / ρ * ξ - - if λ > 0 - # w1 = c₁ * v + s₁ * w2 - # w2 = s₁ * v - c₁ * w2 - # x = x + ξ * w1 - @kaxpy!(n, ξ * c₁, v, x) - @kaxpy!(n, ξ * s₁, w2, x) - @kaxpby!(n, s₁, v, -c₁, w2) - else - @kaxpy!(n, ξ, v, x) # x = x + ξ * v - end + function craig!(solver :: CraigSolver{T,FC,S}, $(def_args_craig...); $(def_kwargs_craig...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CRAIG: system of %d equations in %d variables\n", m, n) + + # Check sqd and λ parameters + sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") + sqd && (λ = one(T)) - # Recur y. - @kaxpby!(m, one(FC), u, -θ/ρ_prev, w) # w = u - θ/ρ_prev * w - @kaxpy!(m, ξ/ρ, w, y) # y = y + ξ/ρ * w + # Tests M = Iₘ and N = Iₙ + MisI = (M === I) + NisI = (N === I) - Dnorm² += @knrm2(m, w) + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") - # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ - mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :u , S, m) + allocate_if(!NisI, solver, :v , S, n) + allocate_if(λ > 0, solver, :w2, S, n) + x, Nv, Aᴴu, y, w = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w + Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats + rNorms = stats.residuals + reset!(stats) + u = MisI ? Mu : solver.u + v = NisI ? Nv : solver.v + + x .= zero(FC) + y .= zero(FC) + + Mu .= b MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) - if β ≠ 0 - @kscal!(m, one(FC) / β, u) - MisI || @kscal!(m, one(FC) / β, Mu) + β₁ = sqrt(@kdotr(m, u, Mu)) + rNorm = β₁ + history && push!(rNorms, rNorm) + if β₁ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + return solver end + β₁² = β₁^2 + β = β₁ + θ = β₁ # θ will differ from β when there is regularization (λ > 0). + ξ = -one(T) # Most recent component of x in Range(V). + δ = λ + ρ_prev = one(T) - # Finish updates from the first Givens rotation. - if λ > 0 - θ = β * c₁ - γ = β * s₁ - else - θ = β - end + # Initialize Golub-Kahan process. + # β₁Mu₁ = b. + @kscal!(m, one(FC) / β₁, u) + MisI || @kscal!(m, one(FC) / β₁, Mu) - if λ > 0 - # Givens rotation to zero out the γ in position (k+1, 2k) - # 2k 2k+1 2k 2k+1 2k 2k+1 - # k+1 [ γ λ ] [ -c₂ s₂ ] = [ 0 δ ] - # k+2 [ 0 0 ] [ s₂ c₂ ] [ 0 0 ] - c₂, s₂, δ = sym_givens(λ, γ) - @kscal!(n, s₂, w2) - end + Nv .= zero(FC) + w .= zero(FC) # Used to update y. - Anorm² += β * β - Anorm = sqrt(Anorm²) - Acond = Anorm * sqrt(Dnorm²) - xNorm² += ξ * ξ - xNorm = sqrt(xNorm²) - rNorm = β * abs(ξ) # r = - β * ξ * u - λ > 0 && (rNorm *= abs(c₁)) # r = -c₁ * β * ξ * u when λ > 0. - history && push!(rNorms, rNorm) - iter = iter + 1 + λ > 0 && (w2 .= zero(FC)) + + Anorm² = zero(T) # Estimate of ‖A‖²_F. + Anorm = zero(T) + Dnorm² = zero(T) # Estimate of ‖(AᴴA)⁻¹‖². + Acond = zero(T) # Estimate of cond(A). + xNorm² = zero(T) # Estimate of ‖x‖². + xNorm = zero(T) - bkwerr = rNorm / sqrt(β₁² + Anorm² * xNorm²) + iter = 0 + itmax == 0 && (itmax = m + n) - ρ_prev = ρ # Only differs from α if λ > 0. + ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. + ɛ_i = atol # Stopping tolerance for inconsistent systems. + ctol = conlim > 0 ? 1/conlim : zero(T) # Stopping tolerance for ill-conditioned operators. + (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %8s %8s %7s %5s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e %8s %7s %.2fs\n", iter, rNorm, xNorm, Anorm, Acond, " ✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time)) - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β) + bkwerr = one(T) # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²) + + status = "unknown" solved_lim = bkwerr ≤ btol solved_mach = one(T) + bkwerr ≤ one(T) @@ -304,34 +265,141 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁ solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim - ill_cond_mach = one(T) + one(T) / Acond ≤ one(T) - ill_cond_lim = 1 / Acond ≤ ctol - ill_cond = ill_cond_mach | ill_cond_lim + ill_cond = ill_cond_mach = ill_cond_lim = false - user_requested_exit = callback(solver) :: Bool inconsistent = false tired = iter ≥ itmax - end - (verbose > 0) && @printf("\n") + user_requested_exit = false + overtimed = false + + while ! (solved || inconsistent || ill_cond || tired || user_requested_exit || overtimed) + # Generate the next Golub-Kahan vectors + # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + if α == 0 + inconsistent = true + continue + end + @kscal!(n, one(FC) / α, v) + NisI || @kscal!(n, one(FC) / α, Nv) + + Anorm² += α * α + λ * λ + + if λ > 0 + # Givens rotation to zero out the δ in position (k, 2k): + # k-1 k 2k k 2k k-1 k 2k + # k [ θ α δ ] [ c₁ s₁ ] = [ θ ρ ] + # k+1 [ β ] [ s₁ -c₁ ] [ θ+ γ ] + (c₁, s₁, ρ) = sym_givens(α, δ) + else + ρ = α + end + + ξ = -θ / ρ * ξ + + if λ > 0 + # w1 = c₁ * v + s₁ * w2 + # w2 = s₁ * v - c₁ * w2 + # x = x + ξ * w1 + @kaxpy!(n, ξ * c₁, v, x) + @kaxpy!(n, ξ * s₁, w2, x) + @kaxpby!(n, s₁, v, -c₁, w2) + else + @kaxpy!(n, ξ, v, x) # x = x + ξ * v + end + + # Recur y. + @kaxpby!(m, one(FC), u, -θ/ρ_prev, w) # w = u - θ/ρ_prev * w + @kaxpy!(m, ξ/ρ, w, y) # y = y + ξ/ρ * w + + Dnorm² += @knrm2(m, w) + + # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ + mul!(Av, A, v) + @kaxpby!(m, one(FC), Av, -α, Mu) + MisI || mulorldiv!(u, M, Mu, ldiv) + β = sqrt(@kdotr(m, u, Mu)) + if β ≠ 0 + @kscal!(m, one(FC) / β, u) + MisI || @kscal!(m, one(FC) / β, Mu) + end + + # Finish updates from the first Givens rotation. + if λ > 0 + θ = β * c₁ + γ = β * s₁ + else + θ = β + end + + if λ > 0 + # Givens rotation to zero out the γ in position (k+1, 2k) + # 2k 2k+1 2k 2k+1 2k 2k+1 + # k+1 [ γ λ ] [ -c₂ s₂ ] = [ 0 δ ] + # k+2 [ 0 0 ] [ s₂ c₂ ] [ 0 0 ] + c₂, s₂, δ = sym_givens(λ, γ) + @kscal!(n, s₂, w2) + end + + Anorm² += β * β + Anorm = sqrt(Anorm²) + Acond = Anorm * sqrt(Dnorm²) + xNorm² += ξ * ξ + xNorm = sqrt(xNorm²) + rNorm = β * abs(ξ) # r = - β * ξ * u + λ > 0 && (rNorm *= abs(c₁)) # r = -c₁ * β * ξ * u when λ > 0. + history && push!(rNorms, rNorm) + iter = iter + 1 + + bkwerr = rNorm / sqrt(β₁² + Anorm² * xNorm²) + + ρ_prev = ρ # Only differs from α if λ > 0. + + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e %.2fs\n", iter, rNorm, xNorm, Anorm, Acond, α, β, ktimer(start_time)) + + solved_lim = bkwerr ≤ btol + solved_mach = one(T) + bkwerr ≤ one(T) + solved_resid_tol = rNorm ≤ ɛ_c + solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁ + solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim + + ill_cond_mach = one(T) + one(T) / Acond ≤ one(T) + ill_cond_lim = 1 / Acond ≤ ctol + ill_cond = ill_cond_mach | ill_cond_lim + + user_requested_exit = callback(solver) :: Bool + inconsistent = false + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") - # transfer to LSQR point if requested - if λ > 0 && transfer_to_lsqr - ξ *= -θ / δ - @kaxpy!(n, ξ, w2, x) - # TODO: update y - end + # transfer to LSQR point if requested + if λ > 0 && transfer_to_lsqr + ξ *= -θ / δ + @kaxpy!(n, ξ, w2, x) + # TODO: update y + end - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough for the tolerances given") - ill_cond_mach && (status = "condition number seems too large for this machine") - ill_cond_lim && (status = "condition number exceeds tolerance") - inconsistent && (status = "system may be inconsistent") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough for the tolerances given") + ill_cond_mach && (status = "condition number seems too large for this machine") + ill_cond_lim && (status = "condition number exceeds tolerance") + inconsistent && (status = "system may be inconsistent") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver + end end diff --git a/src/craigmr.jl b/src/craigmr.jl index e08bb9c36..5f05aa2ae 100644 --- a/src/craigmr.jl +++ b/src/craigmr.jl @@ -10,7 +10,7 @@ # and is equivalent to applying the conjugate residual method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # # This method is equivalent to CRMR, and is described in # @@ -26,12 +26,13 @@ export craigmr, craigmr! - """ (x, y, stats) = craigmr(A, b::AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -40,11 +41,11 @@ Solve the consistent linear system Ax + λ²y = b -using the CRAIGMR method, where λ ≥ 0 is a regularization parameter. +of size m × n using the CRAIGMR method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying the Conjugate Residuals method to the normal equations of the second kind - (AAᵀ + λ²I) y = b + (AAᴴ + λ²I) y = b but is more stable. When λ = 0, this method solves the minimum-norm problem @@ -52,7 +53,7 @@ but is more stable. When λ = 0, this method solves the minimum-norm problem If `λ > 0`, CRAIGMR solves the symmetric and quasi-definite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A λ²E ] [ y ] = [ b ], where E and F are symmetric and positive definite. @@ -63,12 +64,12 @@ The system above represents the optimality conditions of min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`. If `λ = 0`, CRAIGMR solves the symmetric and indefinite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A 0 ] [ y ] = [ b ]. The system above represents the optimality conditions of @@ -82,8 +83,32 @@ It is formally equivalent to CRMR, though can be slightly more accurate, and intricate to implement. Both the x- and y-parts of the solution are returned. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -92,12 +117,6 @@ and `false` otherwise. """ function craigmr end -function craigmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CraigmrSolver(A, b) - craigmr!(solver, A, b; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = craigmr!(solver::CraigmrSolver, A, b; kwargs...) @@ -107,230 +126,274 @@ See [`CraigmrSolver`](@ref) for more details about the `solver`. """ function craigmr! end -function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRAIGMR: system of %d equations in %d variables\n", m, n) - - # Check sqd and λ parameters - sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") - sqd && (λ = one(T)) - - # Tests M = Iₘ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :u, S, m) - allocate_if(!NisI, solver, :v, S, n) - allocate_if(λ > 0, solver, :q, S, n) - x, Nv, Aᵀu, d, y, Mu = solver.x, solver.Nv, solver.Aᵀu, solver.d, solver.y, solver.Mu - w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - u = MisI ? Mu : solver.u - v = NisI ? Nv : solver.v - - # Compute y such that AAᵀy = b. Then recover x = Aᵀy. - x .= zero(FC) - y .= zero(FC) - Mu .= b - MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) - if β == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - history && push!(rNorms, β) - history && push!(ArNorms, zero(T)) - stats.status = "x = 0 is a zero-residual solution" - return solver +def_args_craigmr = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_craigmr = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; sqd::Bool = false ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_craigmr = mapreduce(extract_parameters, vcat, def_kwargs_craigmr) + +args_craigmr = (:A, :b) +kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function craigmr($(def_args_craigmr...); $(def_kwargs_craigmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CraigmrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + craigmr!(solver, $(args_craigmr...); $(kwargs_craigmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # Initialize Golub-Kahan process. - # β₁Mu₁ = b. - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) - # α₁Nv₁ = Aᵀu₁. - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - Anorm² = α * α - - iter = 0 - itmax == 0 && (itmax = m + n) - - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²) - - # Aᵀb = 0 so x = 0 is a minimum least-squares solution - if α == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - history && push!(rNorms, β) - history && push!(ArNorms, zero(T)) - stats.status = "x = 0 is a minimum least-squares solution" - return solver - end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - - # Regularization. - λₖ = λ # λ₁ = λ - cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ - cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁ - λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0 - - if λ > 0 - (cpₖ, spₖ, αhat) = sym_givens(α, λₖ) - @kscal!(n, spₖ, q) # q̄₁ = sp₁ * v₁ - else - αhat = α - end - - # Initialize other constants. - ζbar = β - ρbar = αhat - θ = zero(T) - rNorm = ζbar - history && push!(rNorms, rNorm) - ArNorm = α - history && push!(ArNorms, ArNorm) - - ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. - ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. - - wbar .= u - @kscal!(m, one(FC)/αhat, wbar) - w .= zero(FC) - d .= zero(FC) - - status = "unknown" - solved = rNorm ≤ ɛ_c - inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i) - tired = iter ≥ itmax - user_requested_exit = false - - while ! (solved || inconsistent || tired || user_requested_exit) - iter = iter + 1 - - # Generate next Golub-Kahan vectors. - # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ - mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + function craigmr!(solver :: CraigmrSolver{T,FC,S}, $(def_args_craigmr...); $(def_kwargs_craigmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CRAIGMR: system of %d equations in %d variables\n", m, n) + + # Check sqd and λ parameters + sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") + sqd && (λ = one(T)) + + # Tests M = Iₘ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :u, S, m) + allocate_if(!NisI, solver, :v, S, n) + allocate_if(λ > 0, solver, :q, S, n) + x, Nv, Aᴴu, d, y, Mu = solver.x, solver.Nv, solver.Aᴴu, solver.d, solver.y, solver.Mu + w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + u = MisI ? Mu : solver.u + v = NisI ? Nv : solver.v + + # Compute y such that AAᴴy = b. Then recover x = Aᴴy. + x .= zero(FC) + y .= zero(FC) + Mu .= b MisI || mulorldiv!(u, M, Mu, ldiv) β = sqrt(@kdotr(m, u, Mu)) - if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) + if β == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + history && push!(rNorms, β) + history && push!(ArNorms, zero(T)) + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + return solver + end + + # Initialize Golub-Kahan process. + # β₁Mu₁ = b. + @kscal!(m, one(FC)/β, u) + MisI || @kscal!(m, one(FC)/β, Mu) + # α₁Nv₁ = Aᴴu₁. + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + Anorm² = α * α + + iter = 0 + itmax == 0 && (itmax = m + n) + + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, β, α, β, α, 0, 1, Anorm², ktimer(start_time)) + + # Aᴴb = 0 so x = 0 is a minimum least-squares solution + if α == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + history && push!(rNorms, β) + history && push!(ArNorms, zero(T)) + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a minimum least-squares solution" + return solver end + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) - Anorm² = Anorm² + β * β # = ‖B_{k-1}‖² + # Regularization. + λₖ = λ # λ₁ = λ + cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ + cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁ + λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0 if λ > 0 - βhat = cpₖ * β - λₐᵤₓ = spₖ * β + (cpₖ, spₖ, αhat) = sym_givens(α, λₖ) + @kscal!(n, spₖ, q) # q̄₁ = sp₁ * v₁ else - βhat = β + αhat = α end - # Continue QR factorization - # - # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] : - # [ β 0 ] [ 0 ζbar ] - # - # k k+1 k k+1 k k+1 - # k [ c s ] [ ρbar ] = [ ρ θ⁺ ] - # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ] - # - # so that we obtain - # - # [ c s ] [ ζbar ] = [ ζ ] - # [ s -c ] [ 0 ] [ ζbar⁺ ] - (c, s, ρ) = sym_givens(ρbar, βhat) - ζ = c * ζbar - ζbar = s * ζbar - rNorm = abs(ζbar) + # Initialize other constants. + ζbar = β + ρbar = αhat + θ = zero(T) + rNorm = ζbar history && push!(rNorms, rNorm) + ArNorm = α + history && push!(ArNorms, ArNorm) - @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w) # w = (wbar - θ * w) / ρ - @kaxpy!(m, ζ, w, y) # y = y + ζ * w + ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. + ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. - if λ > 0 - # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁ - if iter == 1 - @kaxpy!(n, one(FC)/ρ, cpₖ * v, d) + wbar .= u + @kscal!(m, one(FC)/αhat, wbar) + w .= zero(FC) + d .= zero(FC) + + status = "unknown" + solved = rNorm ≤ ɛ_c + inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i) + tired = iter ≥ itmax + user_requested_exit = false + overtimed = false + + while ! (solved || inconsistent || tired || user_requested_exit || overtimed) + iter = iter + 1 + + # Generate next Golub-Kahan vectors. + # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ + mul!(Av, A, v) + @kaxpby!(m, one(FC), Av, -α, Mu) + MisI || mulorldiv!(u, M, Mu, ldiv) + β = sqrt(@kdotr(m, u, Mu)) + if β ≠ 0 + @kscal!(m, one(FC)/β, u) + MisI || @kscal!(m, one(FC)/β, Mu) + end + + Anorm² = Anorm² + β * β # = ‖B_{k-1}‖² + + if λ > 0 + βhat = cpₖ * β + λₐᵤₓ = spₖ * β else - @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d) - @kaxpy!(n, one(FC)/ρ, spₖ * q, d) - @kaxpby!(n, spₖ, v, -cpₖ, q) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ + βhat = β end - else - # DₖRₖ = Vₖ - if iter == 1 - @kaxpy!(n, one(FC)/ρ, v, d) + + # Continue QR factorization + # + # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] : + # [ β 0 ] [ 0 ζbar ] + # + # k k+1 k k+1 k k+1 + # k [ c s ] [ ρbar ] = [ ρ θ⁺ ] + # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ] + # + # so that we obtain + # + # [ c s ] [ ζbar ] = [ ζ ] + # [ s -c ] [ 0 ] [ ζbar⁺ ] + (c, s, ρ) = sym_givens(ρbar, βhat) + ζ = c * ζbar + ζbar = s * ζbar + rNorm = abs(ζbar) + history && push!(rNorms, rNorm) + + @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w) # w = (wbar - θ * w) / ρ + @kaxpy!(m, ζ, w, y) # y = y + ζ * w + + if λ > 0 + # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁ + if iter == 1 + @kaxpy!(n, one(FC)/ρ, cpₖ * v, d) + else + @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d) + @kaxpy!(n, one(FC)/ρ, spₖ * q, d) + @kaxpby!(n, spₖ, v, -cpₖ, q) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ + end else - @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d) + # DₖRₖ = Vₖ + if iter == 1 + @kaxpy!(n, one(FC)/ρ, v, d) + else + @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d) + end end - end - # xₖ = Dₖzₖ - @kaxpy!(n, ζ, d, x) + # xₖ = Dₖzₖ + @kaxpy!(n, ζ, d, x) - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - Anorm² = Anorm² + α * α # = ‖Lₖ‖ - ArNorm = α * β * abs(ζ/ρ) - history && push!(ArNorms, ArNorm) + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + Anorm² = Anorm² + α * α # = ‖Lₖ‖ + ArNorm = α * β * abs(ζ/ρ) + history && push!(ArNorms, ArNorm) - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², ktimer(start_time)) - if λ > 0 - (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ) - @kscal!(n, sdₖ, q) # qₖ ← sdₖ * q̄ₖ - (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁) - else - αhat = α - end + if λ > 0 + (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ) + @kscal!(n, sdₖ, q) # qₖ ← sdₖ * q̄ₖ + (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁) + else + αhat = α + end - if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar) # wbar = (u - beta * wbar) / alpha + if α ≠ 0 + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) + @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar) # wbar = (u - beta * wbar) / alpha + end + θ = s * αhat + ρbar = -c * αhat + + user_requested_exit = callback(solver) :: Bool + solved = rNorm ≤ ɛ_c + inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i) + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns end - θ = s * αhat - ρbar = -c * αhat - - user_requested_exit = callback(solver) :: Bool - solved = rNorm ≤ ɛ_c - inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i) - tired = iter ≥ itmax + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "found approximate minimum-norm solution") + !tired && !solved && (status = "found approximate minimum least-squares solution") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - solved && (status = "found approximate minimum-norm solution") - !tired && !solved && (status = "found approximate minimum least-squares solution") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/src/crls.jl b/src/crls.jl index 6410fb836..bf43fa79b 100644 --- a/src/crls.jl +++ b/src/crls.jl @@ -5,7 +5,7 @@ # # equivalently, of the linear system # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # This implementation follows the formulation given in # @@ -20,12 +20,13 @@ export crls, crls! - """ (x, stats) = crls(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), + itmax::Int=0, timemax::Float64=Inf, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -34,19 +35,41 @@ Solve the linear least-squares problem minimize ‖b - Ax‖₂² + λ‖x‖₂² -using the Conjugate Residuals (CR) method. This method is equivalent to -applying MINRES to the normal equations +of size m × n using the Conjugate Residuals (CR) method. +This method is equivalent to applying MINRES to the normal equations - (AᵀA + λI) x = Aᵀb. + (AᴴA + λI) x = Aᴴb. This implementation recurs the residual r := b - Ax. -CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂. +CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂. It is formally equivalent to LSMR, though can be substantially less accurate, but simpler to implement. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -54,12 +77,6 @@ and `false` otherwise. """ function crls end -function crls(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CrlsSolver(A, b) - crls!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = crls!(solver::CrlsSolver, A, b; kwargs...) @@ -69,143 +86,185 @@ See [`CrlsSolver`](@ref) for more details about the `solver`. """ function crls! end -function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRLS: system of %d equations in %d variables\n", m, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :Ms, S, m) - x, p, Ar, q = solver.x, solver.p, solver.Ar, solver.q - r, Ap, s, stats = solver.r, solver.Ap, solver.s, solver.stats - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - Ms = MisI ? s : solver.Ms - Mr = MisI ? r : solver.Ms - MAp = MisI ? Ap : solver.Ms - - x .= zero(FC) - r .= b - bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. - rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. - history && push!(rNorms, rNorm) - if bNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(ArNorms, zero(T)) - return solver +def_args_crls = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_crls = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; radius::T = zero(T) ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_crls = mapreduce(extract_parameters, vcat, def_kwargs_crls) + +args_crls = (:A, :b) +kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function crls($(def_args_crls...); $(def_kwargs_crls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CrlsSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + crls!(solver, $(args_crls...); $(kwargs_crls...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(Mr, M, r, ldiv) - mul!(Ar, Aᵀ, Mr) # - λ * x0 if x0 ≠ 0. - mul!(s, A, Ar) - MisI || mulorldiv!(Ms, M, s, ldiv) - - p .= Ar - Ap .= s - mul!(q, Aᵀ, Ms) # Ap - λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p - γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms) - iter = 0 - itmax == 0 && (itmax = m + n) - - ArNorm = @knrm2(n, Ar) # Marginally faster than norm(Ar) - λ > 0 && (γ += λ * ArNorm * ArNorm) - history && push!(ArNorms, ArNorm) - ε = atol + rtol * ArNorm - (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) - - status = "unknown" - on_boundary = false - solved = ArNorm ≤ ε - tired = iter ≥ itmax - psd = false - user_requested_exit = false - - while ! (solved || tired || user_requested_exit) - qNorm² = @kdotr(n, q, q) # dot(q, q) - α = γ / qNorm² - - # if a trust-region constraint is give, compute step to the boundary - # (note that α > 0 in CRLS) - if radius > 0 - pNorm = @knrm2(n, p) - if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p - psd = true # det(AᵀA) = 0 - p = Ar # p = Aᵀr - pNorm² = ArNorm * ArNorm - mul!(q, Aᵀ, s) - α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᵀr for α = ‖Ar‖²/γ - else - pNorm² = pNorm * pNorm - σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²)) - if α ≥ σ - α = σ - on_boundary = true - end - end + function crls!(solver :: CrlsSolver{T,FC,S}, $(def_args_crls...); $(def_kwargs_crls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CRLS: system of %d equations in %d variables\n", m, n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :Ms, S, m) + x, p, Ar, q = solver.x, solver.p, solver.Ar, solver.q + r, Ap, s, stats = solver.r, solver.Ap, solver.s, solver.stats + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + Ms = MisI ? s : solver.Ms + Mr = MisI ? r : solver.Ms + MAp = MisI ? Ap : solver.Ms + + x .= zero(FC) + r .= b + bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. + rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. + history && push!(rNorms, rNorm) + if bNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(ArNorms, zero(T)) + return solver end - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(n, -α, q, Ar) # Faster than Ar = Ar - α * q - ArNorm = @knrm2(n, Ar) - solved = psd || on_boundary - solved && continue - @kaxpy!(m, -α, Ap, r) # Faster than r = r - α * Ap + MisI || mulorldiv!(Mr, M, r, ldiv) + mul!(Ar, Aᴴ, Mr) # - λ * x0 if x0 ≠ 0. mul!(s, A, Ar) MisI || mulorldiv!(Ms, M, s, ldiv) - γ_next = @kdotr(m, s, Ms) # Faster than γ_next = dot(s, s) - λ > 0 && (γ_next += λ * ArNorm * ArNorm) - β = γ_next / γ - - @kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p - @kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap - MisI || mulorldiv!(MAp, M, Ap, ldiv) - mul!(q, Aᵀ, MAp) + + p .= Ar + Ap .= s + mul!(q, Aᴴ, Ms) # Ap λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p + γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms) + iter = 0 + itmax == 0 && (itmax = m + n) - γ = γ_next - if λ > 0 - rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x)) - else - rNorm = @knrm2(m, r) # norm(r) - end - history && push!(rNorms, rNorm) + ArNorm = @knrm2(n, Ar) # Marginally faster than norm(Ar) + λ > 0 && (γ += λ * ArNorm * ArNorm) history && push!(ArNorms, ArNorm) - iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) - user_requested_exit = callback(solver) :: Bool - solved = (ArNorm ≤ ε) || on_boundary + ε = atol + rtol * ArNorm + (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) + + status = "unknown" + on_boundary = false + solved = ArNorm ≤ ε tired = iter ≥ itmax + psd = false + user_requested_exit = false + overtimed = false + + while ! (solved || tired || user_requested_exit || overtimed) + qNorm² = @kdotr(n, q, q) # dot(q, q) + α = γ / qNorm² + + # if a trust-region constraint is give, compute step to the boundary + # (note that α > 0 in CRLS) + if radius > 0 + pNorm = @knrm2(n, p) + if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p + psd = true # det(AᴴA) = 0 + p = Ar # p = Aᴴr + pNorm² = ArNorm * ArNorm + mul!(q, Aᴴ, s) + α = min(ArNorm^2 / γ, maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ + else + pNorm² = pNorm * pNorm + σ = maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²)) + if α ≥ σ + α = σ + on_boundary = true + end + end + end + + @kaxpy!(n, α, p, x) # Faster than x = x + α * p + @kaxpy!(n, -α, q, Ar) # Faster than Ar = Ar - α * q + ArNorm = @knrm2(n, Ar) + solved = psd || on_boundary + solved && continue + @kaxpy!(m, -α, Ap, r) # Faster than r = r - α * Ap + mul!(s, A, Ar) + MisI || mulorldiv!(Ms, M, s, ldiv) + γ_next = @kdotr(m, s, Ms) # Faster than γ_next = dot(s, s) + λ > 0 && (γ_next += λ * ArNorm * ArNorm) + β = γ_next / γ + + @kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p + @kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap + MisI || mulorldiv!(MAp, M, Ap, ldiv) + mul!(q, Aᴴ, MAp) + λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p + + γ = γ_next + if λ > 0 + rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x)) + else + rNorm = @knrm2(m, r) # norm(r) + end + history && push!(rNorms, rNorm) + history && push!(ArNorms, ArNorm) + iter = iter + 1 + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) + user_requested_exit = callback(solver) :: Bool + solved = (ArNorm ≤ ε) || on_boundary + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + psd && (status = "zero-curvature encountered") + on_boundary && (status = "on trust-region boundary") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - psd && (status = "zero-curvature encountered") - on_boundary && (status = "on trust-region boundary") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/crmr.jl b/src/crmr.jl index deb5cf79f..db333856c 100644 --- a/src/crmr.jl +++ b/src/crmr.jl @@ -10,9 +10,9 @@ # and is equivalent to applying the conjugate residual method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # -# This method is equivalent to Craig-MR, described in +# This method is equivalent to CRAIGMR, described in # # D. Orban and M. Arioli. Iterative Solution of Symmetric Quasi-Definite Linear Systems, # Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017. @@ -26,12 +26,13 @@ export crmr, crmr! - """ (x, stats) = crmr(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), - rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + N=I, ldiv::Bool=false, + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -40,11 +41,11 @@ Solve the consistent linear system Ax + √λs = b -using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization +of size m × n using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying CR to the normal equations of the second kind - (AAᵀ + λI) y = b + (AAᴴ + λI) y = b but is more stable. When λ = 0, this method solves the minimum-norm problem @@ -58,10 +59,29 @@ CRMR produces monotonic residuals ‖r‖₂. It is formally equivalent to CRAIG-MR, though can be slightly less accurate, but simpler to implement. Only the x-part of the solution is returned. -A preconditioner M may be provided. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -70,12 +90,6 @@ and `false` otherwise. """ function crmr end -function crmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = CrmrSolver(A, b) - crmr!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = crmr!(solver::CrmrSolver, A, b; kwargs...) @@ -85,107 +99,148 @@ See [`CrmrSolver`](@ref) for more details about the `solver`. """ function crmr! end -function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), - rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRMR: system of %d equations in %d variables\n", m, n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :Mq, S, m) - allocate_if(λ > 0, solver, :s , S, m) - x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r - q, s, stats = solver.q, solver.s, solver.stats - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - Mq = MisI ? q : solver.Mq - - x .= zero(FC) # initial estimation x = 0 - mulorldiv!(r, M, b, ldiv) # initial residual r = M * (b - Ax) = M * b - bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. - rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. - history && push!(rNorms, rNorm) - if bNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(ArNorms, zero(T)) - return solver +def_args_crmr = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_crmr = (:(; N = I ), + :(; ldiv::Bool = false ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_crmr = mapreduce(extract_parameters, vcat, def_kwargs_crmr) + +args_crmr = (:A, :b) +kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function crmr($(def_args_crmr...); $(def_kwargs_crmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = CrmrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + crmr!(solver, $(args_crmr...); $(kwargs_crmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - λ > 0 && (s .= r) - mul!(Aᵀr, Aᵀ, r) # - λ * x0 if x0 ≠ 0. - p .= Aᵀr - γ = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr) - λ > 0 && (γ += λ * rNorm * rNorm) - iter = 0 - itmax == 0 && (itmax = m + n) - - ArNorm = sqrt(γ) - history && push!(ArNorms, ArNorm) - ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. - ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. - (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) - - status = "unknown" - solved = rNorm ≤ ɛ_c - inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i) - tired = iter ≥ itmax - user_requested_exit = false - - while ! (solved || inconsistent || tired || user_requested_exit) - mul!(q, A, p) - λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s - MisI || mulorldiv!(Mq, M, q, ldiv) - α = γ / @kdotr(m, q, Mq) # Compute qᵗ * M * q - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, Mq, r) # Faster than r = r - α * Mq - rNorm = @knrm2(m, r) # norm(r) - mul!(Aᵀr, Aᵀ, r) - γ_next = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ_next = dot(Aᵀr, Aᵀr) - λ > 0 && (γ_next += λ * rNorm * rNorm) - β = γ_next / γ - - @kaxpby!(n, one(FC), Aᵀr, β, p) # Faster than p = Aᵀr + β * p - if λ > 0 - @kaxpby!(m, one(FC), r, β, s) # s = r + β * s + + function crmr!(solver :: CrmrSolver{T,FC,S}, $(def_args_crmr...); $(def_kwargs_crmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "CRMR: system of %d equations in %d variables\n", m, n) + + # Tests N = Iₙ + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!NisI, solver, :Nq, S, m) + allocate_if(λ > 0, solver, :s , S, m) + x, p, Aᴴr, r = solver.x, solver.p, solver.Aᴴr, solver.r + q, s, stats = solver.q, solver.s, solver.stats + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + Nq = NisI ? q : solver.Nq + + x .= zero(FC) # initial estimation x = 0 + mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b + bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. + rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. + history && push!(rNorms, rNorm) + if bNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(ArNorms, zero(T)) + return solver end + λ > 0 && (s .= r) + mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0. + p .= Aᴴr + γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr) + λ > 0 && (γ += λ * rNorm * rNorm) + iter = 0 + itmax == 0 && (itmax = m + n) - γ = γ_next ArNorm = sqrt(γ) - history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) - iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) - user_requested_exit = callback(solver) :: Bool + ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. + ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. + (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) + + status = "unknown" solved = rNorm ≤ ɛ_c inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i) tired = iter ≥ itmax + user_requested_exit = false + overtimed = false + + while ! (solved || inconsistent || tired || user_requested_exit || overtimed) + mul!(q, A, p) + λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s + NisI || mulorldiv!(Nq, N, q, ldiv) + α = γ / @kdotr(m, q, Nq) # Compute qᴴ * N * q + @kaxpy!(n, α, p, x) # Faster than x = x + α * p + @kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq + rNorm = @knrm2(m, r) # norm(r) + mul!(Aᴴr, Aᴴ, r) + γ_next = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr) + λ > 0 && (γ_next += λ * rNorm * rNorm) + β = γ_next / γ + + @kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p + if λ > 0 + @kaxpby!(m, one(FC), r, β, s) # s = r + β * s + end + + γ = γ_next + ArNorm = sqrt(γ) + history && push!(rNorms, rNorm) + history && push!(ArNorms, ArNorm) + iter = iter + 1 + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) + user_requested_exit = callback(solver) :: Bool + solved = rNorm ≤ ɛ_c + inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i) + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + inconsistent && (status = "system probably inconsistent but least squares/norm solution found") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - inconsistent && (status = "system probably inconsistent but least squares/norm solution found") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/src/diom.jl b/src/diom.jl index 9c6b9767b..72ce462f6 100644 --- a/src/diom.jl +++ b/src/diom.jl @@ -11,40 +11,59 @@ export diom, diom! """ - (x, stats) = diom(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = diom(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + reorthogonalization::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the consistent linear system Ax = b using direct incomplete orthogonalization method. + (x, stats) = diom(A, b, x0::AbstractVector; kwargs...) + +DIOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the consistent linear system Ax = b of size n using DIOM. DIOM only orthogonalizes the new vectors of the Krylov basis against the `memory` most recent vectors. If CG is well defined on `Ax = b` and `memory = 2`, DIOM is theoretically equivalent to CG. If `k ≤ memory` where `k` is the number of iterations, DIOM is theoretically equivalent to FOM. Otherwise, DIOM interpolates between CG and FOM and is similar to CG with partial reorthogonalization. -Partial reorthogonalization is available with the `reorthogonalization` option. - -An advantage of DIOM is that nonsymmetric or symmetric indefinite or both nonsymmetric +An advantage of DIOM is that non-Hermitian or Hermitian indefinite or both non-Hermitian and indefinite systems of linear equations can be handled by this single algorithm. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -DIOM can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = diom(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -52,18 +71,6 @@ and `false` otherwise. """ function diom end -function diom(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = DiomSolver(A, b, memory) - diom!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function diom(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = DiomSolver(A, b, memory) - diom!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = diom!(solver::DiomSolver, A, b; kwargs...) solver = diom!(solver::DiomSolver, A, b, x0; kwargs...) @@ -77,198 +84,256 @@ See [`DiomSolver`](@ref) for more details about the `solver`. """ function diom! end -function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - diom!(solver, A, b; kwargs...) - return solver -end - -function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("DIOM: system of size %d\n", n) - - # Check M = Iₙ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :w, S, n) - allocate_if(!NisI, solver, :z, S, n) - Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V - L, H, stats = solver.L, solver.H, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - w = MisI ? t : solver.w - r₀ = MisI ? t : solver.w - - # Initial solution x₀ and residual r₀. - x .= zero(FC) # x₀ - if warm_start - mul!(t, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), t) - else - t .= b +def_args_diom = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_diom = (:(x0::AbstractVector),) + +def_kwargs_diom = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; reorthogonalization::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_diom = mapreduce(extract_parameters, vcat, def_kwargs_diom) + +args_diom = (:A, :b) +optargs_diom = (:x0,) +kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function diom($(def_args_diom...), $(def_optargs_diom...); memory :: Int=20, $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = DiomSolver(A, b, memory) + warm_start!(solver, $(optargs_diom...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + diom!(solver, $(args_diom...); $(kwargs_diom...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀) - rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + + function diom($(def_args_diom...); memory :: Int=20, $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = DiomSolver(A, b, memory) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + diom!(solver, $(args_diom...); $(kwargs_diom...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2*n) + function diom!(solver :: DiomSolver{T,FC,S}, $(def_args_diom...); $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "DIOM: system of size %d\n", n) + + # Check M = Iₙ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :w, S, n) + allocate_if(!NisI, solver, :z, S, n) + Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V + L, H, stats = solver.L, solver.H, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + w = MisI ? t : solver.w + r₀ = MisI ? t : solver.w + + # Initial solution x₀ and residual r₀. + x .= zero(FC) # x₀ + if warm_start + mul!(t, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), t) + else + t .= b + end + MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀) + rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ + history && push!(rNorms, rNorm) + if rNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + iter = 0 + itmax == 0 && (itmax = 2*n) - mem = length(L) # Memory - for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b). - P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Uₘ)⁻¹. - end - H .= zero(FC) # Last column of the band hessenberg matrix Hₘ = LₘUₘ. - # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2]. - # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located. - # In addition of that, the last column of Uₘ is stored in H. - L .= zero(FC) # Last mem pivots of Lₘ. - - # Initial ξ₁ and V₁. - ξ = rNorm - @. V[1] = r₀ / rNorm - - # Stopping criterion. - solved = rNorm ≤ ε - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || user_requested_exit) - - # Update iteration index. - iter = iter + 1 - - # Set position in circulars stacks. - pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V. - next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V. - - # Incomplete Arnoldi procedure. - z = NisI ? V[pos] : solver.z - NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ - mul!(t, A, z) # AN⁻¹vₘ - MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁ - for i = max(1, iter-mem+1) : iter - ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. - diag = iter - i + 2 - H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩ - @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + + mem = length(V) # Memory + for i = 1 : mem + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). + end + for i = 1 : mem-1 + P[i] .= zero(FC) # Directions Pₖ = NVₖ(Uₖ)⁻¹. end + H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ. + # Each column has at most mem + 1 nonzero elements. + # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H. + # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located. + # In addition of that, the last column of Uₖ is stored in H. + L .= zero(FC) # Last mem-1 pivots of Lₖ. + + # Initial ξ₁ and V₁. + ξ = rNorm + V[1] .= r₀ ./ rNorm + + # Stopping criterion. + solved = rNorm ≤ ε + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || user_requested_exit || overtimed) + + # Update iteration index. + iter = iter + 1 - # Partial reorthogonalization of the Krylov basis. - if reorthogonalization + # Set position in circulars stacks. + pos = mod(iter-1, mem) + 1 # Position corresponding to vₖ in the circular stack V. + next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V. + + # Incomplete Arnoldi procedure. + z = NisI ? V[pos] : solver.z + NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ + mul!(t, A, z) # ANvₖ + MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁ for i = max(1, iter-mem+1) : iter - ipos = mod(i-1, mem) + 1 - diag = iter - i + 2 - Htmp = @kdot(n, w, V[ipos]) - H[diag] += Htmp - @kaxpy!(n, -Htmp, V[ipos], w) + ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. + diag = iter - i + 1 + H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ + @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ end - end - # Compute hₘ₊₁.ₘ and vₘ₊₁. - H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂ - if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown" - @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ - end - # It's possible that uₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1 - if iter ≥ mem + 2 - H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0 - end + # Partial reorthogonalization of the Krylov basis. + if reorthogonalization + for i = max(1, iter-mem+1) : iter + ipos = mod(i-1, mem) + 1 + diag = iter - i + 1 + Htmp = @kdot(n, w, V[ipos]) + H[diag] += Htmp + @kaxpy!(n, -Htmp, V[ipos], w) + end + end - # Update the LU factorization with partial pivoting of H. - # Compute the last column of Uₘ. - if iter ≥ 2 - for i = max(2,iter-mem+1) : iter - lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L. - diag = iter - i + 2 - next_diag = diag + 1 - # uᵢ.ₘ ← hᵢ.ₘ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₘ - H[diag] = H[diag] - L[lpos] * H[next_diag] + # Compute hₖ₊₁.ₖ and vₖ₊₁. + Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown" + V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ end - # Compute ξₘ the last component of zₘ = β(Lₘ)⁻¹e₁. - # ξₘ = -lₘ.ₘ₋₁ * ξₘ₋₁ - ξ = - L[pos] * ξ - end - # Compute next pivot lₘ₊₁.ₘ = hₘ₊₁.ₘ / uₘ.ₘ - L[next_pos] = H[1] / H[2] - - # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Uₘ)⁻¹. - for i = max(1,iter-mem) : iter-1 - ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P. - diag = iter - i + 2 - if ipos == pos - # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ - @kscal!(n, -H[diag], P[pos]) - else - # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ - @kaxpy!(n, -H[diag], P[ipos], P[pos]) + + # Update the LU factorization of Hₖ. + # Compute the last column of Uₖ. + if iter ≥ 2 + # u₁.ₖ ← h₁.ₖ if iter ≤ mem + # uₖ₋ₘₑₘ₊₁.ₖ ← hₖ₋ₘₑₘ₊₁.ₖ if iter ≥ mem + 1 + for i = max(2,iter-mem+2) : iter + lpos = mod(i-1, mem-1) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L. + diag = iter - i + 1 + next_diag = diag + 1 + # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ + H[diag] = H[diag] - L[lpos] * H[next_diag] + if i == iter + # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁. + # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁ + ξ = - L[lpos] * ξ + end + end end + # Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ + next_lpos = mod(iter, mem-1) + 1 + L[next_lpos] = Haux / H[1] + + ppos = mod(iter-1, mem-1) + 1 # Position corresponding to pₖ in the circular stack P. + + # Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹. + # u₁.ₖp₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≤ mem + # uₖ₋ₘₑₘ₊₁.ₖpₖ₋ₘₑₘ₊₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≥ mem + 1 + for i = max(1,iter-mem+1) : iter-1 + ipos = mod(i-1, mem-1) + 1 # Position corresponding to pᵢ in the circular stack P. + diag = iter - i + 1 + if ipos == ppos + # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁ + @kscal!(n, -H[diag], P[ppos]) + else + # pₖ ← pₖ - uᵢ.ₖ * pᵢ + @kaxpy!(n, -H[diag], P[ipos], P[ppos]) + end + end + # pₐᵤₓ ← pₐᵤₓ + Nvₖ + @kaxpy!(n, one(FC), z, P[ppos]) + # pₖ = pₐᵤₓ / uₖ.ₖ + P[ppos] .= P[ppos] ./ H[1] + + # Update solution xₖ. + # xₖ = xₖ₋₁ + ξₖ * pₖ + @kaxpy!(n, ξ, P[ppos], x) + + # Compute residual norm. + # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ| + rNorm = Haux * abs(ξ / H[1]) + history && push!(rNorms, rNorm) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) end - # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ - @kaxpy!(n, one(FC), z, P[pos]) - # pₘ = pₐᵤₓ / uₘ.ₘ - @. P[pos] = P[pos] / H[2] - - # Update solution xₘ. - # xₘ = xₘ₋₁ + ξₘ * pₘ - @kaxpy!(n, ξ, P[pos], x) - - # Compute residual norm. - # ‖ M⁻¹(b - Axₘ) ‖₂ = hₘ₊₁.ₘ * |ξₘ / uₘ.ₘ| - rNorm = real(H[1]) * abs(ξ / H[2]) - history && push!(rNorms, rNorm) + (verbose > 0) && @printf(iostream, "\n") - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - solved = resid_decrease_lim || resid_decrease_mach - tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/dqgmres.jl b/src/dqgmres.jl index ab7c490a6..4c1e52b37 100644 --- a/src/dqgmres.jl +++ b/src/dqgmres.jl @@ -11,16 +11,21 @@ export dqgmres, dqgmres! """ - (x, stats) = dqgmres(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = dqgmres(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + reorthogonalization::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the consistent linear system Ax = b using DQGMRES method. + (x, stats) = dqgmres(A, b, x0::AbstractVector; kwargs...) + +DQGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the consistent linear system Ax = b of size n using DQGMRES. DQGMRES algorithm is based on the incomplete Arnoldi orthogonalization process and computes a sequence of approximate solutions with the quasi-minimal residual property. @@ -30,21 +35,35 @@ If MINRES is well defined on `Ax = b` and `memory = 2`, DQGMRES is theoretically If `k ≤ memory` where `k` is the number of iterations, DQGMRES is theoretically equivalent to GMRES. Otherwise, DQGMRES interpolates between MINRES and GMRES and is similar to MINRES with partial reorthogonalization. -Partial reorthogonalization is available with the `reorthogonalization` option. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +#### Optional argument -DQGMRES can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = dqgmres(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -52,18 +71,6 @@ and `false` otherwise. """ function dqgmres end -function dqgmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = DqgmresSolver(A, b, memory) - dqgmres!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function dqgmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = DqgmresSolver(A, b, memory) - dqgmres!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = dqgmres!(solver::DqgmresSolver, A, b; kwargs...) solver = dqgmres!(solver::DqgmresSolver, A, b, x0; kwargs...) @@ -77,206 +84,258 @@ See [`DqgmresSolver`](@ref) for more details about the `solver`. """ function dqgmres! end -function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - dqgmres!(solver, A, b; kwargs...) - return solver -end - -function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("DQGMRES: system of size %d\n", n) - - # Check M = Iₙ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :w, S, n) - allocate_if(!NisI, solver, :z, S, n) - Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V - c, s, H, stats = solver.c, solver.s, solver.H, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - w = MisI ? t : solver.w - r₀ = MisI ? t : solver.w - - # Initial solution x₀ and residual r₀. - x .= zero(FC) # x₀ - if warm_start - mul!(t, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), t) - else - t .= b +def_args_dqgmres = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_dqgmres = (:(x0::AbstractVector),) + +def_kwargs_dqgmres = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; reorthogonalization::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_dqgmres = mapreduce(extract_parameters, vcat, def_kwargs_dqgmres) + +args_dqgmres = (:A, :b) +optargs_dqgmres = (:x0,) +kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function dqgmres($(def_args_dqgmres...), $(def_optargs_dqgmres...); memory :: Int=20, $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = DqgmresSolver(A, b, memory) + warm_start!(solver, $(optargs_dqgmres...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + dqgmres!(solver, $(args_dqgmres...); $(kwargs_dqgmres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀) - rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + + function dqgmres($(def_args_dqgmres...); memory :: Int=20, $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = DqgmresSolver(A, b, memory) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + dqgmres!(solver, $(args_dqgmres...); $(kwargs_dqgmres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2*n) + function dqgmres!(solver :: DqgmresSolver{T,FC,S}, $(def_args_dqgmres...); $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "DQGMRES: system of size %d\n", n) + + # Check M = Iₙ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :w, S, n) + allocate_if(!NisI, solver, :z, S, n) + Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V + c, s, H, stats = solver.c, solver.s, solver.H, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + w = MisI ? t : solver.w + r₀ = MisI ? t : solver.w + + # Initial solution x₀ and residual r₀. + x .= zero(FC) # x₀ + if warm_start + mul!(t, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), t) + else + t .= b + end + MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀) + rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ + history && push!(rNorms, rNorm) + if rNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + iter = 0 + itmax == 0 && (itmax = 2*n) - # Set up workspace. - mem = length(c) # Memory. - for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b). - P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Rₘ)⁻¹. - end - c .= zero(T) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ. - s .= zero(FC) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ. - H .= zero(FC) # Last column of the band hessenberg matrix Hₘ. - # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2]. - # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located. - # In addition of that, the last column of Rₘ is also stored in H. - - # Initial γ₁ and V₁. - γₘ = rNorm # γₘ and γₘ₊₁ are the last components of gₘ, right-hand of the least squares problem min ‖ Hₘyₘ - gₘ ‖₂. - @. V[1] = r₀ / rNorm - - # The following stopping criterion compensates for the lag in the - # residual, but usually increases the number of iterations. - # solved = sqrt(max(1, iter-mem+1)) * |γₘ₊₁| ≤ ε - solved = rNorm ≤ ε # less accurate, but acceptable. - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || user_requested_exit) - - # Update iteration index. - iter = iter + 1 - - # Set position in circulars stacks. - pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V. - next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V. - - # Incomplete Arnoldi procedure. - z = NisI ? V[pos] : solver.z - NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ - mul!(t, A, z) # AN⁻¹vₘ - MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁ - for i = max(1, iter-mem+1) : iter - ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. - diag = iter - i + 2 - H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩ - @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + + # Set up workspace. + mem = length(V) # Memory. + for i = 1 : mem + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). + P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹. end + c .= zero(T) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ. + s .= zero(FC) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ. + H .= zero(FC) # Last column of the band hessenberg matrix Hₖ. + # Each column has at most mem + 1 nonzero elements. + # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H. + # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located. + # In addition of that, the last column of Rₖ is also stored in H. + + # Initial γ₁ and V₁. + γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂. + V[1] .= r₀ ./ rNorm + + # The following stopping criterion compensates for the lag in the + # residual, but usually increases the number of iterations. + # solved = sqrt(max(1, iter-mem+1)) * |γₖ₊₁| ≤ ε + solved = rNorm ≤ ε # less accurate, but acceptable. + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || user_requested_exit || overtimed) + + # Update iteration index. + iter = iter + 1 - # Partial reorthogonalization of the Krylov basis. - if reorthogonalization + # Set position in circulars stacks. + pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V. + next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V. + + # Incomplete Arnoldi procedure. + z = NisI ? V[pos] : solver.z + NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ + mul!(t, A, z) # ANvₖ + MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁ for i = max(1, iter-mem+1) : iter - ipos = mod(i-1, mem) + 1 - diag = iter - i + 2 - Htmp = @kdot(n, w, V[ipos]) - H[diag] += Htmp - @kaxpy!(n, -Htmp, V[ipos], w) + ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. + diag = iter - i + 1 + H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ + @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ end - end - # Compute hₘ₊₁.ₘ and vₘ₊₁. - H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂ - if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown" - @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ - end - # rₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1 - if iter ≥ mem + 2 - H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0 - end + # Partial reorthogonalization of the Krylov basis. + if reorthogonalization + for i = max(1, iter-mem+1) : iter + ipos = mod(i-1, mem) + 1 + diag = iter - i + 1 + Htmp = @kdot(n, w, V[ipos]) + H[diag] += Htmp + @kaxpy!(n, -Htmp, V[ipos], w) + end + end - # Update the QR factorization of H. - # Apply mem previous Givens reflections Ωᵢ. - for i = max(1,iter-mem) : iter-1 - irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s. - diag = iter - i + 1 - next_diag = diag + 1 - H_aux = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag] - H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag] - H[next_diag] = H_aux - end + # Compute hₖ₊₁.ₖ and vₖ₊₁. + Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown" + V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ + end + # rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1 + # We don't want to use rₖ₋₁₋ₘₑₘ.ₖ₋₁ when we compute rₖ₋ₘₑₘ.ₖ + if iter ≥ mem + 2 + H[mem+1] = zero(FC) # rₖ₋ₘₑₘ.ₖ = 0 + end + + # Update the QR factorization of Hₖ. + # Apply mem previous Givens reflections Ωᵢ. + for i = max(1,iter-mem) : iter-1 + irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s. + diag = iter - i + next_diag = diag + 1 + Htmp = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag] + H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag] + H[next_diag] = Htmp + end - # Compute and apply current Givens reflection Ωₘ. - # [cₘ sₘ] [ hₘ.ₘ ] = [ρₘ] - # [sₘ -cₘ] [hₘ₊₁.ₘ] [0 ] - (c[pos], s[pos], H[2]) = sym_givens(H[2], H[1]) - γₘ₊₁ = conj(s[pos]) * γₘ - γₘ = c[pos] * γₘ - - # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Rₘ)⁻¹. - for i = max(1,iter-mem) : iter-1 - ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P. - diag = iter - i + 2 - if ipos == pos - # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ - @kscal!(n, -H[diag], P[pos]) - else - # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ - @kaxpy!(n, -H[diag], P[ipos], P[pos]) + # Compute and apply current Givens reflection Ωₖ. + # [cₖ sₖ] [ hₖ.ₖ ] = [ρₖ] + # [sₖ -cₖ] [hₖ₊₁.ₖ] [0 ] + (c[pos], s[pos], H[1]) = sym_givens(H[1], Haux) + γₖ₊₁ = conj(s[pos]) * γₖ + γₖ = c[pos] * γₖ + + # Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹. + for i = max(1,iter-mem) : iter-1 + ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P. + diag = iter - i + 1 + if ipos == pos + # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ + @kscal!(n, -H[diag], P[pos]) + else + # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ + @kaxpy!(n, -H[diag], P[ipos], P[pos]) + end end + # pₐᵤₓ ← pₐᵤₓ + Nvₖ + @kaxpy!(n, one(FC), z, P[pos]) + # pₖ = pₐᵤₓ / hₖ.ₖ + P[pos] .= P[pos] ./ H[1] + + # Compute solution xₖ. + # xₖ ← xₖ₋₁ + γₖ * pₖ + @kaxpy!(n, γₖ, P[pos], x) + + # Update residual norm estimate. + # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁| + rNorm = abs(γₖ₊₁) + history && push!(rNorms, rNorm) + + # Update γₖ. + γₖ = γₖ₊₁ + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) end - # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ - @kaxpy!(n, one(FC), z, P[pos]) - # pₘ = pₐᵤₓ / hₘ.ₘ - @. P[pos] = P[pos] / H[2] - - # Compute solution xₘ. - # xₘ ← xₘ₋₁ + γₘ * pₘ - @kaxpy!(n, γₘ, P[pos], x) - - # Update residual norm estimate. - # ‖ M⁻¹(b - Axₘ) ‖₂ ≈ |γₘ₊₁| - rNorm = abs(γₘ₊₁) - history && push!(rNorms, rNorm) + (verbose > 0) && @printf(iostream, "\n") - # Update γₘ. - γₘ = γₘ₊₁ + # Termination status + solved && (status = "solution good enough given atol and rtol") + tired && (status = "maximum number of iterations exceeded") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - solved = resid_decrease_lim || resid_decrease_mach - tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - solved && (status = "solution good enough given atol and rtol") - tired && (status = "maximum number of iterations exceeded") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/fgmres.jl b/src/fgmres.jl new file mode 100644 index 000000000..1a68aac6c --- /dev/null +++ b/src/fgmres.jl @@ -0,0 +1,391 @@ +# An implementation of FGMRES for the solution of the square linear system Ax = b. +# +# This method is described in +# +# Y. Saad, A Flexible Inner-Outer Preconditioned GMRES Algorithms. +# SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993. +# +# Alexis Montoison, +# Montreal, September 2022. + +export fgmres, fgmres! + +""" + (x, stats) = fgmres(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + restart::Bool=false, reorthogonalization::Bool=false, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) + +`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. +`FC` is `T` or `Complex{T}`. + + (x, stats) = fgmres(A, b, x0::AbstractVector; kwargs...) + +FGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the linear system Ax = b of size n using FGMRES. + +FGMRES computes a sequence of approximate solutions with minimum residual. +FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration. + +This implementation allows a left preconditioner M and a flexible right preconditioner N. +A situation in which the preconditioner is "not constant" is when a relaxation-type method, +a Chebyshev iteration or another Krylov subspace method is used as a preconditioner. +Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles. +Thus, GMRES is recommended if the right preconditioner N is constant. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. + +#### Keyword arguments + +* `memory`: if `restart = true`, the restarted version FGMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `restart`: restart the method after `memory` iterations; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. + +#### Reference + +* Y. Saad, [*A Flexible Inner-Outer Preconditioned GMRES Algorithm*](https://doi.org/10.1137/0914028), SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993. +""" +function fgmres end + +""" + solver = fgmres!(solver::FgmresSolver, A, b; kwargs...) + solver = fgmres!(solver::FgmresSolver, A, b, x0; kwargs...) + +where `kwargs` are keyword arguments of [`fgmres`](@ref). + +Note that the `memory` keyword argument is the only exception. +It's required to create a `FgmresSolver` and can't be changed later. + +See [`FgmresSolver`](@ref) for more details about the `solver`. +""" +function fgmres! end + +def_args_fgmres = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_fgmres = (:(x0::AbstractVector),) + +def_kwargs_fgmres = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; restart::Bool = false ), + :(; reorthogonalization::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_fgmres = mapreduce(extract_parameters, vcat, def_kwargs_fgmres) + +args_fgmres = (:A, :b) +optargs_fgmres = (:x0,) +kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function fgmres($(def_args_fgmres...), $(def_optargs_fgmres...); memory :: Int=20, $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = FgmresSolver(A, b, memory) + warm_start!(solver, $(optargs_fgmres...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + fgmres!(solver, $(args_fgmres...); $(kwargs_fgmres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) + end + + function fgmres($(def_args_fgmres...); memory :: Int=20, $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = FgmresSolver(A, b, memory) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + fgmres!(solver, $(args_fgmres...); $(kwargs_fgmres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) + end + + function fgmres!(solver :: FgmresSolver{T,FC,S}, $(def_args_fgmres...); $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "FGMRES: system of size %d\n", n) + + # Check M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI , solver, :q , S, n) + allocate_if(restart, solver, :Δx, S, n) + Δx, x, w, V, Z = solver.Δx, solver.x, solver.w, solver.V, solver.Z + z, c, s, R, stats = solver.z, solver.c, solver.s, solver.R, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + q = MisI ? w : solver.q + r₀ = MisI ? w : solver.q + xr = restart ? Δx : x + + # Initial solution x₀. + x .= zero(FC) + + # Initial residual r₀. + if warm_start + mul!(w, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), w) + restart && @kaxpy!(n, one(FC), Δx, x) + else + w .= b + end + MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) + β = @knrm2(n, r₀) # β = ‖r₀‖₂ + + rNorm = β + history && push!(rNorms, β) + ε = atol + rtol * rNorm + + if β == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + + mem = length(c) # Memory + npass = 0 # Number of pass + + iter = 0 # Cumulative number of iterations + inner_iter = 0 # Number of iterations in a pass + + itmax == 0 && (itmax = 2*n) + inner_itmax = itmax + + (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time)) + + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) + + # Stopping criterion + breakdown = false + inconsistent = false + solved = rNorm ≤ ε + tired = iter ≥ itmax + inner_tired = inner_iter ≥ inner_itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || breakdown || user_requested_exit || overtimed) + + # Initialize workspace. + nr = 0 # Number of coefficients stored in Rₖ. + for i = 1 : mem + V[i] .= zero(FC) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}. + Z[i] .= zero(FC) # Zₖ = [N₁v₁, ..., Nₖvₖ] + end + s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + R .= zero(FC) # Upper triangular matrix Rₖ. + z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. + + if restart + xr .= zero(FC) # xr === Δx when restart is set to true + if npass ≥ 1 + mul!(w, A, x) + @kaxpby!(n, one(FC), b, -one(FC), w) + MisI || mulorldiv!(r₀, M, w, ldiv) + end + end + + # Initial ζ₁ and V₁ + β = @knrm2(n, r₀) + z[1] = β + @. V[1] = r₀ / rNorm + + npass = npass + 1 + solver.inner_iter = 0 + inner_tired = false + + while !(solved || inner_tired || breakdown || user_requested_exit || overtimed) + + # Update iteration index + solver.inner_iter = solver.inner_iter + 1 + inner_iter = solver.inner_iter + + # Update workspace if more storage is required and restart is set to false + if !restart && (inner_iter > mem) + for i = 1 : inner_iter + push!(R, zero(FC)) + end + push!(s, zero(FC)) + push!(c, zero(T)) + push!(Z, S(undef, n)) + end + + # Continue the process. + # MAZₖ = Vₖ₊₁Hₖ₊₁.ₖ + mulorldiv!(Z[inner_iter], N, V[inner_iter], ldiv) # zₖ ← Nₖvₖ + mul!(w, A, Z[inner_iter]) # w ← Azₖ + MisI || mulorldiv!(q, M, w, ldiv) # q ← MAzₖ + for i = 1 : inner_iter + R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ + end + + # Reorthogonalization of the basis. + if reorthogonalization + for i = 1 : inner_iter + Htmp = @kdot(n, V[i], q) + R[nr+i] += Htmp + @kaxpy!(n, -Htmp, V[i], q) + end + end + + # Compute hₖ₊₁.ₖ + Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + + # Update the QR factorization of Hₖ₊₁.ₖ. + # Apply previous Givens reflections Ωᵢ. + # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ] + # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ] + for i = 1 : inner_iter-1 + Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1] + R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1] + R[nr+i] = Rtmp + end + + # Compute and apply current Givens reflection Ωₖ. + # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ] + # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ] + (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis) + + # Update zₖ = (Qₖ)ᴴβe₁ + ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter] + z[inner_iter] = c[inner_iter] * z[inner_iter] + + # Update residual norm estimate. + # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁| + rNorm = abs(ζₖ₊₁) + history && push!(rNorms, rNorm) + + # Update the number of coefficients in Rₖ + nr = nr + inner_iter + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + breakdown = Hbis ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time)) + + # Compute vₖ₊₁ + if !(solved || inner_tired || breakdown || user_requested_exit || overtimed) + if !restart && (inner_iter ≥ mem) + push!(V, S(undef, n)) + push!(z, zero(FC)) + end + @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q + z[inner_iter+1] = ζₖ₊₁ + end + end + + # Compute y by solving Ry = z with backward substitution. + y = z # yᵢ = ζᵢ + for i = inner_iter : -1 : 1 + pos = nr + i - inner_iter # position of rᵢ.ₖ + for j = inner_iter : -1 : i+1 + y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + end + # Rₖ can be singular if the system is inconsistent + if abs(R[pos]) ≤ btol + y[i] = zero(FC) + inconsistent = true + else + y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ + end + end + + # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ + for i = 1 : inner_iter + @kaxpy!(n, y[i], Z[i], xr) + end + restart && @kaxpy!(n, one(FC), xr, x) + + # Update inner_itmax, iter and tired variables. + inner_itmax = inner_itmax - inner_iter + iter = iter + inner_iter + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + inconsistent && (status = "found approximate least-squares solution") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver + end +end diff --git a/src/fom.jl b/src/fom.jl index fcae5cf62..351fb246f 100644 --- a/src/fom.jl +++ b/src/fom.jl @@ -11,38 +11,54 @@ export fom, fom! """ - (x, stats) = fom(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, - restart::Bool=false, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = fom(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + restart::Bool=false, reorthogonalization::Bool=false, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using FOM method. + (x, stats) = fom(A, b, x0::AbstractVector; kwargs...) + +FOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the linear system Ax = b of size n using FOM. FOM algorithm is based on the Arnoldi process and a Galerkin condition. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. -Full reorthogonalization is available with the `reorthogonalization` option. +#### Optional argument -If `restart = true`, the restarted version FOM(k) is used with `k = memory`. -If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. -More storage will be allocated only if the number of iterations exceed `memory`. +* `x0`: a vector of length n that represents an initial guess of the solution x. -FOM can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = fom(A, b, x0; kwargs...) +* `memory`: if `restart = true`, the restarted version FOM(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `restart`: restart the method after `memory` iterations; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -50,18 +66,6 @@ and `false` otherwise. """ function fom end -function fom(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = FomSolver(A, b, memory) - fom!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function fom(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = FomSolver(A, b, memory) - fom!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = fom!(solver::FomSolver, A, b; kwargs...) solver = fom!(solver::FomSolver, A, b, x0; kwargs...) @@ -75,241 +79,293 @@ See [`FomSolver`](@ref) for more details about the `solver`. """ function fom! end -function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - fom!(solver, A, b; kwargs...) - return solver -end - -function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, - restart :: Bool=false, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("FOM: system of size %d\n", n) - - # Check M = Iₙ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI , solver, :q , S, n) - allocate_if(!NisI , solver, :p , S, n) - allocate_if(restart, solver, :Δx, S, n) - Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z - l, U, stats = solver.l, solver.U, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - q = MisI ? w : solver.q - r₀ = MisI ? w : solver.q - xr = restart ? Δx : x - - # Initial solution x₀. - x .= zero(FC) - - # Initial residual r₀. - if warm_start - mul!(w, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), w) - restart && @kaxpy!(n, one(FC), Δx, x) - else - w .= b +def_args_fom = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_fom = (:(x0::AbstractVector),) + +def_kwargs_fom = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; restart::Bool = false ), + :(; reorthogonalization::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_fom = mapreduce(extract_parameters, vcat, def_kwargs_fom) + +args_fom = (:A, :b) +optargs_fom = (:x0,) +kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function fom($(def_args_fom...), $(def_optargs_fom...); memory :: Int=20, $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = FomSolver(A, b, memory) + warm_start!(solver, $(optargs_fom...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + fom!(solver, $(args_fom...); $(kwargs_fom...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀) - β = @knrm2(n, r₀) # β = ‖r₀‖₂ - - rNorm = β - history && push!(rNorms, β) - ε = atol + rtol * rNorm - if β == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function fom($(def_args_fom...); memory :: Int=20, $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = FomSolver(A, b, memory) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + fom!(solver, $(args_fom...); $(kwargs_fom...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - mem = length(l) # Memory - npass = 0 # Number of pass + function fom!(solver :: FomSolver{T,FC,S}, $(def_args_fom...); $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "FOM: system of size %d\n", n) + + # Check M = Iₙ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI , solver, :q , S, n) + allocate_if(!NisI , solver, :p , S, n) + allocate_if(restart, solver, :Δx, S, n) + Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z + l, U, stats = solver.l, solver.U, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + q = MisI ? w : solver.q + r₀ = MisI ? w : solver.q + xr = restart ? Δx : x + + # Initial solution x₀. + x .= zero(FC) + + # Initial residual r₀. + if warm_start + mul!(w, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), w) + restart && @kaxpy!(n, one(FC), Δx, x) + else + w .= b + end + MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) + β = @knrm2(n, r₀) # β = ‖r₀‖₂ + + rNorm = β + history && push!(rNorms, β) + ε = atol + rtol * rNorm + + if β == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end - iter = 0 # Cumulative number of iterations - inner_iter = 0 # Number of iterations in a pass + mem = length(l) # Memory + npass = 0 # Number of pass - itmax == 0 && (itmax = 2*n) - inner_itmax = itmax + iter = 0 # Cumulative number of iterations + inner_iter = 0 # Number of iterations in a pass - (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") - kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") + itmax == 0 && (itmax = 2*n) + inner_itmax = itmax - # Tolerance for breakdown detection. - btol = eps(T)^(3/4) + (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time)) - # Stopping criterion - breakdown = false - solved = rNorm ≤ ε - tired = iter ≥ itmax - inner_tired = inner_iter ≥ inner_itmax - status = "unknown" - user_requested_exit = false + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) - while !(solved || tired || breakdown || user_requested_exit) + # Stopping criterion + breakdown = false + solved = rNorm ≤ ε + tired = iter ≥ itmax + inner_tired = inner_iter ≥ inner_itmax + status = "unknown" + user_requested_exit = false + overtimed = false - # Initialize workspace. - nr = 0 # Number of coefficients stored in Uₖ. - for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀). - end - l .= zero(FC) # Lower unit triangular matrix Lₖ. - U .= zero(FC) # Upper triangular matrix Uₖ. - z .= zero(FC) # Solution of Lₖzₖ = βe₁. - - if restart - xr .= zero(FC) # xr === Δx when restart is set to true - if npass ≥ 1 - mul!(w, A, x) - @kaxpby!(n, one(FC), b, -one(FC), w) - MisI || mulorldiv!(r₀, M, w, ldiv) + while !(solved || tired || breakdown || user_requested_exit || overtimed) + + # Initialize workspace. + nr = 0 # Number of coefficients stored in Uₖ. + for i = 1 : mem + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). + end + l .= zero(FC) # Lower unit triangular matrix Lₖ. + U .= zero(FC) # Upper triangular matrix Uₖ. + z .= zero(FC) # Solution of Lₖzₖ = βe₁. + + if restart + xr .= zero(FC) # xr === Δx when restart is set to true + if npass ≥ 1 + mul!(w, A, x) + @kaxpby!(n, one(FC), b, -one(FC), w) + MisI || mulorldiv!(r₀, M, w, ldiv) + end end - end - # Initial ζ₁ and V₁ - β = @knrm2(n, r₀) - z[1] = β - @. V[1] = r₀ / rNorm + # Initial ζ₁ and V₁ + β = @knrm2(n, r₀) + z[1] = β + @. V[1] = r₀ / rNorm - npass = npass + 1 - inner_iter = 0 - inner_tired = false + npass = npass + 1 + inner_iter = 0 + inner_tired = false - while !(solved || inner_tired || breakdown) + while !(solved || inner_tired || breakdown) - # Update iteration index - inner_iter = inner_iter + 1 + # Update iteration index + inner_iter = inner_iter + 1 - # Update workspace if more storage is required and restart is set to false - if !restart && (inner_iter > mem) - for i = 1 : inner_iter - push!(U, zero(FC)) + # Update workspace if more storage is required and restart is set to false + if !restart && (inner_iter > mem) + for i = 1 : inner_iter + push!(U, zero(FC)) + end + push!(l, zero(FC)) + push!(z, zero(FC)) end - push!(l, zero(FC)) - push!(z, zero(FC)) - end - - # Continue the Arnoldi process. - p = NisI ? V[inner_iter] : solver.p - NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ - mul!(w, A, p) # w ← AN⁻¹vₖ - MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ - for i = 1 : inner_iter - U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ - @kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ - end - # Reorthogonalization of the Krylov basis. - if reorthogonalization + # Continue the Arnoldi process. + p = NisI ? V[inner_iter] : solver.p + NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ + mul!(w, A, p) # w ← ANvₖ + MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ for i = 1 : inner_iter - Htmp = @kdot(n, V[i], q) - U[nr+i] += Htmp - @kaxpy!(n, -Htmp, V[i], q) + U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + @kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end - end - # Compute hₖ₊₁.ₖ - Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + # Reorthogonalization of the Krylov basis. + if reorthogonalization + for i = 1 : inner_iter + Htmp = @kdot(n, V[i], q) + U[nr+i] += Htmp + @kaxpy!(n, -Htmp, V[i], q) + end + end - # Update the LU factorization of Hₖ. - if inner_iter ≥ 2 - for i = 2 : inner_iter - # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ - U[nr+i] = U[nr+i] - l[i-1] * U[nr+i-1] + # Compute hₖ₊₁.ₖ + Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + + # Update the LU factorization of Hₖ. + if inner_iter ≥ 2 + for i = 2 : inner_iter + # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ + U[nr+i] = U[nr+i] - l[i-1] * U[nr+i-1] + end + # ζₖ = -lₖ.ₖ₋₁ * ζₖ₋₁ + z[inner_iter] = - l[inner_iter-1] * z[inner_iter-1] + end + # lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ + l[inner_iter] = Hbis / U[nr+inner_iter] + + # Update residual norm estimate. + # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ| + rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter]) + history && push!(rNorms, rNorm) + + # Update the number of coefficients in Uₖ + nr = nr + inner_iter + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + breakdown = Hbis ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time)) + + # Compute vₖ₊₁. + if !(solved || inner_tired || breakdown || user_requested_exit || overtimed) + if !restart && (inner_iter ≥ mem) + push!(V, S(undef, n)) + end + @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q end - # ζₖ = -lₖ.ₖ₋₁ * ζₖ₋₁ - z[inner_iter] = - l[inner_iter-1] * z[inner_iter-1] end - # lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ - l[inner_iter] = Hbis / U[nr+inner_iter] - - # Update residual norm estimate. - # ‖ M⁻¹(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ| - rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter]) - history && push!(rNorms, rNorm) - - # Update the number of coefficients in Uₖ - nr = nr + inner_iter - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - breakdown = Hbis ≤ btol - solved = resid_decrease_lim || resid_decrease_mach - inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax - kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) - - # Compute vₖ₊₁. - if !(solved || inner_tired || breakdown) - if !restart && (inner_iter ≥ mem) - push!(V, S(undef, n)) + + # Hₖyₖ = βe₁ ⟺ LₖUₖyₖ = βe₁ ⟺ Uₖyₖ = zₖ. + # Compute yₖ by solving Uₖyₖ = zₖ with backward substitution. + y = z # yᵢ = zᵢ + for i = inner_iter : -1 : 1 + pos = nr + i - inner_iter # position of rᵢ.ₖ + for j = inner_iter : -1 : i+1 + y[i] = y[i] - U[pos] * y[j] # yᵢ ← yᵢ - uᵢⱼyⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ end - @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q + y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ end - end - # Hₖyₖ = βe₁ ⟺ LₖUₖyₖ = βe₁ ⟺ Uₖyₖ = zₖ. - # Compute yₖ by solving Uₖyₖ = zₖ with backward substitution. - y = z # yᵢ = zᵢ - for i = inner_iter : -1 : 1 - pos = nr + i - inner_iter # position of rᵢ.ₖ - for j = inner_iter : -1 : i+1 - y[i] = y[i] - U[pos] * y[j] # yᵢ ← yᵢ - uᵢⱼyⱼ - pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + # Form xₖ = NVₖyₖ + for i = 1 : inner_iter + @kaxpy!(n, y[i], V[i], xr) + end + if !NisI + solver.p .= xr + mulorldiv!(xr, N, solver.p, ldiv) end - y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ + restart && @kaxpy!(n, one(FC), xr, x) + + # Update inner_itmax, iter, tired and overtimed variables. + inner_itmax = inner_itmax - inner_iter + iter = iter + inner_iter + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns end + (verbose > 0) && @printf(iostream, "\n") - # Form xₖ = N⁻¹Vₖyₖ - for i = 1 : inner_iter - @kaxpy!(n, y[i], V[i], xr) - end - if !NisI - solver.p .= xr - mulorldiv!(xr, N, solver.p, ldiv) - end - restart && @kaxpy!(n, one(FC), xr, x) + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "inconsistent linear system") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Update inner_itmax, iter and tired variables. - inner_itmax = inner_itmax - inner_iter - iter = iter + inner_iter - tired = iter ≥ itmax + # Update x + warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = !solved && breakdown + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "inconsistent linear system") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = !solved && breakdown - stats.status = status - return solver end diff --git a/src/gmres.jl b/src/gmres.jl index 388a4ab96..7ee6e2341 100644 --- a/src/gmres.jl +++ b/src/gmres.jl @@ -11,38 +11,54 @@ export gmres, gmres! """ - (x, stats) = gmres(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, - restart::Bool=false, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = gmres(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + restart::Bool=false, reorthogonalization::Bool=false, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using GMRES method. + (x, stats) = gmres(A, b, x0::AbstractVector; kwargs...) -GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimal residual property. +GMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +Solve the linear system Ax = b of size n using GMRES. -Full reorthogonalization is available with the `reorthogonalization` option. +GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimum residual. -If `restart = true`, the restarted version GMRES(k) is used with `k = memory`. -If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. -More storage will be allocated only if the number of iterations exceed `memory`. +#### Input arguments -GMRES can be warm-started from an initial guess `x0` with the method +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. - (x, stats) = gmres(A, b, x0; kwargs...) +#### Optional argument -where `kwargs` are the same keyword arguments as above. +* `x0`: a vector of length n that represents an initial guess of the solution x. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Keyword arguments + +* `memory`: if `restart = true`, the restarted version GMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `restart`: restart the method after `memory` iterations; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -50,18 +66,6 @@ and `false` otherwise. """ function gmres end -function gmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = GmresSolver(A, b, memory) - gmres!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function gmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = GmresSolver(A, b, memory) - gmres!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = gmres!(solver::GmresSolver, A, b; kwargs...) solver = gmres!(solver::GmresSolver, A, b, x0; kwargs...) @@ -75,260 +79,310 @@ See [`GmresSolver`](@ref) for more details about the `solver`. """ function gmres! end -function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - gmres!(solver, A, b; kwargs...) - return solver -end - -function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, - restart :: Bool=false, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("GMRES: system of size %d\n", n) - - # Check M = Iₙ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI , solver, :q , S, n) - allocate_if(!NisI , solver, :p , S, n) - allocate_if(restart, solver, :Δx, S, n) - Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z - c, s, R, stats = solver.c, solver.s, solver.R, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - q = MisI ? w : solver.q - r₀ = MisI ? w : solver.q - xr = restart ? Δx : x - - # Initial solution x₀. - x .= zero(FC) - - # Initial residual r₀. - if warm_start - mul!(w, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), w) - restart && @kaxpy!(n, one(FC), Δx, x) - else - w .= b +def_args_gmres = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_gmres = (:(x0::AbstractVector),) + +def_kwargs_gmres = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; restart::Bool = false ), + :(; reorthogonalization::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_gmres = mapreduce(extract_parameters, vcat, def_kwargs_gmres) + +args_gmres = (:A, :b) +optargs_gmres = (:x0,) +kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function gmres($(def_args_gmres...), $(def_optargs_gmres...); memory :: Int=20, $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = GmresSolver(A, b, memory) + warm_start!(solver, $(optargs_gmres...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + gmres!(solver, $(args_gmres...); $(kwargs_gmres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀) - β = @knrm2(n, r₀) # β = ‖r₀‖₂ - - rNorm = β - history && push!(rNorms, β) - ε = atol + rtol * rNorm - if β == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function gmres($(def_args_gmres...); memory :: Int=20, $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = GmresSolver(A, b, memory) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + gmres!(solver, $(args_gmres...); $(kwargs_gmres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - mem = length(c) # Memory - npass = 0 # Number of pass + function gmres!(solver :: GmresSolver{T,FC,S}, $(def_args_gmres...); $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "GMRES: system of size %d\n", n) + + # Check M = Iₙ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI , solver, :q , S, n) + allocate_if(!NisI , solver, :p , S, n) + allocate_if(restart, solver, :Δx, S, n) + Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z + c, s, R, stats = solver.c, solver.s, solver.R, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + q = MisI ? w : solver.q + r₀ = MisI ? w : solver.q + xr = restart ? Δx : x + + # Initial solution x₀. + x .= zero(FC) + + # Initial residual r₀. + if warm_start + mul!(w, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), w) + restart && @kaxpy!(n, one(FC), Δx, x) + else + w .= b + end + MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) + β = @knrm2(n, r₀) # β = ‖r₀‖₂ + + rNorm = β + history && push!(rNorms, β) + ε = atol + rtol * rNorm + + if β == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + + mem = length(c) # Memory + npass = 0 # Number of pass - iter = 0 # Cumulative number of iterations - inner_iter = 0 # Number of iterations in a pass + iter = 0 # Cumulative number of iterations + inner_iter = 0 # Number of iterations in a pass - itmax == 0 && (itmax = 2*n) - inner_itmax = itmax + itmax == 0 && (itmax = 2*n) + inner_itmax = itmax - (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") - kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") + (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time)) - # Tolerance for breakdown detection. - btol = eps(T)^(3/4) + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) - # Stopping criterion - breakdown = false - inconsistent = false - solved = rNorm ≤ ε - tired = iter ≥ itmax - inner_tired = inner_iter ≥ inner_itmax - status = "unknown" - user_requested_exit = false + # Stopping criterion + breakdown = false + inconsistent = false + solved = rNorm ≤ ε + tired = iter ≥ itmax + inner_tired = inner_iter ≥ inner_itmax + status = "unknown" + user_requested_exit = false + overtimed = false - while !(solved || tired || breakdown || user_requested_exit) + while !(solved || tired || breakdown || user_requested_exit || overtimed) - # Initialize workspace. - nr = 0 # Number of coefficients stored in Rₖ. - for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀). - end - s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. - c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. - R .= zero(FC) # Upper triangular matrix Rₖ. - z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. - - if restart - xr .= zero(FC) # xr === Δx when restart is set to true - if npass ≥ 1 - mul!(w, A, x) - @kaxpby!(n, one(FC), b, -one(FC), w) - MisI || mulorldiv!(r₀, M, w, ldiv) + # Initialize workspace. + nr = 0 # Number of coefficients stored in Rₖ. + for i = 1 : mem + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). + end + s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + R .= zero(FC) # Upper triangular matrix Rₖ. + z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. + + if restart + xr .= zero(FC) # xr === Δx when restart is set to true + if npass ≥ 1 + mul!(w, A, x) + @kaxpby!(n, one(FC), b, -one(FC), w) + MisI || mulorldiv!(r₀, M, w, ldiv) + end end - end - # Initial ζ₁ and V₁ - β = @knrm2(n, r₀) - z[1] = β - @. V[1] = r₀ / rNorm + # Initial ζ₁ and V₁ + β = @knrm2(n, r₀) + z[1] = β + @. V[1] = r₀ / rNorm - npass = npass + 1 - solver.inner_iter = 0 - inner_tired = false + npass = npass + 1 + solver.inner_iter = 0 + inner_tired = false - while !(solved || inner_tired || breakdown || user_requested_exit) + while !(solved || inner_tired || breakdown || user_requested_exit || overtimed) - # Update iteration index - solver.inner_iter = solver.inner_iter + 1 - inner_iter = solver.inner_iter + # Update iteration index + solver.inner_iter = solver.inner_iter + 1 + inner_iter = solver.inner_iter - # Update workspace if more storage is required and restart is set to false - if !restart && (inner_iter > mem) + # Update workspace if more storage is required and restart is set to false + if !restart && (inner_iter > mem) + for i = 1 : inner_iter + push!(R, zero(FC)) + end + push!(s, zero(FC)) + push!(c, zero(T)) + end + + # Continue the Arnoldi process. + p = NisI ? V[inner_iter] : solver.p + NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ + mul!(w, A, p) # w ← ANvₖ + MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ for i = 1 : inner_iter - push!(R, zero(FC)) + R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end - push!(s, zero(FC)) - push!(c, zero(T)) - end - # Continue the Arnoldi process. - p = NisI ? V[inner_iter] : solver.p - NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ - mul!(w, A, p) # w ← AN⁻¹vₖ - MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ - for i = 1 : inner_iter - R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ - @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ - end + # Reorthogonalization of the Krylov basis. + if reorthogonalization + for i = 1 : inner_iter + Htmp = @kdot(n, V[i], q) + R[nr+i] += Htmp + @kaxpy!(n, -Htmp, V[i], q) + end + end - # Reorthogonalization of the Krylov basis. - if reorthogonalization - for i = 1 : inner_iter - Htmp = @kdot(n, V[i], q) - R[nr+i] += Htmp - @kaxpy!(n, -Htmp, V[i], q) + # Compute hₖ₊₁.ₖ + Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + + # Update the QR factorization of Hₖ₊₁.ₖ. + # Apply previous Givens reflections Ωᵢ. + # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ] + # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ] + for i = 1 : inner_iter-1 + Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1] + R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1] + R[nr+i] = Rtmp end - end - # Compute hₖ₊₁.ₖ - Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ - - # Update the QR factorization of Hₖ₊₁.ₖ. - # Apply previous Givens reflections Ωᵢ. - # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ] - # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ] - for i = 1 : inner_iter-1 - Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1] - R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1] - R[nr+i] = Rtmp + # Compute and apply current Givens reflection Ωₖ. + # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ] + # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ] + (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis) + + # Update zₖ = (Qₖ)ᴴβe₁ + ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter] + z[inner_iter] = c[inner_iter] * z[inner_iter] + + # Update residual norm estimate. + # ‖ M(b - Axₖ) ‖₂ = |ζₖ₊₁| + rNorm = abs(ζₖ₊₁) + history && push!(rNorms, rNorm) + + # Update the number of coefficients in Rₖ + nr = nr + inner_iter + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + breakdown = Hbis ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time)) + + # Compute vₖ₊₁. + if !(solved || inner_tired || breakdown || user_requested_exit || overtimed) + if !restart && (inner_iter ≥ mem) + push!(V, S(undef, n)) + push!(z, zero(FC)) + end + @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q + z[inner_iter+1] = ζₖ₊₁ + end end - # Compute and apply current Givens reflection Ωₖ. - # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ] - # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ] - (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis) - - # Update zₖ = (Qₖ)ᵀβe₁ - ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter] - z[inner_iter] = c[inner_iter] * z[inner_iter] - - # Update residual norm estimate. - # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁| - rNorm = abs(ζₖ₊₁) - history && push!(rNorms, rNorm) - - # Update the number of coefficients in Rₖ - nr = nr + inner_iter - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - resid_decrease_lim = rNorm ≤ ε - breakdown = Hbis ≤ btol - solved = resid_decrease_lim || resid_decrease_mach - inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax - solver.inner_iter = inner_iter - kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) - - # Compute vₖ₊₁ - if !(solved || inner_tired || breakdown) - if !restart && (inner_iter ≥ mem) - push!(V, S(undef, n)) - push!(z, zero(FC)) + # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution. + y = z # yᵢ = zᵢ + for i = inner_iter : -1 : 1 + pos = nr + i - inner_iter # position of rᵢ.ₖ + for j = inner_iter : -1 : i+1 + y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + end + # Rₖ can be singular if the system is inconsistent + if abs(R[pos]) ≤ btol + y[i] = zero(FC) + inconsistent = true + else + y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ end - @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q - z[inner_iter+1] = ζₖ₊₁ end - user_requested_exit = callback(solver) :: Bool - end - - # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution. - y = z # yᵢ = zᵢ - for i = inner_iter : -1 : 1 - pos = nr + i - inner_iter # position of rᵢ.ₖ - for j = inner_iter : -1 : i+1 - y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ - pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + # Form xₖ = NVₖyₖ + for i = 1 : inner_iter + @kaxpy!(n, y[i], V[i], xr) end - # Rₖ can be singular if the system is inconsistent - if abs(R[pos]) ≤ btol - y[i] = zero(FC) - inconsistent = true - else - y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ + if !NisI + solver.p .= xr + mulorldiv!(xr, N, solver.p, ldiv) end + restart && @kaxpy!(n, one(FC), xr, x) + + # Update inner_itmax, iter, tired and overtimed variables. + inner_itmax = inner_itmax - inner_iter + iter = iter + inner_iter + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns end + (verbose > 0) && @printf(iostream, "\n") - # Form xₖ = N⁻¹Vₖyₖ - for i = 1 : inner_iter - @kaxpy!(n, y[i], V[i], xr) - end - if !NisI - solver.p .= xr - mulorldiv!(xr, N, solver.p, ldiv) - end - restart && @kaxpy!(n, one(FC), xr, x) + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + inconsistent && (status = "found approximate least-squares solution") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Update inner_itmax, iter and tired variables. - inner_itmax = inner_itmax - inner_iter - iter = iter + inner_iter - tired = iter ≥ itmax + # Update x + warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - inconsistent && (status = "found approximate least-squares solution") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/src/gpmr.jl b/src/gpmr.jl index b10942995..1049c3b50 100644 --- a/src/gpmr.jl +++ b/src/gpmr.jl @@ -3,8 +3,8 @@ # This method is described in # # A. Montoison and D. Orban -# GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems -# Cahier du GERAD G-2021-62. +# GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems. +# SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023. # # Alexis Montoison, # Montréal, August 2021. @@ -12,23 +12,30 @@ export gpmr, gpmr! """ - (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; memory::Int=20, - C=I, D=I, E=I, F=I, atol::T=√eps(T), rtol::T=√eps(T), - gsp::Bool=false, reorthogonalization::Bool=false, - itmax::Int=0, λ::FC=one(FC), μ::FC=one(FC), - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; + memory::Int=20, C=I, D=I, E=I, F=I, + ldiv::Bool=false, gsp::Bool=false, + λ::FC=one(FC), μ::FC=one(FC), + reorthogonalization::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -GPMR solves the unsymmetric partitioned linear system + (x, y, stats) = gpmr(A, B, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) - [ λI A ] [ x ] = [ b ] - [ B μI ] [ y ] [ c ], +GPMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. -where λ and μ are real or complex numbers. -`A` can have any shape and `B` has the shape of `Aᵀ`. +Given matrices `A` of dimension m × n and `B` of dimension n × m, +GPMR solves the non-Hermitian partitioned linear system + + [ λIₘ A ] [ x ] = [ b ] + [ B μIₙ ] [ y ] [ c ], + +of size (n+m) × (n+m) where λ and μ are real or complex numbers. +`A` can have any shape and `B` has the shape of `Aᴴ`. `A`, `B`, `b` and `c` must be all nonzero. This implementation allows left and right block diagonal preconditioners @@ -44,8 +51,6 @@ and can solve when `CE = M⁻¹` and `DF = N⁻¹`. By default, GPMR solves unsymmetric linear systems with `λ = 1` and `μ = 1`. -If `gsp = true`, `λ = 1`, `μ = 0` and the associated generalized saddle point system is solved. -`λ` and `μ` are also keyword arguments that can be directly modified for more specific problems. GPMR is based on the orthogonal Hessenberg reduction process and its relations with the block-Arnoldi process. The residual norm ‖rₖ‖ is monotonically decreasing in GPMR. @@ -53,38 +58,50 @@ The residual norm ‖rₖ‖ is monotonically decreasing in GPMR. GPMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`. `atol` is an absolute tolerance and `rtol` is a relative tolerance. -Full reorthogonalization is available with the `reorthogonalization` option. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `B`: a linear operator that models a matrix of dimension n × m; +* `b`: a vector of length m; +* `c`: a vector of length n. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Optional arguments -GPMR can be warm-started from initial guesses `x0` and `y0` with the method +* `x0`: a vector of length m that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. - (x, y, stats) = gpmr(A, B, b, c, x0, y0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `memory`: if `restart = true`, the restarted version GPMR(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `C`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal left preconditioner; +* `D`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal left preconditioner; +* `E`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal right preconditioner; +* `F`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal right preconditioner; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `gsp`: if `true`, set `λ = 1` and `μ = 0` for generalized saddle-point systems; +* `λ` and `μ`: diagonal scaling factors of the partitioned linear system; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length m; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference -* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://dx.doi.org/10.13140/RG.2.2.24069.68326), Cahier du GERAD G-2021-62, GERAD, Montréal, 2021. +* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://doi.org/10.1137/21M1459265), SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023. """ function gpmr end -function gpmr(A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = GpmrSolver(A, b, memory) - gpmr!(solver, A, B, b, c, x0, y0; kwargs...) - return (solver.x, solver.y, solver.stats) -end - -function gpmr(A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex - solver = GpmrSolver(A, b, memory) - gpmr!(solver, A, B, b, c; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = gpmr!(solver::GpmrSolver, A, B, b, c; kwargs...) solver = gpmr!(solver::GpmrSolver, A, B, b, c, x0, y0; kwargs...) @@ -98,382 +115,436 @@ See [`GpmrSolver`](@ref) for more details about the `solver`. """ function gpmr! end -function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0, y0) - gpmr!(solver, A, B, b, c; kwargs...) - return solver -end +def_args_gpmr = (:(A ), + :(B ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_gpmr = (:(x0 :: AbstractVector), + :(y0 :: AbstractVector)) + +def_kwargs_gpmr = (:(; C = I ), + :(; D = I ), + :(; E = I ), + :(; F = I ), + :(; ldiv::Bool = false ), + :(; gsp::Bool = false ), + :(; λ::FC = one(FC) ), + :(; μ::FC = one(FC) ), + :(; reorthogonalization::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_gpmr = mapreduce(extract_parameters, vcat, def_kwargs_gpmr) + +args_gpmr = (:A, :B, :b, :c) +optargs_gpmr = (:x0, :y0) +kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function gpmr($(def_args_gpmr...), $(def_optargs_gpmr...); memory :: Int=20, $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = GpmrSolver(A, b, memory) + warm_start!(solver, $(optargs_gpmr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + gpmr!(solver, $(args_gpmr...); $(kwargs_gpmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) + end -function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - C=I, D=I, E=I, F=I, atol :: T=√eps(T), rtol :: T=√eps(T), - gsp :: Bool=false, reorthogonalization :: Bool=false, - itmax :: Int=0, λ :: FC=one(FC), μ :: FC=one(FC), - verbose :: Int=0, history::Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - s, t = size(B) - m == t || error("Inconsistent problem size") - s == n || error("Inconsistent problem size") - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("GPMR: system of %d equations in %d variables\n", m+n, m+n) - - # Check C = E = Iₘ and D = F = Iₙ - CisI = (C === I) - DisI = (D === I) - EisI = (E === I) - FisI = (F === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - eltype(B) == FC || error("eltype(B) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Determine λ and μ associated to generalized saddle point systems. - gsp && (λ = one(FC) ; μ = zero(FC)) - - warm_start = solver.warm_start - warm_start && (λ ≠ 0) && !EisI && error("Warm-start with right preconditioners is not supported.") - warm_start && (μ ≠ 0) && !FisI && error("Warm-start with right preconditioners is not supported.") - - # Set up workspace. - allocate_if(!CisI, solver, :q , S, m) - allocate_if(!DisI, solver, :p , S, n) - allocate_if(!EisI, solver, :wB, S, m) - allocate_if(!FisI, solver, :wA, S, n) - wA, wB, dA, dB, Δx, Δy = solver.wA, solver.wB, solver.dA, solver.dB, solver.Δx, solver.Δy - x, y, V, U, gs, gc = solver.x, solver.y, solver.V, solver.U, solver.gs, solver.gc - zt, R, stats = solver.zt, solver.R, solver.stats - rNorms = stats.residuals - reset!(stats) - b₀ = warm_start ? dA : b - c₀ = warm_start ? dB : c - q = CisI ? dA : solver.q - p = DisI ? dB : solver.p - - # Initial solutions x₀ and y₀. - x .= zero(FC) - y .= zero(FC) - - iter = 0 - itmax == 0 && (itmax = m+n) - - # Initialize workspace. - nr = 0 # Number of coefficients stored in Rₖ - mem = length(V) # Memory - ωₖ = zero(FC) # Auxiliary variable to store fₖₖ - for i = 1 : mem - V[i] .= zero(FC) - U[i] .= zero(FC) + function gpmr($(def_args_gpmr...); memory :: Int=20, $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = GpmrSolver(A, b, memory) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + gpmr!(solver, $(args_gpmr...); $(kwargs_gpmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. - gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. - R .= zero(FC) # Upper triangular matrix Rₖ. - zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᵀ(βe₁ + γe₂). - - # Warm-start - # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ - # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ - # Compute C(b - AΔy) - λΔx - warm_start && mul!(b₀, A, Δy) - warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀) - !CisI && mulorldiv!(q, C, b₀, ldiv) - !CisI && (b₀ = q) - warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀) - - # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ - # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ - # Compute D(c - BΔx) - μΔy - warm_start && mul!(c₀, B, Δx) - warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀) - !DisI && mulorldiv!(p, D, c₀, ldiv) - !DisI && (c₀ = p) - warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀) - - # Initialize the orthogonal Hessenberg reduction process. - # βv₁ = Cb - β = @knrm2(m, b₀) - β ≠ 0 || error("b must be nonzero") - @. V[1] = b₀ / β - - # γu₁ = Dc - γ = @knrm2(n, c₀) - γ ≠ 0 || error("c must be nonzero") - @. U[1] = c₀ / γ - - # Compute ‖r₀‖² = γ² + β² - rNorm = sqrt(γ^2 + β^2) - history && push!(rNorms, rNorm) - ε = atol + rtol * rNorm - - # Initialize t̄₀ - zt[1] = β - zt[2] = γ - - (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗") - - # Tolerance for breakdown detection. - btol = eps(T)^(3/4) - - # Stopping criterion. - breakdown = false - inconsistent = false - solved = rNorm ≤ ε - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || breakdown || user_requested_exit) - - # Update iteration index. - iter = iter + 1 - k = iter - nr₂ₖ₋₁ = nr # Position of the column 2k-1 in Rₖ. - nr₂ₖ = nr + 2k-1 # Position of the column 2k in Rₖ. - - # Update workspace if more storage is required - if iter > mem - for i = 1 : 4k-1 - push!(R, zero(FC)) - end - for i = 1 : 4 - push!(gs, zero(FC)) - push!(gc, zero(T)) - end + + function gpmr!(solver :: GpmrSolver{T,FC,S}, $(def_args_gpmr...); $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + s, t = size(B) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == t || error("Inconsistent problem size") + s == n || error("Inconsistent problem size") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "GPMR: system of %d equations in %d variables\n", m+n, m+n) + + # Check C = E = Iₘ and D = F = Iₙ + CisI = (C === I) + DisI = (D === I) + EisI = (E === I) + FisI = (F === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + eltype(B) == FC || @warn "eltype(B) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Determine λ and μ associated to generalized saddle point systems. + gsp && (λ = one(FC) ; μ = zero(FC)) + + warm_start = solver.warm_start + warm_start && (λ ≠ 0) && !EisI && error("Warm-start with right preconditioners is not supported.") + warm_start && (μ ≠ 0) && !FisI && error("Warm-start with right preconditioners is not supported.") + + # Set up workspace. + allocate_if(!CisI, solver, :q , S, m) + allocate_if(!DisI, solver, :p , S, n) + allocate_if(!EisI, solver, :wB, S, m) + allocate_if(!FisI, solver, :wA, S, n) + wA, wB, dA, dB, Δx, Δy = solver.wA, solver.wB, solver.dA, solver.dB, solver.Δx, solver.Δy + x, y, V, U, gs, gc = solver.x, solver.y, solver.V, solver.U, solver.gs, solver.gc + zt, R, stats = solver.zt, solver.R, solver.stats + rNorms = stats.residuals + reset!(stats) + b₀ = warm_start ? dA : b + c₀ = warm_start ? dB : c + q = CisI ? dA : solver.q + p = DisI ? dB : solver.p + + # Initial solutions x₀ and y₀. + x .= zero(FC) + y .= zero(FC) + + iter = 0 + itmax == 0 && (itmax = m+n) + + # Initialize workspace. + nr = 0 # Number of coefficients stored in Rₖ + mem = length(V) # Memory + ωₖ = zero(FC) # Auxiliary variable to store fₖₖ + for i = 1 : mem + V[i] .= zero(FC) + U[i] .= zero(FC) end + gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. + gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. + R .= zero(FC) # Upper triangular matrix Rₖ. + zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂). + + # Warm-start + # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ + # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ + # Compute C(b - AΔy) - λΔx + warm_start && mul!(b₀, A, Δy) + warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀) + !CisI && mulorldiv!(q, C, b₀, ldiv) + !CisI && (b₀ = q) + warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀) + + # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ + # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ + # Compute D(c - BΔx) - μΔy + warm_start && mul!(c₀, B, Δx) + warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀) + !DisI && mulorldiv!(p, D, c₀, ldiv) + !DisI && (c₀ = p) + warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀) + + # Initialize the orthogonal Hessenberg reduction process. + # βv₁ = Cb + β = @knrm2(m, b₀) + β ≠ 0 || error("b must be nonzero") + @. V[1] = b₀ / β + + # γu₁ = Dc + γ = @knrm2(n, c₀) + γ ≠ 0 || error("c must be nonzero") + @. U[1] = c₀ / γ + + # Compute ‖r₀‖² = γ² + β² + rNorm = sqrt(γ^2 + β^2) + history && push!(rNorms, rNorm) + ε = atol + rtol * rNorm - # Continue the orthogonal Hessenberg reduction process. - # CAFUₖ = VₖHₖ + hₖ₊₁.ₖ * vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Hₖ₊₁.ₖ - # DBEVₖ = UₖFₖ + fₖ₊₁.ₖ * uₖ₊₁(eₖ)ᵀ = Uₖ₊₁Fₖ₊₁.ₖ - wA = FisI ? U[iter] : solver.wA - wB = EisI ? V[iter] : solver.wB - FisI || mulorldiv!(wA, F, U[iter], ldiv) # wA = Fuₖ - EisI || mulorldiv!(wB, E, V[iter], ldiv) # wB = Evₖ - mul!(dA, A, wA) # dA = AFuₖ - mul!(dB, B, wB) # dB = BEvₖ - CisI || mulorldiv!(q, C, dA, ldiv) # q = CAFuₖ - DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ + # Initialize t̄₀ + zt[1] = β + zt[2] = γ - for i = 1 : iter - hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = vᵢAuₖ - fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = uᵢBvₖ - @kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ - @kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ - R[nr₂ₖ + 2i-1] = hᵢₖ - (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ - end + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %5s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7s %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time)) - # Reorthogonalization of the Krylov basis. - if reorthogonalization - for i = 1 : iter - Htmp = @kdot(m, V[i], q) # hₜₘₚ = qᵀvᵢ - Ftmp = @kdot(n, U[i], p) # fₜₘₚ = pᵀuᵢ - @kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ - @kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ - R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ - (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ - end - end + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) - Haux = @knrm2(m, q) # hₖ₊₁.ₖ = ‖q‖₂ - Faux = @knrm2(n, p) # fₖ₊₁.ₖ = ‖p‖₂ - - # Add regularization terms. - R[nr₂ₖ₋₁ + 2k-1] = λ # S₂ₖ₋₁.₂ₖ₋₁ = λ - R[nr₂ₖ + 2k] = μ # S₂ₖ.₂ₖ = μ - - # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] - # [0 u₁ ••• 0 uₖ] - # - # rₖ = [ b ] - [ λI A ] [ xₖ ] = [ b ] - [ λI A ] Wₖzₖ - # [ c ] [ B μI ] [ yₖ ] [ c ] [ B μI ] - # - # block-Arnoldi formulation : [ λI A ] Wₖ = Wₖ₊₁Sₖ₊₁.ₖ - # [ B μI ] - # - # GPMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - βe₁ - γe₂ ‖ - # - # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ]. - # [ Oᵀ ] - # - # Apply previous givens reflections when k ≥ 2 - # [ 1 ][ 1 ][ c₂.ᵢ s₂.ᵢ ][ c₁.ᵢ s₁.ᵢ ] [ r̄₂ᵢ₋₁.₂ₖ₋₁ r̄₂ᵢ₋₁.₂ₖ ] [ r₂ᵢ₋₁.₂ₖ₋₁ r₂ᵢ₋₁.₂ₖ ] - # [ c₄.ᵢ s₄.ᵢ ][ c₃.ᵢ s₃.ᵢ ][ s̄₂.ᵢ -c₂.ᵢ ][ 1 ] [ r̄₂ᵢ.₂ₖ₋₁ r̄₂ᵢ.₂ₖ ] = [ r₂ᵢ.₂ₖ₋₁ r₂ᵢ.₂ₖ ] - # [ s̄₄.ᵢ -c₄.ᵢ ][ 1 ][ 1 ][ 1 ] [ ρ hᵢ₊₁.ₖ ] [ r̄₂ᵢ₊₁.₂ₖ₋₁ r̄₂ᵢ₊₁.₂ₖ ] - # [ 1 ][ s̄₃.ᵢ -c₃.ᵢ ][ 1 ][ s̄₁.ᵢ -c₁.ᵢ ] [ fᵢ₊₁.ₖ δ ] [ r̄₂ᵢ₊₂.₂ₖ₋₁ r̄₂ᵢ₊₂.₂ₖ ] - # - # r̄₁.₂ₖ₋₁ = 0, r̄₁.₂ₖ = h₁.ₖ, r̄₂.₂ₖ₋₁ = f₁.ₖ and r̄₂.₂ₖ = 0. - # (ρ, δ) = (λ, μ) if i == k-1, (ρ, δ) = (0, 0) otherwise. - for i = 1 : iter-1 - for nrcol ∈ (nr₂ₖ₋₁, nr₂ₖ) - flag = (i == iter-1 && nrcol == nr₂ₖ₋₁) - αₖ = flag ? ωₖ : R[nrcol + 2i+2] - - c₁ᵢ = gc[4i-3] - s₁ᵢ = gs[4i-3] - rtmp = c₁ᵢ * R[nrcol + 2i-1] + s₁ᵢ * αₖ - αₖ = conj(s₁ᵢ) * R[nrcol + 2i-1] - c₁ᵢ * αₖ - R[nrcol + 2i-1] = rtmp - - c₂ᵢ = gc[4i-2] - s₂ᵢ = gs[4i-2] - rtmp = c₂ᵢ * R[nrcol + 2i-1] + s₂ᵢ * R[nrcol + 2i] - R[nrcol + 2i] = conj(s₂ᵢ) * R[nrcol + 2i-1] - c₂ᵢ * R[nrcol + 2i] - R[nrcol + 2i-1] = rtmp - - c₃ᵢ = gc[4i-1] - s₃ᵢ = gs[4i-1] - rtmp = c₃ᵢ * R[nrcol + 2i] + s₃ᵢ * αₖ - αₖ = conj(s₃ᵢ) * R[nrcol + 2i] - c₃ᵢ * αₖ - R[nrcol + 2i] = rtmp - - c₄ᵢ = gc[4i] - s₄ᵢ = gs[4i] - rtmp = c₄ᵢ * R[nrcol + 2i] + s₄ᵢ * R[nrcol + 2i+1] - R[nrcol + 2i+1] = conj(s₄ᵢ) * R[nrcol + 2i] - c₄ᵢ * R[nrcol + 2i+1] - R[nrcol + 2i] = rtmp - - flag ? ωₖ = αₖ : R[nrcol + 2i+2] = αₖ + # Stopping criterion. + breakdown = false + inconsistent = false + solved = rNorm ≤ ε + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || breakdown || user_requested_exit || overtimed) + + # Update iteration index. + iter = iter + 1 + k = iter + nr₂ₖ₋₁ = nr # Position of the column 2k-1 in Rₖ. + nr₂ₖ = nr + 2k-1 # Position of the column 2k in Rₖ. + + # Update workspace if more storage is required + if iter > mem + for i = 1 : 4k-1 + push!(R, zero(FC)) + end + for i = 1 : 4 + push!(gs, zero(FC)) + push!(gc, zero(T)) + end end - end - # Compute and apply current givens reflections - # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ r̄₂ₖ₋₁.₂ₖ₋₁ r̄₂ₖ₋₁.₂ₖ ] [ r₂ₖ₋₁.₂ₖ₋₁ r₂ₖ₋₁.₂ₖ ] - # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ r̄₂ₖ.₂ₖ₋₁ r̄₂ₖ.₂ₖ ] = [ r₂ₖ.₂ₖ ] - # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ hₖ₊₁.ₖ ] [ ] - # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ fₖ₊₁.ₖ ] [ ] - (c₁ₖ, s₁ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], Faux) # annihilate fₖ₊₁.ₖ - θₖ = conj(s₁ₖ) * R[nr₂ₖ + 2k-1] - R[nr₂ₖ + 2k-1] = c₁ₖ * R[nr₂ₖ + 2k-1] - - (c₂ₖ, s₂ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], ωₖ) # annihilate ωₖ = r̄₂ₖ.₂ₖ₋₁ - rtmp = c₂ₖ * R[nr₂ₖ + 2k-1] + s₂ₖ * R[nr₂ₖ + 2k] - R[nr₂ₖ + 2k] = conj(s₂ₖ) * R[nr₂ₖ + 2k-1] - c₂ₖ * R[nr₂ₖ + 2k] - R[nr₂ₖ + 2k-1] = rtmp - - (c₃ₖ, s₃ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], θₖ) # annihilate Θₖ = r̄₂ₖ₊₂.₂ₖ - - (c₄ₖ, s₄ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], Haux) # annihilate hₖ₊₁.ₖ - - # Update t̄ₖ = (τ₁, ..., τ₂ₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂). - # - # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ τbar₂ₖ₋₁ ] [ τ₂ₖ₋₁ ] - # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ τbar₂ₖ ] = [ τ₂ₖ ] - # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ ] [ τbar₂ₖ₊₁ ] - # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ ] [ τbar₂ₖ₊₂ ] - τbar₂ₖ₊₂ = conj(s₁ₖ) * zt[2k-1] - zt[2k-1] = c₁ₖ * zt[2k-1] - - τtmp = c₂ₖ * zt[2k-1] + s₂ₖ * zt[2k] - zt[2k] = conj(s₂ₖ) * zt[2k-1] - c₂ₖ * zt[2k] - zt[2k-1] = τtmp - - τtmp = c₃ₖ * zt[2k] + s₃ₖ * τbar₂ₖ₊₂ - τbar₂ₖ₊₂ = conj(s₃ₖ) * zt[2k] - c₃ₖ * τbar₂ₖ₊₂ - zt[2k] = τtmp - - τbar₂ₖ₊₁ = conj(s₄ₖ) * zt[2k] - zt[2k] = c₄ₖ * zt[2k] - - # Update gc and gs vectors - gc[4k-3], gc[4k-2], gc[4k-1], gc[4k] = c₁ₖ, c₂ₖ, c₃ₖ, c₄ₖ - gs[4k-3], gs[4k-2], gs[4k-1], gs[4k] = s₁ₖ, s₂ₖ, s₃ₖ, s₄ₖ - - # Compute ‖rₖ‖² = |τbar₂ₖ₊₁|² + |τbar₂ₖ₊₂|² - rNorm = sqrt(abs2(τbar₂ₖ₊₁) + abs2(τbar₂ₖ₊₂)) - history && push!(rNorms, rNorm) + # Continue the orthogonal Hessenberg reduction process. + # CAFUₖ = VₖHₖ + hₖ₊₁.ₖ * vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Hₖ₊₁.ₖ + # DBEVₖ = UₖFₖ + fₖ₊₁.ₖ * uₖ₊₁(eₖ)ᵀ = Uₖ₊₁Fₖ₊₁.ₖ + wA = FisI ? U[iter] : solver.wA + wB = EisI ? V[iter] : solver.wB + FisI || mulorldiv!(wA, F, U[iter], ldiv) # wA = Fuₖ + EisI || mulorldiv!(wB, E, V[iter], ldiv) # wB = Evₖ + mul!(dA, A, wA) # dA = AFuₖ + mul!(dB, B, wB) # dB = BEvₖ + CisI || mulorldiv!(q, C, dA, ldiv) # q = CAFuₖ + DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ - # Update the number of coefficients in Rₖ. - nr = nr + 4k-1 - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + for i = 1 : iter + hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq + fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp + @kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ + @kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ + R[nr₂ₖ + 2i-1] = hᵢₖ + (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ + end - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - breakdown = Faux ≤ btol && Haux ≤ btol - solved = resid_decrease_lim || resid_decrease_mach - tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, Haux, Faux) - - # Compute vₖ₊₁ and uₖ₊₁ - if !(solved || tired || breakdown || user_requested_exit) - if iter ≥ mem - push!(V, S(undef, m)) - push!(U, S(undef, n)) - push!(zt, zero(FC), zero(FC)) + # Reorthogonalization of the Krylov basis. + if reorthogonalization + for i = 1 : iter + Htmp = @kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq + Ftmp = @kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp + @kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ + @kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ + R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ + (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ + end end - # hₖ₊₁.ₖ ≠ 0 - if Haux > btol - @. V[k+1] = q / Haux # hₖ₊₁.ₖvₖ₊₁ = q - else - # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ} - V[k+1] .= zero(FC) # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ} + Haux = @knrm2(m, q) # hₖ₊₁.ₖ = ‖q‖₂ + Faux = @knrm2(n, p) # fₖ₊₁.ₖ = ‖p‖₂ + + # Add regularization terms. + R[nr₂ₖ₋₁ + 2k-1] = λ # S₂ₖ₋₁.₂ₖ₋₁ = λ + R[nr₂ₖ + 2k] = μ # S₂ₖ.₂ₖ = μ + + # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] + # [0 u₁ ••• 0 uₖ] + # + # rₖ = [ b ] - [ λI A ] [ xₖ ] = [ b ] - [ λI A ] Wₖzₖ + # [ c ] [ B μI ] [ yₖ ] [ c ] [ B μI ] + # + # block-Arnoldi formulation : [ λI A ] Wₖ = Wₖ₊₁Sₖ₊₁.ₖ + # [ B μI ] + # + # GPMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - βe₁ - γe₂ ‖ + # + # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ]. + # [ Oᵀ ] + # + # Apply previous givens reflections when k ≥ 2 + # [ 1 ][ 1 ][ c₂.ᵢ s₂.ᵢ ][ c₁.ᵢ s₁.ᵢ ] [ r̄₂ᵢ₋₁.₂ₖ₋₁ r̄₂ᵢ₋₁.₂ₖ ] [ r₂ᵢ₋₁.₂ₖ₋₁ r₂ᵢ₋₁.₂ₖ ] + # [ c₄.ᵢ s₄.ᵢ ][ c₃.ᵢ s₃.ᵢ ][ s̄₂.ᵢ -c₂.ᵢ ][ 1 ] [ r̄₂ᵢ.₂ₖ₋₁ r̄₂ᵢ.₂ₖ ] = [ r₂ᵢ.₂ₖ₋₁ r₂ᵢ.₂ₖ ] + # [ s̄₄.ᵢ -c₄.ᵢ ][ 1 ][ 1 ][ 1 ] [ ρ hᵢ₊₁.ₖ ] [ r̄₂ᵢ₊₁.₂ₖ₋₁ r̄₂ᵢ₊₁.₂ₖ ] + # [ 1 ][ s̄₃.ᵢ -c₃.ᵢ ][ 1 ][ s̄₁.ᵢ -c₁.ᵢ ] [ fᵢ₊₁.ₖ δ ] [ r̄₂ᵢ₊₂.₂ₖ₋₁ r̄₂ᵢ₊₂.₂ₖ ] + # + # r̄₁.₂ₖ₋₁ = 0, r̄₁.₂ₖ = h₁.ₖ, r̄₂.₂ₖ₋₁ = f₁.ₖ and r̄₂.₂ₖ = 0. + # (ρ, δ) = (λ, μ) if i == k-1, (ρ, δ) = (0, 0) otherwise. + for i = 1 : iter-1 + for nrcol ∈ (nr₂ₖ₋₁, nr₂ₖ) + flag = (i == iter-1 && nrcol == nr₂ₖ₋₁) + αₖ = flag ? ωₖ : R[nrcol + 2i+2] + + c₁ᵢ = gc[4i-3] + s₁ᵢ = gs[4i-3] + rtmp = c₁ᵢ * R[nrcol + 2i-1] + s₁ᵢ * αₖ + αₖ = conj(s₁ᵢ) * R[nrcol + 2i-1] - c₁ᵢ * αₖ + R[nrcol + 2i-1] = rtmp + + c₂ᵢ = gc[4i-2] + s₂ᵢ = gs[4i-2] + rtmp = c₂ᵢ * R[nrcol + 2i-1] + s₂ᵢ * R[nrcol + 2i] + R[nrcol + 2i] = conj(s₂ᵢ) * R[nrcol + 2i-1] - c₂ᵢ * R[nrcol + 2i] + R[nrcol + 2i-1] = rtmp + + c₃ᵢ = gc[4i-1] + s₃ᵢ = gs[4i-1] + rtmp = c₃ᵢ * R[nrcol + 2i] + s₃ᵢ * αₖ + αₖ = conj(s₃ᵢ) * R[nrcol + 2i] - c₃ᵢ * αₖ + R[nrcol + 2i] = rtmp + + c₄ᵢ = gc[4i] + s₄ᵢ = gs[4i] + rtmp = c₄ᵢ * R[nrcol + 2i] + s₄ᵢ * R[nrcol + 2i+1] + R[nrcol + 2i+1] = conj(s₄ᵢ) * R[nrcol + 2i] - c₄ᵢ * R[nrcol + 2i+1] + R[nrcol + 2i] = rtmp + + flag ? ωₖ = αₖ : R[nrcol + 2i+2] = αₖ + end end - # fₖ₊₁.ₖ ≠ 0 - if Faux > btol - @. U[k+1] = p / Faux # fₖ₊₁.ₖuₖ₊₁ = p + # Compute and apply current givens reflections + # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ r̄₂ₖ₋₁.₂ₖ₋₁ r̄₂ₖ₋₁.₂ₖ ] [ r₂ₖ₋₁.₂ₖ₋₁ r₂ₖ₋₁.₂ₖ ] + # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ r̄₂ₖ.₂ₖ₋₁ r̄₂ₖ.₂ₖ ] = [ r₂ₖ.₂ₖ ] + # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ hₖ₊₁.ₖ ] [ ] + # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ fₖ₊₁.ₖ ] [ ] + (c₁ₖ, s₁ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], Faux) # annihilate fₖ₊₁.ₖ + θₖ = conj(s₁ₖ) * R[nr₂ₖ + 2k-1] + R[nr₂ₖ + 2k-1] = c₁ₖ * R[nr₂ₖ + 2k-1] + + (c₂ₖ, s₂ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], ωₖ) # annihilate ωₖ = r̄₂ₖ.₂ₖ₋₁ + rtmp = c₂ₖ * R[nr₂ₖ + 2k-1] + s₂ₖ * R[nr₂ₖ + 2k] + R[nr₂ₖ + 2k] = conj(s₂ₖ) * R[nr₂ₖ + 2k-1] - c₂ₖ * R[nr₂ₖ + 2k] + R[nr₂ₖ + 2k-1] = rtmp + + (c₃ₖ, s₃ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], θₖ) # annihilate Θₖ = r̄₂ₖ₊₂.₂ₖ + + (c₄ₖ, s₄ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], Haux) # annihilate hₖ₊₁.ₖ + + # Update t̄ₖ = (τ₁, ..., τ₂ₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂). + # + # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ τbar₂ₖ₋₁ ] [ τ₂ₖ₋₁ ] + # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ τbar₂ₖ ] = [ τ₂ₖ ] + # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ ] [ τbar₂ₖ₊₁ ] + # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ ] [ τbar₂ₖ₊₂ ] + τbar₂ₖ₊₂ = conj(s₁ₖ) * zt[2k-1] + zt[2k-1] = c₁ₖ * zt[2k-1] + + τtmp = c₂ₖ * zt[2k-1] + s₂ₖ * zt[2k] + zt[2k] = conj(s₂ₖ) * zt[2k-1] - c₂ₖ * zt[2k] + zt[2k-1] = τtmp + + τtmp = c₃ₖ * zt[2k] + s₃ₖ * τbar₂ₖ₊₂ + τbar₂ₖ₊₂ = conj(s₃ₖ) * zt[2k] - c₃ₖ * τbar₂ₖ₊₂ + zt[2k] = τtmp + + τbar₂ₖ₊₁ = conj(s₄ₖ) * zt[2k] + zt[2k] = c₄ₖ * zt[2k] + + # Update gc and gs vectors + gc[4k-3], gc[4k-2], gc[4k-1], gc[4k] = c₁ₖ, c₂ₖ, c₃ₖ, c₄ₖ + gs[4k-3], gs[4k-2], gs[4k-1], gs[4k] = s₁ₖ, s₂ₖ, s₃ₖ, s₄ₖ + + # Compute ‖rₖ‖² = |τbar₂ₖ₊₁|² + |τbar₂ₖ₊₂|² + rNorm = sqrt(abs2(τbar₂ₖ₊₁) + abs2(τbar₂ₖ₊₂)) + history && push!(rNorms, rNorm) + + # Update the number of coefficients in Rₖ. + nr = nr + 4k-1 + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + breakdown = Faux ≤ btol && Haux ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, Haux, Faux, ktimer(start_time)) + + # Compute vₖ₊₁ and uₖ₊₁ + if !(solved || tired || breakdown || user_requested_exit || overtimed) + if iter ≥ mem + push!(V, S(undef, m)) + push!(U, S(undef, n)) + push!(zt, zero(FC), zero(FC)) + end + + # hₖ₊₁.ₖ ≠ 0 + if Haux > btol + @. V[k+1] = q / Haux # hₖ₊₁.ₖvₖ₊₁ = q + else + # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ} + V[k+1] .= zero(FC) # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ} + end + + # fₖ₊₁.ₖ ≠ 0 + if Faux > btol + @. U[k+1] = p / Faux # fₖ₊₁.ₖuₖ₊₁ = p + else + # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ} + U[k+1] .= zero(FC) # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ} + end + + zt[2k+1] = τbar₂ₖ₊₁ + zt[2k+2] = τbar₂ₖ₊₂ + end + end + (verbose > 0) && @printf(iostream, "\n") + + # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution. + for i = 2iter : -1 : 1 + pos = nr + i - 2iter # position of rᵢ.ₖ + for j = 2iter : -1 : i+1 + zt[i] = zt[i] - R[pos] * zt[j] # ζᵢ ← ζᵢ - rᵢ.ⱼζⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + end + # Rₖ can be singular if the system is inconsistent + if abs(R[pos]) ≤ btol + zt[i] = zero(FC) + inconsistent = true else - # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ} - U[k+1] .= zero(FC) # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ} + zt[i] = zt[i] / R[pos] # ζᵢ ← ζᵢ / rᵢ.ᵢ end + end - zt[2k+1] = τbar₂ₖ₊₁ - zt[2k+2] = τbar₂ₖ₊₂ + # Compute xₖ and yₖ + for i = 1 : iter + @kaxpy!(m, zt[2i-1], V[i], x) # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ + @kaxpy!(n, zt[2i] , U[i], y) # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ end - end - (verbose > 0) && @printf("\n") - - # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution. - for i = 2iter : -1 : 1 - pos = nr + i - 2iter # position of rᵢ.ₖ - for j = 2iter : -1 : i+1 - zt[i] = zt[i] - R[pos] * zt[j] # ζᵢ ← ζᵢ - rᵢ.ⱼζⱼ - pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + if !EisI + wB .= x + mulorldiv!(x, E, wB, ldiv) end - # Rₖ can be singular if the system is inconsistent - if abs(R[pos]) ≤ btol - zt[i] = zero(FC) - inconsistent = true - else - zt[i] = zt[i] / R[pos] # ζᵢ ← ζᵢ / rᵢ.ᵢ + if !FisI + wA .= y + mulorldiv!(y, F, wA, ldiv) end + warm_start && @kaxpy!(m, one(FC), Δx, x) + warm_start && @kaxpy!(n, one(FC), Δy, y) + solver.warm_start = false + + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + inconsistent && (status = "found approximate least-squares solution") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - - # Compute xₖ and yₖ - for i = 1 : iter - @kaxpy!(m, zt[2i-1], V[i], x) # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ - @kaxpy!(n, zt[2i] , U[i], y) # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ - end - if !EisI - wB .= x - mulorldiv!(x, E, wB, ldiv) - end - if !FisI - wA .= y - mulorldiv!(y, F, wA, ldiv) - end - warm_start && @kaxpy!(m, one(FC), Δx, x) - warm_start && @kaxpy!(n, one(FC), Δy, y) - solver.warm_start = false - - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - inconsistent && (status = "found approximate least-squares solution") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl new file mode 100644 index 000000000..5c9cad24d --- /dev/null +++ b/src/krylov_processes.jl @@ -0,0 +1,439 @@ +export hermitian_lanczos, nonhermitian_lanczos, arnoldi, golub_kahan, saunders_simon_yip, montoison_orban + +""" + V, T = hermitian_lanczos(A, b, k) + +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n; +* `k`: the number of iterations of the Hermitian Lanczos process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `T`: a sparse (k+1) × k tridiagonal matrix. + +#### Reference + +* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950. +""" +function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + R = real(FC) + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+1) + rowval = zeros(Int, 3k-1) + nzval = zeros(R, 3k-1) + + colptr[1] = 1 + rowval[1] = 1 + rowval[2] = 2 + for i = 1:k + colptr[i+1] = 3i + if i ≥ 2 + pos = colptr[i] + rowval[pos] = i-1 + rowval[pos+1] = i + rowval[pos+2] = i+1 + end + end + + V = M(undef, n, k+1) + T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval) + + pαᵢ = 1 # Position of αᵢ in the vector `nzval` + for i = 1:k + vᵢ = view(V,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + if i == 1 + βᵢ = @knrm2(n, b) + vᵢ .= b ./ βᵢ + end + mul!(q, A, vᵢ) + αᵢ = @kdotr(n, vᵢ, q) + nzval[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ + @kaxpy!(n, -αᵢ, vᵢ, q) + if i ≥ 2 + vᵢ₋₁ = view(V,:,i-1) + βᵢ = nzval[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ + nzval[pαᵢ-1] = βᵢ # Tᵢ₋₁.ᵢ = βᵢ + @kaxpy!(n, -βᵢ, vᵢ₋₁, q) + end + βᵢ₊₁ = @knrm2(n, q) + nzval[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ + vᵢ₊₁ .= q ./ βᵢ₊₁ + pαᵢ = pαᵢ + 3 + end + return V, T +end + +""" + V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k) + +#### Input arguments + +* `A`: a linear operator that models a square matrix of dimension n; +* `b`: a vector of length n; +* `c`: a vector of length n; +* `k`: the number of iterations of the non-Hermitian Lanczos process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `T`: a sparse (k+1) × k tridiagonal matrix; +* `U`: a dense n × (k+1) matrix; +* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix. + +#### Reference + +* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950. +""" +function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + Aᴴ = A' + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+1) + rowval = zeros(Int, 3k-1) + nzval_T = zeros(FC, 3k-1) + nzval_Tᴴ = zeros(FC, 3k-1) + + colptr[1] = 1 + rowval[1] = 1 + rowval[2] = 2 + for i = 1:k + colptr[i+1] = 3i + if i ≥ 2 + pos = colptr[i] + rowval[pos] = i-1 + rowval[pos+1] = i + rowval[pos+2] = i+1 + end + end + + V = M(undef, n, k+1) + U = M(undef, n, k+1) + T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T) + Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ) + + pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ` + for i = 1:k + vᵢ = view(V,:,i) + uᵢ = view(U,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + uᵢ₊₁ = p = view(U,:,i+1) + if i == 1 + cᴴb = @kdot(n, c, b) + βᵢ = √(abs(cᴴb)) + γᵢ = cᴴb / βᵢ + vᵢ .= b ./ βᵢ + uᵢ .= c ./ conj(γᵢ) + end + mul!(q, A , vᵢ) + mul!(p, Aᴴ, uᵢ) + if i ≥ 2 + vᵢ₋₁ = view(V,:,i-1) + uᵢ₋₁ = view(U,:,i-1) + βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ + γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ + @kaxpy!(n, - γᵢ , vᵢ₋₁, q) + @kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p) + end + αᵢ = @kdot(n, uᵢ, q) + nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ + nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ + @kaxpy!(m, - αᵢ , vᵢ, q) + @kaxpy!(n, -conj(αᵢ), uᵢ, p) + pᴴq = @kdot(n, p, q) + βᵢ₊₁ = √(abs(pᴴq)) + γᵢ₊₁ = pᴴq / βᵢ₊₁ + vᵢ₊₁ .= q ./ βᵢ₊₁ + uᵢ₊₁ .= p ./ conj(γᵢ₊₁) + nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ + nzval_Tᴴ[pαᵢ+1] = conj(γᵢ₊₁) # Tᴴᵢ₊₁.ᵢ = γ̄ᵢ₊₁ + if i ≤ k-1 + nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁ + nzval_Tᴴ[pαᵢ+2] = conj(βᵢ₊₁) # Tᴴᵢ.ᵢ₊₁ = β̄ᵢ₊₁ + end + pαᵢ = pαᵢ + 3 + end + return V, T, U, Tᴴ +end + +""" + V, H = arnoldi(A, b, k) + +#### Input arguments + +* `A`: a linear operator that models a square matrix of dimension n; +* `b`: a vector of length n; +* `k`: the number of iterations of the Arnoldi process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `H`: a dense (k+1) × k upper Hessenberg matrix. + +#### Reference + +* W. E. Arnoldi, [*The principle of minimized iterations in the solution of the matrix eigenvalue problem*](https://doi.org/10.1090/qam/42792), Quarterly of Applied Mathematics, 9, pp. 17--29, 1951. +""" +function arnoldi(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + S = ktypeof(b) + M = vector_to_matrix(S) + + V = M(undef, n, k+1) + H = zeros(FC, k+1, k) + + for i = 1:k + vᵢ = view(V,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + if i == 1 + β = @knrm2(n, b) + vᵢ .= b ./ β + end + mul!(q, A, vᵢ) + for j = 1:i + vⱼ = view(V,:,j) + H[j,i] = @kdot(n, vⱼ, q) + @kaxpy!(n, -H[j,i], vⱼ, q) + end + H[i+1,i] = @knrm2(n, q) + vᵢ₊₁ .= q ./ H[i+1,i] + end + return V, H +end + +""" + V, U, L = golub_kahan(A, b, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `k`: the number of iterations of the Golub-Kahan process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `U`: a dense m × (k+1) matrix; +* `L`: a sparse (k+1) × (k+1) lower bidiagonal matrix. + +#### References + +* G. H. Golub and W. Kahan, [*Calculating the Singular Values and Pseudo-Inverse of a Matrix*](https://doi.org/10.1137/0702016), SIAM Journal on Numerical Analysis, 2(2), pp. 225--224, 1965. +* C. C. Paige, [*Bidiagonalization of Matrices and Solution of Linear Equations*](https://doi.org/10.1137/0711019), SIAM Journal on Numerical Analysis, 11(1), pp. 197--209, 1974. +""" +function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + R = real(FC) + Aᴴ = A' + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+2) + rowval = zeros(Int, 2k+1) + nzval = zeros(R, 2k+1) + + colptr[1] = 1 + for i = 1:k + pos = colptr[i] + colptr[i+1] = pos+2 + rowval[pos] = i + rowval[pos+1] = i+1 + end + rowval[2k+1] = k+1 + colptr[k+2] = 2k+2 + + V = M(undef, n, k+1) + U = M(undef, m, k+1) + L = SparseMatrixCSC(k+1, k+1, colptr, rowval, nzval) + + pαᵢ = 1 # Position of αᵢ in the vector `nzval` + for i = 1:k + uᵢ = view(U,:,i) + vᵢ = view(V,:,i) + uᵢ₊₁ = q = view(U,:,i+1) + vᵢ₊₁ = p = view(V,:,i+1) + if i == 1 + wᵢ = vᵢ + βᵢ = @knrm2(m, b) + uᵢ .= b ./ βᵢ + mul!(wᵢ, Aᴴ, uᵢ) + αᵢ = @knrm2(n, wᵢ) + nzval[pαᵢ] = αᵢ # Lᵢ.ᵢ = αᵢ + vᵢ .= wᵢ ./ αᵢ + end + mul!(q, A, vᵢ) + αᵢ = nzval[pαᵢ] # αᵢ = Lᵢ.ᵢ + @kaxpy!(m, -αᵢ, uᵢ, q) + βᵢ₊₁ = @knrm2(m, q) + uᵢ₊₁ .= q ./ βᵢ₊₁ + mul!(p, Aᴴ, uᵢ₊₁) + @kaxpy!(n, -βᵢ₊₁, vᵢ, p) + αᵢ₊₁ = @knrm2(n, p) + vᵢ₊₁ .= p ./ αᵢ₊₁ + nzval[pαᵢ+1] = βᵢ₊₁ # Lᵢ₊₁.ᵢ = βᵢ₊₁ + nzval[pαᵢ+2] = αᵢ₊₁ # Lᵢ₊₁.ᵢ₊₁ = αᵢ₊₁ + pαᵢ = pαᵢ + 2 + end + return V, U, L +end + +""" + V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n; +* `k`: the number of iterations of the Saunders-Simon-Yip process. + +#### Output arguments + +* `V`: a dense m × (k+1) matrix; +* `T`: a sparse (k+1) × k tridiagonal matrix; +* `U`: a dense n × (k+1) matrix; +* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix. + +#### Reference + +* M. A. Saunders, H. D. Simon, and E. L. Yip, [*Two Conjugate-Gradient-Type Methods for Unsymmetric Linear Equations*](https://doi.org/10.1137/0725052), SIAM Journal on Numerical Analysis, 25(4), pp. 927--940, 1988. +""" +function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + Aᴴ = A' + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+1) + rowval = zeros(Int, 3k-1) + nzval_T = zeros(FC, 3k-1) + nzval_Tᴴ = zeros(FC, 3k-1) + + colptr[1] = 1 + rowval[1] = 1 + rowval[2] = 2 + for i = 1:k + colptr[i+1] = 3i + if i ≥ 2 + pos = colptr[i] + rowval[pos] = i-1 + rowval[pos+1] = i + rowval[pos+2] = i+1 + end + end + + V = M(undef, m, k+1) + U = M(undef, n, k+1) + T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T) + Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ) + + pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ` + for i = 1:k + vᵢ = view(V,:,i) + uᵢ = view(U,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + uᵢ₊₁ = p = view(U,:,i+1) + if i == 1 + β = @knrm2(m, b) + γ = @knrm2(n, c) + vᵢ .= b ./ β + uᵢ .= c ./ γ + end + mul!(q, A , uᵢ) + mul!(p, Aᴴ, vᵢ) + if i ≥ 2 + vᵢ₋₁ = view(V,:,i-1) + uᵢ₋₁ = view(U,:,i-1) + βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ + γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ + @kaxpy!(m, -γᵢ, vᵢ₋₁, q) + @kaxpy!(n, -βᵢ, uᵢ₋₁, p) + end + αᵢ = @kdot(m, vᵢ, q) + nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ + nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ + @kaxpy!(m, - αᵢ , vᵢ, q) + @kaxpy!(n, -conj(αᵢ), uᵢ, p) + βᵢ₊₁ = @knrm2(m, q) + γᵢ₊₁ = @knrm2(n, p) + vᵢ₊₁ .= q ./ βᵢ₊₁ + uᵢ₊₁ .= p ./ γᵢ₊₁ + nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ + nzval_Tᴴ[pαᵢ+1] = γᵢ₊₁ # Tᴴᵢ₊₁.ᵢ = γᵢ₊₁ + if i ≤ k-1 + nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁ + nzval_Tᴴ[pαᵢ+2] = βᵢ₊₁ # Tᴴᵢ.ᵢ₊₁ = βᵢ₊₁ + end + pαᵢ = pαᵢ + 3 + end + return V, T, U, Tᴴ +end + +""" + V, H, U, F = montoison_orban(A, B, b, c, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `B`: a linear operator that models a matrix of dimension n × m; +* `b`: a vector of length m; +* `c`: a vector of length n; +* `k`: the number of iterations of the Montoison-Orban process. + +#### Output arguments + +* `V`: a dense m × (k+1) matrix; +* `H`: a dense (k+1) × k upper Hessenberg matrix; +* `U`: a dense n × (k+1) matrix; +* `F`: a dense (k+1) × k upper Hessenberg matrix. + +#### Reference + +* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://doi.org/10.1137/21M1459265), SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023. +""" +function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + S = ktypeof(b) + M = vector_to_matrix(S) + + V = M(undef, m, k+1) + U = M(undef, n, k+1) + H = zeros(FC, k+1, k) + F = zeros(FC, k+1, k) + + for i = 1:k + vᵢ = view(V,:,i) + uᵢ = view(U,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + uᵢ₊₁ = p = view(U,:,i+1) + if i == 1 + β = @knrm2(m, b) + γ = @knrm2(n, c) + vᵢ .= b ./ β + uᵢ .= c ./ γ + end + mul!(q, A, uᵢ) + mul!(p, B, vᵢ) + for j = 1:i + vⱼ = view(V,:,j) + uⱼ = view(U,:,j) + H[j,i] = @kdot(m, vⱼ, q) + @kaxpy!(n, -H[j,i], vⱼ, q) + F[j,i] = @kdot(n, uⱼ, p) + @kaxpy!(m, -F[j,i], uⱼ, p) + end + H[i+1,i] = @knrm2(m, q) + vᵢ₊₁ .= q ./ H[i+1,i] + F[i+1,i] = @knrm2(n, p) + uᵢ₊₁ .= p ./ F[i+1,i] + end + return V, H, U, F +end diff --git a/src/krylov_solve.jl b/src/krylov_solve.jl new file mode 100644 index 000000000..30a463dfa --- /dev/null +++ b/src/krylov_solve.jl @@ -0,0 +1,60 @@ +""" + solve!(solver, args...; kwargs...) + +Use the in-place Krylov method associated to `solver`. +""" +function solve! end + +for (KS, fun, args, def_args, optargs, def_optargs, kwargs, def_kwargs) in [ + (:LsmrSolver , :lsmr! , args_lsmr , def_args_lsmr , () , () , kwargs_lsmr , def_kwargs_lsmr ) + (:CgsSolver , :cgs! , args_cgs , def_args_cgs , optargs_cgs , def_optargs_cgs , kwargs_cgs , def_kwargs_cgs ) + (:UsymlqSolver , :usymlq! , args_usymlq , def_args_usymlq , optargs_usymlq , def_optargs_usymlq , kwargs_usymlq , def_kwargs_usymlq ) + (:LnlqSolver , :lnlq! , args_lnlq , def_args_lnlq , () , () , kwargs_lnlq , def_kwargs_lnlq ) + (:BicgstabSolver , :bicgstab! , args_bicgstab , def_args_bicgstab , optargs_bicgstab , def_optargs_bicgstab , kwargs_bicgstab , def_kwargs_bicgstab ) + (:CrlsSolver , :crls! , args_crls , def_args_crls , () , () , kwargs_crls , def_kwargs_crls ) + (:LsqrSolver , :lsqr! , args_lsqr , def_args_lsqr , () , () , kwargs_lsqr , def_kwargs_lsqr ) + (:MinresSolver , :minres! , args_minres , def_args_minres , optargs_minres , def_optargs_minres , kwargs_minres , def_kwargs_minres ) + (:CgneSolver , :cgne! , args_cgne , def_args_cgne , () , () , kwargs_cgne , def_kwargs_cgne ) + (:DqgmresSolver , :dqgmres! , args_dqgmres , def_args_dqgmres , optargs_dqgmres , def_optargs_dqgmres , kwargs_dqgmres , def_kwargs_dqgmres ) + (:SymmlqSolver , :symmlq! , args_symmlq , def_args_symmlq , optargs_symmlq , def_optargs_symmlq , kwargs_symmlq , def_kwargs_symmlq ) + (:TrimrSolver , :trimr! , args_trimr , def_args_trimr , optargs_trimr , def_optargs_trimr , kwargs_trimr , def_kwargs_trimr ) + (:UsymqrSolver , :usymqr! , args_usymqr , def_args_usymqr , optargs_usymqr , def_optargs_usymqr , kwargs_usymqr , def_kwargs_usymqr ) + (:BilqrSolver , :bilqr! , args_bilqr , def_args_bilqr , optargs_bilqr , def_optargs_bilqr , kwargs_bilqr , def_kwargs_bilqr ) + (:CrSolver , :cr! , args_cr , def_args_cr , optargs_cr , def_optargs_cr , kwargs_cr , def_kwargs_cr ) + (:CraigmrSolver , :craigmr! , args_craigmr , def_args_craigmr , () , () , kwargs_craigmr , def_kwargs_craigmr ) + (:TricgSolver , :tricg! , args_tricg , def_args_tricg , optargs_tricg , def_optargs_tricg , kwargs_tricg , def_kwargs_tricg ) + (:CraigSolver , :craig! , args_craig , def_args_craig , () , () , kwargs_craig , def_kwargs_craig ) + (:DiomSolver , :diom! , args_diom , def_args_diom , optargs_diom , def_optargs_diom , kwargs_diom , def_kwargs_diom ) + (:LslqSolver , :lslq! , args_lslq , def_args_lslq , () , () , kwargs_lslq , def_kwargs_lslq ) + (:TrilqrSolver , :trilqr! , args_trilqr , def_args_trilqr , optargs_trilqr , def_optargs_trilqr , kwargs_trilqr , def_kwargs_trilqr ) + (:CrmrSolver , :crmr! , args_crmr , def_args_crmr , () , () , kwargs_crmr , def_kwargs_crmr ) + (:CgSolver , :cg! , args_cg , def_args_cg , optargs_cg , def_optargs_cg , kwargs_cg , def_kwargs_cg ) + (:CgLanczosShiftSolver, :cg_lanczos_shift!, args_cg_lanczos_shift, def_args_cg_lanczos_shift, () , () , kwargs_cg_lanczos_shift, def_kwargs_cg_lanczos_shift) + (:CglsSolver , :cgls! , args_cgls , def_args_cgls , () , () , kwargs_cgls , def_kwargs_cgls ) + (:CgLanczosSolver , :cg_lanczos! , args_cg_lanczos , def_args_cg_lanczos , optargs_cg_lanczos, def_optargs_cg_lanczos, kwargs_cg_lanczos , def_kwargs_cg_lanczos ) + (:BilqSolver , :bilq! , args_bilq , def_args_bilq , optargs_bilq , def_optargs_bilq , kwargs_bilq , def_kwargs_bilq ) + (:MinresQlpSolver , :minres_qlp! , args_minres_qlp , def_args_minres_qlp , optargs_minres_qlp, def_optargs_minres_qlp, kwargs_minres_qlp , def_kwargs_minres_qlp ) + (:QmrSolver , :qmr! , args_qmr , def_args_qmr , optargs_qmr , def_optargs_qmr , kwargs_qmr , def_kwargs_qmr ) + (:GmresSolver , :gmres! , args_gmres , def_args_gmres , optargs_gmres , def_optargs_gmres , kwargs_gmres , def_kwargs_gmres ) + (:FgmresSolver , :fgmres! , args_fgmres , def_args_fgmres , optargs_fgmres , def_optargs_fgmres , kwargs_fgmres , def_kwargs_fgmres ) + (:FomSolver , :fom! , args_fom , def_args_fom , optargs_fom , def_optargs_fom , kwargs_fom , def_kwargs_fom ) + (:GpmrSolver , :gpmr! , args_gpmr , def_args_gpmr , optargs_gpmr , def_optargs_gpmr , kwargs_gpmr , def_kwargs_gpmr ) +] + @eval begin + solve!(solver :: $KS{T,FC,S}, $(def_args...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} = $(fun)(solver, $(args...); $(kwargs...)) + + if !isempty($optargs) + function $(fun)(solver :: $KS{T,FC,S}, $(def_args...), $(def_optargs...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + start_time = time_ns() + warm_start!(solver, $(optargs...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + $(fun)(solver, $(args...); $(kwargs...)) + solver.stats.timer += elapsed_time + return solver + end + + solve!(solver :: $KS{T,FC,S}, $(def_args...), $(def_optargs...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} = $(fun)(solver, $(args...), $(optargs...); $(kwargs...)) + end + end +end diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl index 8a109a2be..0e905e807 100644 --- a/src/krylov_solvers.jl +++ b/src/krylov_solvers.jl @@ -3,11 +3,13 @@ CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver, UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver, BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver, LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver, -GmresSolver, FomSolver, GpmrSolver +GmresSolver, FomSolver, GpmrSolver, FgmresSolver export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual, niterations, Aprod, Atprod, Bprod, warm_start! +import Base.size, Base.sizeof, Base.format_bytes + const KRYLOV_SOLVERS = Dict( :cg => :CgSolver , :cr => :CrSolver , @@ -20,6 +22,7 @@ const KRYLOV_SOLVERS = Dict( :fom => :FomSolver , :dqgmres => :DqgmresSolver , :gmres => :GmresSolver , + :fgmres => :FgmresSolver , :gpmr => :GpmrSolver , :usymlq => :UsymlqSolver , :usymqr => :UsymqrSolver , @@ -51,12 +54,14 @@ Type for storing the vectors required by the in-place version of MINRES. The outer constructors - solver = MinresSolver(n, m, S; window :: Int=5) + solver = MinresSolver(m, n, S; window :: Int=5) solver = MinresSolver(A, b; window :: Int=5) may be used in order to create these vectors. """ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r1 :: S @@ -68,29 +73,29 @@ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S} err_vec :: Vector{T} warm_start :: Bool stats :: SimpleStats{T} +end - function MinresSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r1 = S(undef, n) - r2 = S(undef, n) - w1 = S(undef, n) - w2 = S(undef, n) - y = S(undef, n) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats) - return solver - end +function MinresSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r1 = S(undef, n) + r2 = S(undef, n) + w1 = S(undef, n) + w2 = S(undef, n) + y = S(undef, n) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = MinresSolver{T,FC,S}(m, n, Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats) + return solver +end - function MinresSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - MinresSolver(n, m, S, window=window) - end +function MinresSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + MinresSolver(m, n, S; window) end """ @@ -98,12 +103,14 @@ Type for storing the vectors required by the in-place version of CG. The outer constructors - solver = CgSolver(n, m, S) + solver = CgSolver(m, n, S) solver = CgSolver(A, b) may be used in order to create these vectors. """ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -112,26 +119,26 @@ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S} z :: S warm_start :: Bool stats :: SimpleStats{T} +end - function CgSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - p = S(undef, n) - Ap = S(undef, n) - z = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, p, Ap, z, false, stats) - return solver - end +function CgSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + Ap = S(undef, n) + z = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CgSolver{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats) + return solver +end - function CgSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgSolver(n, m, S) - end +function CgSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgSolver(m, n, S) end """ @@ -139,12 +146,14 @@ Type for storing the vectors required by the in-place version of CR. The outer constructors - solver = CrSolver(n, m, S) + solver = CrSolver(m, n, S) solver = CrSolver(A, b) may be used in order to create these vectors. """ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -154,27 +163,27 @@ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S} Mq :: S warm_start :: Bool stats :: SimpleStats{T} +end - function CrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - p = S(undef, n) - q = S(undef, n) - Ar = S(undef, n) - Mq = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats) - return solver - end +function CrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + q = S(undef, n) + Ar = S(undef, n) + Mq = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CrSolver{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats) + return solver +end - function CrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CrSolver(n, m, S) - end +function CrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CrSolver(m, n, S) end """ @@ -182,12 +191,14 @@ Type for storing the vectors required by the in-place version of SYMMLQ. The outer constructors - solver = SymmlqSolver(n, m, S) + solver = SymmlqSolver(m, n, S) solver = SymmlqSolver(A, b) may be used in order to create these vectors. """ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S Mvold :: S @@ -200,30 +211,30 @@ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} sprod :: Vector{T} warm_start :: Bool stats :: SymmlqStats{T} +end - function SymmlqSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - Mvold = S(undef, n) - Mv = S(undef, n) - Mv_next = S(undef, n) - w̅ = S(undef, n) - v = S(undef, 0) - clist = zeros(T, window) - zlist = zeros(T, window) - sprod = ones(T, window) - stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown") - solver = new{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats) - return solver - end +function SymmlqSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + Mvold = S(undef, n) + Mv = S(undef, n) + Mv_next = S(undef, n) + w̅ = S(undef, n) + v = S(undef, 0) + clist = zeros(T, window) + zlist = zeros(T, window) + sprod = ones(T, window) + stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), 0.0, "unknown") + solver = SymmlqSolver{T,FC,S}(m, n, Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats) + return solver +end - function SymmlqSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - SymmlqSolver(n, m, S, window=window) - end +function SymmlqSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + SymmlqSolver(m, n, S; window) end """ @@ -231,12 +242,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS. The outer constructors - solver = CgLanczosSolver(n, m, S) + solver = CgLanczosSolver(m, n, S) solver = CgLanczosSolver(A, b) may be used in order to create these vectors. """ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S Mv :: S @@ -246,27 +259,27 @@ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S warm_start :: Bool stats :: LanczosStats{T} +end - function CgLanczosSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - Mv = S(undef, n) - Mv_prev = S(undef, n) - p = S(undef, n) - Mv_next = S(undef, n) - v = S(undef, 0) - stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown") - solver = new{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats) - return solver - end +function CgLanczosSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + Mv = S(undef, n) + Mv_prev = S(undef, n) + p = S(undef, n) + Mv_next = S(undef, n) + v = S(undef, 0) + stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), 0.0, "unknown") + solver = CgLanczosSolver{T,FC,S}(m, n, Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats) + return solver +end - function CgLanczosSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgLanczosSolver(n, m, S) - end +function CgLanczosSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgLanczosSolver(m, n, S) end """ @@ -274,12 +287,15 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIF The outer constructors - solver = CgLanczosShiftSolver(n, m, nshifts, S) + solver = CgLanczosShiftSolver(m, n, nshifts, S) solver = CgLanczosShiftSolver(A, b, nshifts) may be used in order to create these vectors. """ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int + nshifts :: Int Mv :: S Mv_prev :: S Mv_next :: S @@ -294,34 +310,34 @@ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S} converged :: BitVector not_cv :: BitVector stats :: LanczosShiftStats{T} +end - function CgLanczosShiftSolver(n, m, nshifts, S) - FC = eltype(S) - T = real(FC) - Mv = S(undef, n) - Mv_prev = S(undef, n) - Mv_next = S(undef, n) - v = S(undef, 0) - x = [S(undef, n) for i = 1 : nshifts] - p = [S(undef, n) for i = 1 : nshifts] - σ = Vector{T}(undef, nshifts) - δhat = Vector{T}(undef, nshifts) - ω = Vector{T}(undef, nshifts) - γ = Vector{T}(undef, nshifts) - rNorms = Vector{T}(undef, nshifts) - indefinite = BitVector(undef, nshifts) - converged = BitVector(undef, nshifts) - not_cv = BitVector(undef, nshifts) - stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown") - solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats) - return solver - end +function CgLanczosShiftSolver(m, n, nshifts, S) + FC = eltype(S) + T = real(FC) + Mv = S(undef, n) + Mv_prev = S(undef, n) + Mv_next = S(undef, n) + v = S(undef, 0) + x = S[S(undef, n) for i = 1 : nshifts] + p = S[S(undef, n) for i = 1 : nshifts] + σ = Vector{T}(undef, nshifts) + δhat = Vector{T}(undef, nshifts) + ω = Vector{T}(undef, nshifts) + γ = Vector{T}(undef, nshifts) + rNorms = Vector{T}(undef, nshifts) + indefinite = BitVector(undef, nshifts) + converged = BitVector(undef, nshifts) + not_cv = BitVector(undef, nshifts) + stats = LanczosShiftStats(0, false, Vector{T}[T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), 0.0, "unknown") + solver = CgLanczosShiftSolver{T,FC,S}(m, n, nshifts, Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats) + return solver +end - function CgLanczosShiftSolver(A, b, nshifts) - n, m = size(A) - S = ktypeof(b) - CgLanczosShiftSolver(n, m, nshifts, S) - end +function CgLanczosShiftSolver(A, b, nshifts) + m, n = size(A) + S = ktypeof(b) + CgLanczosShiftSolver(m, n, nshifts, S) end """ @@ -329,12 +345,14 @@ Type for storing the vectors required by the in-place version of MINRES-QLP. The outer constructors - solver = MinresQlpSolver(n, m, S) + solver = MinresQlpSolver(m, n, S) solver = MinresQlpSolver(A, b) may be used in order to create these vectors. """ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S wₖ₋₁ :: S wₖ :: S @@ -345,28 +363,28 @@ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S} vₖ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function MinresQlpSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - wₖ₋₁ = S(undef, n) - wₖ = S(undef, n) - M⁻¹vₖ₋₁ = S(undef, n) - M⁻¹vₖ = S(undef, n) - x = S(undef, n) - p = S(undef, n) - vₖ = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats) - return solver - end +function MinresQlpSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + wₖ₋₁ = S(undef, n) + wₖ = S(undef, n) + M⁻¹vₖ₋₁ = S(undef, n) + M⁻¹vₖ = S(undef, n) + x = S(undef, n) + p = S(undef, n) + vₖ = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = MinresQlpSolver{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats) + return solver +end - function MinresQlpSolver(A, b) - n, m = size(A) - S = ktypeof(b) - MinresQlpSolver(n, m, S) - end +function MinresQlpSolver(A, b) + m, n = size(A) + S = ktypeof(b) + MinresQlpSolver(m, n, S) end """ @@ -374,13 +392,15 @@ Type for storing the vectors required by the in-place version of DQGMRES. The outer constructors - solver = DqgmresSolver(n, m, memory, S) + solver = DqgmresSolver(m, n, memory, S) solver = DqgmresSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S t :: S @@ -393,31 +413,31 @@ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} H :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function DqgmresSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - t = S(undef, n) - z = S(undef, 0) - w = S(undef, 0) - P = [S(undef, n) for i = 1 : memory] - V = [S(undef, n) for i = 1 : memory] - c = Vector{T}(undef, memory) - s = Vector{FC}(undef, memory) - H = Vector{FC}(undef, memory+2) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats) - return solver - end +function DqgmresSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + t = S(undef, n) + z = S(undef, 0) + w = S(undef, 0) + P = S[S(undef, n) for i = 1 : memory] + V = S[S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{FC}(undef, memory) + H = Vector{FC}(undef, memory+1) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = DqgmresSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats) + return solver +end - function DqgmresSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - DqgmresSolver(n, m, memory, S) - end +function DqgmresSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + DqgmresSolver(m, n, memory, S) end """ @@ -425,13 +445,15 @@ Type for storing the vectors required by the in-place version of DIOM. The outer constructors - solver = DiomSolver(n, m, memory, S) + solver = DiomSolver(m, n, memory, S) solver = DiomSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S t :: S @@ -443,30 +465,30 @@ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S} H :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function DiomSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - t = S(undef, n) - z = S(undef, 0) - w = S(undef, 0) - P = [S(undef, n) for i = 1 : memory] - V = [S(undef, n) for i = 1 : memory] - L = Vector{FC}(undef, memory) - H = Vector{FC}(undef, memory+2) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats) - return solver - end +function DiomSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + t = S(undef, n) + z = S(undef, 0) + w = S(undef, 0) + P = S[S(undef, n) for i = 1 : memory-1] + V = S[S(undef, n) for i = 1 : memory] + L = Vector{FC}(undef, memory-1) + H = Vector{FC}(undef, memory) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = DiomSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats) + return solver +end - function DiomSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - DiomSolver(n, m, memory, S) - end +function DiomSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + DiomSolver(m, n, memory, S) end """ @@ -474,12 +496,14 @@ Type for storing the vectors required by the in-place version of USYMLQ. The outer constructors - solver = UsymlqSolver(n, m, S) + solver = UsymlqSolver(m, n, S) solver = UsymlqSolver(A, b) may be used in order to create these vectors. """ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S p :: S @@ -491,29 +515,29 @@ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} q :: S warm_start :: Bool stats :: SimpleStats{T} +end - function UsymlqSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, m) - uₖ = S(undef, m) - p = S(undef, m) - Δx = S(undef, 0) - x = S(undef, m) - d̅ = S(undef, m) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - q = S(undef, n) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats) - return solver - end +function UsymlqSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + d̅ = S(undef, n) + vₖ₋₁ = S(undef, m) + vₖ = S(undef, m) + q = S(undef, m) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = UsymlqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats) + return solver +end - function UsymlqSolver(A, b) - n, m = size(A) - S = ktypeof(b) - UsymlqSolver(n, m, S) - end +function UsymlqSolver(A, b) + m, n = size(A) + S = ktypeof(b) + UsymlqSolver(m, n, S) end """ @@ -521,12 +545,14 @@ Type for storing the vectors required by the in-place version of USYMQR. The outer constructors - solver = UsymqrSolver(n, m, S) + solver = UsymqrSolver(m, n, S) solver = UsymqrSolver(A, b) may be used in order to create these vectors. """ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int vₖ₋₁ :: S vₖ :: S q :: S @@ -539,30 +565,30 @@ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} p :: S warm_start :: Bool stats :: SimpleStats{T} +end - function UsymqrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - q = S(undef, n) - Δx = S(undef, 0) - x = S(undef, m) - wₖ₋₂ = S(undef, m) - wₖ₋₁ = S(undef, m) - uₖ₋₁ = S(undef, m) - uₖ = S(undef, m) - p = S(undef, m) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats) - return solver - end +function UsymqrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + vₖ₋₁ = S(undef, m) + vₖ = S(undef, m) + q = S(undef, m) + Δx = S(undef, 0) + x = S(undef, n) + wₖ₋₂ = S(undef, n) + wₖ₋₁ = S(undef, n) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + p = S(undef, n) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = UsymqrSolver{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats) + return solver +end - function UsymqrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - UsymqrSolver(n, m, S) - end +function UsymqrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + UsymqrSolver(m, n, S) end """ @@ -570,12 +596,14 @@ Type for storing the vectors required by the in-place version of TRICG. The outer constructors - solver = TricgSolver(n, m, S) + solver = TricgSolver(m, n, S) solver = TricgSolver(A, b) may be used in order to create these vectors. """ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int y :: S N⁻¹uₖ₋₁ :: S N⁻¹uₖ :: S @@ -594,36 +622,36 @@ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S} vₖ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function TricgSolver(n, m, S) - FC = eltype(S) - T = real(FC) - y = S(undef, m) - N⁻¹uₖ₋₁ = S(undef, m) - N⁻¹uₖ = S(undef, m) - p = S(undef, m) - gy₂ₖ₋₁ = S(undef, m) - gy₂ₖ = S(undef, m) - x = S(undef, n) - M⁻¹vₖ₋₁ = S(undef, n) - M⁻¹vₖ = S(undef, n) - q = S(undef, n) - gx₂ₖ₋₁ = S(undef, n) - gx₂ₖ = S(undef, n) - Δx = S(undef, 0) - Δy = S(undef, 0) - uₖ = S(undef, 0) - vₖ = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) - return solver - end +function TricgSolver(m, n, S) + FC = eltype(S) + T = real(FC) + y = S(undef, n) + N⁻¹uₖ₋₁ = S(undef, n) + N⁻¹uₖ = S(undef, n) + p = S(undef, n) + gy₂ₖ₋₁ = S(undef, n) + gy₂ₖ = S(undef, n) + x = S(undef, m) + M⁻¹vₖ₋₁ = S(undef, m) + M⁻¹vₖ = S(undef, m) + q = S(undef, m) + gx₂ₖ₋₁ = S(undef, m) + gx₂ₖ = S(undef, m) + Δx = S(undef, 0) + Δy = S(undef, 0) + uₖ = S(undef, 0) + vₖ = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = TricgSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) + return solver +end - function TricgSolver(A, b) - n, m = size(A) - S = ktypeof(b) - TricgSolver(n, m, S) - end +function TricgSolver(A, b) + m, n = size(A) + S = ktypeof(b) + TricgSolver(m, n, S) end """ @@ -631,12 +659,14 @@ Type for storing the vectors required by the in-place version of TRIMR. The outer constructors - solver = TrimrSolver(n, m, S) + solver = TrimrSolver(m, n, S) solver = TrimrSolver(A, b) may be used in order to create these vectors. """ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int y :: S N⁻¹uₖ₋₁ :: S N⁻¹uₖ :: S @@ -659,40 +689,40 @@ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S} vₖ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function TrimrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - y = S(undef, m) - N⁻¹uₖ₋₁ = S(undef, m) - N⁻¹uₖ = S(undef, m) - p = S(undef, m) - gy₂ₖ₋₃ = S(undef, m) - gy₂ₖ₋₂ = S(undef, m) - gy₂ₖ₋₁ = S(undef, m) - gy₂ₖ = S(undef, m) - x = S(undef, n) - M⁻¹vₖ₋₁ = S(undef, n) - M⁻¹vₖ = S(undef, n) - q = S(undef, n) - gx₂ₖ₋₃ = S(undef, n) - gx₂ₖ₋₂ = S(undef, n) - gx₂ₖ₋₁ = S(undef, n) - gx₂ₖ = S(undef, n) - Δx = S(undef, 0) - Δy = S(undef, 0) - uₖ = S(undef, 0) - vₖ = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) - return solver - end +function TrimrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + y = S(undef, n) + N⁻¹uₖ₋₁ = S(undef, n) + N⁻¹uₖ = S(undef, n) + p = S(undef, n) + gy₂ₖ₋₃ = S(undef, n) + gy₂ₖ₋₂ = S(undef, n) + gy₂ₖ₋₁ = S(undef, n) + gy₂ₖ = S(undef, n) + x = S(undef, m) + M⁻¹vₖ₋₁ = S(undef, m) + M⁻¹vₖ = S(undef, m) + q = S(undef, m) + gx₂ₖ₋₃ = S(undef, m) + gx₂ₖ₋₂ = S(undef, m) + gx₂ₖ₋₁ = S(undef, m) + gx₂ₖ = S(undef, m) + Δx = S(undef, 0) + Δy = S(undef, 0) + uₖ = S(undef, 0) + vₖ = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = TrimrSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) + return solver +end - function TrimrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - TrimrSolver(n, m, S) - end +function TrimrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + TrimrSolver(m, n, S) end """ @@ -700,12 +730,14 @@ Type for storing the vectors required by the in-place version of TRILQR. The outer constructors - solver = TrilqrSolver(n, m, S) + solver = TrilqrSolver(m, n, S) solver = TrilqrSolver(A, b) may be used in order to create these vectors. """ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S p :: S @@ -721,33 +753,33 @@ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} wₖ₋₂ :: S warm_start :: Bool stats :: AdjointStats{T} +end - function TrilqrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, m) - uₖ = S(undef, m) - p = S(undef, m) - d̅ = S(undef, m) - Δx = S(undef, 0) - x = S(undef, m) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - q = S(undef, n) - Δy = S(undef, 0) - y = S(undef, n) - wₖ₋₃ = S(undef, n) - wₖ₋₂ = S(undef, n) - stats = AdjointStats(0, false, false, T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats) - return solver - end +function TrilqrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + p = S(undef, n) + d̅ = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + vₖ₋₁ = S(undef, m) + vₖ = S(undef, m) + q = S(undef, m) + Δy = S(undef, 0) + y = S(undef, m) + wₖ₋₃ = S(undef, m) + wₖ₋₂ = S(undef, m) + stats = AdjointStats(0, false, false, T[], T[], 0.0, "unknown") + solver = TrilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats) + return solver +end - function TrilqrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - TrilqrSolver(n, m, S) - end +function TrilqrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + TrilqrSolver(m, n, S) end """ @@ -755,12 +787,14 @@ Type for storing the vectors required by the in-place version of CGS. The outer constructorss - solver = CgsSolver(n, m, S) + solver = CgsSolver(m, n, S) solver = CgsSolver(A, b) may be used in order to create these vectors. """ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -772,29 +806,29 @@ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S} vw :: S warm_start :: Bool stats :: SimpleStats{T} +end - function CgsSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - u = S(undef, n) - p = S(undef, n) - q = S(undef, n) - ts = S(undef, n) - yz = S(undef, 0) - vw = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats) - return solver - end +function CgsSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + u = S(undef, n) + p = S(undef, n) + q = S(undef, n) + ts = S(undef, n) + yz = S(undef, 0) + vw = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CgsSolver{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats) + return solver +end - function CgsSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgsSolver(n, m, S) - end +function CgsSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgsSolver(m, n, S) end """ @@ -802,12 +836,14 @@ Type for storing the vectors required by the in-place version of BICGSTAB. The outer constructors - solver = BicgstabSolver(n, m, S) + solver = BicgstabSolver(m, n, S) solver = BicgstabSolver(A, b) may be used in order to create these vectors. """ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -819,29 +855,29 @@ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S} t :: S warm_start :: Bool stats :: SimpleStats{T} +end - function BicgstabSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - p = S(undef, n) - v = S(undef, n) - s = S(undef, n) - qd = S(undef, n) - yz = S(undef, 0) - t = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats) - return solver - end +function BicgstabSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + v = S(undef, n) + s = S(undef, n) + qd = S(undef, n) + yz = S(undef, 0) + t = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = BicgstabSolver{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats) + return solver +end - function BicgstabSolver(A, b) - n, m = size(A) - S = ktypeof(b) - BicgstabSolver(n, m, S) - end +function BicgstabSolver(A, b) + m, n = size(A) + S = ktypeof(b) + BicgstabSolver(m, n, S) end """ @@ -849,12 +885,14 @@ Type for storing the vectors required by the in-place version of BILQ. The outer constructors - solver = BilqSolver(n, m, S) + solver = BilqSolver(m, n, S) solver = BilqSolver(A, b) may be used in order to create these vectors. """ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S q :: S @@ -866,29 +904,29 @@ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S} d̅ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function BilqSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, n) - uₖ = S(undef, n) - q = S(undef, n) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - p = S(undef, n) - Δx = S(undef, 0) - x = S(undef, n) - d̅ = S(undef, n) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats) - return solver - end +function BilqSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + q = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + d̅ = S(undef, n) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = BilqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats) + return solver +end - function BilqSolver(A, b) - n, m = size(A) - S = ktypeof(b) - BilqSolver(n, m, S) - end +function BilqSolver(A, b) + m, n = size(A) + S = ktypeof(b) + BilqSolver(m, n, S) end """ @@ -896,12 +934,14 @@ Type for storing the vectors required by the in-place version of QMR. The outer constructors - solver = QmrSolver(n, m, S) + solver = QmrSolver(m, n, S) solver = QmrSolver(A, b) may be used in order to create these vectors. """ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S q :: S @@ -914,30 +954,30 @@ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} wₖ₋₁ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function QmrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, n) - uₖ = S(undef, n) - q = S(undef, n) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - p = S(undef, n) - Δx = S(undef, 0) - x = S(undef, n) - wₖ₋₂ = S(undef, n) - wₖ₋₁ = S(undef, n) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats) - return solver - end +function QmrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + q = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + wₖ₋₂ = S(undef, n) + wₖ₋₁ = S(undef, n) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = QmrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats) + return solver +end - function QmrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - QmrSolver(n, m, S) - end +function QmrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + QmrSolver(m, n, S) end """ @@ -945,12 +985,14 @@ Type for storing the vectors required by the in-place version of BILQR. The outer constructors - solver = BilqrSolver(n, m, S) + solver = BilqrSolver(m, n, S) solver = BilqrSolver(A, b) may be used in order to create these vectors. """ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S q :: S @@ -966,33 +1008,33 @@ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} wₖ₋₂ :: S warm_start :: Bool stats :: AdjointStats{T} +end - function BilqrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, n) - uₖ = S(undef, n) - q = S(undef, n) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - p = S(undef, n) - Δx = S(undef, 0) - x = S(undef, n) - Δy = S(undef, 0) - y = S(undef, n) - d̅ = S(undef, n) - wₖ₋₃ = S(undef, n) - wₖ₋₂ = S(undef, n) - stats = AdjointStats(0, false, false, T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats) - return solver - end +function BilqrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + q = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + Δy = S(undef, 0) + y = S(undef, n) + d̅ = S(undef, n) + wₖ₋₃ = S(undef, n) + wₖ₋₂ = S(undef, n) + stats = AdjointStats(0, false, false, T[], T[], 0.0, "unknown") + solver = BilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats) + return solver +end - function BilqrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - BilqrSolver(n, m, S) - end +function BilqrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + BilqrSolver(m, n, S) end """ @@ -1000,12 +1042,14 @@ Type for storing the vectors required by the in-place version of CGLS. The outer constructors - solver = CglsSolver(n, m, S) + solver = CglsSolver(m, n, S) solver = CglsSolver(A, b) may be used in order to create these vectors. """ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S s :: S @@ -1013,26 +1057,26 @@ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S} q :: S Mr :: S stats :: SimpleStats{T} +end - function CglsSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - s = S(undef, m) - r = S(undef, n) - q = S(undef, n) - Mr = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, s, r, q, Mr, stats) - return solver - end +function CglsSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + s = S(undef, n) + r = S(undef, m) + q = S(undef, m) + Mr = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CglsSolver{T,FC,S}(m, n, x, p, s, r, q, Mr, stats) + return solver +end - function CglsSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CglsSolver(n, m, S) - end +function CglsSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CglsSolver(m, n, S) end """ @@ -1040,12 +1084,14 @@ Type for storing the vectors required by the in-place version of CRLS. The outer constructors - solver = CrlsSolver(n, m, S) + solver = CrlsSolver(m, n, S) solver = CrlsSolver(A, b) may be used in order to create these vectors. """ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S Ar :: S @@ -1055,28 +1101,28 @@ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S} s :: S Ms :: S stats :: SimpleStats{T} +end - function CrlsSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - Ar = S(undef, m) - q = S(undef, m) - r = S(undef, n) - Ap = S(undef, n) - s = S(undef, n) - Ms = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats) - return solver - end +function CrlsSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + Ar = S(undef, n) + q = S(undef, n) + r = S(undef, m) + Ap = S(undef, m) + s = S(undef, m) + Ms = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CrlsSolver{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats) + return solver +end - function CrlsSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CrlsSolver(n, m, S) - end +function CrlsSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CrlsSolver(m, n, S) end """ @@ -1084,41 +1130,43 @@ Type for storing the vectors required by the in-place version of CGNE. The outer constructors - solver = CgneSolver(n, m, S) + solver = CgneSolver(m, n, S) solver = CgneSolver(A, b) may be used in order to create these vectors. """ mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S - Aᵀz :: S + Aᴴz :: S r :: S q :: S s :: S z :: S stats :: SimpleStats{T} +end - function CgneSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - Aᵀz = S(undef, m) - r = S(undef, n) - q = S(undef, n) - s = S(undef, 0) - z = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats) - return solver - end +function CgneSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + Aᴴz = S(undef, n) + r = S(undef, m) + q = S(undef, m) + s = S(undef, 0) + z = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CgneSolver{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats) + return solver +end - function CgneSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgneSolver(n, m, S) - end +function CgneSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgneSolver(m, n, S) end """ @@ -1126,41 +1174,43 @@ Type for storing the vectors required by the in-place version of CRMR. The outer constructors - solver = CrmrSolver(n, m, S) + solver = CrmrSolver(m, n, S) solver = CrmrSolver(A, b) may be used in order to create these vectors. """ mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S - Aᵀr :: S + Aᴴr :: S r :: S q :: S - Mq :: S + Nq :: S s :: S stats :: SimpleStats{T} +end - function CrmrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - Aᵀr = S(undef, m) - r = S(undef, n) - q = S(undef, n) - Mq = S(undef, 0) - s = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats) - return solver - end +function CrmrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + Aᴴr = S(undef, n) + r = S(undef, m) + q = S(undef, m) + Nq = S(undef, 0) + s = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CrmrSolver{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats) + return solver +end - function CrmrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CrmrSolver(n, m, S) - end +function CrmrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CrmrSolver(m, n, S) end """ @@ -1168,15 +1218,17 @@ Type for storing the vectors required by the in-place version of LSLQ. The outer constructors - solver = LslqSolver(n, m, S) + solver = LslqSolver(m, n, S) solver = LslqSolver(A, b) may be used in order to create these vectors. """ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S w̄ :: S Mu :: S Av :: S @@ -1184,29 +1236,29 @@ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S err_vec :: Vector{T} stats :: LSLQStats{T} +end - function LslqSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - w̄ = S(undef, m) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats) - return solver - end +function LslqSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + w̄ = S(undef, n) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], 0.0, "unknown") + solver = LslqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w̄, Mu, Av, u, v, err_vec, stats) + return solver +end - function LslqSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - LslqSolver(n, m, S, window=window) - end +function LslqSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + LslqSolver(m, n, S; window) end """ @@ -1214,15 +1266,17 @@ Type for storing the vectors required by the in-place version of LSQR. The outer constructors - solver = LsqrSolver(n, m, S) + solver = LsqrSolver(m, n, S) solver = LsqrSolver(A, b) may be used in order to create these vectors. """ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S w :: S Mu :: S Av :: S @@ -1230,29 +1284,29 @@ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S err_vec :: Vector{T} stats :: SimpleStats{T} +end - function LsqrSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - w = S(undef, m) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats) - return solver - end +function LsqrSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + w = S(undef, n) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = LsqrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats) + return solver +end - function LsqrSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - LsqrSolver(n, m, S, window=window) - end +function LsqrSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + LsqrSolver(m, n, S; window) end """ @@ -1260,15 +1314,17 @@ Type for storing the vectors required by the in-place version of LSMR. The outer constructors - solver = LsmrSolver(n, m, S) + solver = LsmrSolver(m, n, S) solver = LsmrSolver(A, b) may be used in order to create these vectors. """ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S h :: S hbar :: S Mu :: S @@ -1277,30 +1333,30 @@ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S err_vec :: Vector{T} stats :: LsmrStats{T} +end - function LsmrSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - h = S(undef, m) - hbar = S(undef, m) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats) - return solver - end +function LsmrSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + h = S(undef, n) + hbar = S(undef, n) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), 0.0, "unknown") + solver = LsmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, h, hbar, Mu, Av, u, v, err_vec, stats) + return solver +end - function LsmrSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - LsmrSolver(n, m, S, window=window) - end +function LsmrSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + LsmrSolver(m, n, S; window) end """ @@ -1308,15 +1364,17 @@ Type for storing the vectors required by the in-place version of LNLQ. The outer constructors - solver = LnlqSolver(n, m, S) + solver = LnlqSolver(m, n, S) solver = LnlqSolver(A, b) may be used in order to create these vectors. """ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S y :: S w̄ :: S Mu :: S @@ -1325,30 +1383,30 @@ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S q :: S stats :: LNLQStats{T} +end - function LnlqSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - y = S(undef, n) - w̄ = S(undef, n) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - q = S(undef, 0) - stats = LNLQStats(0, false, T[], false, T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats) - return solver - end +function LnlqSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + y = S(undef, m) + w̄ = S(undef, m) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + q = S(undef, 0) + stats = LNLQStats(0, false, T[], false, T[], T[], 0.0, "unknown") + solver = LnlqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w̄, Mu, Av, u, v, q, stats) + return solver +end - function LnlqSolver(A, b) - n, m = size(A) - S = ktypeof(b) - LnlqSolver(n, m, S) - end +function LnlqSolver(A, b) + m, n = size(A) + S = ktypeof(b) + LnlqSolver(m, n, S) end """ @@ -1356,15 +1414,17 @@ Type for storing the vectors required by the in-place version of CRAIG. The outer constructors - solver = CraigSolver(n, m, S) + solver = CraigSolver(m, n, S) solver = CraigSolver(A, b) may be used in order to create these vectors. """ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S y :: S w :: S Mu :: S @@ -1373,30 +1433,30 @@ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S w2 :: S stats :: SimpleStats{T} +end - function CraigSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - y = S(undef, n) - w = S(undef, n) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - w2 = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats) - return solver - end +function CraigSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + y = S(undef, m) + w = S(undef, m) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + w2 = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CraigSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats) + return solver +end - function CraigSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CraigSolver(n, m, S) - end +function CraigSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CraigSolver(m, n, S) end """ @@ -1404,15 +1464,17 @@ Type for storing the vectors required by the in-place version of CRAIGMR. The outer constructors - solver = CraigmrSolver(n, m, S) + solver = CraigmrSolver(m, n, S) solver = CraigmrSolver(A, b) may be used in order to create these vectors. """ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S d :: S y :: S Mu :: S @@ -1423,32 +1485,32 @@ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S q :: S stats :: SimpleStats{T} +end - function CraigmrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - d = S(undef, m) - y = S(undef, n) - Mu = S(undef, n) - w = S(undef, n) - wbar = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - q = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats) - return solver - end +function CraigmrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + d = S(undef, n) + y = S(undef, m) + Mu = S(undef, m) + w = S(undef, m) + wbar = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + q = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = CraigmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats) + return solver +end - function CraigmrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CraigmrSolver(n, m, S) - end +function CraigmrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CraigmrSolver(m, n, S) end """ @@ -1456,13 +1518,15 @@ Type for storing the vectors required by the in-place version of GMRES. The outer constructors - solver = GmresSolver(n, m, memory, S) + solver = GmresSolver(m, n, memory, S) solver = GmresSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S w :: S @@ -1476,31 +1540,85 @@ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} warm_start :: Bool inner_iter :: Int stats :: SimpleStats{T} +end - function GmresSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - w = S(undef, n) - p = S(undef, 0) - q = S(undef, 0) - V = [S(undef, n) for i = 1 : memory] - c = Vector{T}(undef, memory) - s = Vector{FC}(undef, memory) - z = Vector{FC}(undef, memory) - R = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats) - return solver - end +function GmresSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + w = S(undef, n) + p = S(undef, 0) + q = S(undef, 0) + V = S[S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{FC}(undef, memory) + z = Vector{FC}(undef, memory) + R = Vector{FC}(undef, div(memory * (memory+1), 2)) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = GmresSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats) + return solver +end - function GmresSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - GmresSolver(n, m, memory, S) - end +function GmresSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + GmresSolver(m, n, memory, S) +end + +""" +Type for storing the vectors required by the in-place version of FGMRES. + +The outer constructors + + solver = FgmresSolver(m, n, memory, S) + solver = FgmresSolver(A, b, memory = 20) + +may be used in order to create these vectors. +`memory` is set to `n` if the value given is larger than `n`. +""" +mutable struct FgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int + Δx :: S + x :: S + w :: S + q :: S + V :: Vector{S} + Z :: Vector{S} + c :: Vector{T} + s :: Vector{FC} + z :: Vector{FC} + R :: Vector{FC} + warm_start :: Bool + inner_iter :: Int + stats :: SimpleStats{T} +end + +function FgmresSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + w = S(undef, n) + q = S(undef, 0) + V = S[S(undef, n) for i = 1 : memory] + Z = S[S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{FC}(undef, memory) + z = Vector{FC}(undef, memory) + R = Vector{FC}(undef, div(memory * (memory+1), 2)) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = FgmresSolver{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats) + return solver +end + +function FgmresSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + FgmresSolver(m, n, memory, S) end """ @@ -1508,13 +1626,15 @@ Type for storing the vectors required by the in-place version of FOM. The outer constructors - solver = FomSolver(n, m, memory, S) + solver = FomSolver(m, n, memory, S) solver = FomSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S w :: S @@ -1526,30 +1646,30 @@ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S} U :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function FomSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - w = S(undef, n) - p = S(undef, 0) - q = S(undef, 0) - V = [S(undef, n) for i = 1 : memory] - l = Vector{FC}(undef, memory) - z = Vector{FC}(undef, memory) - U = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats) - return solver - end +function FomSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + w = S(undef, n) + p = S(undef, 0) + q = S(undef, 0) + V = S[S(undef, n) for i = 1 : memory] + l = Vector{FC}(undef, memory) + z = Vector{FC}(undef, memory) + U = Vector{FC}(undef, div(memory * (memory+1), 2)) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = FomSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats) + return solver +end - function FomSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - FomSolver(n, m, memory, S) - end +function FomSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + FomSolver(m, n, memory, S) end """ @@ -1557,13 +1677,15 @@ Type for storing the vectors required by the in-place version of GPMR. The outer constructors - solver = GpmrSolver(n, m, memory, S) + solver = GpmrSolver(m, n, memory, S) solver = GpmrSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n + m` if the value given is larger than `n + m`. """ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int wA :: S wB :: S dA :: S @@ -1582,45 +1704,38 @@ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} R :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} - - function GpmrSolver(n, m, memory, S) - memory = min(n + m, memory) - FC = eltype(S) - T = real(FC) - wA = S(undef, 0) - wB = S(undef, 0) - dA = S(undef, n) - dB = S(undef, m) - Δx = S(undef, 0) - Δy = S(undef, 0) - x = S(undef, n) - y = S(undef, m) - q = S(undef, 0) - p = S(undef, 0) - V = [S(undef, n) for i = 1 : memory] - U = [S(undef, m) for i = 1 : memory] - gs = Vector{FC}(undef, 4 * memory) - gc = Vector{T}(undef, 4 * memory) - zt = Vector{FC}(undef, 2 * memory) - R = Vector{FC}(undef, memory * (2memory + 1)) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats) - return solver - end - - function GpmrSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - GpmrSolver(n, m, memory, S) - end end -""" - solve!(solver, args...; kwargs...) +function GpmrSolver(m, n, memory, S) + memory = min(n + m, memory) + FC = eltype(S) + T = real(FC) + wA = S(undef, 0) + wB = S(undef, 0) + dA = S(undef, m) + dB = S(undef, n) + Δx = S(undef, 0) + Δy = S(undef, 0) + x = S(undef, m) + y = S(undef, n) + q = S(undef, 0) + p = S(undef, 0) + V = S[S(undef, m) for i = 1 : memory] + U = S[S(undef, n) for i = 1 : memory] + gs = Vector{FC}(undef, 4 * memory) + gc = Vector{T}(undef, 4 * memory) + zt = Vector{FC}(undef, 2 * memory) + R = Vector{FC}(undef, memory * (2 * memory + 1)) + stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown") + solver = GpmrSolver{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats) + return solver +end -Use the in-place Krylov method associated to `solver`. -""" -function solve! end +function GpmrSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + GpmrSolver(m, n, memory, S) +end """ solution(solver) @@ -1674,59 +1789,64 @@ Return the number of operator-vector products with `A'` performed by the Krylov function Atprod end for (KS, fun, nsol, nA, nAt, warm_start) in [ - (LsmrSolver , :lsmr! , 1, 1, 1, false) - (CgsSolver , :cgs! , 1, 2, 0, true ) - (UsymlqSolver , :usymlq! , 1, 1, 1, true ) - (LnlqSolver , :lnlq! , 2, 1, 1, false) - (BicgstabSolver , :bicgstab! , 1, 2, 0, true ) - (CrlsSolver , :crls! , 1, 1, 1, false) - (LsqrSolver , :lsqr! , 1, 1, 1, false) - (MinresSolver , :minres! , 1, 1, 0, true ) - (CgneSolver , :cgne! , 1, 1, 1, false) - (DqgmresSolver , :dqgmres! , 1, 1, 0, true ) - (SymmlqSolver , :symmlq! , 1, 1, 0, true ) - (TrimrSolver , :trimr! , 2, 1, 1, true ) - (UsymqrSolver , :usymqr! , 1, 1, 1, true ) - (BilqrSolver , :bilqr! , 2, 1, 1, true ) - (CrSolver , :cr! , 1, 1, 0, true ) - (CraigmrSolver , :craigmr! , 2, 1, 1, false) - (TricgSolver , :tricg! , 2, 1, 1, true ) - (CraigSolver , :craig! , 2, 1, 1, false) - (DiomSolver , :diom! , 1, 1, 0, true ) - (LslqSolver , :lslq! , 1, 1, 1, false) - (TrilqrSolver , :trilqr! , 2, 1, 1, true ) - (CrmrSolver , :crmr! , 1, 1, 1, false) - (CgSolver , :cg! , 1, 1, 0, true ) - (CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false) - (CglsSolver , :cgls! , 1, 1, 1, false) - (CgLanczosSolver , :cg_lanczos! , 1, 1, 0, true ) - (BilqSolver , :bilq! , 1, 1, 1, true ) - (MinresQlpSolver , :minres_qlp! , 1, 1, 0, true ) - (QmrSolver , :qmr! , 1, 1, 1, true ) - (GmresSolver , :gmres! , 1, 1, 0, true ) - (FomSolver , :fom! , 1, 1, 0, true ) - (GpmrSolver , :gpmr! , 2, 1, 0, true ) + (:LsmrSolver , :lsmr! , 1, 1, 1, false) + (:CgsSolver , :cgs! , 1, 2, 0, true ) + (:UsymlqSolver , :usymlq! , 1, 1, 1, true ) + (:LnlqSolver , :lnlq! , 2, 1, 1, false) + (:BicgstabSolver , :bicgstab! , 1, 2, 0, true ) + (:CrlsSolver , :crls! , 1, 1, 1, false) + (:LsqrSolver , :lsqr! , 1, 1, 1, false) + (:MinresSolver , :minres! , 1, 1, 0, true ) + (:CgneSolver , :cgne! , 1, 1, 1, false) + (:DqgmresSolver , :dqgmres! , 1, 1, 0, true ) + (:SymmlqSolver , :symmlq! , 1, 1, 0, true ) + (:TrimrSolver , :trimr! , 2, 1, 1, true ) + (:UsymqrSolver , :usymqr! , 1, 1, 1, true ) + (:BilqrSolver , :bilqr! , 2, 1, 1, true ) + (:CrSolver , :cr! , 1, 1, 0, true ) + (:CraigmrSolver , :craigmr! , 2, 1, 1, false) + (:TricgSolver , :tricg! , 2, 1, 1, true ) + (:CraigSolver , :craig! , 2, 1, 1, false) + (:DiomSolver , :diom! , 1, 1, 0, true ) + (:LslqSolver , :lslq! , 1, 1, 1, false) + (:TrilqrSolver , :trilqr! , 2, 1, 1, true ) + (:CrmrSolver , :crmr! , 1, 1, 1, false) + (:CgSolver , :cg! , 1, 1, 0, true ) + (:CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false) + (:CglsSolver , :cgls! , 1, 1, 1, false) + (:CgLanczosSolver , :cg_lanczos! , 1, 1, 0, true ) + (:BilqSolver , :bilq! , 1, 1, 1, true ) + (:MinresQlpSolver , :minres_qlp! , 1, 1, 0, true ) + (:QmrSolver , :qmr! , 1, 1, 1, true ) + (:GmresSolver , :gmres! , 1, 1, 0, true ) + (:FgmresSolver , :fgmres! , 1, 1, 0, true ) + (:FomSolver , :fom! , 1, 1, 0, true ) + (:GpmrSolver , :gpmr! , 2, 1, 0, true ) ] @eval begin - @inline solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...) - @inline statistics(solver :: $KS) = solver.stats - @inline niterations(solver :: $KS) = solver.stats.niter - @inline Aprod(solver :: $KS) = $nA * solver.stats.niter - @inline Atprod(solver :: $KS) = $nAt * solver.stats.niter + size(solver :: $KS) = solver.m, solver.n + statistics(solver :: $KS) = solver.stats + niterations(solver :: $KS) = solver.stats.niter + Aprod(solver :: $KS) = $nA * solver.stats.niter + Atprod(solver :: $KS) = $nAt * solver.stats.niter if $KS == GpmrSolver - @inline Bprod(solver :: $KS) = solver.stats.niter + Bprod(solver :: $KS) = solver.stats.niter + end + nsolution(solver :: $KS) = $nsol + if $nsol == 1 + solution(solver :: $KS) = solver.x + solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.") + end + if $nsol == 2 + solution(solver :: $KS) = solver.x, solver.y + solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.") end - @inline nsolution(solver :: $KS) = $nsol - ($nsol == 1) && @inline solution(solver :: $KS) = solver.x - ($nsol == 2) && @inline solution(solver :: $KS) = solver.x, solver.y - ($nsol == 1) && @inline solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.") - ($nsol == 2) && @inline solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.") if $KS ∈ (BilqrSolver, TrilqrSolver) - @inline issolved_primal(solver :: $KS) = solver.stats.solved_primal - @inline issolved_dual(solver :: $KS) = solver.stats.solved_dual - @inline issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver) + issolved_primal(solver :: $KS) = solver.stats.solved_primal + issolved_dual(solver :: $KS) = solver.stats.solved_dual + issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver) else - @inline issolved(solver :: $KS) = solver.stats.solved + issolved(solver :: $KS) = solver.stats.solved end if $warm_start if $KS in (BilqrSolver, TrilqrSolver, TricgSolver, TrimrSolver, GpmrSolver) @@ -1758,45 +1878,70 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [ end end +function ksizeof(attribute) + if isa(attribute, Vector{<:AbstractVector}) && !isempty(attribute) + # A vector of vectors is a vector of pointers in Julia. + # All vectors inside a vector have the same size in Krylov.jl + size_attribute = sizeof(attribute) + length(attribute) * ksizeof(attribute[1]) + else + size_attribute = sizeof(attribute) + end + return size_attribute +end + +function sizeof(stats_solver :: Union{KrylovStats, KrylovSolver}) + type = typeof(stats_solver) + nfields = fieldcount(type) + storage = 0 + for i = 1:nfields + field_i = getfield(stats_solver, i) + size_i = ksizeof(field_i) + storage += size_i + end + return storage +end + """ show(io, solver; show_stats=true) Statistics of `solver` are displayed if `show_stats` is set to true. """ -function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} +function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} workspace = typeof(solver) - name_solver = workspace.name.wrapper - l1 = max(length(string(name_solver)), 10) # length("warm_start") = 10 - l2 = length(string(S)) + 8 # length("Vector{}") = 8 + name_solver = string(workspace.name.name) + name_stats = string(typeof(solver.stats).name.name) + nbytes = sizeof(solver) + storage = format_bytes(nbytes) architecture = S <: Vector ? "CPU" : "GPU" - format = Printf.Format("│%$(l1)s│%$(l2)s│%18s│\n") - format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%18s│\n") - @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^18) - Printf.format(io, format, name_solver, "Precision: $FC", "Architecture: $architecture") - @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18) + l1 = max(length(name_solver), length(string(FC)) + 11) # length("Precision: ") = 11 + nchar = workspace <: Union{CgLanczosShiftSolver, FomSolver, DiomSolver, DqgmresSolver, GmresSolver, FgmresSolver, GpmrSolver} ? 8 : 0 # length("Vector{}") = 8 + l2 = max(ndigits(solver.m) + 7, length(architecture) + 14, length(string(S)) + nchar) # length("nrows: ") = 7 and length("Architecture: ") = 14 + l2 = max(l2, length(name_stats) + 2 + length(string(T))) # length("{}") = 2 + l3 = max(ndigits(solver.n) + 7, length(storage) + 9) # length("Storage: ") = 9 and length("cols: ") = 7 + format = Printf.Format("│%$(l1)s│%$(l2)s│%$(l3)s│\n") + format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%$(l3)s│\n") + @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^l3) + Printf.format(io, format, "$(name_solver)", "nrows: $(solver.m)", "ncols: $(solver.n)") + @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3) + Printf.format(io, format, "Precision: $FC", "Architecture: $architecture","Storage: $storage") + @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3) Printf.format(io, format, "Attribute", "Type", "Size") - @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18) - for i=1:fieldcount(workspace)-1 # show stats seperately - type_i = fieldtype(workspace, i) + @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3) + for i=1:fieldcount(workspace) name_i = fieldname(workspace, i) - len = if type_i <: AbstractVector - field_i = getfield(solver, name_i) - ni = length(field_i) - if eltype(type_i) <: AbstractVector - "$(ni) x $(length(field_i[1]))" - else - length(field_i) - end - else - 0 - end - if (name_i in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV") - Printf.format(io, format2, string(name_i), type_i, len) + type_i = fieldtype(workspace, i) + field_i = getfield(solver, name_i) + size_i = ksizeof(field_i) + if (name_i::Symbol in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV") + (size_i ≠ 0) && Printf.format(io, format2, string(name_i), type_i, format_bytes(size_i)) else - Printf.format(io, format, string(name_i), type_i, len) + (size_i ≠ 0) && Printf.format(io, format, string(name_i), type_i, format_bytes(size_i)) end end - @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^18) - show_stats && show(io, solver.stats) + @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^l3) + if show_stats + @printf(io, "\n") + show(io, solver.stats) + end return nothing end diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl index a662fa0a0..ba217a597 100644 --- a/src/krylov_stats.jl +++ b/src/krylov_stats.jl @@ -1,3 +1,6 @@ +export KrylovStats, SimpleStats, LsmrStats, LanczosStats, LanczosShiftStats, +SymmlqStats, AdjointStats, LNLQStats, LSLQStats + "Abstract type for statistics returned by a solver" abstract type KrylovStats{T} end @@ -9,6 +12,7 @@ Type for statistics returned by the majority of Krylov solvers, the attributes a - residuals - Aresiduals - Acond +- timer - status """ mutable struct SimpleStats{T} <: KrylovStats{T} @@ -18,9 +22,16 @@ mutable struct SimpleStats{T} <: KrylovStats{T} residuals :: Vector{T} Aresiduals :: Vector{T} Acond :: Vector{T} + timer :: Float64 status :: String end +function reset!(stats :: SimpleStats) + empty!(stats.residuals) + empty!(stats.Aresiduals) + empty!(stats.Acond) +end + """ Type for statistics returned by LSMR. The attributes are: - niter @@ -31,6 +42,7 @@ Type for statistics returned by LSMR. The attributes are: - Acond - Anorm - xNorm +- timer - status """ mutable struct LsmrStats{T} <: KrylovStats{T} @@ -44,9 +56,15 @@ mutable struct LsmrStats{T} <: KrylovStats{T} Acond :: T Anorm :: T xNorm :: T + timer :: Float64 status :: String end +function reset!(stats :: LsmrStats) + empty!(stats.residuals) + empty!(stats.Aresiduals) +end + """ Type for statistics returned by CG-LANCZOS, the attributes are: - niter @@ -55,6 +73,7 @@ Type for statistics returned by CG-LANCZOS, the attributes are: - indefinite - Anorm - Acond +- timer - status """ mutable struct LanczosStats{T} <: KrylovStats{T} @@ -64,9 +83,14 @@ mutable struct LanczosStats{T} <: KrylovStats{T} indefinite :: Bool Anorm :: T Acond :: T + timer :: Float64 status :: String end +function reset!(stats :: LanczosStats) + empty!(stats.residuals) +end + """ Type for statistics returned by CG-LANCZOS with shifts, the attributes are: - niter @@ -75,6 +99,7 @@ Type for statistics returned by CG-LANCZOS with shifts, the attributes are: - indefinite - Anorm - Acond +- timer - status """ mutable struct LanczosShiftStats{T} <: KrylovStats{T} @@ -84,6 +109,7 @@ mutable struct LanczosShiftStats{T} <: KrylovStats{T} indefinite :: BitVector Anorm :: T Acond :: T + timer :: Float64 status :: String end @@ -103,6 +129,7 @@ Type for statistics returned by SYMMLQ, the attributes are: - errorscg - Anorm - Acond +- timer - status """ mutable struct SymmlqStats{T} <: KrylovStats{T} @@ -114,9 +141,17 @@ mutable struct SymmlqStats{T} <: KrylovStats{T} errorscg :: Vector{Union{T, Missing}} Anorm :: T Acond :: T + timer :: Float64 status :: String end +function reset!(stats :: SymmlqStats) + empty!(stats.residuals) + empty!(stats.residualscg) + empty!(stats.errors) + empty!(stats.errorscg) +end + """ Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the attributes are: - niter @@ -124,6 +159,7 @@ Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the at - solved_dual - residuals_primal - residuals_dual +- timer - status """ mutable struct AdjointStats{T} <: KrylovStats{T} @@ -132,9 +168,15 @@ mutable struct AdjointStats{T} <: KrylovStats{T} solved_dual :: Bool residuals_primal :: Vector{T} residuals_dual :: Vector{T} + timer :: Float64 status :: String end +function reset!(stats :: AdjointStats) + empty!(stats.residuals_primal) + empty!(stats.residuals_dual) +end + """ Type for statistics returned by the LNLQ method, the attributes are: - niter @@ -143,6 +185,7 @@ Type for statistics returned by the LNLQ method, the attributes are: - error_with_bnd - error_bnd_x - error_bnd_y +- timer - status """ mutable struct LNLQStats{T} <: KrylovStats{T} @@ -152,9 +195,16 @@ mutable struct LNLQStats{T} <: KrylovStats{T} error_with_bnd :: Bool error_bnd_x :: Vector{T} error_bnd_y :: Vector{T} + timer :: Float64 status :: String end +function reset!(stats :: LNLQStats) + empty!(stats.residuals) + empty!(stats.error_bnd_x) + empty!(stats.error_bnd_y) +end + """ Type for statistics returned by the LSLQ method, the attributes are: - niter @@ -166,6 +216,7 @@ Type for statistics returned by the LSLQ method, the attributes are: - error_with_bnd - err_ubnds_lq - err_ubnds_cg +- timer - status """ mutable struct LSLQStats{T} <: KrylovStats{T} @@ -178,9 +229,18 @@ mutable struct LSLQStats{T} <: KrylovStats{T} error_with_bnd :: Bool err_ubnds_lq :: Vector{T} err_ubnds_cg :: Vector{T} + timer :: Float64 status :: String end +function reset!(stats :: LSLQStats) + empty!(stats.residuals) + empty!(stats.Aresiduals) + empty!(stats.err_lbnds) + empty!(stats.err_ubnds_lq) + empty!(stats.err_ubnds_cg) +end + import Base.show special_fields = Dict( @@ -192,45 +252,28 @@ special_fields = Dict( :err_ubnds_cg => "error bound CG", ) -for f in ["Simple", "Lsmr", "Adjoint", "LNLQ", "LSLQ", "Lanczos", "Symmlq"] - T = Meta.parse("Krylov." * f * "Stats{S}") - - @eval function empty_field!(stats :: $T, i, ::Type{Vector{Si}}) where {S, Si} - statfield = getfield(stats, i) - empty!(statfield) - end - @eval empty_field!(stats :: $T, i, type) where S = stats - - @eval function reset!(stats :: $T) where S - nfield = length($T.types) - for i = 1 : nfield - type = fieldtype($T, i) - empty_field!(stats, i, type) +function show(io :: IO, stats :: KrylovStats) + kst = typeof(stats) + s = string(kst.name.name) * "\n" + nfield = fieldcount(kst) + for i = 1 : nfield + field = fieldname(kst, i) + field_name = if field ∈ keys(special_fields) + special_fields[field] + else + replace(string(field), "_" => " ") end - end -end - -for f in ["Simple", "Lsmr", "Lanczos", "LanczosShift", "Symmlq", "Adjoint", "LNLQ", "LSLQ"] - T = Meta.parse("Krylov." * f * "Stats{S}") - - @eval function show(io :: IO, stats :: $T) where S - s = $f * " stats\n" - nfield = length($T.types) - for i = 1 : nfield - field = fieldname($T, i) - field_name = if field ∈ keys(special_fields) - special_fields[field] - else - replace(string(field), "_" => " ") - end - s *= " " * field_name * ":" - statfield = getfield(stats, field) - if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat} - s *= @sprintf " %s\n" vec2str(statfield) - else - s *= @sprintf " %s\n" statfield - end + s *= " " * field_name * ":" + statfield = getfield(stats, field) + if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat} + s *= @sprintf " %s\n" vec2str(statfield) + elseif field_name == "timer" + (statfield < 1e-3) && (s *= @sprintf " %.2fμs\n" 1e6*statfield) + (1e-3 ≤ statfield < 1.00) && (s *= @sprintf " %.2fms\n" 1e3*statfield) + (statfield ≥ 1.00) && (s *= @sprintf " %.2fs\n" statfield) + else + s *= @sprintf " %s\n" statfield end - print(io, s) end + print(io, s) end diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl index 6f0c1c382..fb554395e 100644 --- a/src/krylov_utils.jl +++ b/src/krylov_utils.jl @@ -1,3 +1,8 @@ +export kstdout + +"Default I/O stream for all Krylov methods." +const kstdout = Core.stdout + """ FloatOrComplex{T} Union type of `T` and `Complex{T}` where T is an `AbstractFloat`. @@ -92,8 +97,8 @@ function sym_givens(a :: Complex{T}, b :: Complex{T}) where T <: AbstractFloat return (c, s, ρ) end -@inline sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b)) -@inline sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b) +sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b)) +sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b) """ roots = roots_quadratic(q₂, q₁, q₀; nitref) @@ -111,79 +116,97 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T; # Case where q(x) is linear. if q₂ == zero(T) if q₁ == zero(T) - root = [zero(T)] - q₀ == zero(T) || (root = T[]) + q₀ == zero(T) || error("The quadratic `q` doesn't have real roots.") + root = zero(T) else - root = [-q₀ / q₁] + root = -q₀ / q₁ end - return root + return (root, root) end # Case where q(x) is indeed quadratic. rhs = √eps(T) * q₁ * q₁ if abs(q₀ * q₂) > rhs ρ = q₁ * q₁ - 4 * q₂ * q₀ - ρ < 0 && return T[] + ρ < 0 && return error("The quadratic `q` doesn't have real roots.") d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2 - roots = [d / q₂, q₀ / d] + root1 = d / q₂ + root2 = q₀ / d else # Ill-conditioned quadratic. - roots = [-q₁ / q₂, zero(T)] + root1 = -q₁ / q₂ + root2 = zero(T) end # Perform a few Newton iterations to improve accuracy. - for k = 1 : 2 - root = roots[k] - for it = 1 : nitref - q = (q₂ * root + q₁) * root + q₀ - dq = 2 * q₂ * root + q₁ - dq == zero(T) && continue - root = root - q / dq - end - roots[k] = root + for it = 1 : nitref + q = (q₂ * root1 + q₁) * root1 + q₀ + dq = 2 * q₂ * root1 + q₁ + dq == zero(T) && continue + root1 = root1 - q / dq end - return roots -end + for it = 1 : nitref + q = (q₂ * root2 + q₁) * root2 + q₀ + dq = 2 * q₂ * root2 + q₁ + dq == zero(T) && continue + root2 = root2 - q / dq + end + return (root1, root2) +end """ - roots = to_boundary(x, d, radius; flip, xNorm2, dNorm2) - -Given a trust-region radius `radius`, a vector `x` lying inside the -trust-region and a direction `d`, return `σ1` and `σ2` such that - - ‖x + σi d‖ = radius, i = 1, 2 + s = vec2str(x; ndisp) -in the Euclidean norm. If known, ‖x‖² may be supplied in `xNorm2`. +Display an array in the form -If `flip` is set to `true`, `σ1` and `σ2` are computed such that + [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ] - ‖x - σi d‖ = radius, i = 1, 2. +with (ndisp - 1)/2 elements on each side. """ -function to_boundary(x :: Vector{T}, d :: Vector{T}, - radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number - radius > 0 || error("radius must be positive") - - # ‖d‖² σ² + 2 xᵀd σ + (‖x‖² - radius²). - xd = dot(x, d) - flip && (xd = -xd) - dNorm2 == zero(T) && (dNorm2 = dot(d, d)) - dNorm2 == zero(T) && error("zero direction") - xNorm2 == zero(T) && (xNorm2 = dot(x, x)) - (xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius)) - roots = roots_quadratic(dNorm2, 2 * xd, xNorm2 - radius * radius) - return roots # `σ1` and `σ2` +function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing} + n = length(x) + if n ≤ ndisp + ndisp = n + nside = n + else + nside = max(1, div(ndisp - 1, 2)) + end + s = "[" + i = 1 + while i ≤ nside + if x[i] !== missing + s *= @sprintf("%8.1e ", x[i]) + else + s *= " ✗✗✗✗ " + end + i += 1 + end + if i ≤ div(n, 2) + s *= "... " + end + i = max(i, n - nside + 1) + while i ≤ n + if x[i] !== missing + s *= @sprintf("%8.1e ", x[i]) + else + s *= " ✗✗✗✗ " + end + i += 1 + end + s *= "]" + return s end """ S = ktypeof(v) -Return a dense storage type `S` based on the type of `v`. +Return the most relevant storage type `S` based on the type of `v`. """ function ktypeof end -function ktypeof(v::S) where S <: DenseVector - return S +function ktypeof(v::S) where S <: AbstractVector + return S end function ktypeof(v::S) where S <: SparseVector @@ -195,90 +218,128 @@ function ktypeof(v::S) where S <: AbstractSparseVector return S.types[2] # return `CuVector` for a `CuSparseVector` end -function ktypeof(v::S) where S <: AbstractVector - T = eltype(S) - return Vector{T} # BlockArrays, FillArrays, etc... +function ktypeof(v::S) where S <: SubArray + vp = v.parent + if isa(vp, DenseMatrix) + M = typeof(vp) + return matrix_to_vector(M) # view of a row or a column of a matrix + else + return ktypeof(vp) # view of a vector + end end -function ktypeof(v::S) where S <: SubArray - return ktypeof(v.parent) +""" + M = vector_to_matrix(S) + +Return the dense matrix storage type `M` related to the dense vector storage type `S`. +""" +function vector_to_matrix(::Type{S}) where S <: DenseVector + T = hasproperty(S, :body) ? S.body : S + par = T.parameters + npar = length(par) + (2 ≤ npar ≤ 3) || error("Type $S is not supported.") + if npar == 2 + M = T.name.wrapper{par[1], 2} + else + M = T.name.wrapper{par[1], 2, par[3]} + end + return M +end + +""" + S = matrix_to_vector(M) + +Return the dense vector storage type `S` related to the dense matrix storage type `M`. +""" +function matrix_to_vector(::Type{M}) where M <: DenseMatrix + T = hasproperty(M, :body) ? M.body : M + par = T.parameters + npar = length(par) + (2 ≤ npar ≤ 3) || error("Type $M is not supported.") + if npar == 2 + S = T.name.wrapper{par[1], 1} + else + S = T.name.wrapper{par[1], 1, par[3]} + end + return S end """ v = kzeros(S, n) -Create an AbstractVector of storage type `S` of length `n` only composed of zero. +Create a vector of storage type `S` of length `n` only composed of zero. """ -@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S))) +kzeros(S, n) = fill!(S(undef, n), zero(eltype(S))) """ v = kones(S, n) -Create an AbstractVector of storage type `S` of length `n` only composed of one. +Create a vector of storage type `S` of length `n` only composed of one. """ -@inline kones(S, n) = fill!(S(undef, n), one(eltype(S))) +kones(S, n) = fill!(S(undef, n), one(eltype(S))) -@inline allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n)) +allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)::S) && (solver.:($v)::S = S(undef, n)) -@inline kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0) +kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0) -@inline mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x) +ktimer(start_time::UInt64) = (time_ns() - start_time) / 1e9 -@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy) -@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy) -@inline krylov_dot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = dot(x, y) +mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x) -@inline krylov_dotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = krylov_dot(n, x, dx, y, dy) -@inline krylov_dotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(krylov_dot(n, x, dx, y, dy)) +kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy) +kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy) +kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y) -@inline krylov_norm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx) -@inline krylov_norm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: Number = norm(x) +kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy) +kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy)) -@inline krylov_scal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx) -@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: Number = (x .*= s) -@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = krylov_scal!(n, Complex{T}(s), x, dx) +knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx) +knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x) -@inline krylov_axpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy) -@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpy!(s, x, y) -@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpy!(n, Complex{T}(s), x, dx, y, dy) +kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx) +kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s) +kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx) -@inline krylov_axpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy) -@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpby!(s, x, t, y) -@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, t, y, dy) -@inline krylov_axpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, s, x, dx, Complex{T}(t), y, dy) -@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy) +kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy) +kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y) +kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy) -@inline krylov_copy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy) -@inline krylov_copy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = copyto!(y, x) +kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy) +kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy) -# the macros are just for readability, so we don't have to write the increments (always equal to 1) +kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy) +kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x) +# the macros are just for readability, so we don't have to write the increments (always equal to 1) macro kdot(n, x, y) - return esc(:(krylov_dot($n, $x, 1, $y, 1))) + return esc(:(Krylov.kdot($n, $x, 1, $y, 1))) end macro kdotr(n, x, y) - return esc(:(krylov_dotr($n, $x, 1, $y, 1))) + return esc(:(Krylov.kdotr($n, $x, 1, $y, 1))) end macro knrm2(n, x) - return esc(:(krylov_norm2($n, $x, 1))) + return esc(:(Krylov.knrm2($n, $x, 1))) end macro kscal!(n, s, x) - return esc(:(krylov_scal!($n, $s, $x, 1))) + return esc(:(Krylov.kscal!($n, $s, $x, 1))) end macro kaxpy!(n, s, x, y) - return esc(:(krylov_axpy!($n, $s, $x, 1, $y, 1))) + return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1))) end macro kaxpby!(n, s, x, t, y) - return esc(:(krylov_axpby!($n, $s, $x, 1, $t, $y, 1))) + return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1))) end macro kcopy!(n, x, y) - return esc(:(krylov_copy!($n, $x, 1, $y, 1))) + return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1))) end macro kswap(x, y) @@ -294,44 +355,48 @@ macro kref!(n, x, y, c, s) end """ - s = vec2str(x; ndisp) + roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2) -Display an array in the form +Given a trust-region radius `radius`, a vector `x` lying inside the +trust-region and a direction `d`, return `σ1` and `σ2` such that - [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ] + ‖x + σi d‖ = radius, i = 1, 2 -with (ndisp - 1)/2 elements on each side. +in the Euclidean norm. +`n` is the length of vectors `x` and `d`. +If known, ‖x‖² and ‖d‖² may be supplied with `xNorm2` and `dNorm2`. + +If `flip` is set to `true`, `σ1` and `σ2` are computed such that + + ‖x - σi d‖ = radius, i = 1, 2. """ -function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing} - n = length(x) - if n ≤ ndisp - ndisp = n - nside = n - else - nside = max(1, div(ndisp - 1, 2)) - end - s = "[" - i = 1 - while i ≤ nside - if x[i] !== missing - s *= @sprintf("%8.1e ", x[i]) - else - s *= " ✗✗✗✗ " - end - i += 1 - end - if i ≤ div(n, 2) - s *= "... " - end - i = max(i, n - nside + 1) - while i ≤ n - if x[i] !== missing - s *= @sprintf("%8.1e ", x[i]) - else - s *= " ✗✗✗✗ " - end - i += 1 - end - s *= "]" - return s +function to_boundary(n :: Int, x :: AbstractVector{FC}, d :: AbstractVector{FC}, radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + radius > 0 || error("radius must be positive") + + # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²). + rxd = @kdotr(n, x, d) + flip && (rxd = -rxd) + dNorm2 == zero(T) && (dNorm2 = @kdotr(n, d, d)) + dNorm2 == zero(T) && error("zero direction") + xNorm2 == zero(T) && (xNorm2 = @kdotr(n, x, x)) + radius2 = radius * radius + (xNorm2 ≤ radius2) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius2)) + + # q₂ = ‖d‖², q₁ = xᴴd + dᴴx, q₀ = ‖x‖² - Δ² + # ‖x‖² ≤ Δ² ⟹ (q₁)² - 4 * q₂ * q₀ ≥ 0 + roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius2) + return roots # `σ1` and `σ2` +end + +""" + arguments = extract_parameters(ex::Expr) + +Extract the arguments of an expression that is keyword parameter tuple. +Implementation suggested by Mitchell J. O'Sullivan (@mosullivan93). +""" +function extract_parameters(ex::Expr) + Meta.isexpr(ex, :tuple, 1) && + Meta.isexpr((@inbounds p = ex.args[1]), :parameters) && + all(Base.Docs.validcall, p.args) || throw(ArgumentError("Given expression is not a kw parameter tuple [e.g. :(; x)]: $ex")) + return p.args end diff --git a/src/lnlq.jl b/src/lnlq.jl index a1f890de2..f59f5daf4 100644 --- a/src/lnlq.jl +++ b/src/lnlq.jl @@ -9,9 +9,9 @@ # and is equivalent to applying the SYMMLQ method # to the linear system # -# AAᵀy = b with x = Aᵀy and can be reformulated as +# AAᴴy = b with x = Aᴴy and can be reformulated as # -# [ -I Aᵀ ][ x ] = [ 0 ] +# [ -I Aᴴ ][ x ] = [ 0 ] # [ A ][ y ] [ b ]. # # This method is based on the Golub-Kahan bidiagonalization process and is described in @@ -26,10 +26,14 @@ export lnlq, lnlq! """ (x, y, stats) = lnlq(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), σ::T=zero(T), - atol::T=√eps(T), rtol::T=√eps(T), etolx::T=√eps(T), etoly::T=√eps(T), itmax::Int=0, - transfer_to_craig::Bool=true, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + transfer_to_craig::Bool=true, + sqd::Bool=false, λ::T=zero(T), + σ::T=zero(T), utolx::T=√eps(T), + utoly::T=√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -38,17 +42,17 @@ Find the least-norm solution of the consistent linear system Ax + λ²y = b -using the LNLQ method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LNLQ method, where λ ≥ 0 is a regularization parameter. For a system in the form Ax = b, LNLQ method is equivalent to applying -SYMMLQ to AAᵀy = b and recovering x = Aᵀy but is more stable. +SYMMLQ to AAᴴy = b and recovering x = Aᴴy but is more stable. Note that y are the Lagrange multipliers of the least-norm problem minimize ‖x‖ s.t. Ax = b. If `λ > 0`, LNLQ solves the symmetric and quasi-definite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A λ²E ] [ y ] = [ b ], where E and F are symmetric and positive definite. @@ -59,12 +63,12 @@ The system above represents the optimality conditions of min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`. If `λ = 0`, LNLQ solves the symmetric and indefinite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A 0 ] [ y ] = [ b ]. The system above represents the optimality conditions of @@ -75,12 +79,40 @@ In this case, `M` can still be specified and indicates the weighted norm in whic In this implementation, both the x and y-parts of the solution are returned. -`etolx` and `etoly` are tolerances on the upper bound of the distance to the solution ‖x-xₛ‖ and ‖y-yₛ‖, respectively. +`utolx` and `utoly` are tolerances on the upper bound of the distance to the solution ‖x-x*‖ and ‖y-y*‖, respectively. The bound is valid if λ>0 or σ>0 where σ should be strictly smaller than the smallest positive singular value. For instance σ:=(1-1e-7)σₘᵢₙ . -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `transfer_to_craig`: transfer from the LNLQ point to the CRAIG point, when it exists. The transfer is based on the residual norm; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`; +* `utolx`: tolerance on the upper bound on the distance to the solution `‖x-x*‖`; +* `utoly`: tolerance on the upper bound on the distance to the solution `‖y-y*‖`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in a [`LNLQStats`](@ref) structure. #### Reference @@ -88,12 +120,6 @@ and `false` otherwise. """ function lnlq end -function lnlq(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = LnlqSolver(A, b) - lnlq!(solver, A, b; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = lnlq!(solver::LnlqSolver, A, b; kwargs...) @@ -103,389 +129,432 @@ See [`LnlqSolver`](@ref) for more details about the `solver`. """ function lnlq! end -function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), σ :: T=zero(T), - atol :: T=√eps(T), rtol :: T=√eps(T), etolx :: T=√eps(T), etoly :: T=√eps(T), itmax :: Int=0, - transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LNLQ: system of %d equations in %d variables\n", m, n) - - # Check sqd and λ parameters - sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") - sqd && (λ = one(T)) - - # Tests M = Iₘ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :u, S, m) - allocate_if(!NisI, solver, :v, S, n) - allocate_if(λ > 0, solver, :q, S, n) - x, Nv, Aᵀu, y, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w̄ - Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats - rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y - reset!(stats) - u = MisI ? Mu : solver.u - v = NisI ? Nv : solver.v - - # Set up parameter σₑₛₜ for the error estimate on x and y - σₑₛₜ = √(σ^2 + λ^2) - complex_error_bnd = false - - # Initial solutions (x₀, y₀) and residual norm ‖r₀‖. - x .= zero(FC) - y .= zero(FC) - - bNorm = @knrm2(m, b) - if bNorm == 0 - stats.niter = 0 - stats.solved = true - stats.error_with_bnd = false - history && push!(rNorms, bNorm) - stats.status = "x = 0 is a zero-residual solution" - return solver +def_args_lnlq = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_lnlq = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; transfer_to_craig::Bool = true), + :(; sqd::Bool = false ), + :(; λ::T = zero(T) ), + :(; σ::T = zero(T) ), + :(; utolx::T = √eps(T) ), + :(; utoly::T = √eps(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_lnlq = mapreduce(extract_parameters, vcat, def_kwargs_lnlq) + +args_lnlq = (:A, :b) +kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function lnlq($(def_args_lnlq...); $(def_kwargs_lnlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = LnlqSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + lnlq!(solver, $(args_lnlq...); $(kwargs_lnlq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - history && push!(rNorms, bNorm) - ε = atol + rtol * bNorm - - iter = 0 - itmax == 0 && (itmax = m + n) - - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) - - # Update iteration index - iter = iter + 1 - - # Initialize generalized Golub-Kahan bidiagonalization. - # β₁Mu₁ = b. - Mu .= b - MisI || mulorldiv!(u, M, Mu, ldiv) # u₁ = M⁻¹ * Mu₁ - βₖ = sqrt(@kdotr(m, u, Mu)) # β₁ = ‖u₁‖_M - if βₖ ≠ 0 - @kscal!(m, one(FC) / βₖ, u) - MisI || @kscal!(m, one(FC) / βₖ, Mu) - end + function lnlq!(solver :: LnlqSolver{T,FC,S}, $(def_args_lnlq...); $(def_kwargs_lnlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "LNLQ: system of %d equations in %d variables\n", m, n) + + # Check sqd and λ parameters + sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") + sqd && (λ = one(T)) + + # Tests M = Iₘ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :u, S, m) + allocate_if(!NisI, solver, :v, S, n) + allocate_if(λ > 0, solver, :q, S, n) + x, Nv, Aᴴu, y, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w̄ + Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats + rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y + reset!(stats) + u = MisI ? Mu : solver.u + v = NisI ? Nv : solver.v + + # Set up parameter σₑₛₜ for the error estimate on x and y + σₑₛₜ = √(σ^2 + λ^2) + complex_error_bnd = false + + # Initial solutions (x₀, y₀) and residual norm ‖r₀‖. + x .= zero(FC) + y .= zero(FC) + + bNorm = @knrm2(m, b) + if bNorm == 0 + stats.niter = 0 + stats.solved = true + stats.error_with_bnd = false + history && push!(rNorms, bNorm) + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + return solver + end - # α₁Nv₁ = Aᵀu₁. - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu - NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁ - αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N - if αₖ ≠ 0 - @kscal!(n, one(FC) / αₖ, v) - NisI || @kscal!(n, one(FC) / αₖ, Nv) - end + history && push!(rNorms, bNorm) + ε = atol + rtol * bNorm - w̄ .= u # Direction w̄₁ - cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᵀ - sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᵀ - ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ - ηₖ = zero(FC) # Coefficient of M̅ₖ - - # Variable used for the regularization. - λₖ = λ # λ₁ = λ - cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ - cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁ - λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0 - - # Initialize the regularization. - if λ > 0 - # k 2k k 2k k 2k - # k [ αₖ λₖ ] [ cpₖ spₖ ] = [ αhatₖ 0 ] - # k+1 [ βₖ₊₁ 0 ] [ spₖ -cpₖ ] [ βhatₖ₊₁ θₖ₊₁ ] - (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ) - - # q̄₁ = sp₁ * v₁ - @kscal!(n, spₖ, q) - else - αhatₖ = αₖ - end + iter = 0 + itmax == 0 && (itmax = m + n) - # Begin the LQ factorization of (Lₖ)ᵀ = M̅ₖQₖ. - # [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ] - # [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ] - # [ • • • • • • ] [ 0 • • • • ] - # [ • • • • • • ] = [ • • • • • • ] Qₖ - # [ • • • • 0 ] [ • • • • • • ] - # [ • • • βₖ] [ • • • • 0 ] - # [ 0 • • • • 0 αₖ] [ 0 • • • 0 ηₖ ϵbarₖ] - - ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁ - - # Hₖ = Bₖ(Lₖ)ᵀ = [ Lₖ(Lₖ)ᵀ ] ⟹ (Hₖ₋₁)ᵀ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ - # [ αₖβₖ₊₁(eₖ)ᵀ ] - # - # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ - # tₖ = (τ₁, •••, τₖ) - # z̅ₖ = (zₖ₋₁, ζbarₖ) = (ζ₁, •••, ζₖ₋₁, ζbarₖ) - - τₖ = βₖ / αhatₖ # τ₁ = β₁ / αhat₁ - ζbarₖ = τₖ / ϵbarₖ # ζbar₁ = τ₁ / ϵbar₁ - - # Stopping criterion. - solved_lq = solved_cg = false - tired = false - status = "unknown" - user_requested_exit = false - - if σₑₛₜ > 0 - τtildeₖ = βₖ / σₑₛₜ - ζtildeₖ = τtildeₖ / σₑₛₜ - err_x = τtildeₖ - err_y = ζtildeₖ - - solved_lq = err_x ≤ etolx || err_y ≤ etoly - history && push!(xNorms, err_x) - history && push!(yNorms, err_y) - - ρbar = -σₑₛₜ - csig = -one(T) - end + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time)) - while !(solved_lq || solved_cg || tired || user_requested_exit) + # Update iteration index + iter = iter + 1 - # Update of (xᵃᵘˣ)ₖ = Vₖtₖ - if λ > 0 - # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁) - @kaxpy!(n, τₖ * cpₖ, v, x) - if iter ≥ 2 - @kaxpy!(n, τₖ * spₖ, q, x) - # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ - @kaxpby!(n, spₖ, v, -cpₖ, q) - end - else - # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ - @kaxpy!(n, τₖ, v, x) + # Initialize generalized Golub-Kahan bidiagonalization. + # β₁Mu₁ = b. + Mu .= b + MisI || mulorldiv!(u, M, Mu, ldiv) # u₁ = M⁻¹ * Mu₁ + βₖ = sqrt(@kdotr(m, u, Mu)) # β₁ = ‖u₁‖_M + if βₖ ≠ 0 + @kscal!(m, one(FC) / βₖ, u) + MisI || @kscal!(m, one(FC) / βₖ, Mu) end - # Continue the generalized Golub-Kahan bidiagonalization. - # AVₖ = MUₖ₊₁Bₖ - # AᵀUₖ₊₁ = NVₖ(Bₖ)ᵀ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᵀ = NVₖ₊₁(Lₖ₊₁)ᵀ - # - # [ α₁ 0 • • • • 0 ] - # [ β₂ α₂ • • ] - # [ 0 • • • • ] - # Lₖ = [ • • • • • • ] - # [ • • • • • • ] - # [ • • • • 0 ] - # [ 0 • • • 0 βₖ αₖ] - # - # Bₖ = [ Lₖ ] - # [ βₖ₊₁(eₖ)ᵀ ] - - # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ - mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -αₖ, Mu) - MisI || mulorldiv!(u, M, Mu, ldiv) # uₖ₊₁ = M⁻¹ * Muₖ₊₁ - βₖ₊₁ = sqrt(@kdotr(m, u, Mu)) # βₖ₊₁ = ‖uₖ₊₁‖_M - if βₖ₊₁ ≠ 0 - @kscal!(m, one(FC) / βₖ₊₁, u) - MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu) + # α₁Nv₁ = Aᴴu₁. + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu + NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁ + αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N + if αₖ ≠ 0 + @kscal!(n, one(FC) / αₖ, v) + NisI || @kscal!(n, one(FC) / αₖ, Nv) end - # αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -βₖ₊₁, Nv) - NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁ - αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N - if αₖ₊₁ ≠ 0 - @kscal!(n, one(FC) / αₖ₊₁, v) - NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv) - end + w̄ .= u # Direction w̄₁ + cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᴴ + sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᴴ + ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ + ηₖ = zero(FC) # Coefficient of M̅ₖ + + # Variable used for the regularization. + λₖ = λ # λ₁ = λ + cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ + cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁ + λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0 - # Continue the regularization. + # Initialize the regularization. if λ > 0 # k 2k k 2k k 2k # k [ αₖ λₖ ] [ cpₖ spₖ ] = [ αhatₖ 0 ] # k+1 [ βₖ₊₁ 0 ] [ spₖ -cpₖ ] [ βhatₖ₊₁ θₖ₊₁ ] - βhatₖ₊₁ = cpₖ * βₖ₊₁ - θₖ₊₁ = spₖ * βₖ₊₁ + (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ) - # 2k 2k+1 2k 2k+1 2k 2k+1 - # k [ 0 0 ] [ -cdₖ sdₖ ] = [ 0 0 ] - # k+1 [ θₖ₊₁ λ ] [ sdₖ cdₖ ] [ 0 λₖ₊₁ ] - (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁) - - # qₖ ← sdₖ * q̄ₖ - @kscal!(n, sdₖ, q) - - # k+1 2k+1 k+1 2k+1 k+1 2k+1 - # k+1 [ αₖ₊₁ λₖ₊₁ ] [ cpₖ₊₁ spₖ₊₁ ] = [ αhatₖ₊₁ 0 ] - # k+2 [ βₖ₊₂ 0 ] [ spₖ₊₁ -cpₖ₊₁ ] [ γₖ₊₂ θₖ₊₂ ] - (cpₖ₊₁, spₖ₊₁, αhatₖ₊₁) = sym_givens(αₖ₊₁, λₖ₊₁) + # q̄₁ = sp₁ * v₁ + @kscal!(n, spₖ, q) else - βhatₖ₊₁ = βₖ₊₁ - αhatₖ₊₁ = αₖ₊₁ + αhatₖ = αₖ end - if σₑₛₜ > 0 && !complex_error_bnd - μbar = -csig * αhatₖ - ρ = √(ρbar^2 + αhatₖ^2) - csig = ρbar / ρ - ssig = αhatₖ / ρ - ρbar = ssig * μbar + csig * σₑₛₜ - μbar = -csig * βhatₖ₊₁ - θ = βhatₖ₊₁ * csig / ρbar - ωdisc = σₑₛₜ^2 - σₑₛₜ * βhatₖ₊₁ * θ - if ωdisc < 0 - complex_error_bnd = true - else - ω = √ωdisc - τtildeₖ = - τₖ * βhatₖ₊₁ / ω - end + # Begin the LQ factorization of (Lₖ)ᴴ = M̅ₖQₖ. + # [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ] + # [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ] + # [ • • • • • • ] [ 0 • • • • ] + # [ • • • • • • ] = [ • • • • • • ] Qₖ + # [ • • • • 0 ] [ • • • • • • ] + # [ • • • βₖ] [ • • • • 0 ] + # [ 0 • • • • 0 αₖ] [ 0 • • • 0 ηₖ ϵbarₖ] - ρ = √(ρbar^2 + βhatₖ₊₁^2) - csig = ρbar / ρ - ssig = βhatₖ₊₁ / ρ - ρbar = ssig * μbar + csig * σₑₛₜ - end + ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁ - # Continue the LQ factorization of (Lₖ₊₁)ᵀ. - # [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ] - # [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁] - # [0 sₖ₊₁ -cₖ₊₁] + # Hₖ = Bₖ(Lₖ)ᴴ = [ Lₖ(Lₖ)ᴴ ] ⟹ (Hₖ₋₁)ᴴ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ + # [ αₖβₖ₊₁(eₖ)ᵀ ] + # + # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ + # tₖ = (τ₁, •••, τₖ) + # z̅ₖ = (zₖ₋₁, ζbarₖ) = (ζ₁, •••, ζₖ₋₁, ζbarₖ) - (cₖ₊₁, sₖ₊₁, ϵₖ) = sym_givens(ϵbarₖ, βhatₖ₊₁) - ηₖ₊₁ = αhatₖ₊₁ * sₖ₊₁ - ϵbarₖ₊₁ = - αhatₖ₊₁ * cₖ₊₁ + τₖ = βₖ / αhatₖ # τ₁ = β₁ / αhat₁ + ζbarₖ = τₖ / ϵbarₖ # ζbar₁ = τ₁ / ϵbar₁ - # Update solutions of Lₖ₊₁tₖ₊₁ = β₁e₁ and M̅ₖ₊₁z̅ₖ₊₁ = tₖ₊₁. - τₖ₊₁ = - βhatₖ₊₁ * τₖ / αhatₖ₊₁ - ζₖ = cₖ₊₁ * ζbarₖ - ζbarₖ₊₁ = (τₖ₊₁ - ηₖ₊₁ * ζₖ) / ϵbarₖ₊₁ + # Stopping criterion. + solved_lq = solved_cg = false + tired = false + status = "unknown" + user_requested_exit = false + overtimed = false - # Relations for the directions wₖ and w̄ₖ₊₁ - # [w̄ₖ uₖ₊₁] [cₖ₊₁ sₖ₊₁] = [wₖ w̄ₖ₊₁] → wₖ = cₖ₊₁ * w̄ₖ + sₖ₊₁ * uₖ₊₁ - # [sₖ₊₁ -cₖ₊₁] → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁ + if σₑₛₜ > 0 + τtildeₖ = βₖ / σₑₛₜ + ζtildeₖ = τtildeₖ / σₑₛₜ + err_x = τtildeₖ + err_y = ζtildeₖ - # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ - @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y) - @kaxpy!(m, ζₖ * sₖ₊₁, u, y) + solved_lq = err_x ≤ utolx || err_y ≤ utoly + history && push!(xNorms, err_x) + history && push!(yNorms, err_y) - # Compute w̄ₖ₊₁ - @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄) + ρbar = -σₑₛₜ + csig = -one(T) + end - if σₑₛₜ > 0 && !complex_error_bnd - if transfer_to_craig - disc_x = τtildeₖ^2 - τₖ₊₁^2 - disc_x < 0 ? complex_error_bnd = true : err_x = √disc_x + while !(solved_lq || solved_cg || tired || user_requested_exit || overtimed) + + # Update of (xᵃᵘˣ)ₖ = Vₖtₖ + if λ > 0 + # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁) + @kaxpy!(n, τₖ * cpₖ, v, x) + if iter ≥ 2 + @kaxpy!(n, τₖ * spₖ, q, x) + # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ + @kaxpby!(n, spₖ, v, -cpₖ, q) + end else - disc_xL = τtildeₖ^2 - τₖ₊₁^2 + (τₖ₊₁ - ηₖ₊₁ * ζₖ)^2 - disc_xL < 0 ? complex_error_bnd = true : err_x = √disc_xL + # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ + @kaxpy!(n, τₖ, v, x) end - ηtildeₖ = ω * sₖ₊₁ - ϵtildeₖ = -ω * cₖ₊₁ - ζtildeₖ = (τtildeₖ - ηtildeₖ * ζₖ) / ϵtildeₖ - - if transfer_to_craig - disc_y = ζtildeₖ^2 - ζbarₖ₊₁^2 - disc_y < 0 ? complex_error_bnd = true : err_y = √disc_y - else - err_y = abs(ζtildeₖ) + + # Continue the generalized Golub-Kahan bidiagonalization. + # AVₖ = MUₖ₊₁Bₖ + # AᴴUₖ₊₁ = NVₖ(Bₖ)ᴴ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᴴ = NVₖ₊₁(Lₖ₊₁)ᴴ + # + # [ α₁ 0 • • • • 0 ] + # [ β₂ α₂ • • ] + # [ 0 • • • • ] + # Lₖ = [ • • • • • • ] + # [ • • • • • • ] + # [ • • • • 0 ] + # [ 0 • • • 0 βₖ αₖ] + # + # Bₖ = [ Lₖ ] + # [ βₖ₊₁(eₖ)ᵀ ] + + # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ + mul!(Av, A, v) + @kaxpby!(m, one(FC), Av, -αₖ, Mu) + MisI || mulorldiv!(u, M, Mu, ldiv) # uₖ₊₁ = M⁻¹ * Muₖ₊₁ + βₖ₊₁ = sqrt(@kdotr(m, u, Mu)) # βₖ₊₁ = ‖uₖ₊₁‖_M + if βₖ₊₁ ≠ 0 + @kscal!(m, one(FC) / βₖ₊₁, u) + MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu) end - history && push!(xNorms, err_x) - history && push!(yNorms, err_y) - end + # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv) + NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁ + αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N + if αₖ₊₁ ≠ 0 + @kscal!(n, one(FC) / αₖ₊₁, v) + NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv) + end - # Compute residual norm ‖(rᴸ)ₖ‖ = |αₖ| * √(|ϵbarₖζbarₖ|² + |βₖ₊₁sₖζₖ₋₁|²) - if iter == 1 - rNorm_lq = bNorm - else - rNorm_lq = abs(αhatₖ) * √(abs2(ϵbarₖ * ζbarₖ) + abs2(βhatₖ₊₁ * sₖ * ζₖ₋₁)) - end - history && push!(rNorms, rNorm_lq) + # Continue the regularization. + if λ > 0 + # k 2k k 2k k 2k + # k [ αₖ λₖ ] [ cpₖ spₖ ] = [ αhatₖ 0 ] + # k+1 [ βₖ₊₁ 0 ] [ spₖ -cpₖ ] [ βhatₖ₊₁ θₖ₊₁ ] + βhatₖ₊₁ = cpₖ * βₖ₊₁ + θₖ₊₁ = spₖ * βₖ₊₁ + + # 2k 2k+1 2k 2k+1 2k 2k+1 + # k [ 0 0 ] [ -cdₖ sdₖ ] = [ 0 0 ] + # k+1 [ θₖ₊₁ λ ] [ sdₖ cdₖ ] [ 0 λₖ₊₁ ] + (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁) + + # qₖ ← sdₖ * q̄ₖ + @kscal!(n, sdₖ, q) + + # k+1 2k+1 k+1 2k+1 k+1 2k+1 + # k+1 [ αₖ₊₁ λₖ₊₁ ] [ cpₖ₊₁ spₖ₊₁ ] = [ αhatₖ₊₁ 0 ] + # k+2 [ βₖ₊₂ 0 ] [ spₖ₊₁ -cpₖ₊₁ ] [ γₖ₊₂ θₖ₊₂ ] + (cpₖ₊₁, spₖ₊₁, αhatₖ₊₁) = sym_givens(αₖ₊₁, λₖ₊₁) + else + βhatₖ₊₁ = βₖ₊₁ + αhatₖ₊₁ = αₖ₊₁ + end - # Compute residual norm ‖(rᶜ)ₖ‖ = |βₖ₊₁ * τₖ| - if transfer_to_craig - rNorm_cg = abs(βhatₖ₊₁ * τₖ) - end + if σₑₛₜ > 0 && !complex_error_bnd + μbar = -csig * αhatₖ + ρ = √(ρbar^2 + αhatₖ^2) + csig = ρbar / ρ + ssig = αhatₖ / ρ + ρbar = ssig * μbar + csig * σₑₛₜ + μbar = -csig * βhatₖ₊₁ + θ = βhatₖ₊₁ * csig / ρbar + ωdisc = σₑₛₜ^2 - σₑₛₜ * βhatₖ₊₁ * θ + if ωdisc < 0 + complex_error_bnd = true + else + ω = √ωdisc + τtildeₖ = - τₖ * βhatₖ₊₁ / ω + end + + ρ = √(ρbar^2 + βhatₖ₊₁^2) + csig = ρbar / ρ + ssig = βhatₖ₊₁ / ρ + ρbar = ssig * μbar + csig * σₑₛₜ + end - # Update sₖ, cₖ, αₖ, βₖ, ηₖ, ϵbarₖ, τₖ, ζₖ₋₁ and ζbarₖ. - cₖ = cₖ₊₁ - sₖ = sₖ₊₁ - αₖ = αₖ₊₁ - αhatₖ = αhatₖ₊₁ - βₖ = βₖ₊₁ - ηₖ = ηₖ₊₁ - ϵbarₖ = ϵbarₖ₊₁ - τₖ = τₖ₊₁ - ζₖ₋₁ = ζₖ - ζbarₖ = ζbarₖ₊₁ - - # Update regularization variables. - if λ > 0 - cpₖ = cpₖ₊₁ - spₖ = spₖ₊₁ - end + # Continue the LQ factorization of (Lₖ₊₁)ᴴ. + # [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ] + # [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁] + # [0 sₖ₊₁ -cₖ₊₁] + + (cₖ₊₁, sₖ₊₁, ϵₖ) = sym_givens(ϵbarₖ, βhatₖ₊₁) + ηₖ₊₁ = αhatₖ₊₁ * sₖ₊₁ + ϵbarₖ₊₁ = - αhatₖ₊₁ * cₖ₊₁ + + # Update solutions of Lₖ₊₁tₖ₊₁ = β₁e₁ and M̅ₖ₊₁z̅ₖ₊₁ = tₖ₊₁. + τₖ₊₁ = - βhatₖ₊₁ * τₖ / αhatₖ₊₁ + ζₖ = cₖ₊₁ * ζbarₖ + ζbarₖ₊₁ = (τₖ₊₁ - ηₖ₊₁ * ζₖ) / ϵbarₖ₊₁ + + # Relations for the directions wₖ and w̄ₖ₊₁ + # [w̄ₖ uₖ₊₁] [cₖ₊₁ sₖ₊₁] = [wₖ w̄ₖ₊₁] → wₖ = cₖ₊₁ * w̄ₖ + sₖ₊₁ * uₖ₊₁ + # [sₖ₊₁ -cₖ₊₁] → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁ + + # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ + @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y) + @kaxpy!(m, ζₖ * sₖ₊₁, u, y) + + # Compute w̄ₖ₊₁ + @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄) + + if σₑₛₜ > 0 && !complex_error_bnd + if transfer_to_craig + disc_x = τtildeₖ^2 - τₖ₊₁^2 + disc_x < 0 ? complex_error_bnd = true : err_x = √disc_x + else + disc_xL = τtildeₖ^2 - τₖ₊₁^2 + (τₖ₊₁ - ηₖ₊₁ * ζₖ)^2 + disc_xL < 0 ? complex_error_bnd = true : err_x = √disc_xL + end + ηtildeₖ = ω * sₖ₊₁ + ϵtildeₖ = -ω * cₖ₊₁ + ζtildeₖ = (τtildeₖ - ηtildeₖ * ζₖ) / ϵtildeₖ + + if transfer_to_craig + disc_y = ζtildeₖ^2 - ζbarₖ₊₁^2 + disc_y < 0 ? complex_error_bnd = true : err_y = √disc_y + else + err_y = abs(ζtildeₖ) + end + + history && push!(xNorms, err_x) + history && push!(yNorms, err_y) + end - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - tired = iter ≥ itmax - solved_lq = rNorm_lq ≤ ε - solved_cg = transfer_to_craig && rNorm_cg ≤ ε - if σₑₛₜ > 0 - if transfer_to_craig - solved_cg = solved_cg || err_x ≤ etolx || err_y ≤ etoly + # Compute residual norm ‖(rᴸ)ₖ‖ = |αₖ| * √(|ϵbarₖζbarₖ|² + |βₖ₊₁sₖζₖ₋₁|²) + if iter == 1 + rNorm_lq = bNorm else - solved_lq = solved_lq || err_x ≤ etolx || err_y ≤ etoly + rNorm_lq = abs(αhatₖ) * √(abs2(ϵbarₖ * ζbarₖ) + abs2(βhatₖ₊₁ * sₖ * ζₖ₋₁)) end - end - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq) + history && push!(rNorms, rNorm_lq) - # Update iteration index. - iter = iter + 1 - end - (verbose > 0) && @printf("\n") + # Compute residual norm ‖(rᶜ)ₖ‖ = |βₖ₊₁ * τₖ| + if transfer_to_craig + rNorm_cg = abs(βhatₖ₊₁ * τₖ) + end - if solved_cg - if λ > 0 - # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁) - @kaxpy!(n, τₖ * cpₖ, v, x) - if iter ≥ 2 - @kaxpy!(n, τₖ * spₖ, q, x) + # Update sₖ, cₖ, αₖ, βₖ, ηₖ, ϵbarₖ, τₖ, ζₖ₋₁ and ζbarₖ. + cₖ = cₖ₊₁ + sₖ = sₖ₊₁ + αₖ = αₖ₊₁ + αhatₖ = αhatₖ₊₁ + βₖ = βₖ₊₁ + ηₖ = ηₖ₊₁ + ϵbarₖ = ϵbarₖ₊₁ + τₖ = τₖ₊₁ + ζₖ₋₁ = ζₖ + ζbarₖ = ζbarₖ₊₁ + + # Update regularization variables. + if λ > 0 + cpₖ = cpₖ₊₁ + spₖ = spₖ₊₁ end - else - # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ - @kaxpy!(n, τₖ, v, x) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + tired = iter ≥ itmax + solved_lq = rNorm_lq ≤ ε + solved_cg = transfer_to_craig && rNorm_cg ≤ ε + if σₑₛₜ > 0 + solved_lq = solved_lq || err_x ≤ utolx || err_y ≤ utoly + solved_cg = transfer_to_craig && (solved_cg || err_x ≤ utolx || err_y ≤ utoly) + end + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm_lq, ktimer(start_time)) + + # Update iteration index. + iter = iter + 1 end - # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ - @kaxpy!(m, ζbarₖ, w̄, y) - else - if λ > 0 - # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁) - @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x) - if iter ≥ 2 - @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x) + (verbose > 0) && @printf(iostream, "\n") + + if solved_cg + if λ > 0 + # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁) + @kaxpy!(n, τₖ * cpₖ, v, x) + if iter ≥ 2 + @kaxpy!(n, τₖ * spₖ, q, x) + end + else + # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ + @kaxpy!(n, τₖ, v, x) end + # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ + @kaxpy!(m, ζbarₖ, w̄, y) else - # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ - @kaxpy!(n, ηₖ * ζₖ₋₁, v, x) + if λ > 0 + # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁) + @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x) + if iter ≥ 2 + @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x) + end + else + # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ + @kaxpy!(n, ηₖ * ζₖ₋₁, v, x) + end end - end - tired && (status = "maximum number of iterations exceeded") - solved_lq && (status = "solutions (xᴸ, yᴸ) good enough for the tolerances given") - solved_cg && (status = "solutions (xᶜ, yᶜ) good enough for the tolerances given") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved_lq || solved_cg - stats.error_with_bnd = complex_error_bnd - stats.status = status - return solver + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved_lq && (status = "solutions (xᴸ, yᴸ) good enough for the tolerances given") + solved_cg && (status = "solutions (xᶜ, yᶜ) good enough for the tolerances given") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved_lq || solved_cg + stats.error_with_bnd = complex_error_bnd + stats.timer = ktimer(start_time) + stats.status = status + return solver + end end diff --git a/src/lslq.jl b/src/lslq.jl index 908de19c5..3a549207e 100644 --- a/src/lslq.jl +++ b/src/lslq.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # LSLQ is formally equivalent to applying SYMMLQ to the normal equations # but should be more stable. @@ -21,15 +21,17 @@ export lslq, lslq! - """ (x, stats) = lslq(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), - atol::T=√eps(T), btol::T=√eps(T), etol::T=√eps(T), - window::Int=5, utol::T=√eps(T), itmax::Int=0, - σ::T=zero(T), transfer_to_lsqr::Bool=false, - conlim::T=1/√eps(T), verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + window::Int=5, transfer_to_lsqr::Bool=false, + sqd::Bool=false, λ::T=zero(T), + σ::T=zero(T), etol::T=√eps(T), + utol::T=√eps(T), btol::T=√eps(T), + conlim::T=1/√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -38,31 +40,17 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ²‖x‖₂² -using the LSLQ method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LSLQ method, where λ ≥ 0 is a regularization parameter. LSLQ is formally equivalent to applying SYMMLQ to the normal equations - (AᵀA + λ²I) x = Aᵀb + (AᴴA + λ²I) x = Aᴴb but is more stable. -#### Main features - -* the solution estimate is updated along orthogonal directions -* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing -* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing -* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error) -* if `A` is rank deficient, identify the minimum least-squares solution - -#### Optional arguments - -* `M`: a symmetric and positive definite dual preconditioner -* `N`: a symmetric and positive definite primal preconditioner -* `sqd` indicates that we are solving a symmetric and quasi-definite system with `λ=1` - If `λ > 0`, we solve the symmetric and quasi-definite system [ E A ] [ r ] [ b ] - [ Aᵀ -λ²F ] [ x ] = [ 0 ], + [ Aᴴ -λ²F ] [ x ] = [ 0 ], where E and F are symmetric and positive definite. Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators. @@ -72,39 +60,61 @@ The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LSLQ is then equivalent to applying SYMMLQ to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LSLQ is then equivalent to applying SYMMLQ to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`. If `λ = 0`, we solve the symmetric and indefinite system [ E A ] [ r ] [ b ] - [ Aᵀ 0 ] [ x ] = [ 0 ]. + [ Aᴴ 0 ] [ x ] = [ 0 ]. The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹. -In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured. +In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured. `r` can be recovered by computing `E⁻¹(b - Ax)`. -* `λ` is a regularization parameter (see the problem statement above) -* `σ` is an underestimate of the smallest nonzero singular value of `A`---setting `σ` too large will result in an error in the course of the iterations -* `atol` is a stopping tolerance based on the residual -* `btol` is a stopping tolerance used to detect zero-residual problems -* `etol` is a stopping tolerance based on the lower bound on the error -* `window` is the number of iterations used to accumulate a lower bound on the error -* `utol` is a stopping tolerance based on the upper bound on the error -* `transfer_to_lsqr` return the CG solution estimate (i.e., the LSQR point) instead of the LQ estimate -* `itmax` is the maximum number of iterations (0 means no imposed limit) -* `conlim` is the limit on the estimated condition number of `A` beyond which the solution will be abandoned -* `verbose` determines verbosity. - -#### Return values +#### Main features -`lslq` returns the tuple `(x, stats)` where +* the solution estimate is updated along orthogonal directions +* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing +* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing +* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error) +* if `A` is rank deficient, identify the minimum least-squares solution -* `x` is the LQ solution estimate -* `stats` collects other statistics on the run in a LSLQStats +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`; +* `etol`: stopping tolerance based on the lower bound on the error; +* `utol`: stopping tolerance based on the upper bound on the error; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`LSLQStats`](@ref) structure. * `stats.err_lbnds` is a vector of lower bounds on the LQ error---the vector is empty if `window` is set to zero * `stats.err_ubnds_lq` is a vector of upper bounds on the LQ error---the vector is empty if `σ == 0` is left at zero @@ -116,8 +126,8 @@ In this case, `N` can still be specified and indicates the weighted norm in whic The iterations stop as soon as one of the following conditions holds true: * the optimality residual is sufficiently small (`stats.status = "found approximate minimum least-squares solution"`) in the sense that either - * ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ atol, or - * 1 + ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ 1 + * ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ atol, or + * 1 + ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ 1 * an approximate zero-residual solution has been found (`stats.status = "found approximate zero-residual solution"`) in the sense that either * ‖r‖ / ‖b‖ ≤ btol + atol ‖A‖ * ‖xᴸ‖ / ‖b‖, or * 1 + ‖r‖ / ‖b‖ ≤ 1 @@ -127,9 +137,6 @@ The iterations stop as soon as one of the following conditions holds true: * the lower bound on the LQ forward error is less than etol * ‖xᴸ‖ * the upper bound on the CG forward error is less than utol * ‖xᶜ‖ -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. - #### References * R. Estrin, D. Orban and M. A. Saunders, [*Euclidean-norm error bounds for SYMMLQ and CG*](https://doi.org/10.1137/16M1094816), SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019. @@ -137,12 +144,6 @@ and `false` otherwise. """ function lslq end -function lslq(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = LslqSolver(A, b, window=window) - lslq!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = lslq!(solver::LslqSolver, A, b; kwargs...) @@ -152,315 +153,363 @@ See [`LslqSolver`](@ref) for more details about the `solver`. """ function lslq! end -function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), - atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T), - utol :: T=√eps(T), itmax :: Int=0, σ :: T=zero(T), - transfer_to_lsqr :: Bool=false, conlim :: T=1/√eps(T), - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LSLQ: system of %d equations in %d variables\n", m, n) - - # Check sqd and λ parameters - sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") - sqd && (λ = one(T)) - - # Tests M = Iₙ and N = Iₘ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :u, S, m) - allocate_if(!NisI, solver, :v, S, n) - x, Nv, Aᵀu, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.w̄ - Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats - rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds - err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg - reset!(stats) - u = MisI ? Mu : solver.u - v = NisI ? Nv : solver.v - - λ² = λ * λ - ctol = conlim > 0 ? 1/conlim : zero(T) - - x .= zero(FC) # LSLQ point - - # Initialize Golub-Kahan process. - # β₁ M u₁ = b. - Mu .= b - MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) - if β₁ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.error_with_bnd = false - history && push!(rNorms, zero(T)) - history && push!(ArNorms, zero(T)) - stats.status = "x = 0 is a zero-residual solution" - return solver - end - β = β₁ - - @kscal!(m, one(FC)/β₁, u) - MisI || @kscal!(m, one(FC)/β₁, Mu) - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) # = α₁ - - # Aᵀb = 0 so x = 0 is a minimum least-squares solution - if α == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.error_with_bnd = false - history && push!(rNorms, β₁) - history && push!(ArNorms, zero(T)) - stats.status = "x = 0 is a minimum least-squares solution" - return solver +def_args_lslq = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_lslq = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; transfer_to_lsqr::Bool = false), + :(; sqd::Bool = false ), + :(; λ::T = zero(T) ), + :(; σ::T = zero(T) ), + :(; etol::T = √eps(T) ), + :(; utol::T = √eps(T) ), + :(; btol::T = √eps(T) ), + :(; conlim::T = 1/√eps(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_lslq = mapreduce(extract_parameters, vcat, def_kwargs_lslq) + +args_lslq = (:A, :b) +kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function lslq($(def_args_lslq...); window :: Int=5, $(def_kwargs_lslq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = LslqSolver(A, b; window) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + lslq!(solver, $(args_lslq...); $(kwargs_lslq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - - Anorm = α - Anorm² = α * α - - # condition number estimate - σmax = zero(T) - σmin = Inf - Acond = zero(T) - - xlqNorm = zero(T) - xlqNorm² = zero(T) - xcgNorm = zero(T) - xcgNorm² = zero(T) - - w̄ .= v # w̄₁ = v₁ - - err_lbnd = zero(T) - window = length(err_vec) - err_vec .= zero(T) - complex_error_bnd = false - - # Initialize other constants. - αL = α - βL = β - ρ̄ = -σ - γ̄ = α - ψ = β₁ - c = -one(T) - s = zero(T) - δ = -one(T) - τ = α * β₁ - ζ = zero(T) - ζ̄ = zero(T) - ζ̃ = zero(T) - csig = -one(T) - - rNorm = β₁ - history && push!(rNorms, rNorm) - ArNorm = α * β - history && push!(ArNorms, ArNorm) - - iter = 0 - itmax == 0 && (itmax = m + n) - - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm) - - status = "unknown" - solved = solved_mach = solved_lim = (rNorm ≤ atol) - tired = iter ≥ itmax - ill_cond = ill_cond_mach = ill_cond_lim = false - zero_resid = zero_resid_mach = zero_resid_lim = false - fwd_err_lbnd = false - fwd_err_ubnd = false - user_requested_exit = false - - while ! (solved || tired || ill_cond || user_requested_exit) - - # Generate next Golub-Kahan vectors. - # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ - mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) - MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) - if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) - - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - end - # rotate out regularization term if present - αL = α - βL = β - if λ ≠ 0 - (cL, sL, βL) = sym_givens(β, λ) - αL = cL * α + function lslq!(solver :: LslqSolver{T,FC,S}, $(def_args_lslq...); $(def_kwargs_lslq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} - # the rotation updates the next regularization parameter - λ = sqrt(λ² + (sL * α)^2) - end - Anorm² = Anorm² + αL * αL + βL * βL # = ‖Lₖ‖² - Anorm = sqrt(Anorm²) - end + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "LSLQ: system of %d equations in %d variables\n", m, n) + + # Check sqd and λ parameters + sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") + sqd && (λ = one(T)) + + # Tests M = Iₙ and N = Iₘ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :u, S, m) + allocate_if(!NisI, solver, :v, S, n) + x, Nv, Aᴴu, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.w̄ + Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats + rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds + err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg + reset!(stats) + u = MisI ? Mu : solver.u + v = NisI ? Nv : solver.v - # Continue QR factorization of Bₖ - # - # k k+1 k k+1 k k+1 - # k [ c' s' ] [ γ̄ ] = [ γ δ ] - # k+1 [ s' -c' ] [ β α⁺ ] [ γ̄ ] - (cp, sp, γ) = sym_givens(γ̄, βL) - τ = -τ * δ / γ # forward substitution for t - δ = sp * αL - γ̄ = -cp * αL - - if σ > 0 && !complex_error_bnd - # Continue QR factorization for error estimate - μ̄ = -csig * γ - (csig, ssig, ρ) = sym_givens(ρ̄, γ) - ρ̄ = ssig * μ̄ + csig * σ - μ̄ = -csig * δ - - # determine component of eigenvector and Gauss-Radau parameter - h = δ * csig / ρ̄ - disc = σ * (σ - δ * h) - disc < 0 ? complex_error_bnd = true : ω = sqrt(disc) - (csig, ssig, ρ) = sym_givens(ρ̄, δ) - ρ̄ = ssig * μ̄ + csig * σ + λ² = λ * λ + ctol = conlim > 0 ? 1/conlim : zero(T) + + x .= zero(FC) # LSLQ point + + # Initialize Golub-Kahan process. + # β₁ M u₁ = b. + Mu .= b + MisI || mulorldiv!(u, M, Mu, ldiv) + β₁ = sqrt(@kdotr(m, u, Mu)) + if β₁ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.error_with_bnd = false + history && push!(rNorms, zero(T)) + history && push!(ArNorms, zero(T)) + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + return solver end + β = β₁ + + @kscal!(m, one(FC)/β₁, u) + MisI || @kscal!(m, one(FC)/β₁, Mu) + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) # = α₁ + + # Aᴴb = 0 so x = 0 is a minimum least-squares solution + if α == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.error_with_bnd = false + history && push!(rNorms, β₁) + history && push!(ArNorms, zero(T)) + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a minimum least-squares solution" + return solver + end + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) - # Continue LQ factorization of Rₖ - ϵ̄ = -γ * c - η = γ * s - (c, s, ϵ) = sym_givens(ϵ̄, δ) + Anorm = α + Anorm² = α * α # condition number estimate - # the QLP factorization suggests that the diagonal of M̄ approximates - # the singular values of B. - σmax = max(σmax, ϵ, abs(ϵ̄)) - σmin = min(σmin, ϵ, abs(ϵ̄)) - Acond = σmax / σmin - - # forward substitution for z, ζ̄ - ζold = ζ - ζ = (τ - ζ * η) / ϵ - ζ̄ = ζ / c - - # residual norm estimate - rNorm = sqrt((ψ * cp - ζold * η)^2 + (ψ * sp)^2) + σmax = zero(T) + σmin = Inf + Acond = zero(T) + + xlqNorm = zero(T) + xlqNorm² = zero(T) + xcgNorm = zero(T) + xcgNorm² = zero(T) + + w̄ .= v # w̄₁ = v₁ + + err_lbnd = zero(T) + window = length(err_vec) + err_vec .= zero(T) + complex_error_bnd = false + + # Initialize other constants. + αL = α + βL = β + ρ̄ = -σ + γ̄ = α + ψ = β₁ + c = -one(T) + s = zero(T) + δ = -one(T) + τ = α * β₁ + ζ = zero(T) + ζ̄ = zero(T) + ζ̃ = zero(T) + csig = -one(T) + + rNorm = β₁ history && push!(rNorms, rNorm) - - ArNorm = sqrt((γ * ϵ * ζ)^2 + (δ * η * ζold)^2) + ArNorm = α * β history && push!(ArNorms, ArNorm) - # Compute ψₖ - ψ = ψ * sp + iter = 0 + itmax == 0 && (itmax = m + n) - # Compute ‖x_cg‖₂ - xcgNorm² = xlqNorm² + ζ̄ * ζ̄ + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm, ktimer(start_time)) - if σ > 0 && iter > 0 && !complex_error_bnd - disc = ζ̃ * ζ̃ - ζ̄ * ζ̄ - if disc < 0 - complex_error_bnd = true - else - err_ubnd_cg = sqrt(disc) - history && push!(err_ubnds_cg, err_ubnd_cg) - fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²) + status = "unknown" + ε = atol + rtol * β₁ + solved = solved_mach = solved_lim = (rNorm ≤ ε) + tired = iter ≥ itmax + ill_cond = ill_cond_mach = ill_cond_lim = false + zero_resid = zero_resid_mach = zero_resid_lim = false + fwd_err_lbnd = false + fwd_err_ubnd = false + user_requested_exit = false + overtimed = false + + while ! (solved || tired || ill_cond || user_requested_exit || overtimed) + + # Generate next Golub-Kahan vectors. + # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ + mul!(Av, A, v) + @kaxpby!(m, one(FC), Av, -α, Mu) + MisI || mulorldiv!(u, M, Mu, ldiv) + β = sqrt(@kdotr(m, u, Mu)) + if β ≠ 0 + @kscal!(m, one(FC)/β, u) + MisI || @kscal!(m, one(FC)/β, Mu) + + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + if α ≠ 0 + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) + end + + # rotate out regularization term if present + αL = α + βL = β + if λ ≠ 0 + (cL, sL, βL) = sym_givens(β, λ) + αL = cL * α + + # the rotation updates the next regularization parameter + λ = sqrt(λ² + (sL * α)^2) + end + Anorm² = Anorm² + αL * αL + βL * βL # = ‖Lₖ‖² + Anorm = sqrt(Anorm²) end - end - test1 = rNorm / β₁ - test2 = ArNorm / (Anorm * rNorm) - test3 = 1 / Acond - t1 = test1 / (one(T) + Anorm * xlqNorm / β₁) - rtol = btol + atol * Anorm * xlqNorm / β₁ + # Continue QR factorization of Bₖ + # + # k k+1 k k+1 k k+1 + # k [ c' s' ] [ γ̄ ] = [ γ δ ] + # k+1 [ s' -c' ] [ β α⁺ ] [ γ̄ ] + (cp, sp, γ) = sym_givens(γ̄, βL) + τ = -τ * δ / γ # forward substitution for t + δ = sp * αL + γ̄ = -cp * αL + + if σ > 0 && !complex_error_bnd + # Continue QR factorization for error estimate + μ̄ = -csig * γ + (csig, ssig, ρ) = sym_givens(ρ̄, γ) + ρ̄ = ssig * μ̄ + csig * σ + μ̄ = -csig * δ + + # determine component of eigenvector and Gauss-Radau parameter + h = δ * csig / ρ̄ + disc = σ * (σ - δ * h) + disc < 0 ? complex_error_bnd = true : ω = sqrt(disc) + (csig, ssig, ρ) = sym_givens(ρ̄, δ) + ρ̄ = ssig * μ̄ + csig * σ + end - # update LSLQ point for next iteration - @kaxpy!(n, c * ζ, w̄, x) - @kaxpy!(n, s * ζ, v, x) + # Continue LQ factorization of Rₖ + ϵ̄ = -γ * c + η = γ * s + (c, s, ϵ) = sym_givens(ϵ̄, δ) + + # condition number estimate + # the QLP factorization suggests that the diagonal of M̄ approximates + # the singular values of B. + σmax = max(σmax, ϵ, abs(ϵ̄)) + σmin = min(σmin, ϵ, abs(ϵ̄)) + Acond = σmax / σmin + + # forward substitution for z, ζ̄ + ζold = ζ + ζ = (τ - ζ * η) / ϵ + ζ̄ = ζ / c + + # residual norm estimate + rNorm = sqrt((ψ * cp - ζold * η)^2 + (ψ * sp)^2) + history && push!(rNorms, rNorm) + + ArNorm = sqrt((γ * ϵ * ζ)^2 + (δ * η * ζold)^2) + history && push!(ArNorms, ArNorm) + + # Compute ψₖ + ψ = ψ * sp + + # Compute ‖x_cg‖₂ + xcgNorm² = xlqNorm² + ζ̄ * ζ̄ + + if σ > 0 && iter > 0 && !complex_error_bnd + disc = ζ̃ * ζ̃ - ζ̄ * ζ̄ + if disc < 0 + complex_error_bnd = true + else + err_ubnd_cg = sqrt(disc) + history && push!(err_ubnds_cg, err_ubnd_cg) + fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²) + end + end - # compute w̄ - @kaxpby!(n, -c, v, s, w̄) + test1 = rNorm + test2 = ArNorm / (Anorm * rNorm) + test3 = 1 / Acond + t1 = test1 / (one(T) + Anorm * xlqNorm) + tol = btol + atol * Anorm * xlqNorm / β₁ - xlqNorm² += ζ * ζ - xlqNorm = sqrt(xlqNorm²) + # update LSLQ point for next iteration + @kaxpy!(n, c * ζ, w̄, x) + @kaxpy!(n, s * ζ, v, x) - # check stopping condition based on forward error lower bound - err_vec[mod(iter, window) + 1] = ζ - if iter ≥ window - err_lbnd = norm(err_vec) - history && push!(err_lbnds, err_lbnd) - fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm - end + # compute w̄ + @kaxpby!(n, -c, v, s, w̄) - # compute LQ forward error upper bound - if σ > 0 && !complex_error_bnd - η̃ = ω * s - ϵ̃ = -ω * c - τ̃ = -τ * δ / ω - ζ̃ = (τ̃ - ζ * η̃) / ϵ̃ - history && push!(err_ubnds_lq, abs(ζ̃ )) - end + xlqNorm² += ζ * ζ + xlqNorm = sqrt(xlqNorm²) - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - ill_cond_mach = (one(T) + test3 ≤ one(T)) - solved_mach = (one(T) + test2 ≤ one(T)) - zero_resid_mach = (one(T) + t1 ≤ one(T)) + # check stopping condition based on forward error lower bound + err_vec[mod(iter, window) + 1] = ζ + if iter ≥ window + err_lbnd = @knrm2(window, err_vec) + history && push!(err_lbnds, err_lbnd) + fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm + end - # Stopping conditions based on user-provided tolerances. - user_requested_exit = callback(solver) :: Bool - tired = iter ≥ itmax - ill_cond_lim = (test3 ≤ ctol) - solved_lim = (test2 ≤ atol) - zero_resid_lim = (test1 ≤ rtol) + # compute LQ forward error upper bound + if σ > 0 && !complex_error_bnd + η̃ = ω * s + ϵ̃ = -ω * c + τ̃ = -τ * δ / ω + ζ̃ = (τ̃ - ζ * η̃) / ϵ̃ + history && push!(err_ubnds_lq, abs(ζ̃ )) + end - ill_cond = ill_cond_mach || ill_cond_lim - zero_resid = zero_resid_mach || zero_resid_lim - solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + ill_cond_mach = (one(T) + test3 ≤ one(T)) + solved_mach = (one(T) + test2 ≤ one(T)) + zero_resid_mach = (one(T) + t1 ≤ one(T)) + + # Stopping conditions based on user-provided tolerances. + user_requested_exit = callback(solver) :: Bool + tired = iter ≥ itmax + ill_cond_lim = (test3 ≤ ctol) + solved_lim = (test2 ≤ atol) + zero_resid_lim = (test1 ≤ ε) + + ill_cond = ill_cond_mach || ill_cond_lim + zero_resid = zero_resid_mach || zero_resid_lim + solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd + timer = time_ns() - start_time + overtimed = timer > timemax_ns + + iter = iter + 1 + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm, ktimer(start_time)) + end + (verbose > 0) && @printf(iostream, "\n") - iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm) - end - (verbose > 0) && @printf("\n") + if transfer_to_lsqr # compute LSQR point + @kaxpy!(n, ζ̄ , w̄, x) + end - if transfer_to_lsqr # compute LSQR point - @kaxpy!(n, ζ̄ , w̄, x) + # Termination status + tired && (status = "maximum number of iterations exceeded") + ill_cond_mach && (status = "condition number seems too large for this machine") + ill_cond_lim && (status = "condition number exceeds tolerance") + solved && (status = "found approximate minimum least-squares solution") + zero_resid && (status = "found approximate zero-residual solution") + fwd_err_lbnd && (status = "forward error lower bound small enough") + fwd_err_ubnd && (status = "forward error upper bound small enough") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = !zero_resid + stats.error_with_bnd = complex_error_bnd + stats.timer = ktimer(start_time) + stats.status = status + return solver end - - tired && (status = "maximum number of iterations exceeded") - ill_cond_mach && (status = "condition number seems too large for this machine") - ill_cond_lim && (status = "condition number exceeds tolerance") - solved && (status = "found approximate minimum least-squares solution") - zero_resid && (status = "found approximate zero-residual solution") - fwd_err_lbnd && (status = "forward error lower bound small enough") - fwd_err_ubnd && (status = "forward error upper bound small enough") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = !zero_resid - stats.error_with_bnd = complex_error_bnd - stats.status = status - return solver end diff --git a/src/lsmr.jl b/src/lsmr.jl index f4d8349d1..085d941db 100644 --- a/src/lsmr.jl +++ b/src/lsmr.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # LSMR is formally equivalent to applying MINRES to the normal equations # but should be more stable. It is also formally equivalent to CRLS though @@ -24,17 +24,16 @@ export lsmr, lsmr! - """ (x, stats) = lsmr(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), + M=I, N=I, ldiv::Bool=false, + window::Int=5, sqd::Bool=false, λ::T=zero(T), + radius::T=zero(T), etol::T=√eps(T), axtol::T=√eps(T), btol::T=√eps(T), - atol::T=zero(T), rtol::T=zero(T), - etol::T=√eps(T), window::Int=5, - itmax::Int=0, conlim::T=1/√eps(T), - radius::T=zero(T), verbose::Int=0, - history::Bool=false, ldiv::Bool=false, - callback=solver->false) + conlim::T=1/√eps(T), atol::T=zero(T), + rtol::T=zero(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -43,24 +42,24 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ²‖x‖₂² -using the LSMR method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LSMR method, where λ ≥ 0 is a regularization parameter. LSMR is formally equivalent to applying MINRES to the normal equations - (AᵀA + λ²I) x = Aᵀb + (AᴴA + λ²I) x = Aᴴb (and therefore to CRLS) but is more stable. -LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂. +LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂. It is formally equivalent to CRLS, though can be substantially more accurate. LSMR can be also used to find a null vector of a singular matrix A -by solving the problem `min ‖Aᵀx - b‖` with any nonzero vector `b`. -At a minimizer, the residual vector `r = b - Aᵀx` will satisfy `Ar = 0`. +by solving the problem `min ‖Aᴴx - b‖` with any nonzero vector `b`. +At a minimizer, the residual vector `r = b - Aᴴx` will satisfy `Ar = 0`. If `λ > 0`, we solve the symmetric and quasi-definite system [ E A ] [ r ] [ b ] - [ Aᵀ -λ²F ] [ x ] = [ 0 ], + [ Aᴴ -λ²F ] [ x ] = [ 0 ], where E and F are symmetric and positive definite. Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators. @@ -70,23 +69,52 @@ The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LSMR is then equivalent to applying MINRES to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LSMR is then equivalent to applying MINRES to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`. If `λ = 0`, we solve the symmetric and indefinite system [ E A ] [ r ] [ b ] - [ Aᵀ 0 ] [ x ] = [ 0 ]. + [ Aᴴ 0 ] [ x ] = [ 0 ]. The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹. -In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured. +In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured. `r` can be recovered by computing `E⁻¹(b - Ax)`. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `etol`: stopping tolerance based on the lower bound on the error; +* `axtol`: tolerance on the backward error; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`LsmrStats`](@ref) structure. #### Reference @@ -94,12 +122,6 @@ and `false` otherwise. """ function lsmr end -function lsmr(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = LsmrSolver(A, b, window=window) - lsmr!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = lsmr!(solver::LsmrSolver, A, b; kwargs...) @@ -109,274 +131,320 @@ See [`LsmrSolver`](@ref) for more details about the `solver`. """ function lsmr! end -function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), - axtol :: T=√eps(T), btol :: T=√eps(T), - atol :: T=zero(T), rtol :: T=zero(T), - etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), - radius :: T=zero(T), verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LSMR: system of %d equations in %d variables\n", m, n) - - # Check sqd and λ parameters - sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") - sqd && (λ = one(T)) - - # Tests M = Iₙ and N = Iₘ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :u, S, m) - allocate_if(!NisI, solver, :v, S, n) - x, Nv, Aᵀu, h, hbar = solver.x, solver.Nv, solver.Aᵀu, solver.h, solver.hbar - Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - u = MisI ? Mu : solver.u - v = NisI ? Nv : solver.v - - ctol = conlim > 0 ? 1/conlim : zero(T) - x .= zero(FC) - - # Initialize Golub-Kahan process. - # β₁ M u₁ = b. - Mu .= b - MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) - if β₁ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(rNorms, zero(T)) - history && push!(ArNorms, zero(T)) - return solver - end - β = β₁ - - @kscal!(m, one(FC)/β₁, u) - MisI || @kscal!(m, one(FC)/β₁, Mu) - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - - ζbar = α * β - αbar = α - ρ = one(T) - ρbar = one(T) - cbar = one(T) - sbar = zero(T) - - # Initialize variables for estimation of ‖r‖. - βdd = β - βd = zero(T) - ρdold = one(T) - τtildeold = zero(T) - θtilde = zero(T) - ζ = zero(T) - d = zero(T) - - # Initialize variables for estimation of ‖A‖, cond(A) and xNorm. - Anorm² = α * α - maxrbar = zero(T) - minrbar = min(floatmax(T), T(1.0e+100)) - Acond = maxrbar / minrbar - Anorm = sqrt(Anorm²) - xNorm = zero(T) - - # Items for use in stopping rules. - ctol = conlim > 0 ? 1 / conlim : zero(T) - rNorm = β - history && push!(rNorms, rNorm) - ArNorm = ArNorm0 = α * β - history && push!(ArNorms, ArNorm) - - xENorm² = zero(T) - err_lbnd = zero(T) - window = length(err_vec) - err_vec .= zero(T) - - iter = 0 - itmax == 0 && (itmax = m + n) - - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²) - - # Aᵀb = 0 so x = 0 is a minimum least-squares solution - if α == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a minimum least-squares solution" - return solver +def_args_lsmr = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_lsmr = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; sqd::Bool = false ), + :(; λ::T = zero(T) ), + :(; radius::T = zero(T) ), + :(; etol::T = √eps(T) ), + :(; axtol::T = √eps(T) ), + :(; btol::T = √eps(T) ), + :(; conlim::T = 1/√eps(T) ), + :(; atol::T = zero(T) ), + :(; rtol::T = zero(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_lsmr = mapreduce(extract_parameters, vcat, def_kwargs_lsmr) + +args_lsmr = (:A, :b) +kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function lsmr($(def_args_lsmr...); window :: Int=5, $(def_kwargs_lsmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = LsmrSolver(A, b; window) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + lsmr!(solver, $(args_lsmr...); $(kwargs_lsmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - - h .= v - hbar .= zero(FC) - - status = "unknown" - on_boundary = false - solved = solved_mach = solved_lim = (rNorm ≤ axtol) - tired = iter ≥ itmax - ill_cond = ill_cond_mach = ill_cond_lim = false - zero_resid = zero_resid_mach = zero_resid_lim = false - fwd_err = false - user_requested_exit = false - - while ! (solved || tired || ill_cond || user_requested_exit) - iter = iter + 1 - - # Generate next Golub-Kahan vectors. - # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ - mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) - MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) - if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) - - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - end - end - # Continue QR factorization - (chat, shat, αhat) = sym_givens(αbar, λ) - - ρold = ρ - (c, s, ρ) = sym_givens(αhat, β) - θnew = s * α - αbar = c * α - - ρbarold = ρbar - ζold = ζ - θbar = sbar * ρ - ρtemp = cbar * ρ - (cbar, sbar, ρbar) = sym_givens(ρtemp, θnew) - ζ = cbar * ζbar - ζbar = -sbar * ζbar - - xENorm² = xENorm² + ζ * ζ - err_vec[mod(iter, window) + 1] = ζ - iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) - - # Update h, hbar and x. - δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁) - @kaxpby!(n, one(FC), h, -δ, hbar) # ĥₖ = hₖ - δₖ * ĥₖ₋₁ - - # if a trust-region constraint is given, compute step to the boundary - # the step ϕ/ρ is not necessarily positive - σ = ζ / (ρ * ρbar) - if radius > 0 - t1, t2 = to_boundary(x, hbar, radius) - tmax, tmin = max(t1, t2), min(t1, t2) - on_boundary = σ > tmax || σ < tmin - σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) - end + function lsmr!(solver :: LsmrSolver{T,FC,S}, $(def_args_lsmr...); $(def_kwargs_lsmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} - @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ - @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax - # Estimate ‖r‖. - βacute = chat * βdd - βcheck = -shat * βdd + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "LSMR: system of %d equations in %d variables\n", m, n) - βhat = c * βacute - βdd = -s * βacute + # Check sqd and λ parameters + sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") + sqd && (λ = one(T)) - θtildeold = θtilde - (ctildeold, stildeold, ρtildeold) = sym_givens(ρdold, θbar) - θtilde = stildeold * ρbar - ρdold = ctildeold * ρbar - βd = -stildeold * βd + ctildeold * βhat + # Tests M = Iₙ and N = Iₘ + MisI = (M === I) + NisI = (N === I) - τtildeold = (ζold - θtildeold * τtildeold) / ρtildeold - τd = (ζ - θtilde * τtildeold) / ρdold - d = d + βcheck * βcheck - rNorm = sqrt(d + (βd - τd)^2 + βdd * βdd) - history && push!(rNorms, rNorm) + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' - # Estimate ‖A‖. - Anorm² += β * β - Anorm = sqrt(Anorm²) - Anorm² += α * α + # Set up workspace. + allocate_if(!MisI, solver, :u, S, m) + allocate_if(!NisI, solver, :v, S, n) + x, Nv, Aᴴu, h, hbar = solver.x, solver.Nv, solver.Aᴴu, solver.h, solver.hbar + Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + u = MisI ? Mu : solver.u + v = NisI ? Nv : solver.v - # Estimate cond(A). - maxrbar = max(maxrbar, ρbarold) - iter > 1 && (minrbar = min(minrbar, ρbarold)) - Acond = max(maxrbar, ρtemp) / min(minrbar, ρtemp) + ctol = conlim > 0 ? 1/conlim : zero(T) + x .= zero(FC) - # Test for convergence. - ArNorm = abs(ζbar) + # Initialize Golub-Kahan process. + # β₁ M u₁ = b. + Mu .= b + MisI || mulorldiv!(u, M, Mu, ldiv) + β₁ = sqrt(@kdotr(m, u, Mu)) + if β₁ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(rNorms, zero(T)) + history && push!(ArNorms, zero(T)) + return solver + end + β = β₁ + + @kscal!(m, one(FC)/β₁, u) + MisI || @kscal!(m, one(FC)/β₁, Mu) + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + + ζbar = α * β + αbar = α + ρ = one(T) + ρbar = one(T) + cbar = one(T) + sbar = zero(T) + + # Initialize variables for estimation of ‖r‖. + βdd = β + βd = zero(T) + ρdold = one(T) + τtildeold = zero(T) + θtilde = zero(T) + ζ = zero(T) + d = zero(T) + + # Initialize variables for estimation of ‖A‖, cond(A) and xNorm. + Anorm² = α * α + maxrbar = zero(T) + minrbar = min(floatmax(T), T(1.0e+100)) + Acond = maxrbar / minrbar + Anorm = sqrt(Anorm²) + xNorm = zero(T) + + # Items for use in stopping rules. + ctol = conlim > 0 ? 1 / conlim : zero(T) + rNorm = β + history && push!(rNorms, rNorm) + ArNorm = ArNorm0 = α * β history && push!(ArNorms, ArNorm) - xNorm = @knrm2(n, x) - test1 = rNorm / β₁ - test2 = ArNorm / (Anorm * rNorm) - test3 = 1 / Acond - t1 = test1 / (one(T) + Anorm * xNorm / β₁) - rNormtol = btol + axtol * Anorm * xNorm / β₁ + xENorm² = zero(T) + err_lbnd = zero(T) + window = length(err_vec) + err_vec .= zero(T) - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²) + iter = 0 + itmax == 0 && (itmax = m + n) + + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, β₁, α, β₁, α, 0, 1, Anorm², ktimer(start_time)) + + # Aᴴb = 0 so x = 0 is a minimum least-squares solution + if α == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a minimum least-squares solution" + return solver + end + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - ill_cond_mach = (one(T) + test3 ≤ one(T)) - solved_mach = (one(T) + test2 ≤ one(T)) - zero_resid_mach = (one(T) + t1 ≤ one(T)) + h .= v + hbar .= zero(FC) - # Stopping conditions based on user-provided tolerances. - user_requested_exit = callback(solver) :: Bool + status = "unknown" + on_boundary = false + solved = solved_mach = solved_lim = (rNorm ≤ axtol) tired = iter ≥ itmax - ill_cond_lim = (test3 ≤ ctol) - solved_lim = (test2 ≤ axtol) - solved_opt = ArNorm ≤ atol + rtol * ArNorm0 - zero_resid_lim = (test1 ≤ rNormtol) - iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) - - ill_cond = ill_cond_mach | ill_cond_lim - zero_resid = zero_resid_mach | zero_resid_lim - solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary + ill_cond = ill_cond_mach = ill_cond_lim = false + zero_resid = zero_resid_mach = zero_resid_lim = false + fwd_err = false + user_requested_exit = false + overtimed = false + + while ! (solved || tired || ill_cond || user_requested_exit || overtimed) + iter = iter + 1 + + # Generate next Golub-Kahan vectors. + # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ + mul!(Av, A, v) + @kaxpby!(m, one(FC), Av, -α, Mu) + MisI || mulorldiv!(u, M, Mu, ldiv) + β = sqrt(@kdotr(m, u, Mu)) + if β ≠ 0 + @kscal!(m, one(FC)/β, u) + MisI || @kscal!(m, one(FC)/β, Mu) + + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + if α ≠ 0 + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) + end + end + + # Continue QR factorization + (chat, shat, αhat) = sym_givens(αbar, λ) + + ρold = ρ + (c, s, ρ) = sym_givens(αhat, β) + θnew = s * α + αbar = c * α + + ρbarold = ρbar + ζold = ζ + θbar = sbar * ρ + ρtemp = cbar * ρ + (cbar, sbar, ρbar) = sym_givens(ρtemp, θnew) + ζ = cbar * ζbar + ζbar = -sbar * ζbar + + xENorm² = xENorm² + ζ * ζ + err_vec[mod(iter, window) + 1] = ζ + iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) + + # Update h, hbar and x. + δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁) + @kaxpby!(n, one(FC), h, -δ, hbar) # ĥₖ = hₖ - δₖ * ĥₖ₋₁ + + # if a trust-region constraint is given, compute step to the boundary + # the step ϕ/ρ is not necessarily positive + σ = ζ / (ρ * ρbar) + if radius > 0 + t1, t2 = to_boundary(n, x, hbar, radius) + tmax, tmin = max(t1, t2), min(t1, t2) + on_boundary = σ > tmax || σ < tmin + σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) + end + + @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ + @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ + + # Estimate ‖r‖. + βacute = chat * βdd + βcheck = -shat * βdd + + βhat = c * βacute + βdd = -s * βacute + + θtildeold = θtilde + (ctildeold, stildeold, ρtildeold) = sym_givens(ρdold, θbar) + θtilde = stildeold * ρbar + ρdold = ctildeold * ρbar + βd = -stildeold * βd + ctildeold * βhat + + τtildeold = (ζold - θtildeold * τtildeold) / ρtildeold + τd = (ζ - θtilde * τtildeold) / ρdold + d = d + βcheck * βcheck + rNorm = sqrt(d + (βd - τd)^2 + βdd * βdd) + history && push!(rNorms, rNorm) + + # Estimate ‖A‖. + Anorm² += β * β + Anorm = sqrt(Anorm²) + Anorm² += α * α + + # Estimate cond(A). + maxrbar = max(maxrbar, ρbarold) + iter > 1 && (minrbar = min(minrbar, ρbarold)) + Acond = max(maxrbar, ρtemp) / min(minrbar, ρtemp) + + # Test for convergence. + ArNorm = abs(ζbar) + history && push!(ArNorms, ArNorm) + xNorm = @knrm2(n, x) + + test1 = rNorm / β₁ + test2 = ArNorm / (Anorm * rNorm) + test3 = 1 / Acond + t1 = test1 / (one(T) + Anorm * xNorm / β₁) + rNormtol = btol + axtol * Anorm * xNorm / β₁ + + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², ktimer(start_time)) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + ill_cond_mach = (one(T) + test3 ≤ one(T)) + solved_mach = (one(T) + test2 ≤ one(T)) + zero_resid_mach = (one(T) + t1 ≤ one(T)) + + # Stopping conditions based on user-provided tolerances. + user_requested_exit = callback(solver) :: Bool + tired = iter ≥ itmax + ill_cond_lim = (test3 ≤ ctol) + solved_lim = (test2 ≤ axtol) + solved_opt = ArNorm ≤ atol + rtol * ArNorm0 + zero_resid_lim = (test1 ≤ rNormtol) + iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) + + ill_cond = ill_cond_mach || ill_cond_lim + zero_resid = zero_resid_mach || zero_resid_lim + solved = solved_mach || solved_lim || solved_opt || zero_resid || fwd_err || on_boundary + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + ill_cond_mach && (status = "condition number seems too large for this machine") + ill_cond_lim && (status = "condition number exceeds tolerance") + solved && (status = "found approximate minimum least-squares solution") + zero_resid && (status = "found approximate zero-residual solution") + fwd_err && (status = "truncated forward error small enough") + on_boundary && (status = "on trust-region boundary") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.residual = rNorm + stats.Aresidual = ArNorm + stats.Acond = Acond + stats.Anorm = Anorm + stats.xNorm = xNorm + stats.niter = iter + stats.solved = solved + stats.inconsistent = !zero_resid + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - ill_cond_mach && (status = "condition number seems too large for this machine") - ill_cond_lim && (status = "condition number exceeds tolerance") - solved && (status = "found approximate minimum least-squares solution") - zero_resid && (status = "found approximate zero-residual solution") - fwd_err && (status = "truncated forward error small enough") - on_boundary && (status = "on trust-region boundary") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.residual = rNorm - stats.Aresidual = ArNorm - stats.Acond = Acond - stats.Anorm = Anorm - stats.xNorm = xNorm - stats.niter = iter - stats.solved = solved - stats.inconsistent = !zero_resid - stats.status = status - return solver end diff --git a/src/lsqr.jl b/src/lsqr.jl index dd3779dce..fe7acc37c 100644 --- a/src/lsqr.jl +++ b/src/lsqr.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # LSQR is formally equivalent to applying the conjugate gradient method # to the normal equations but should be more stable. It is also formally @@ -24,16 +24,16 @@ export lsqr, lsqr! - """ (x, stats) = lsqr(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), + M=I, N=I, ldiv::Bool=false, + window::Int=5, sqd::Bool=false, λ::T=zero(T), + radius::T=zero(T), etol::T=√eps(T), axtol::T=√eps(T), btol::T=√eps(T), - atol::T=zero(T), rtol::T=zero(T), - etol::T=√eps(T), window::Int=5, - itmax::Int=0, conlim::T=1/√eps(T), - radius::T=zero(T), verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + conlim::T=1/√eps(T), atol::T=zero(T), + rtol::T=zero(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -42,20 +42,20 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ²‖x‖₂² -using the LSQR method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LSQR method, where λ ≥ 0 is a regularization parameter. LSQR is formally equivalent to applying CG to the normal equations - (AᵀA + λ²I) x = Aᵀb + (AᴴA + λ²I) x = Aᴴb (and therefore to CGLS) but is more stable. -LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂. +LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂. It is formally equivalent to CGLS, though can be slightly more accurate. If `λ > 0`, LSQR solves the symmetric and quasi-definite system [ E A ] [ r ] [ b ] - [ Aᵀ -λ²F ] [ x ] = [ 0 ], + [ Aᴴ -λ²F ] [ x ] = [ 0 ], where E and F are symmetric and positive definite. Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators. @@ -65,23 +65,52 @@ The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LSQR is then equivalent to applying CG to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LSQR is then equivalent to applying CG to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`. If `λ = 0`, we solve the symmetric and indefinite system [ E A ] [ r ] [ b ] - [ Aᵀ 0 ] [ x ] = [ 0 ]. + [ Aᴴ 0 ] [ x ] = [ 0 ]. The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹. -In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured. +In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured. `r` can be recovered by computing `E⁻¹(b - Ax)`. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `etol`: stopping tolerance based on the lower bound on the error; +* `axtol`: tolerance on the backward error; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -89,12 +118,6 @@ and `false` otherwise. """ function lsqr end -function lsqr(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = LsqrSolver(A, b, window=window) - lsqr!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = lsqr!(solver::LsqrSolver, A, b; kwargs...) @@ -104,263 +127,309 @@ See [`LsqrSolver`](@ref) for more details about the `solver`. """ function lsqr! end -function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), - axtol :: T=√eps(T), btol :: T=√eps(T), - atol :: T=zero(T), rtol :: T=zero(T), - etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), - radius :: T=zero(T), verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LSQR: system of %d equations in %d variables\n", m, n) - - # Check sqd and λ parameters - sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") - sqd && (λ = one(T)) - - # Tests M = Iₙ and N = Iₘ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :u, S, m) - allocate_if(!NisI, solver, :v, S, n) - x, Nv, Aᵀu, w = solver.x, solver.Nv, solver.Aᵀu, solver.w - Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats - rNorms, ArNorms = stats.residuals, stats.Aresiduals - reset!(stats) - u = MisI ? Mu : solver.u - v = NisI ? Nv : solver.v - - λ² = λ * λ - ctol = conlim > 0 ? 1/conlim : zero(T) - x .= zero(FC) - - # Initialize Golub-Kahan process. - # β₁ M u₁ = b. - Mu .= b - MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) - if β₁ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(rNorms, zero(T)) - history && push!(ArNorms, zero(T)) - return solver +def_args_lsqr = (:(A ), + :(b::AbstractVector{FC})) + +def_kwargs_lsqr = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; sqd::Bool = false ), + :(; λ::T = zero(T) ), + :(; radius::T = zero(T) ), + :(; etol::T = √eps(T) ), + :(; axtol::T = √eps(T) ), + :(; btol::T = √eps(T) ), + :(; conlim::T = 1/√eps(T) ), + :(; atol::T = zero(T) ), + :(; rtol::T = zero(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_lsqr = mapreduce(extract_parameters, vcat, def_kwargs_lsqr) + +args_lsqr = (:A, :b) +kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function lsqr($(def_args_lsqr...); window :: Int=5, $(def_kwargs_lsqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = LsqrSolver(A, b; window) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + lsqr!(solver, $(args_lsqr...); $(kwargs_lsqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - β = β₁ - - @kscal!(m, one(FC)/β₁, u) - MisI || @kscal!(m, one(FC)/β₁, Mu) - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu - NisI || mulorldiv!(v, N, Nv, ldiv) - Anorm² = @kdotr(n, v, Nv) - Anorm = sqrt(Anorm²) - α = Anorm - Acond = zero(T) - xNorm = zero(T) - xNorm² = zero(T) - dNorm² = zero(T) - c2 = -one(T) - s2 = zero(T) - z = zero(T) - - xENorm² = zero(T) - err_lbnd = zero(T) - window = length(err_vec) - err_vec .= zero(T) - - iter = 0 - itmax == 0 && (itmax = m + n) - - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᵀr‖", "compat", "backwrd", "‖A‖", "κ(A)") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond) - - rNorm = β₁ - r1Norm = rNorm - r2Norm = rNorm - res2 = zero(T) - history && push!(rNorms, r2Norm) - ArNorm = ArNorm0 = α * β - history && push!(ArNorms, ArNorm) - # Aᵀb = 0 so x = 0 is a minimum least-squares solution - if α == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a minimum least-squares solution" - return solver - end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - w .= v - - # Initialize other constants. - ϕbar = β₁ - ρbar = α - - status = "unknown" - on_boundary = false - solved_lim = ArNorm / (Anorm * rNorm) ≤ axtol - solved_mach = one(T) + ArNorm / (Anorm * rNorm) ≤ one(T) - solved = solved_mach | solved_lim - tired = iter ≥ itmax - ill_cond = ill_cond_mach = ill_cond_lim = false - zero_resid_lim = rNorm / β₁ ≤ axtol - zero_resid_mach = one(T) + rNorm / β₁ ≤ one(T) - zero_resid = zero_resid_mach | zero_resid_lim - fwd_err = false - user_requested_exit = false - - while ! (solved || tired || ill_cond || user_requested_exit) - iter = iter + 1 - - # Generate next Golub-Kahan vectors. - # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ - mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) - MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) - if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) - Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖² - λ > 0 && (Anorm² += λ²) - - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) - NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) - if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - end - end - # Continue QR factorization - # 1. Eliminate the regularization parameter. - (c1, s1, ρbar1) = sym_givens(ρbar, λ) - ψ = s1 * ϕbar - ϕbar = c1 * ϕbar - - # 2. Eliminate β. - # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] : - # [ β 0 ] [ 0 ζbar ] - # - # k k+1 k k+1 k k+1 - # k [ c s ] [ ρbar ] = [ ρ θ⁺ ] - # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ] - # - # so that we obtain - # - # [ c s ] [ ζbar ] = [ ζ ] - # [ s -c ] [ 0 ] [ ζbar⁺ ] - (c, s, ρ) = sym_givens(ρbar1, β) - ϕ = c * ϕbar - ϕbar = s * ϕbar - - xENorm² = xENorm² + ϕ * ϕ - err_vec[mod(iter, window) + 1] = ϕ - iter ≥ window && (err_lbnd = norm(err_vec)) - - τ = s * ϕ - θ = s * α - ρbar = -c * α - dNorm² += @kdotr(n, w, w) / ρ^2 - - # if a trust-region constraint is give, compute step to the boundary - # the step ϕ/ρ is not necessarily positive - σ = ϕ / ρ - if radius > 0 - t1, t2 = to_boundary(x, w, radius) - tmax, tmin = max(t1, t2), min(t1, t2) - on_boundary = σ > tmax || σ < tmin - σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) + function lsqr!(solver :: LsqrSolver{T,FC,S}, $(def_args_lsqr...); $(def_kwargs_lsqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "LSQR: system of %d equations in %d variables\n", m, n) + + # Check sqd and λ parameters + sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") + sqd && (λ = one(T)) + + # Tests M = Iₙ and N = Iₘ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :u, S, m) + allocate_if(!NisI, solver, :v, S, n) + x, Nv, Aᴴu, w = solver.x, solver.Nv, solver.Aᴴu, solver.w + Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats + rNorms, ArNorms = stats.residuals, stats.Aresiduals + reset!(stats) + u = MisI ? Mu : solver.u + v = NisI ? Nv : solver.v + + λ² = λ * λ + ctol = conlim > 0 ? 1/conlim : zero(T) + x .= zero(FC) + + # Initialize Golub-Kahan process. + # β₁ M u₁ = b. + Mu .= b + MisI || mulorldiv!(u, M, Mu, ldiv) + β₁ = sqrt(@kdotr(m, u, Mu)) + if β₁ == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(rNorms, zero(T)) + history && push!(ArNorms, zero(T)) + return solver end - - @kaxpy!(n, σ, w, x) # x = x + ϕ / ρ * w - @kaxpby!(n, one(FC), v, -θ/ρ, w) # w = v - θ / ρ * w - - # Use a plane rotation on the right to eliminate the super-diagonal - # element (θ) of the upper-bidiagonal matrix. - # Use the result to estimate norm(x). - δ = s2 * ρ - γbar = -c2 * ρ - rhs = ϕ - δ * z - zbar = rhs / γbar - xNorm = sqrt(xNorm² + zbar * zbar) - (c2, s2, γ) = sym_givens(γbar, θ) - z = rhs / γ - xNorm² += z * z - + β = β₁ + + @kscal!(m, one(FC)/β₁, u) + MisI || @kscal!(m, one(FC)/β₁, Mu) + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu + NisI || mulorldiv!(v, N, Nv, ldiv) + Anorm² = @kdotr(n, v, Nv) Anorm = sqrt(Anorm²) - Acond = Anorm * sqrt(dNorm²) - res1 = ϕbar * ϕbar - res2 += ψ * ψ - rNorm = sqrt(res1 + res2) - - ArNorm = α * abs(τ) - history && push!(ArNorms, ArNorm) - - r1sq = rNorm * rNorm - λ² * xNorm² - r1Norm = sqrt(abs(r1sq)) - r1sq < 0 && (r1Norm = -r1Norm) + α = Anorm + Acond = zero(T) + xNorm = zero(T) + xNorm² = zero(T) + dNorm² = zero(T) + c2 = -one(T) + s2 = zero(T) + z = zero(T) + + xENorm² = zero(T) + err_lbnd = zero(T) + window = length(err_vec) + err_vec .= zero(T) + + iter = 0 + itmax == 0 && (itmax = m + n) + + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %7s %7s %7s %7s %5s\n", "k", "α", "β", "‖r‖", "‖Aᴴr‖", "compat", "backwrd", "‖A‖", "κ(A)", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %.2fs\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond, ktimer(start_time)) + + rNorm = β₁ + r1Norm = rNorm r2Norm = rNorm + res2 = zero(T) history && push!(rNorms, r2Norm) - - test1 = rNorm / β₁ - test2 = ArNorm / (Anorm * rNorm) - test3 = 1 / Acond - t1 = test1 / (one(T) + Anorm * xNorm / β₁) - rNormtol = btol + axtol * Anorm * xNorm / β₁ - - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond) - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - ill_cond_mach = (one(T) + test3 ≤ one(T)) - solved_mach = (one(T) + test2 ≤ one(T)) - zero_resid_mach = (one(T) + t1 ≤ one(T)) - - # Stopping conditions based on user-provided tolerances. - user_requested_exit = callback(solver) :: Bool + ArNorm = ArNorm0 = α * β + history && push!(ArNorms, ArNorm) + # Aᴴb = 0 so x = 0 is a minimum least-squares solution + if α == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a minimum least-squares solution" + return solver + end + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) + w .= v + + # Initialize other constants. + ϕbar = β₁ + ρbar = α + + status = "unknown" + on_boundary = false + solved_lim = ArNorm / (Anorm * rNorm) ≤ axtol + solved_mach = one(T) + ArNorm / (Anorm * rNorm) ≤ one(T) + solved = solved_mach | solved_lim tired = iter ≥ itmax - ill_cond_lim = (test3 ≤ ctol) - solved_lim = (test2 ≤ axtol) - solved_opt = ArNorm ≤ atol + rtol * ArNorm0 - zero_resid_lim = (test1 ≤ rNormtol) - iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) - - ill_cond = ill_cond_mach | ill_cond_lim + ill_cond = ill_cond_mach = ill_cond_lim = false + zero_resid_lim = rNorm / β₁ ≤ axtol + zero_resid_mach = one(T) + rNorm / β₁ ≤ one(T) zero_resid = zero_resid_mach | zero_resid_lim - solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary + fwd_err = false + user_requested_exit = false + overtimed = false + + while ! (solved || tired || ill_cond || user_requested_exit || overtimed) + iter = iter + 1 + + # Generate next Golub-Kahan vectors. + # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ + mul!(Av, A, v) + @kaxpby!(m, one(FC), Av, -α, Mu) + MisI || mulorldiv!(u, M, Mu, ldiv) + β = sqrt(@kdotr(m, u, Mu)) + if β ≠ 0 + @kscal!(m, one(FC)/β, u) + MisI || @kscal!(m, one(FC)/β, Mu) + Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖² + λ > 0 && (Anorm² += λ²) + + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + NisI || mulorldiv!(v, N, Nv, ldiv) + α = sqrt(@kdotr(n, v, Nv)) + if α ≠ 0 + @kscal!(n, one(FC)/α, v) + NisI || @kscal!(n, one(FC)/α, Nv) + end + end + + # Continue QR factorization + # 1. Eliminate the regularization parameter. + (c1, s1, ρbar1) = sym_givens(ρbar, λ) + ψ = s1 * ϕbar + ϕbar = c1 * ϕbar + + # 2. Eliminate β. + # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] : + # [ β 0 ] [ 0 ζbar ] + # + # k k+1 k k+1 k k+1 + # k [ c s ] [ ρbar ] = [ ρ θ⁺ ] + # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ] + # + # so that we obtain + # + # [ c s ] [ ζbar ] = [ ζ ] + # [ s -c ] [ 0 ] [ ζbar⁺ ] + (c, s, ρ) = sym_givens(ρbar1, β) + ϕ = c * ϕbar + ϕbar = s * ϕbar + + xENorm² = xENorm² + ϕ * ϕ + err_vec[mod(iter, window) + 1] = ϕ + iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) + + τ = s * ϕ + θ = s * α + ρbar = -c * α + dNorm² += @kdotr(n, w, w) / ρ^2 + + # if a trust-region constraint is give, compute step to the boundary + # the step ϕ/ρ is not necessarily positive + σ = ϕ / ρ + if radius > 0 + t1, t2 = to_boundary(n, x, w, radius) + tmax, tmin = max(t1, t2), min(t1, t2) + on_boundary = σ > tmax || σ < tmin + σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) + end + + @kaxpy!(n, σ, w, x) # x = x + ϕ / ρ * w + @kaxpby!(n, one(FC), v, -θ/ρ, w) # w = v - θ / ρ * w + + # Use a plane rotation on the right to eliminate the super-diagonal + # element (θ) of the upper-bidiagonal matrix. + # Use the result to estimate norm(x). + δ = s2 * ρ + γbar = -c2 * ρ + rhs = ϕ - δ * z + zbar = rhs / γbar + xNorm = sqrt(xNorm² + zbar * zbar) + (c2, s2, γ) = sym_givens(γbar, θ) + z = rhs / γ + xNorm² += z * z + + Anorm = sqrt(Anorm²) + Acond = Anorm * sqrt(dNorm²) + res1 = ϕbar * ϕbar + res2 += ψ * ψ + rNorm = sqrt(res1 + res2) + + ArNorm = α * abs(τ) + history && push!(ArNorms, ArNorm) + + r1sq = rNorm * rNorm - λ² * xNorm² + r1Norm = sqrt(abs(r1sq)) + r1sq < 0 && (r1Norm = -r1Norm) + r2Norm = rNorm + history && push!(rNorms, r2Norm) + + test1 = rNorm / β₁ + test2 = ArNorm / (Anorm * rNorm) + test3 = 1 / Acond + t1 = test1 / (one(T) + Anorm * xNorm / β₁) + rNormtol = btol + axtol * Anorm * xNorm / β₁ + + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %.2fs\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond, ktimer(start_time)) + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + ill_cond_mach = (one(T) + test3 ≤ one(T)) + solved_mach = (one(T) + test2 ≤ one(T)) + zero_resid_mach = (one(T) + t1 ≤ one(T)) + + # Stopping conditions based on user-provided tolerances. + user_requested_exit = callback(solver) :: Bool + tired = iter ≥ itmax + ill_cond_lim = (test3 ≤ ctol) + solved_lim = (test2 ≤ axtol) + solved_opt = ArNorm ≤ atol + rtol * ArNorm0 + zero_resid_lim = (test1 ≤ rNormtol) + iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) + + ill_cond = ill_cond_mach || ill_cond_lim + zero_resid = zero_resid_mach || zero_resid_lim + solved = solved_mach || solved_lim || solved_opt || zero_resid || fwd_err || on_boundary + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + ill_cond_mach && (status = "condition number seems too large for this machine") + ill_cond_lim && (status = "condition number exceeds tolerance") + solved && (status = "found approximate minimum least-squares solution") + zero_resid && (status = "found approximate zero-residual solution") + fwd_err && (status = "truncated forward error small enough") + on_boundary && (status = "on trust-region boundary") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = !zero_resid + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - ill_cond_mach && (status = "condition number seems too large for this machine") - ill_cond_lim && (status = "condition number exceeds tolerance") - solved && (status = "found approximate minimum least-squares solution") - zero_resid && (status = "found approximate zero-residual solution") - fwd_err && (status = "truncated forward error small enough") - on_boundary && (status = "on trust-region boundary") - user_requested_exit && (status = "user-requested exit") - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = !zero_resid - stats.status = status - return solver end diff --git a/src/minres.jl b/src/minres.jl index cbaefee9f..8e6659472 100644 --- a/src/minres.jl +++ b/src/minres.jl @@ -3,7 +3,7 @@ # # minimize ‖Ax - b‖₂ # -# where A is square and symmetric. +# where A is Hermitian. # # MINRES is formally equivalent to applying the conjugate residuals method # to Ax = b when A is positive definite, but is more general and also applies @@ -21,20 +21,22 @@ export minres, minres! - """ (x, stats) = minres(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T)/100, - rtol::T=√eps(T)/100, ratol :: T=zero(T), - rrtol :: T=zero(T), etol::T=√eps(T), - window::Int=5, itmax::Int=0, - conlim::T=1/√eps(T), verbose::Int=0, - history::Bool=false, ldiv::Bool=false, - callback=solver->false) + M=I, ldiv::Bool=false, window::Int=5, + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), etol::T=√eps(T), + conlim::T=1/√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, stats) = minres(A, b, x0::AbstractVector; kwargs...) + +MINRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + Solve the shifted linear least-squares problem minimize ‖b - (A + λI)x‖₂² @@ -43,26 +45,45 @@ or the shifted linear system (A + λI) x = b -using the MINRES method, where λ ≥ 0 is a shift parameter, -where A is square and symmetric. +of size n using the MINRES method, where λ ≥ 0 is a shift parameter, +where A is Hermitian. MINRES is formally equivalent to applying CR to Ax=b when A is positive definite, but is typically more stable and also applies to the case where A is indefinite. -MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂. +MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂. + +#### Input arguments -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. -MINRES can be warm-started from an initial guess `x0` with the method +#### Optional argument - (x, stats) = minres(A, b, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `etol`: stopping tolerance based on the lower bound on the error; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -70,18 +91,6 @@ and `false` otherwise. """ function minres end -function minres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = MinresSolver(A, b, window=window) - minres!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function minres(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = MinresSolver(A, b, window=window) - minres!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = minres!(solver::MinresSolver, A, b; kwargs...) solver = minres!(solver::MinresSolver, A, b, x0; kwargs...) @@ -92,257 +101,306 @@ See [`MinresSolver`](@ref) for more details about the `solver`. """ function minres! end -function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - minres!(solver, A, b; kwargs...) - return solver -end - -function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100, - ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T), - itmax :: Int=0, conlim :: T=1/√eps(T), verbose :: Int=0, - history :: Bool=false, ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("MINRES: system of size %d\n", n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :v, S, n) - Δx, x, r1, r2, w1, w2, y = solver.Δx, solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.y - err_vec, stats = solver.err_vec, solver.stats - warm_start = solver.warm_start - rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond - reset!(stats) - v = MisI ? r2 : solver.v - - ϵM = eps(T) - ctol = conlim > 0 ? 1 / conlim : zero(T) - - # Initial solution x₀ - x .= zero(FC) - - if warm_start - mul!(r1, A, Δx) - (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1) - @kaxpby!(n, one(FC), b, -one(FC), r1) - else - r1 .= b +def_args_minres = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_minres = (:(x0::AbstractVector),) + +def_kwargs_minres = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; etol::T = √eps(T) ), + :(; conlim::T = 1/√eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_minres = mapreduce(extract_parameters, vcat, def_kwargs_minres) + +args_minres = (:A, :b) +optargs_minres = (:x0,) +kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function minres($(def_args_minres...), $(def_optargs_minres...); window :: Int=5, $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = MinresSolver(A, b; window) + warm_start!(solver, $(optargs_minres...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + minres!(solver, $(args_minres...); $(kwargs_minres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Initialize Lanczos process. - # β₁ M v₁ = b. - r2 .= r1 - MisI || mulorldiv!(v, M, r1, ldiv) - β₁ = @kdotr(m, r1, v) - β₁ < 0 && error("Preconditioner is not positive definite") - if β₁ == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - history && push!(rNorms, β₁) - history && push!(ArNorms, zero(T)) - history && push!(Aconds, zero(T)) - solver.warm_start = false - return solver + function minres($(def_args_minres...); window :: Int=5, $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = MinresSolver(A, b; window) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + minres!(solver, $(args_minres...); $(kwargs_minres...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - β₁ = sqrt(β₁) - β = β₁ - - oldβ = zero(T) - δbar = zero(T) - ϵ = zero(T) - rNorm = β₁ - history && push!(rNorms, β₁) - ϕbar = β₁ - rhs1 = β₁ - rhs2 = zero(T) - γmax = zero(T) - γmin = T(Inf) - cs = -one(T) - sn = zero(T) - w1 .= zero(FC) - w2 .= zero(FC) - - ANorm² = zero(T) - ANorm = zero(T) - Acond = zero(T) - history && push!(Aconds, Acond) - ArNorm = zero(T) - history && push!(ArNorms, ArNorm) - xNorm = zero(T) - - xENorm² = zero(T) - err_lbnd = zero(T) - window = length(err_vec) - err_vec .= zero(T) - - iter = 0 - itmax == 0 && (itmax = 2*n) - - (verbose > 0) && @printf("%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond) - - tol = atol + rtol * β₁ - rNormtol = ratol + rrtol * β₁ - stats.status = "unknown" - solved = solved_mach = solved_lim = (rNorm ≤ rtol) - tired = iter ≥ itmax - ill_cond = ill_cond_mach = ill_cond_lim = false - zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ tol) - fwd_err = false - user_requested_exit = false - - while !(solved || tired || ill_cond || user_requested_exit) - iter = iter + 1 - - # Generate next Lanczos vector. - mul!(y, A, v) - λ ≠ 0 && @kaxpy!(n, λ, v, y) # (y = y + λ * v) - @kscal!(n, one(FC) / β, y) - iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1) - - α = real((@kdot(n, v, y) / β)) - @kaxpy!(n, -α / β, r2, y) # y = y - α / β * r2 - - # Compute w. - δ = cs * δbar + sn * α - if iter == 1 - w = w2 - else - iter ≥ 3 && @kscal!(n, -ϵ, w1) - w = w1 - @kaxpy!(n, -δ, w2, w) - end - @kaxpy!(n, one(FC) / β, v, w) - - @. r1 = r2 - @. r2 = y - MisI || mulorldiv!(v, M, r2, ldiv) - oldβ = β - β = @kdotr(n, r2, v) - β < 0 && error("Preconditioner is not positive definite") - β = sqrt(β) - ANorm² = ANorm² + α * α + oldβ * oldβ + β * β - - # Apply rotation to obtain - # [ δₖ ϵₖ₊₁ ] = [ cs sn ] [ δbarₖ 0 ] - # [ γbar δbarₖ₊₁ ] [ sn -cs ] [ αₖ βₖ₊₁ ] - γbar = sn * δbar - cs * α - ϵ = sn * β - δbar = -cs * β - root = sqrt(γbar * γbar + δbar * δbar) - ArNorm = ϕbar * root # = ‖Aᵀrₖ₋₁‖ - history && push!(ArNorms, ArNorm) - - # Compute the next plane rotation. - γ = sqrt(γbar * γbar + β * β) - γ = max(γ, ϵM) - cs = γbar / γ - sn = β / γ - ϕ = cs * ϕbar - ϕbar = sn * ϕbar - - # Final update of w. - @kscal!(n, one(FC) / γ, w) - # Update x. - @kaxpy!(n, ϕ, w, x) # x = x + ϕ * w - xENorm² = xENorm² + ϕ * ϕ + function minres!(solver :: MinresSolver{T,FC,S}, $(def_args_minres...); $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} - # Update directions for x. - if iter ≥ 2 - @kswap(w1, w2) - end + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax - # Compute lower bound on forward error. - err_vec[mod(iter, window) + 1] = ϕ - iter ≥ window && (err_lbnd = norm(err_vec)) + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "MINRES: system of size %d\n", n) - γmax = max(γmax, γ) - γmin = min(γmin, γ) - ζ = rhs1 / γ - rhs1 = rhs2 - δ * ζ - rhs2 = -ϵ * ζ + # Tests M = Iₙ + MisI = (M === I) - # Estimate various norms. - ANorm = sqrt(ANorm²) - xNorm = @knrm2(n, x) - ϵA = ANorm * ϵM - ϵx = ANorm * xNorm * ϵM - ϵr = ANorm * xNorm * rtol - d = γbar - d == 0 && (d = ϵA) + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") - rNorm = ϕbar + # Set up workspace. + allocate_if(!MisI, solver, :v, S, n) + Δx, x, r1, r2, w1, w2, y = solver.Δx, solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.y + err_vec, stats = solver.err_vec, solver.stats + warm_start = solver.warm_start + rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond + reset!(stats) + v = MisI ? r2 : solver.v - test1 = rNorm / (ANorm * xNorm) - test2 = root / ANorm - history && push!(rNorms, rNorm) + ϵM = eps(T) + ctol = conlim > 0 ? 1 / conlim : zero(T) - Acond = γmax / γmin - history && push!(Aconds, Acond) + # Initial solution x₀ + x .= zero(FC) - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2) + if warm_start + mul!(r1, A, Δx) + (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1) + @kaxpby!(n, one(FC), b, -one(FC), r1) + else + r1 .= b + end - if iter == 1 && β / β₁ ≤ 10 * ϵM - # Aᵀb = 0 so x = 0 is a minimum least-squares solution + # Initialize Lanczos process. + # β₁ M v₁ = b. + r2 .= r1 + MisI || mulorldiv!(v, M, r1, ldiv) + β₁ = @kdotr(m, r1, v) + β₁ < 0 && error("Preconditioner is not positive definite") + if β₁ == 0 stats.niter = 0 - stats.solved, stats.inconsistent = true, true - stats.status = "x is a minimum least-squares solution" + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + history && push!(rNorms, β₁) + history && push!(ArNorms, zero(T)) + history && push!(Aconds, zero(T)) solver.warm_start = false return solver end + β₁ = sqrt(β₁) + β = β₁ - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T)) - solved_mach = (one(T) + test2 ≤ one(T)) - zero_resid_mach = (one(T) + test1 ≤ one(T)) - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - # solved_mach = (ϵx ≥ β₁) - - # Stopping conditions based on user-provided tolerances. - tired = iter ≥ itmax - ill_cond_lim = (one(T) / Acond ≤ ctol) - solved_lim = (test2 ≤ tol) - zero_resid_lim = (test1 ≤ tol) - resid_decrease_lim = (rNorm ≤ rNormtol) - iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) - - user_requested_exit = callback(solver) :: Bool - zero_resid = zero_resid_mach | zero_resid_lim - resid_decrease = resid_decrease_mach | resid_decrease_lim - ill_cond = ill_cond_mach | ill_cond_lim - solved = solved_mach | solved_lim | zero_resid | fwd_err | resid_decrease + oldβ = zero(T) + δbar = zero(T) + ϵ = zero(T) + rNorm = β₁ + history && push!(rNorms, β₁) + ϕbar = β₁ + rhs1 = β₁ + rhs2 = zero(T) + γmax = zero(T) + γmin = T(Inf) + cs = -one(T) + sn = zero(T) + w1 .= zero(FC) + w2 .= zero(FC) + + ANorm² = zero(T) + ANorm = zero(T) + Acond = zero(T) + history && push!(Aconds, Acond) + ArNorm = zero(T) + history && push!(ArNorms, ArNorm) + xNorm = zero(T) + + xENorm² = zero(T) + err_lbnd = zero(T) + window = length(err_vec) + err_vec .= zero(T) + + iter = 0 + itmax == 0 && (itmax = 2*n) + + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7s %7s %.2fs\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time)) + + ε = atol + rtol * β₁ + solved = solved_mach = solved_lim = (rNorm ≤ rtol) + tired = iter ≥ itmax + ill_cond = ill_cond_mach = ill_cond_lim = false + zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ ε) + fwd_err = false + user_requested_exit = false + overtimed = false + + while !(solved || tired || ill_cond || user_requested_exit || overtimed) + iter = iter + 1 + + # Generate next Lanczos vector. + mul!(y, A, v) + λ ≠ 0 && @kaxpy!(n, λ, v, y) # (y = y + λ * v) + @kscal!(n, one(FC) / β, y) + iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1) + + α = real((@kdot(n, v, y) / β)) + @kaxpy!(n, -α / β, r2, y) # y = y - α / β * r2 + + # Compute w. + δ = cs * δbar + sn * α + if iter == 1 + w = w2 + else + iter ≥ 3 && @kscal!(n, -ϵ, w1) + w = w1 + @kaxpy!(n, -δ, w2, w) + end + @kaxpy!(n, one(FC) / β, v, w) + + @. r1 = r2 + @. r2 = y + MisI || mulorldiv!(v, M, r2, ldiv) + oldβ = β + β = @kdotr(n, r2, v) + β < 0 && error("Preconditioner is not positive definite") + β = sqrt(β) + ANorm² = ANorm² + α * α + oldβ * oldβ + β * β + + # Apply rotation to obtain + # [ δₖ ϵₖ₊₁ ] = [ cs sn ] [ δbarₖ 0 ] + # [ γbar δbarₖ₊₁ ] [ sn -cs ] [ αₖ βₖ₊₁ ] + γbar = sn * δbar - cs * α + ϵ = sn * β + δbar = -cs * β + root = sqrt(γbar * γbar + δbar * δbar) + ArNorm = ϕbar * root # = ‖Aᴴrₖ₋₁‖ + history && push!(ArNorms, ArNorm) + + # Compute the next plane rotation. + γ = sqrt(γbar * γbar + β * β) + γ = max(γ, ϵM) + cs = γbar / γ + sn = β / γ + ϕ = cs * ϕbar + ϕbar = sn * ϕbar + + # Final update of w. + @kscal!(n, one(FC) / γ, w) + + # Update x. + @kaxpy!(n, ϕ, w, x) # x = x + ϕ * w + xENorm² = xENorm² + ϕ * ϕ + + # Update directions for x. + if iter ≥ 2 + @kswap(w1, w2) + end + + # Compute lower bound on forward error. + err_vec[mod(iter, window) + 1] = ϕ + iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) + + γmax = max(γmax, γ) + γmin = min(γmin, γ) + ζ = rhs1 / γ + rhs1 = rhs2 - δ * ζ + rhs2 = -ϵ * ζ + + # Estimate various norms. + ANorm = sqrt(ANorm²) + xNorm = @knrm2(n, x) + ϵA = ANorm * ϵM + ϵx = ANorm * xNorm * ϵM + ϵr = ANorm * xNorm * rtol + d = γbar + d == 0 && (d = ϵA) + + rNorm = ϕbar + + test1 = rNorm / (ANorm * xNorm) + test2 = root / ANorm + history && push!(rNorms, rNorm) + + Acond = γmax / γmin + history && push!(Aconds, Acond) + + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2, ktimer(start_time)) + + if iter == 1 && β / β₁ ≤ 10 * ϵM + # Aᴴb = 0 so x = 0 is a minimum least-squares solution + stats.niter = 1 + stats.solved, stats.inconsistent = true, true + stats.timer = ktimer(start_time) + stats.status = "x is a minimum least-squares solution" + solver.warm_start = false + return solver + end + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T)) + solved_mach = (one(T) + test2 ≤ one(T)) + zero_resid_mach = (one(T) + test1 ≤ one(T)) + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + # solved_mach = (ϵx ≥ β₁) + + # Stopping conditions based on user-provided tolerances. + tired = iter ≥ itmax + ill_cond_lim = (one(T) / Acond ≤ ctol) + solved_lim = (test2 ≤ ε) + zero_resid_lim = MisI && (test1 ≤ eps(T)) + resid_decrease_lim = (rNorm ≤ ε) + iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) + + user_requested_exit = callback(solver) :: Bool + zero_resid = zero_resid_mach || zero_resid_lim + resid_decrease = resid_decrease_mach || resid_decrease_lim + ill_cond = ill_cond_mach || ill_cond_lim + solved = solved_mach || solved_lim || zero_resid || fwd_err || resid_decrease + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + ill_cond_mach && (status = "condition number seems too large for this machine") + ill_cond_lim && (status = "condition number exceeds tolerance") + solved && (status = "found approximate minimum least-squares solution") + zero_resid && (status = "found approximate zero-residual solution") + fwd_err && (status = "truncated forward error small enough") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = !zero_resid + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - ill_cond_mach && (status = "condition number seems too large for this machine") - ill_cond_lim && (status = "condition number exceeds tolerance") - solved && (status = "found approximate minimum least-squares solution") - zero_resid && (status = "found approximate zero-residual solution") - fwd_err && (status = "truncated forward error small enough") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = !zero_resid - stats.status = status - return solver end diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl index bbfbf856b..5bc3399eb 100644 --- a/src/minres_qlp.jl +++ b/src/minres_qlp.jl @@ -18,30 +18,53 @@ export minres_qlp, minres_qlp! """ (x, stats) = minres_qlp(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), - ctol::T=√eps(T), λ::T=zero(T), itmax::Int=0, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, Artol::T=√eps(T), + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, stats) = minres_qlp(A, b, x0::AbstractVector; kwargs...) + +MINRES-QLP can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + MINRES-QLP is the only method based on the Lanczos process that returns the minimum-norm -solution on singular inconsistent systems (A + λI)x = b, where λ is a shift parameter. +solution on singular inconsistent systems (A + λI)x = b of size n, where λ is a shift parameter. It is significantly more complex but can be more reliable than MINRES when A is ill-conditioned. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. M also indicates the weighted norm in which residuals are measured. -MINRES-QLP can be warm-started from an initial guess `x0` with the method +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument - (x, stats) = minres_qlp(A, b, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `Artol`: relative stopping tolerance based on the Aᴴ-residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -51,18 +74,6 @@ and `false` otherwise. """ function minres_qlp end -function minres_qlp(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = MinresQlpSolver(A, b) - minres_qlp!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function minres_qlp(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = MinresQlpSolver(A, b) - minres_qlp!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = minres_qlp!(solver::MinresQlpSolver, A, b; kwargs...) solver = minres_qlp!(solver::MinresQlpSolver, A, b, x0; kwargs...) @@ -73,365 +84,414 @@ See [`MinresQlpSolver`](@ref) for more details about the `solver`. """ function minres_qlp! end -function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - minres_qlp!(solver, A, b; kwargs...) - return solver -end - -function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), - ctol :: T=√eps(T), λ ::T=zero(T), itmax :: Int=0, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("MINRES-QLP: system of size %d\n", n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :vₖ, S, n) - wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ - Δx, x, p, stats = solver.Δx, solver.x, solver.p, solver.stats - warm_start = solver.warm_start - rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond - reset!(stats) - vₖ = MisI ? M⁻¹vₖ : solver.vₖ - vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁ - - # Initial solution x₀ - x .= zero(FC) - - if warm_start - mul!(M⁻¹vₖ, A, Δx) - (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ) - @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ) - else - M⁻¹vₖ .= b +def_args_minres_qlp = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_minres_qlp = (:(x0::AbstractVector),) + +def_kwargs_minres_qlp = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; λ::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; Artol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_minres_qlp = mapreduce(extract_parameters, vcat, def_kwargs_minres_qlp) + +args_minres_qlp = (:A, :b) +optargs_minres_qlp = (:x0,) +kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function minres_qlp($(def_args_minres_qlp...), $(def_optargs_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = MinresQlpSolver(A, b) + warm_start!(solver, $(optargs_minres_qlp...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + minres_qlp!(solver, $(args_minres_qlp...); $(kwargs_minres_qlp...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # β₁v₁ = Mb - MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) - βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ)) - if βₖ ≠ 0 - @kscal!(n, one(FC) / βₖ, M⁻¹vₖ) - MisI || @kscal!(n, one(FC) / βₖ, vₖ) + function minres_qlp($(def_args_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = MinresQlpSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + minres_qlp!(solver, $(args_minres_qlp...); $(kwargs_minres_qlp...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - rNorm = βₖ - ANorm² = zero(T) - ANorm = zero(T) - μmin = zero(T) - μmax = zero(T) - Acond = zero(T) - history && push!(rNorms, rNorm) - history && push!(Aconds, Acond) - if rNorm == 0 - stats.niter = 0 - stats.solved, stats.inconsistent = true, false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver - end - - iter = 0 - itmax == 0 && (itmax = 2*n) - - ε = atol + rtol * rNorm - κ = zero(T) - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %7s %8s %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗") - - # Set up workspace. - M⁻¹vₖ₋₁ .= zero(FC) - ζbarₖ = βₖ - ξₖ₋₁ = zero(T) - τₖ₋₂ = τₖ₋₁ = τₖ = zero(T) - ψbarₖ₋₂ = zero(T) - μbisₖ₋₂ = μbarₖ₋₁ = zero(T) - wₖ₋₁ .= zero(FC) - wₖ .= zero(FC) - cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ - sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - - # Tolerance for breakdown detection. - btol = eps(T)^(3/4) - - # Stopping criterion. - breakdown = false - solved = zero_resid = zero_resid_lim = rNorm ≤ ε - zero_resid_mach = false - inconsistent = false - ill_cond_mach = false - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || inconsistent || ill_cond_mach || breakdown || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the preconditioned Lanczos process. - # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ - # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁ - - mul!(p, A, vₖ) # p ← Avₖ - if λ ≠ 0 - @kaxpy!(n, λ, vₖ, p) # p ← p + λvₖ - end - - if iter ≥ 2 - @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁ - end - - αₖ = @kdotr(n, vₖ, p) # αₖ = ⟨vₖ,p⟩ - - @kaxpy!(n, -αₖ, M⁻¹vₖ, p) # p ← p - αₖM⁻¹vₖ - - MisI || mulorldiv!(vₖ₊₁, M, p, ldiv) # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ - - βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p)) - - # βₖ₊₁.ₖ ≠ 0 - if βₖ₊₁ > btol - @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) - MisI || @kscal!(m, one(FC) / βₖ₊₁, p) - end - - ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁ - - # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. - # [ Oᵀ ] - # - # [ α₁ β₂ 0 • • • 0 ] [ λ₁ γ₁ ϵ₁ 0 • • 0 ] - # [ β₂ α₂ β₃ • • ] [ 0 λ₂ γ₂ • • • ] - # [ 0 • • • • • ] [ • • λ₃ • • • • ] - # [ • • • • • • • ] = Qₖ [ • • • • • 0 ] - # [ • • • • • 0 ] [ • • • • ϵₖ₋₂] - # [ • • • • βₖ ] [ • • • γₖ₋₁] - # [ • • βₖ αₖ ] [ 0 • • • • 0 λₖ ] - # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ] - # - # If k = 1, we don't have any previous reflexion. - # If k = 2, we apply the last reflexion. - # If k ≥ 3, we only apply the two previous reflexions. - - # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁ - if iter ≥ 3 - # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ] - # [sₖ₋₂ -cₖ₋₂] [βₖ] [γbarₖ₋₁] - ϵₖ₋₂ = sₖ₋₂ * βₖ - γbarₖ₋₁ = -cₖ₋₂ * βₖ - end - # Apply previous Givens reflections Qₖ₋₁.ₖ - if iter ≥ 2 - iter == 2 && (γbarₖ₋₁ = βₖ) - # [cₖ₋₁ sₖ₋₁] [γbarₖ₋₁] = [γₖ₋₁ ] - # [sₖ₋₁ -cₖ₋₁] [ αₖ ] [λbarₖ] - γₖ₋₁ = cₖ₋₁ * γbarₖ₋₁ + sₖ₋₁ * αₖ - λbarₖ = sₖ₋₁ * γbarₖ₋₁ - cₖ₋₁ * αₖ - end - iter == 1 && (λbarₖ = αₖ) - - # Compute and apply current Givens reflection Qₖ.ₖ₊₁ - # [cₖ sₖ] [λbarₖ] = [λₖ] - # [sₖ -cₖ] [βₖ₊₁ ] [0 ] - (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁) - - # Compute [ zₖ ] = (Qₖ)ᵀβ₁e₁ - # [ζbarₖ₊₁] - # - # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] - # [sₖ -cₖ] [ 0 ] [ζbarₖ₊₁] - ζₖ = cₖ * ζbarₖ - ζbarₖ₊₁ = sₖ * ζbarₖ - - # Update the LQ factorization of Rₖ = LₖPₖ. - # [ λ₁ γ₁ ϵ₁ 0 • • 0 ] [ μ₁ 0 • • • • 0 ] - # [ 0 λ₂ γ₂ • • • ] [ ψ₁ μ₂ • • ] - # [ • • λ₃ • • • • ] [ ρ₁ ψ₂ μ₃ • • ] - # [ • • • • • 0 ] = [ 0 • • • • • ] Pₖ - # [ • • • • ϵₖ₋₂] [ • • • • μₖ₋₂ • • ] - # [ • • • γₖ₋₁] [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ] - # [ 0 • • • • 0 λₖ ] [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ] - - if iter == 1 - μbarₖ = λₖ - elseif iter == 2 - # [μbar₁ γ₁] [cp₂ sp₂] = [μbis₁ 0 ] - # [ 0 λ₂] [sp₂ -cp₂] [ψbar₁ μbar₂] - (cpₖ, spₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, γₖ₋₁) - ψbarₖ₋₁ = spₖ * λₖ - μbarₖ = -cpₖ * λₖ - else - # [μbisₖ₋₂ 0 ϵₖ₋₂] [cpₖ 0 spₖ] [μₖ₋₂ 0 0 ] - # [ψbarₖ₋₂ μbarₖ₋₁ γₖ₋₁] [ 0 1 0 ] = [ψₖ₋₂ μbarₖ₋₁ θₖ] - # [ 0 0 λₖ ] [spₖ 0 -cpₖ] [ρₖ₋₂ 0 ηₖ] - (cpₖ, spₖ, μₖ₋₂) = sym_givens(μbisₖ₋₂, ϵₖ₋₂) - ψₖ₋₂ = cpₖ * ψbarₖ₋₂ + spₖ * γₖ₋₁ - θₖ = spₖ * ψbarₖ₋₂ - cpₖ * γₖ₋₁ - ρₖ₋₂ = spₖ * λₖ - ηₖ = -cpₖ * λₖ - - # [μₖ₋₂ 0 0 ] [1 0 0 ] [μₖ₋₂ 0 0 ] - # [ψₖ₋₂ μbarₖ₋₁ θₖ] [0 cdₖ sdₖ] = [ψₖ₋₂ μbisₖ₋₁ 0 ] - # [ρₖ₋₂ 0 ηₖ] [0 sdₖ -cdₖ] [ρₖ₋₂ ψbarₖ₋₁ μbarₖ] - (cdₖ, sdₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, θₖ) - ψbarₖ₋₁ = sdₖ * ηₖ - μbarₖ = -cdₖ * ηₖ - end - - # Compute Lₖtₖ = zₖ - # [ μ₁ 0 • • • • 0 ] [τ₁] [ζ₁] - # [ ψ₁ μ₂ • • ] [τ₂] [ζ₂] - # [ ρ₁ ψ₂ μ₃ • • ] [τ₃] [ζ₃] - # [ 0 • • • • • ] [••] = [••] - # [ • • • • μₖ₋₂ • • ] [••] [••] - # [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ] [••] [••] - # [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ] [τₖ] [ζₖ] - if iter == 1 - τₖ = ζₖ / μbarₖ - elseif iter == 2 - τₖ₋₁ = τₖ - τₖ₋₁ = τₖ₋₁ * μbarₖ₋₁ / μbisₖ₋₁ - ξₖ = ζₖ - τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ + function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, $(def_args_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "MINRES-QLP: system of size %d\n", n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :vₖ, S, n) + wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ + Δx, x, p, stats = solver.Δx, solver.x, solver.p, solver.stats + warm_start = solver.warm_start + rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond + reset!(stats) + vₖ = MisI ? M⁻¹vₖ : solver.vₖ + vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁ + + # Initial solution x₀ + x .= zero(FC) + + if warm_start + mul!(M⁻¹vₖ, A, Δx) + (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ) + @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ) else - τₖ₋₂ = τₖ₋₁ - τₖ₋₂ = τₖ₋₂ * μbisₖ₋₂ / μₖ₋₂ - τₖ₋₁ = (ξₖ₋₁ - ψₖ₋₂ * τₖ₋₂) / μbisₖ₋₁ - ξₖ = ζₖ - ρₖ₋₂ * τₖ₋₂ - τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ + M⁻¹vₖ .= b end - # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᵀ - if iter == 1 - # w̅₁ = v₁ - @. wₖ = vₖ - elseif iter == 2 - # [w̅ₖ₋₁ vₖ] [cpₖ spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ - # [spₖ -cpₖ] ⟷ w̅ₖ = spₖ * w̅ₖ₋₁ - cpₖ * vₖ - @kswap(wₖ₋₁, wₖ) - @. wₖ = spₖ * wₖ₋₁ - cpₖ * vₖ - @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁) - else - # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ 0 spₖ] [1 0 0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ - # [ 0 1 0 ] [0 cdₖ sdₖ] ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ) - # [spₖ 0 -cpₖ] [0 sdₖ -cdₖ] ⟷ w̄ₖ = sdₖ * w̄ₖ₋₁ - cdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ) - ẘₖ₋₂ = wₖ₋₁ - w̄ₖ₋₁ = wₖ - # Update the solution x - @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x) - @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x) - # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ - @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂) - wₐᵤₓ = ẘₖ₋₂ - # Compute ẘₖ₋₁ and w̄ₖ - @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ) - @kswap(wₖ₋₁, wₖ) + # β₁v₁ = Mb + MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) + βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ)) + if βₖ ≠ 0 + @kscal!(n, one(FC) / βₖ, M⁻¹vₖ) + MisI || @kscal!(n, one(FC) / βₖ, vₖ) end - # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ - MisI || (vₖ .= vₖ₊₁) - M⁻¹vₖ₋₁ .= M⁻¹vₖ - M⁻¹vₖ .= p - - # Update ‖rₖ‖ estimate - # ‖ rₖ ‖ = |ζbarₖ₊₁| - rNorm = abs(ζbarₖ₊₁) + rNorm = βₖ + ANorm² = zero(T) + ANorm = zero(T) + μmin = zero(T) + μmax = zero(T) + Acond = zero(T) history && push!(rNorms, rNorm) - - # Update ‖Arₖ₋₁‖ estimate - # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²) - ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁)) - iter == 1 && (κ = atol + ctol * ArNorm) - history && push!(ArNorms, ArNorm) - - ANorm = sqrt(ANorm²) - # estimate A condition number - abs_μbarₖ = abs(μbarₖ) - if iter == 1 - μmin = abs_μbarₖ - μmax = abs_μbarₖ - elseif iter == 2 - μmax = max(μmax, μbisₖ₋₁, abs_μbarₖ) - μmin = min(μmin, μbisₖ₋₁, abs_μbarₖ) - else - μmax = max(μmax, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ) - μmin = min(μmin, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ) - end - Acond = μmax / μmin history && push!(Aconds, Acond) - xNorm = @knrm2(n, x) - backward = rNorm / (ANorm * xNorm) - - # Update stopping criterion. - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T)) - resid_decrease_mach = (one(T) + rNorm ≤ one(T)) - zero_resid_mach = (one(T) + backward ≤ one(T)) + if rNorm == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end - # Stopping conditions based on user-provided tolerances. + iter = 0 + itmax == 0 && (itmax = 2*n) + + ε = atol + rtol * rNorm + κ = zero(T) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %7s %7s %8s %5s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗", ktimer(start_time)) + + # Set up workspace. + M⁻¹vₖ₋₁ .= zero(FC) + ζbarₖ = βₖ + ξₖ₋₁ = zero(T) + τₖ₋₂ = τₖ₋₁ = τₖ = zero(T) + ψbarₖ₋₂ = zero(T) + μbisₖ₋₂ = μbarₖ₋₁ = zero(T) + wₖ₋₁ .= zero(FC) + wₖ .= zero(FC) + cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ + sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ + + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) + + # Stopping criterion. + breakdown = false + solved = zero_resid = zero_resid_lim = rNorm ≤ ε + zero_resid_mach = false + inconsistent = false + ill_cond_mach = false tired = iter ≥ itmax - resid_decrease_lim = (rNorm ≤ ε) - zero_resid_lim = (backward ≤ ε) - breakdown = βₖ₊₁ ≤ btol - - user_requested_exit = callback(solver) :: Bool - zero_resid = zero_resid_mach | zero_resid_lim - resid_decrease = resid_decrease_mach | resid_decrease_lim - solved = resid_decrease | zero_resid - inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ ctol) || (breakdown && !solved) + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || inconsistent || ill_cond_mach || breakdown || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 + + # Continue the preconditioned Lanczos process. + # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ + # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁ + + mul!(p, A, vₖ) # p ← Avₖ + if λ ≠ 0 + @kaxpy!(n, λ, vₖ, p) # p ← p + λvₖ + end + + if iter ≥ 2 + @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁ + end + + αₖ = @kdotr(n, vₖ, p) # αₖ = ⟨vₖ,p⟩ + + @kaxpy!(n, -αₖ, M⁻¹vₖ, p) # p ← p - αₖM⁻¹vₖ + + MisI || mulorldiv!(vₖ₊₁, M, p, ldiv) # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ + + βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p)) + + # βₖ₊₁.ₖ ≠ 0 + if βₖ₊₁ > btol + @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) + MisI || @kscal!(m, one(FC) / βₖ₊₁, p) + end + + ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁ + + # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. + # [ Oᵀ ] + # + # [ α₁ β₂ 0 • • • 0 ] [ λ₁ γ₁ ϵ₁ 0 • • 0 ] + # [ β₂ α₂ β₃ • • ] [ 0 λ₂ γ₂ • • • ] + # [ 0 • • • • • ] [ • • λ₃ • • • • ] + # [ • • • • • • • ] = Qₖ [ • • • • • 0 ] + # [ • • • • • 0 ] [ • • • • ϵₖ₋₂] + # [ • • • • βₖ ] [ • • • γₖ₋₁] + # [ • • βₖ αₖ ] [ 0 • • • • 0 λₖ ] + # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ] + # + # If k = 1, we don't have any previous reflexion. + # If k = 2, we apply the last reflexion. + # If k ≥ 3, we only apply the two previous reflexions. + + # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁ + if iter ≥ 3 + # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ] + # [sₖ₋₂ -cₖ₋₂] [βₖ] [γbarₖ₋₁] + ϵₖ₋₂ = sₖ₋₂ * βₖ + γbarₖ₋₁ = -cₖ₋₂ * βₖ + end + # Apply previous Givens reflections Qₖ₋₁.ₖ + if iter ≥ 2 + iter == 2 && (γbarₖ₋₁ = βₖ) + # [cₖ₋₁ sₖ₋₁] [γbarₖ₋₁] = [γₖ₋₁ ] + # [sₖ₋₁ -cₖ₋₁] [ αₖ ] [λbarₖ] + γₖ₋₁ = cₖ₋₁ * γbarₖ₋₁ + sₖ₋₁ * αₖ + λbarₖ = sₖ₋₁ * γbarₖ₋₁ - cₖ₋₁ * αₖ + end + iter == 1 && (λbarₖ = αₖ) + + # Compute and apply current Givens reflection Qₖ.ₖ₊₁ + # [cₖ sₖ] [λbarₖ] = [λₖ] + # [sₖ -cₖ] [βₖ₊₁ ] [0 ] + (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁) + + # Compute [ zₖ ] = (Qₖ)ᴴβ₁e₁ + # [ζbarₖ₊₁] + # + # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] + # [sₖ -cₖ] [ 0 ] [ζbarₖ₊₁] + ζₖ = cₖ * ζbarₖ + ζbarₖ₊₁ = sₖ * ζbarₖ + + # Update the LQ factorization of Rₖ = LₖPₖ. + # [ λ₁ γ₁ ϵ₁ 0 • • 0 ] [ μ₁ 0 • • • • 0 ] + # [ 0 λ₂ γ₂ • • • ] [ ψ₁ μ₂ • • ] + # [ • • λ₃ • • • • ] [ ρ₁ ψ₂ μ₃ • • ] + # [ • • • • • 0 ] = [ 0 • • • • • ] Pₖ + # [ • • • • ϵₖ₋₂] [ • • • • μₖ₋₂ • • ] + # [ • • • γₖ₋₁] [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ] + # [ 0 • • • • 0 λₖ ] [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ] + + if iter == 1 + μbarₖ = λₖ + elseif iter == 2 + # [μbar₁ γ₁] [cp₂ sp₂] = [μbis₁ 0 ] + # [ 0 λ₂] [sp₂ -cp₂] [ψbar₁ μbar₂] + (cpₖ, spₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, γₖ₋₁) + ψbarₖ₋₁ = spₖ * λₖ + μbarₖ = -cpₖ * λₖ + else + # [μbisₖ₋₂ 0 ϵₖ₋₂] [cpₖ 0 spₖ] [μₖ₋₂ 0 0 ] + # [ψbarₖ₋₂ μbarₖ₋₁ γₖ₋₁] [ 0 1 0 ] = [ψₖ₋₂ μbarₖ₋₁ θₖ] + # [ 0 0 λₖ ] [spₖ 0 -cpₖ] [ρₖ₋₂ 0 ηₖ] + (cpₖ, spₖ, μₖ₋₂) = sym_givens(μbisₖ₋₂, ϵₖ₋₂) + ψₖ₋₂ = cpₖ * ψbarₖ₋₂ + spₖ * γₖ₋₁ + θₖ = spₖ * ψbarₖ₋₂ - cpₖ * γₖ₋₁ + ρₖ₋₂ = spₖ * λₖ + ηₖ = -cpₖ * λₖ + + # [μₖ₋₂ 0 0 ] [1 0 0 ] [μₖ₋₂ 0 0 ] + # [ψₖ₋₂ μbarₖ₋₁ θₖ] [0 cdₖ sdₖ] = [ψₖ₋₂ μbisₖ₋₁ 0 ] + # [ρₖ₋₂ 0 ηₖ] [0 sdₖ -cdₖ] [ρₖ₋₂ ψbarₖ₋₁ μbarₖ] + (cdₖ, sdₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, θₖ) + ψbarₖ₋₁ = sdₖ * ηₖ + μbarₖ = -cdₖ * ηₖ + end + + # Compute Lₖtₖ = zₖ + # [ μ₁ 0 • • • • 0 ] [τ₁] [ζ₁] + # [ ψ₁ μ₂ • • ] [τ₂] [ζ₂] + # [ ρ₁ ψ₂ μ₃ • • ] [τ₃] [ζ₃] + # [ 0 • • • • • ] [••] = [••] + # [ • • • • μₖ₋₂ • • ] [••] [••] + # [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ] [••] [••] + # [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ] [τₖ] [ζₖ] + if iter == 1 + τₖ = ζₖ / μbarₖ + elseif iter == 2 + τₖ₋₁ = τₖ + τₖ₋₁ = τₖ₋₁ * μbarₖ₋₁ / μbisₖ₋₁ + ξₖ = ζₖ + τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ + else + τₖ₋₂ = τₖ₋₁ + τₖ₋₂ = τₖ₋₂ * μbisₖ₋₂ / μₖ₋₂ + τₖ₋₁ = (ξₖ₋₁ - ψₖ₋₂ * τₖ₋₂) / μbisₖ₋₁ + ξₖ = ζₖ - ρₖ₋₂ * τₖ₋₂ + τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ + end + + # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ + if iter == 1 + # w̅₁ = v₁ + @. wₖ = vₖ + elseif iter == 2 + # [w̅ₖ₋₁ vₖ] [cpₖ spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ + # [spₖ -cpₖ] ⟷ w̅ₖ = spₖ * w̅ₖ₋₁ - cpₖ * vₖ + @kswap(wₖ₋₁, wₖ) + @. wₖ = spₖ * wₖ₋₁ - cpₖ * vₖ + @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁) + else + # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ 0 spₖ] [1 0 0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ + # [ 0 1 0 ] [0 cdₖ sdₖ] ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ) + # [spₖ 0 -cpₖ] [0 sdₖ -cdₖ] ⟷ w̄ₖ = sdₖ * w̄ₖ₋₁ - cdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ) + ẘₖ₋₂ = wₖ₋₁ + w̄ₖ₋₁ = wₖ + # Update the solution x + @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x) + @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x) + # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ + @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂) + wₐᵤₓ = ẘₖ₋₂ + # Compute ẘₖ₋₁ and w̄ₖ + @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ) + @kswap(wₖ₋₁, wₖ) + end + + # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ + MisI || (vₖ .= vₖ₊₁) + M⁻¹vₖ₋₁ .= M⁻¹vₖ + M⁻¹vₖ .= p + + # Update ‖rₖ‖ estimate + # ‖ rₖ ‖ = |ζbarₖ₊₁| + rNorm = abs(ζbarₖ₊₁) + history && push!(rNorms, rNorm) + + # Update ‖Arₖ₋₁‖ estimate + # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²) + ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁)) + iter == 1 && (κ = atol + Artol * ArNorm) + history && push!(ArNorms, ArNorm) + + ANorm = sqrt(ANorm²) + # estimate A condition number + abs_μbarₖ = abs(μbarₖ) + if iter == 1 + μmin = abs_μbarₖ + μmax = abs_μbarₖ + elseif iter == 2 + μmax = max(μmax, μbisₖ₋₁, abs_μbarₖ) + μmin = min(μmin, μbisₖ₋₁, abs_μbarₖ) + else + μmax = max(μmax, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ) + μmin = min(μmin, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ) + end + Acond = μmax / μmin + history && push!(Aconds, Acond) + xNorm = @knrm2(n, x) + backward = rNorm / (ANorm * xNorm) + + # Update stopping criterion. + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T)) + resid_decrease_mach = (one(T) + rNorm ≤ one(T)) + zero_resid_mach = (one(T) + backward ≤ one(T)) + + # Stopping conditions based on user-provided tolerances. + tired = iter ≥ itmax + resid_decrease_lim = (rNorm ≤ ε) + zero_resid_lim = MisI && (backward ≤ eps(T)) + breakdown = βₖ₊₁ ≤ btol + + user_requested_exit = callback(solver) :: Bool + zero_resid = zero_resid_mach | zero_resid_lim + resid_decrease = resid_decrease_mach | resid_decrease_lim + solved = resid_decrease | zero_resid + inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ Artol) || (breakdown && !solved) + timer = time_ns() - start_time + overtimed = timer > timemax_ns + + # Update variables + if iter ≥ 2 + sₖ₋₂ = sₖ₋₁ + cₖ₋₂ = cₖ₋₁ + ξₖ₋₁ = ξₖ + μbisₖ₋₂ = μbisₖ₋₁ + ψbarₖ₋₂ = ψbarₖ₋₁ + end + sₖ₋₁ = sₖ + cₖ₋₁ = cₖ + μbarₖ₋₁ = μbarₖ + ζbarₖ = ζbarₖ₊₁ + βₖ = βₖ₊₁ + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e %.2fs\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward, ktimer(start_time)) + end + (verbose > 0) && @printf(iostream, "\n") - # Update variables + # Finalize the update of x if iter ≥ 2 - sₖ₋₂ = sₖ₋₁ - cₖ₋₂ = cₖ₋₁ - ξₖ₋₁ = ξₖ - μbisₖ₋₂ = μbisₖ₋₁ - ψbarₖ₋₂ = ψbarₖ₋₁ + @kaxpy!(n, τₖ₋₁, wₖ₋₁, x) + end + if !inconsistent + @kaxpy!(n, τₖ, wₖ, x) end - sₖ₋₁ = sₖ - cₖ₋₁ = cₖ - μbarₖ₋₁ = μbarₖ - ζbarₖ = ζbarₖ₊₁ - βₖ = βₖ₊₁ - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward) - end - (verbose > 0) && @printf("\n") - # Finalize the update of x - if iter ≥ 2 - @kaxpy!(n, τₖ₋₁, wₖ₋₁, x) - end - if !inconsistent - @kaxpy!(n, τₖ, wₖ, x) - end + # Termination status + tired && (status = "maximum number of iterations exceeded") + ill_cond_mach && (status = "condition number seems too large for this machine") + inconsistent && (status = "found approximate minimum least-squares solution") + zero_resid && (status = "found approximate zero-residual solution") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - tired && (status = "maximum number of iterations exceeded") - ill_cond_mach && (status = "condition number seems too large for this machine") - inconsistent && (status = "found approximate minimum least-squares solution") - zero_resid && (status = "found approximate zero-residual solution") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver + end end diff --git a/src/qmr.jl b/src/qmr.jl index eb4a4eb46..995392f0c 100644 --- a/src/qmr.jl +++ b/src/qmr.jl @@ -21,28 +21,49 @@ export qmr, qmr! """ - (x, stats) = qmr(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + (x, stats) = qmr(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, timemax::Float64=Inf, verbose::Int=0, + history::Bool=false, callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the square linear system Ax = b using the QMR method. + (x, stats) = qmr(A, b, x0::AbstractVector; kwargs...) + +QMR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the square linear system Ax = b of size n using QMR. QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. -When `A` is symmetric and `b = c`, QMR is equivalent to MINRES. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. +When `A` is Hermitian and `b = c`, QMR is equivalent to MINRES. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -QMR can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = qmr(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -52,18 +73,6 @@ and `false` otherwise. """ function qmr end -function qmr(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = QmrSolver(A, b) - qmr!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function qmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = QmrSolver(A, b) - qmr!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = qmr!(solver::QmrSolver, A, b; kwargs...) solver = qmr!(solver::QmrSolver, A, b, x0; kwargs...) @@ -74,253 +83,301 @@ See [`QmrSolver`](@ref) for more details about the `solver`. """ function qmr! end -function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - qmr!(solver, A, b; kwargs...) - return solver -end - -function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("QMR: system of size %d\n", n) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p - Δx, x, wₖ₋₂, wₖ₋₁, stats = solver.Δx, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - r₀ = warm_start ? q : b - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) +def_args_qmr = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_qmr = (:(x0::AbstractVector),) + +def_kwargs_qmr = (:(; c::AbstractVector{FC} = b ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_qmr = mapreduce(extract_parameters, vcat, def_kwargs_qmr) + +args_qmr = (:A, :b) +optargs_qmr = (:x0,) +kwargs_qmr = (:c, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function qmr($(def_args_qmr...), $(def_optargs_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = QmrSolver(A, b) + warm_start!(solver, $(optargs_qmr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + qmr!(solver, $(args_qmr...); $(kwargs_qmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Initial solution x₀ and residual norm ‖r₀‖. - x .= zero(FC) - rNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ - - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved = true - stats.inconsistent = false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function qmr($(def_args_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = QmrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + qmr!(solver, $(args_qmr...); $(kwargs_qmr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = 2*n) - - ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) - - # Initialize the Lanczos biorthogonalization process. - cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩ - if cᵗb == 0 - stats.niter = 0 - stats.solved = false - stats.inconsistent = false - stats.status = "Breakdown bᵀc = 0" - solver.warm_start = false - return solver - end - - βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀) - γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀) - vₖ₋₁ .= zero(FC) # v₀ = 0 - uₖ₋₁ .= zero(FC) # u₀ = 0 - vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ - cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ - sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ - wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ - ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁ - τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate - - # Stopping criterion. - solved = rNorm ≤ ε - breakdown = false - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || breakdown || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the Lanczos biorthogonalization process. - # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ - - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - - αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - - @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - - pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|) - γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁ - - # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. - # [ Oᵀ ] - # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ] - # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ] - # [ 0 • • • • • ] [ • • δ₃ • • • • ] - # [ • • • • • • • ] = Qₖ [ • • • • • 0 ] - # [ • • • • • 0 ] [ • • • • ϵₖ₋₂] - # [ • • • • γₖ ] [ • • • λₖ₋₁] - # [ • • βₖ αₖ ] [ • • δₖ ] - # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ] - # - # If k = 1, we don't have any previous reflexion. - # If k = 2, we apply the last reflexion. - # If k ≥ 3, we only apply the two previous reflexions. - - # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁ - if iter ≥ 3 - # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ] - # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁] - ϵₖ₋₂ = sₖ₋₂ * γₖ - λbarₖ₋₁ = -cₖ₋₂ * γₖ + function qmr!(solver :: QmrSolver{T,FC,S}, $(def_args_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "QMR: system of size %d\n", n) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p + Δx, x, wₖ₋₂, wₖ₋₁, stats = solver.Δx, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + r₀ = warm_start ? q : b + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) end - # Apply previous Givens reflections Qₖ₋₁.ₖ - if iter ≥ 2 - iter == 2 && (λbarₖ₋₁ = γₖ) - # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ] - # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ] - λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ - δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ - - # Update sₖ₋₂ and cₖ₋₂. - sₖ₋₂ = sₖ₋₁ - cₖ₋₂ = cₖ₋₁ - end + # Initial solution x₀ and residual norm ‖r₀‖. + x .= zero(FC) + rNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ - # Compute and apply current Givens reflection Qₖ.ₖ₊₁ - iter == 1 && (δbarₖ = αₖ) - # [cₖ sₖ] [δbarₖ] = [δₖ] - # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ] - (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁) - - # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ] - # [ 0 ] - # - # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] - # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁] - ζₖ = cₖ * ζbarₖ - ζbarₖ₊₁ = conj(sₖ) * ζbarₖ - - # Update sₖ₋₁ and cₖ₋₁. - sₖ₋₁ = sₖ - cₖ₋₁ = cₖ - - # Compute the direction wₖ, the last column of Wₖ = Vₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Vₖ)ᵀ. - # w₁ = v₁ / δ₁ - if iter == 1 - wₖ = wₖ₋₁ - @kaxpy!(n, one(FC), vₖ, wₖ) - @. wₖ = wₖ / δₖ - end - # w₂ = (v₂ - λ₁w₁) / δ₂ - if iter == 2 - wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), vₖ, wₖ) - @. wₖ = wₖ / δₖ - end - # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ - if iter ≥ 3 - @kscal!(n, -ϵₖ₋₂, wₖ₋₂) - wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), vₖ, wₖ) - @. wₖ = wₖ / δₖ + history && push!(rNorms, rNorm) + if rNorm == 0 + stats.niter = 0 + stats.solved = true + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver end - # Compute solution xₖ. - # xₖ ← xₖ₋₁ + ζₖ * wₖ - @kaxpy!(n, ζₖ, wₖ, x) - - # Compute vₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if pᵗq ≠ zero(FC) - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p + iter = 0 + itmax == 0 && (itmax = 2*n) + + ε = atol + rtol * rNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + + # Initialize the Lanczos biorthogonalization process. + cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩ + if cᴴb == 0 + stats.niter = 0 + stats.solved = false + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "Breakdown bᴴc = 0" + solver.warm_start = false + return solver end - # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖² - τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ) + βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀) + γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀) + vₖ₋₁ .= zero(FC) # v₀ = 0 + uₖ₋₁ .= zero(FC) # u₀ = 0 + vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ + uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ + cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ + sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ + wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ + wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ + ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁ + τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate + + # Stopping criterion. + solved = rNorm ≤ ε + breakdown = false + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || breakdown || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 + + # Continue the Lanczos biorthogonalization process. + # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ + # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ + + mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ + mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ + + @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ + + αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ + + @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + + pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ + + # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. + # [ Oᵀ ] + # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ] + # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ] + # [ 0 • • • • • ] [ • • δ₃ • • • • ] + # [ • • • • • • • ] = Qₖ [ • • • • • 0 ] + # [ • • • • • 0 ] [ • • • • ϵₖ₋₂] + # [ • • • • γₖ ] [ • • • λₖ₋₁] + # [ • • βₖ αₖ ] [ • • δₖ ] + # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ] + # + # If k = 1, we don't have any previous reflexion. + # If k = 2, we apply the last reflexion. + # If k ≥ 3, we only apply the two previous reflexions. + + # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁ + if iter ≥ 3 + # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ] + # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁] + ϵₖ₋₂ = sₖ₋₂ * γₖ + λbarₖ₋₁ = -cₖ₋₂ * γₖ + end + + # Apply previous Givens reflections Qₖ₋₁.ₖ + if iter ≥ 2 + iter == 2 && (λbarₖ₋₁ = γₖ) + # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ] + # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ] + λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ + δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ + + # Update sₖ₋₂ and cₖ₋₂. + sₖ₋₂ = sₖ₋₁ + cₖ₋₂ = cₖ₋₁ + end + + # Compute and apply current Givens reflection Qₖ.ₖ₊₁ + iter == 1 && (δbarₖ = αₖ) + # [cₖ sₖ] [δbarₖ] = [δₖ] + # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ] + (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁) + + # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ] + # [ 0 ] + # + # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] + # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁] + ζₖ = cₖ * ζbarₖ + ζbarₖ₊₁ = conj(sₖ) * ζbarₖ + + # Update sₖ₋₁ and cₖ₋₁. + sₖ₋₁ = sₖ + cₖ₋₁ = cₖ + + # Compute the direction wₖ, the last column of Wₖ = Vₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Vₖ)ᵀ. + # w₁ = v₁ / δ₁ + if iter == 1 + wₖ = wₖ₋₁ + @kaxpy!(n, one(FC), vₖ, wₖ) + @. wₖ = wₖ / δₖ + end + # w₂ = (v₂ - λ₁w₁) / δ₂ + if iter == 2 + wₖ = wₖ₋₂ + @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + @kaxpy!(n, one(FC), vₖ, wₖ) + @. wₖ = wₖ / δₖ + end + # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ + if iter ≥ 3 + @kscal!(n, -ϵₖ₋₂, wₖ₋₂) + wₖ = wₖ₋₂ + @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + @kaxpy!(n, one(FC), vₖ, wₖ) + @. wₖ = wₖ / δₖ + end + + # Compute solution xₖ. + # xₖ ← xₖ₋₁ + ζₖ * wₖ + @kaxpy!(n, ζₖ, wₖ, x) + + # Compute vₖ₊₁ and uₖ₊₁. + @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ + @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + + if pᴴq ≠ zero(FC) + @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p + end + + # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖² + τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ) + + # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁ + rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁ + history && push!(rNorms, rNorm) + + # Update directions for x. + if iter ≥ 2 + @kswap(wₖ₋₂, wₖ₋₁) + end + + # Update ζbarₖ, βₖ, γₖ and τₖ. + ζbarₖ = ζbarₖ₊₁ + βₖ = βₖ₊₁ + γₖ = γₖ₊₁ + τₖ = τₖ₊₁ + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + breakdown = !solved && (pᴴq == 0) + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) + end + (verbose > 0) && @printf(iostream, "\n") - # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁ - rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁ - history && push!(rNorms, rNorm) + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Update directions for x. - if iter ≥ 2 - @kswap(wₖ₋₂, wₖ₋₁) - end + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - # Update ζbarₖ, βₖ, γₖ and τₖ. - ζbarₖ = ζbarₖ₊₁ - βₖ = βₖ₊₁ - γₖ = γₖ₊₁ - τₖ = τₖ₊₁ - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - solved = resid_decrease_lim || resid_decrease_mach - tired = iter ≥ itmax - breakdown = !solved && (pᵗq == 0) - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/symmlq.jl b/src/symmlq.jl index 7b889c715..604698525 100644 --- a/src/symmlq.jl +++ b/src/symmlq.jl @@ -1,5 +1,5 @@ # An implementation of SYMMLQ for the solution of the -# linear system Ax = b, where A is square and symmetric. +# linear system Ax = b, where A is Hermitian. # # This implementation follows the original implementation by # Michael Saunders described in @@ -11,38 +11,63 @@ export symmlq, symmlq! - """ - (x, stats) = symmlq(A, b::AbstractVector{FC}; window::Int=0, - M=I, λ::T=zero(T), transfer_to_cg::Bool=true, - λest::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - etol::T=√eps(T), itmax::Int=0, conlim::T=1/√eps(T), - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = symmlq(A, b::AbstractVector{FC}; + M=I, ldiv::Bool=false, window::Int=5, + transfer_to_cg::Bool=true, λ::T=zero(T), + λest::T=zero(T), etol::T=√eps(T), + conlim::T=1/√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, stats) = symmlq(A, b, x0::AbstractVector; kwargs...) + +SYMMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above + Solve the shifted linear system (A + λI) x = b -using the SYMMLQ method, where λ is a shift parameter, -and A is square and symmetric. +of size n using the SYMMLQ method, where λ is a shift parameter, and A is Hermitian. + +SYMMLQ produces monotonic errors ‖x* - x‖₂. -SYMMLQ produces monotonic errors ‖x*-x‖₂. +#### Input arguments -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. -SYMMLQ can be warm-started from an initial guess `x0` with the method +#### Optional argument - (x, stats) = symmlq(A, b, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `transfer_to_cg`: transfer from the SYMMLQ point to the CG point, when it exists. The transfer is based on the residual norm; +* `λ`: regularization parameter; +* `λest`: positive strict lower bound on the smallest eigenvalue `λₘᵢₙ` when solving a positive-definite system, such as `λest = (1-10⁻⁷)λₘᵢₙ`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `etol`: stopping tolerance based on the lower bound on the error; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SymmlqStats`](@ref) structure. #### Reference @@ -50,18 +75,6 @@ and `false` otherwise. """ function symmlq end -function symmlq(A, b :: AbstractVector{FC}, x0 :: AbstractVector; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = SymmlqSolver(A, b, window=window) - symmlq!(solver, A, b, x0; kwargs...) - return (solver.x, solver.stats) -end - -function symmlq(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex - solver = SymmlqSolver(A, b, window=window) - symmlq!(solver, A, b; kwargs...) - return (solver.x, solver.stats) -end - """ solver = symmlq!(solver::SymmlqSolver, A, b; kwargs...) solver = symmlq!(solver::SymmlqSolver, A, b, x0; kwargs...) @@ -72,182 +85,125 @@ See [`SymmlqSolver`](@ref) for more details about the `solver`. """ function symmlq! end -function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - symmlq!(solver, A, b; kwargs...) - return solver -end - -function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), transfer_to_cg :: Bool=true, - λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - m == n || error("System must be square") - length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n) - - # Tests M = Iₙ - MisI = (M === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - - # Set up workspace. - allocate_if(!MisI, solver, :v, S, n) - x, Mvold, Mv, Mv_next, w̅ = solver.x, solver.Mvold, solver.Mv, solver.Mv_next, solver.w̅ - Δx, clist, zlist, sprod, stats = solver.Δx, solver.clist, solver.zlist, solver.sprod, solver.stats - warm_start = solver.warm_start - rNorms, rcgNorms = stats.residuals, stats.residualscg - errors, errorscg = stats.errors, stats.errorscg - reset!(stats) - v = MisI ? Mv : solver.v - vold = MisI ? Mvold : solver.v - - ϵM = eps(T) - ctol = conlim > 0 ? 1 / conlim : zero(T) - - # Initial solution x₀ - x .= zero(FC) - - if warm_start - mul!(Mvold, A, Δx) - (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold) - @kaxpby!(n, one(FC), b, -one(FC), Mvold) - else - Mvold .= b +def_args_symmlq = (:(A ), + :(b::AbstractVector{FC})) + +def_optargs_symmlq = (:(x0::AbstractVector),) + +def_kwargs_symmlq = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; transfer_to_cg::Bool = true), + :(; λ::T = zero(T) ), + :(; λest::T = zero(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; etol::T = √eps(T) ), + :(; conlim::T = 1/√eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_symmlq = mapreduce(extract_parameters, vcat, def_kwargs_symmlq) + +args_symmlq = (:A, :b) +optargs_symmlq = (:x0,) +kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, :conlim, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function symmlq($(def_args_symmlq...), $(def_optargs_symmlq...); window :: Int=5, $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = SymmlqSolver(A, b; window) + warm_start!(solver, $(optargs_symmlq...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + symmlq!(solver, $(args_symmlq...); $(kwargs_symmlq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Initialize Lanczos process. - # β₁ M v₁ = b. - MisI || mulorldiv!(vold, M, Mvold, ldiv) - β₁ = @kdotr(m, vold, Mvold) - if β₁ == 0 - stats.niter = 0 - stats.solved = true - stats.Anorm = T(NaN) - stats.Acond = T(NaN) - history && push!(rNorms, zero(T)) - history && push!(rcgNorms, zero(T)) - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function symmlq($(def_args_symmlq...); window :: Int=5, $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = SymmlqSolver(A, b; window) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + symmlq!(solver, $(args_symmlq...); $(kwargs_symmlq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - β₁ = sqrt(β₁) - β = β₁ - @kscal!(m, one(FC) / β, vold) - MisI || @kscal!(m, one(FC) / β, Mvold) - - w̅ .= vold - - mul!(Mv, A, vold) - α = @kdotr(m, vold, Mv) + λ - @kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold - MisI || mulorldiv!(v, M, Mv, ldiv) - β = @kdotr(m, v, Mv) - β < 0 && error("Preconditioner is not positive definite") - β = sqrt(β) - @kscal!(m, one(FC) / β, v) - MisI || @kscal!(m, one(FC) / β, Mv) - - # Start QR factorization - γbar = α - δbar = β - ϵold = zero(T) - cold = one(T) - sold = zero(T) - - ηold = zero(T) - η = β₁ - ζold = zero(T) - - ANorm² = α * α + β * β - - γmax = T(-Inf) - γmin = T(Inf) - ANorm = zero(T) - Acond = zero(T) - - xNorm = zero(T) - rNorm = β₁ - history && push!(rNorms, rNorm) - - if γbar ≠ 0 - ζbar = η / γbar - xcgNorm = abs(ζbar) - rcgNorm = β₁ * abs(ζbar) - history && push!(rcgNorms, rcgNorm) - else - history && push!(rcgNorms, missing) - end - - err = T(Inf) - errcg = T(Inf) - window = length(clist) - clist .= zero(T) - zlist .= zero(T) - sprod .= one(T) - - if λest ≠ 0 - # Start QR factorization of Tₖ - λest I - ρbar = α - λest - σbar = β - ρ = sqrt(ρbar * ρbar + β * β) - cwold = -one(T) - cw = ρbar / ρ - sw = β / ρ - - history && push!(errors, abs(β₁/λest)) - if γbar ≠ 0 - history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2)) + function symmlq!(solver :: SymmlqSolver{T,FC,S}, $(def_args_symmlq...); $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "SYMMLQ: system of size %d\n", n) + + # Tests M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI, solver, :v, S, n) + x, Mvold, Mv, Mv_next, w̅ = solver.x, solver.Mvold, solver.Mv, solver.Mv_next, solver.w̅ + Δx, clist, zlist, sprod, stats = solver.Δx, solver.clist, solver.zlist, solver.sprod, solver.stats + warm_start = solver.warm_start + rNorms, rcgNorms = stats.residuals, stats.residualscg + errors, errorscg = stats.errors, stats.errorscg + reset!(stats) + v = MisI ? Mv : solver.v + vold = MisI ? Mvold : solver.v + + ϵM = eps(T) + ctol = conlim > 0 ? 1 / conlim : zero(T) + + # Initial solution x₀ + x .= zero(FC) + + if warm_start + mul!(Mvold, A, Δx) + (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold) + @kaxpby!(n, one(FC), b, -one(FC), Mvold) else - history && push!(errorscg, missing) + Mvold .= b end - end - iter = 0 - itmax == 0 && (itmax = 2 * n) - - (verbose > 0) && @printf("%5s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond) - - tol = atol + rtol * β₁ - status = "unknown" - solved_lq = solved_mach = solved_lim = (rNorm ≤ tol) - solved_cg = (γbar ≠ 0) && transfer_to_cg && rcgNorm ≤ tol - tired = iter ≥ itmax - ill_cond = ill_cond_mach = ill_cond_lim = false - solved = zero_resid = solved_lq || solved_cg - fwd_err = false - user_requested_exit = false - - while ! (solved || tired || ill_cond || user_requested_exit) - iter = iter + 1 - - # Continue QR factorization - (c, s, γ) = sym_givens(γbar, β) - - # Update SYMMLQ point - ηold = η - ζ = ηold / γ - @kaxpy!(n, c * ζ, w̅, x) - @kaxpy!(n, s * ζ, v, x) - # Update w̅ - @kaxpby!(n, -c, v, s, w̅) - - # Generate next Lanczos vector - oldβ = β - mul!(Mv_next, A, v) - α = @kdotr(m, v, Mv_next) + λ - @kaxpy!(m, -oldβ, Mvold, Mv_next) - @. Mvold = Mv - @kaxpy!(m, -α, Mv, Mv_next) - @. Mv = Mv_next + # Initialize Lanczos process. + # β₁ M v₁ = b. + MisI || mulorldiv!(vold, M, Mvold, ldiv) + β₁ = @kdotr(m, vold, Mvold) + if β₁ == 0 + stats.niter = 0 + stats.solved = true + stats.Anorm = T(NaN) + stats.Acond = T(NaN) + history && push!(rNorms, zero(T)) + history && push!(rcgNorms, zero(T)) + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + β₁ = sqrt(β₁) + β = β₁ + @kscal!(m, one(FC) / β, vold) + MisI || @kscal!(m, one(FC) / β, Mvold) + + w̅ .= vold + + mul!(Mv, A, vold) + α = @kdotr(m, vold, Mv) + λ + @kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold MisI || mulorldiv!(v, M, Mv, ldiv) β = @kdotr(m, v, Mv) β < 0 && error("Preconditioner is not positive definite") @@ -255,148 +211,259 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kscal!(m, one(FC) / β, v) MisI || @kscal!(m, one(FC) / β, Mv) - # Continue A norm estimate - ANorm² = ANorm² + α * α + oldβ * oldβ + β * β + # Start QR factorization + γbar = α + δbar = β + ϵold = zero(T) + cold = one(T) + sold = zero(T) - if λest ≠ 0 - η = -oldβ * oldβ * cwold / ρbar - ω = λest + η - ψ = c * δbar + s * ω - ωbar = s * δbar - c * ω - end + ηold = zero(T) + η = β₁ + ζold = zero(T) + + ANorm² = α * α + β * β - # Continue QR factorization - δ = δbar * c + α * s - γbar = δbar * s - α * c - ϵ = β * s - δbar = -β * c - η = -ϵold * ζold - δ * ζ + γmax = T(-Inf) + γmin = T(Inf) + ANorm = zero(T) + Acond = zero(T) - rNorm = sqrt(γ * γ * ζ * ζ + ϵold * ϵold * ζold * ζold) - xNorm = xNorm + ζ * ζ + xNorm = zero(T) + rNorm = β₁ history && push!(rNorms, rNorm) if γbar ≠ 0 ζbar = η / γbar - rcgNorm = β * abs(s * ζ - c * ζbar) - xcgNorm = xNorm + ζbar * ζbar + xcgNorm = abs(ζbar) + rcgNorm = β₁ * abs(ζbar) history && push!(rcgNorms, rcgNorm) else history && push!(rcgNorms, missing) end - if window > 0 && λest ≠ 0 - if iter < window && window > 1 - for i = iter+1 : window - sprod[i] = s * sprod[i] - end - end + err = T(Inf) + errcg = T(Inf) - ix = ((iter-1) % window) + 1 - clist[ix] = c - zlist[ix] = ζ + window = length(clist) + clist .= zero(T) + zlist .= zero(T) + sprod .= one(T) - if iter ≥ window - jx = mod(iter, window) + 1 - zetabark = zlist[jx] / clist[jx] + if λest ≠ 0 + # Start QR factorization of Tₖ - λest I + ρbar = α - λest + σbar = β + ρ = sqrt(ρbar * ρbar + β * β) + cwold = -one(T) + cw = ρbar / ρ + sw = β / ρ - if γbar ≠ 0 - theta = abs(sum(clist[i] * sprod[i] * zlist[i] for i = 1 : window)) - theta = zetabark * theta + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2 - history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta))) - else - history && (errorscg[iter-window+1] = missing) - end + history && push!(errors, abs(β₁/λest)) + if γbar ≠ 0 + history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2)) + else + history && push!(errorscg, missing) end + end + + iter = 0 + itmax == 0 && (itmax = 2 * n) - ix = (iter % window) + 1 - if iter ≥ window && window > 1 - sprod .= sprod ./ sprod[(ix % window) + 1] - sprod[ix] = sprod[mod(ix-2, window)+1] * s + (verbose > 0) && @printf(iostream, "%5s %7s %7s %8s %8s %7s %7s %7s %5s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7s %.2fs\n", iter, rNorm, β, cold, sold, ANorm, Acond, "✗ ✗ ✗ ✗", ktimer(start_time)) + + tol = atol + rtol * β₁ + status = "unknown" + solved_lq = solved_mach = solved_lim = (rNorm ≤ tol) + solved_cg = (γbar ≠ 0) && transfer_to_cg && rcgNorm ≤ tol + tired = iter ≥ itmax + ill_cond = ill_cond_mach = ill_cond_lim = false + solved = zero_resid = solved_lq || solved_cg + fwd_err = false + user_requested_exit = false + overtimed = false + + while ! (solved || tired || ill_cond || user_requested_exit || overtimed) + iter = iter + 1 + + # Continue QR factorization + (c, s, γ) = sym_givens(γbar, β) + + # Update SYMMLQ point + ηold = η + ζ = ηold / γ + @kaxpy!(n, c * ζ, w̅, x) + @kaxpy!(n, s * ζ, v, x) + # Update w̅ + @kaxpby!(n, -c, v, s, w̅) + + # Generate next Lanczos vector + oldβ = β + mul!(Mv_next, A, v) + α = @kdotr(m, v, Mv_next) + λ + @kaxpy!(m, -oldβ, Mvold, Mv_next) + @. Mvold = Mv + @kaxpy!(m, -α, Mv, Mv_next) + @. Mv = Mv_next + MisI || mulorldiv!(v, M, Mv, ldiv) + β = @kdotr(m, v, Mv) + β < 0 && error("Preconditioner is not positive definite") + β = sqrt(β) + @kscal!(m, one(FC) / β, v) + MisI || @kscal!(m, one(FC) / β, Mv) + + # Continue A norm estimate + ANorm² = ANorm² + α * α + oldβ * oldβ + β * β + + if λest ≠ 0 + η = -oldβ * oldβ * cwold / ρbar + ω = λest + η + ψ = c * δbar + s * ω + ωbar = s * δbar - c * ω end - end - if λest ≠ 0 - err = abs((ϵold * ζold + ψ * ζ) / ωbar) - history && push!(errors, err) + # Continue QR factorization + δ = δbar * c + α * s + γbar = δbar * s - α * c + ϵ = β * s + δbar = -β * c + η = -ϵold * ζold - δ * ζ + + rNorm = sqrt(γ * γ * ζ * ζ + ϵold * ϵold * ζold * ζold) + xNorm = xNorm + ζ * ζ + history && push!(rNorms, rNorm) if γbar ≠ 0 - errcg = sqrt(abs(err * err - ζbar * ζbar)) - history && push!(errorscg, errcg) + ζbar = η / γbar + rcgNorm = β * abs(s * ζ - c * ζbar) + xcgNorm = xNorm + ζbar * ζbar + history && push!(rcgNorms, rcgNorm) else - history && push!(errorscg, missing) + history && push!(rcgNorms, missing) end - ρbar = sw * σbar - cw * (α - λest) - σbar = -cw * β - ρ = sqrt(ρbar * ρbar + β * β) + if window > 0 && λest ≠ 0 + if iter < window && window > 1 + for i = iter+1 : window + sprod[i] = s * sprod[i] + end + end + + ix = ((iter-1) % window) + 1 + clist[ix] = c + zlist[ix] = ζ + + if iter ≥ window + jx = mod(iter, window) + 1 + zetabark = zlist[jx] / clist[jx] + + if γbar ≠ 0 + theta = zero(T) + for i = 1 : window + theta += clist[i] * sprod[i] * zlist[i] + end + theta = zetabark * abs(theta) + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2 + history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta))) + else + history && (errorscg[iter-window+1] = missing) + end + end - cwold = cw + ix = (iter % window) + 1 + if iter ≥ window && window > 1 + sprod .= sprod ./ sprod[(ix % window) + 1] + sprod[ix] = sprod[mod(ix-2, window)+1] * s + end + end - cw = ρbar / ρ - sw = β / ρ - end + if λest ≠ 0 + err = abs((ϵold * ζold + ψ * ζ) / ωbar) + history && push!(errors, err) + + if γbar ≠ 0 + errcg = sqrt(abs(err * err - ζbar * ζbar)) + history && push!(errorscg, errcg) + else + history && push!(errorscg, missing) + end - # TODO: Use γ or γbar? - γmax = max(γmax, γ) - γmin = min(γmin, γ) + ρbar = sw * σbar - cw * (α - λest) + σbar = -cw * β + ρ = sqrt(ρbar * ρbar + β * β) - Acond = γmax / γmin - ANorm = sqrt(ANorm²) - test1 = rNorm / (ANorm * xNorm) + cwold = cw - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1) + cw = ρbar / ρ + sw = β / ρ + end - # Reset variables - ϵold = ϵ - ζold = ζ - cold = c + # TODO: Use γ or γbar? + γmax = max(γmax, γ) + γmin = min(γmin, γ) + + Acond = γmax / γmin + ANorm = sqrt(ANorm²) + test1 = rNorm / (ANorm * xNorm) + + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, β, c, s, ANorm, Acond, test1, ktimer(start_time)) + + # Reset variables + ϵold = ϵ + ζold = ζ + cold = c + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (one(T) + rNorm ≤ one(T)) + ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T)) + zero_resid_mach = (one(T) + test1 ≤ one(T)) + # solved_mach = (ϵx ≥ β₁) + + # Stopping conditions based on user-provided tolerances. + tired = iter ≥ itmax + ill_cond_lim = (one(T) / Acond ≤ ctol) + zero_resid_lim = (test1 ≤ tol) + fwd_err = (err ≤ etol) || ((γbar ≠ 0) && (errcg ≤ etol)) + solved_lq = rNorm ≤ tol + solved_cg = transfer_to_cg && (γbar ≠ 0) && rcgNorm ≤ tol + + user_requested_exit = callback(solver) :: Bool + zero_resid = solved_lq || solved_cg + ill_cond = ill_cond_mach || ill_cond_lim + solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (one(T) + rNorm ≤ one(T)) - ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T)) - zero_resid_mach = (one(T) + test1 ≤ one(T)) - # solved_mach = (ϵx ≥ β₁) + # Compute CG point + # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ + if solved_cg + @kaxpy!(m, ζbar, w̅, x) + end - # Stopping conditions based on user-provided tolerances. - tired = iter ≥ itmax - ill_cond_lim = (one(T) / Acond ≤ ctol) - zero_resid_lim = (test1 ≤ tol) - fwd_err = (err ≤ etol) || ((γbar ≠ 0) && (errcg ≤ etol)) - solved_lq = rNorm ≤ tol - solved_cg = transfer_to_cg && (γbar ≠ 0) && rcgNorm ≤ tol - - user_requested_exit = callback(solver) :: Bool - zero_resid = solved_lq || solved_cg - ill_cond = ill_cond_mach || ill_cond_lim - solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach - end - (verbose > 0) && @printf("\n") + # Termination status + tired && (status = "maximum number of iterations exceeded") + ill_cond_mach && (status = "condition number seems too large for this machine") + ill_cond_lim && (status = "condition number exceeds tolerance") + solved && (status = "found approximate solution") + solved_lq && (status = "solution xᴸ good enough given atol and rtol") + solved_cg && (status = "solution xᶜ good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - # Compute CG point - # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ - if solved_cg - @kaxpy!(m, ζbar, w̅, x) + # Update stats + stats.niter = iter + stats.solved = solved + stats.Anorm = ANorm + stats.Acond = Acond + stats.timer = ktimer(start_time) + stats.status = status + return solver end - - tired && (status = "maximum number of iterations exceeded") - ill_cond_mach && (status = "condition number seems too large for this machine") - ill_cond_lim && (status = "condition number exceeds tolerance") - solved && (status = "found approximate solution") - solved_lq && (status = "solution xᴸ good enough given atol and rtol") - solved_cg && (status = "solution xᶜ good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.Anorm = ANorm - stats.Acond = Acond - stats.status = status - return solver end diff --git a/src/tricg.jl b/src/tricg.jl index 5acff2d52..8250e6dfc 100644 --- a/src/tricg.jl +++ b/src/tricg.jl @@ -13,30 +13,32 @@ export tricg, tricg! """ (x, y, stats) = tricg(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - spd::Bool=false, snd::Bool=false, flip::Bool=false, - τ::T=one(T), ν::T=-one(T), itmax::Int=0, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + spd::Bool=false, snd::Bool=false, + flip::Bool=false, τ::T=one(T), + ν::T=-one(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -TriCG solves the symmetric linear system + (x, y, stats) = tricg(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +TriCG can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + +Given a matrix `A` of dimension m × n, TriCG solves the Hermitian linear system [ τE A ] [ x ] = [ b ] - [ Aᵀ νF ] [ y ] [ c ], + [ Aᴴ νF ] [ y ] [ c ], -where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0. +of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0. `b` and `c` must both be nonzero. TriCG could breakdown if `τ = 0` or `ν = 0`. It's recommended to use TriMR in these cases. -By default, TriCG solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1. -If `flip = true`, TriCG solves another known variant of SQD systems where τ = -1 and ν = 1. -If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved. -If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved. -`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems. +By default, TriCG solves Hermitian and quasi-definite linear systems with τ = 1 and ν = -1. TriCG is based on the preconditioned orthogonal tridiagonalization process and its relation with the preconditioned block-Lanczos process. @@ -50,17 +52,40 @@ It's the Euclidean norm when `M` and `N` are identity operators. TriCG stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`. `atol` is an absolute tolerance and `rtol` is a relative tolerance. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. -TriCG can be warm-started from initial guesses `x0` and `y0` with the method +#### Optional arguments - (x, y, stats) = tricg(A, b, c, x0, y0; kwargs...) +* `x0`: a vector of length m that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system; +* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems; +* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems; +* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length m; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -68,18 +93,6 @@ and `false` otherwise. """ function tricg end -function tricg(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = TricgSolver(A, b) - tricg!(solver, A, b, c, x0, y0; kwargs...) - return (solver.x, solver.y, solver.stats) -end - -function tricg(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = TricgSolver(A, b) - tricg!(solver, A, b, c; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = tricg!(solver::TricgSolver, A, b, c; kwargs...) solver = tricg!(solver::TricgSolver, A, b, c, x0, y0; kwargs...) @@ -90,322 +103,374 @@ See [`TricgSolver`](@ref) for more details about the `solver`. """ function tricg! end -function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0, y0) - tricg!(solver, A, b, c; kwargs...) - return solver -end - -function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, - τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("TriCG: system of %d equations in %d variables\n", m+n, m+n) - - # Check flip, spd and snd parameters - spd && flip && error("The matrix cannot be SPD and SQD") - snd && flip && error("The matrix cannot be SND and SQD") - spd && snd && error("The matrix cannot be SPD and SND") - - # Check M = Iₘ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Determine τ and ν associated to SQD, SPD or SND systems. - flip && (τ = -one(T) ; ν = one(T)) - spd && (τ = one(T) ; ν = one(T)) - snd && (τ = -one(T) ; ν = -one(T)) - - warm_start = solver.warm_start - warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.") - warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :vₖ, S, m) - allocate_if(!NisI, solver, :uₖ, S, n) - Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p - Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q - gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ - vₖ = MisI ? M⁻¹vₖ : solver.vₖ - uₖ = NisI ? N⁻¹uₖ : solver.uₖ - vₖ₊₁ = MisI ? q : vₖ - uₖ₊₁ = NisI ? p : uₖ - b₀ = warm_start ? q : b - c₀ = warm_start ? p : c - - stats = solver.stats - rNorms = stats.residuals - reset!(stats) - - # Initial solutions x₀ and y₀. - xₖ .= zero(FC) - yₖ .= zero(FC) - - iter = 0 - itmax == 0 && (itmax = m+n) - - # Initialize preconditioned orthogonal tridiagonalization process. - M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0 - N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0 - - # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] - # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ] - if warm_start - mul!(b₀, A, Δy) - (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) - @kaxpby!(m, one(FC), b, -one(FC), b₀) - mul!(c₀, Aᵀ, Δx) - (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) - @kaxpby!(n, one(FC), c, -one(FC), c₀) - end - - # β₁Ev₁ = b ↔ β₁v₁ = Mb - M⁻¹vₖ .= b₀ - MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) - βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E - if βₖ ≠ 0 - @kscal!(m, one(FC) / βₖ, M⁻¹vₖ) - MisI || @kscal!(m, one(FC) / βₖ, vₖ) - else - error("b must be nonzero") +def_args_tricg = (:(A ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_tricg = (:(x0::AbstractVector), + :(y0::AbstractVector)) + +def_kwargs_tricg = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; spd::Bool = false ), + :(; snd::Bool = false ), + :(; flip::Bool = false ), + :(; τ::T = one(T) ), + :(; ν::T = -one(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_tricg = mapreduce(extract_parameters, vcat, def_kwargs_tricg) + +args_tricg = (:A, :b, :c) +optargs_tricg = (:x0, :y0) +kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function tricg($(def_args_tricg...), $(def_optargs_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = TricgSolver(A, b) + warm_start!(solver, $(optargs_tricg...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + tricg!(solver, $(args_tricg...); $(kwargs_tricg...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # γ₁Fu₁ = c ↔ γ₁u₁ = Nc - N⁻¹uₖ .= c₀ - NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv) - γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F - if γₖ ≠ 0 - @kscal!(n, one(FC) / γₖ, N⁻¹uₖ) - NisI || @kscal!(n, one(FC) / γₖ, uₖ) - else - error("c must be nonzero") + function tricg($(def_args_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = TricgSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + tricg!(solver, $(args_tricg...); $(kwargs_tricg...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ - gx₂ₖ₋₁ .= zero(FC) - gy₂ₖ₋₁ .= zero(FC) - gx₂ₖ .= zero(FC) - gy₂ₖ .= zero(FC) - - # Compute ‖r₀‖² = (γ₁)² + (β₁)² - rNorm = sqrt(γₖ^2 + βₖ^2) - history && push!(rNorms, rNorm) - ε = atol + rtol * rNorm - - (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ) - - # Set up workspace. - d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T) - π₂ₖ₋₃ = π₂ₖ₋₂ = zero(FC) - δₖ₋₁ = zero(FC) - - # Tolerance for breakdown detection. - btol = eps(T)^(3/4) - - # Stopping criterion. - breakdown = false - solved = rNorm ≤ ε - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || breakdown || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the orthogonal tridiagonalization process. - # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ - - if iter ≥ 2 - @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ - @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ - end - - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - - @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ - @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ - - # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁ - M⁻¹vₖ₋₁ .= M⁻¹vₖ - N⁻¹uₖ₋₁ .= N⁻¹uₖ - - # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] - # [0 u₁ ••• 0 uₖ] - # - # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ - # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ] - # - # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ - # [ Aᵀ νF ] [ 0 F ] - # - # TriCG subproblem : (Wₖ)ᵀ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂ - # - # Update the LDLᵀ factorization of Sₖ.ₖ. - # - # [ τ α₁ γ₂ 0 • • • • 0 ] - # [ ᾱ₁ ν β₂ • • ] - # [ β₂ τ α₂ γ₃ • • ] - # [ γ₂ ᾱ₂ ν β₃ • • ] - # [ 0 β₃ • • • • • ] - # [ • • γ₃ • • • 0 ] - # [ • • • • • γₖ ] - # [ • • • • • βₖ ] - # [ • • βₖ τ αₖ ] - # [ 0 • • • • 0 γₖ ᾱₖ ν ] - if iter == 1 - d₂ₖ₋₁ = τ - δₖ = conj(αₖ) / d₂ₖ₋₁ - d₂ₖ = ν - abs2(δₖ) * d₂ₖ₋₁ - else - σₖ = βₖ / d₂ₖ₋₂ - ηₖ = γₖ / d₂ₖ₋₃ - λₖ = -(ηₖ * conj(δₖ₋₁) * d₂ₖ₋₃) / d₂ₖ₋₂ - d₂ₖ₋₁ = τ - abs2(σₖ) * d₂ₖ₋₂ - δₖ = (conj(αₖ) - λₖ * conj(σₖ) * d₂ₖ₋₂) / d₂ₖ₋₁ - d₂ₖ = ν - abs2(ηₖ) * d₂ₖ₋₃ - abs2(λₖ) * d₂ₖ₋₂ - abs2(δₖ) * d₂ₖ₋₁ + function tricg!(solver :: TricgSolver{T,FC,S}, $(def_args_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "TriCG: system of %d equations in %d variables\n", m+n, m+n) + + # Check flip, spd and snd parameters + spd && flip && error("The matrix cannot be SPD and SQD") + snd && flip && error("The matrix cannot be SND and SQD") + spd && snd && error("The matrix cannot be SPD and SND") + + # Check M = Iₘ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Determine τ and ν associated to SQD, SPD or SND systems. + flip && (τ = -one(T) ; ν = one(T)) + spd && (τ = one(T) ; ν = one(T)) + snd && (τ = -one(T) ; ν = -one(T)) + + warm_start = solver.warm_start + warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.") + warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :vₖ, S, m) + allocate_if(!NisI, solver, :uₖ, S, n) + Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p + Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q + gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ + vₖ = MisI ? M⁻¹vₖ : solver.vₖ + uₖ = NisI ? N⁻¹uₖ : solver.uₖ + vₖ₊₁ = MisI ? q : vₖ + uₖ₊₁ = NisI ? p : uₖ + b₀ = warm_start ? q : b + c₀ = warm_start ? p : c + + stats = solver.stats + rNorms = stats.residuals + reset!(stats) + + # Initial solutions x₀ and y₀. + xₖ .= zero(FC) + yₖ .= zero(FC) + + iter = 0 + itmax == 0 && (itmax = m+n) + + # Initialize preconditioned orthogonal tridiagonalization process. + M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0 + N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0 + + # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] + # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ] + if warm_start + mul!(b₀, A, Δy) + (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) + @kaxpby!(m, one(FC), b, -one(FC), b₀) + mul!(c₀, Aᴴ, Δx) + (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) + @kaxpby!(n, one(FC), c, -one(FC), c₀) end - # Solve LₖDₖpₖ = (β₁e₁ + γ₁e₂) - # - # [ 1 0 • • • • • • • 0 ] [ d₁ ] [ β₁ ] - # [ δ₁ 1 • • ] [ d₂ ] [ γ₁ ] - # [ σ₂ 1 • • ] [ • ] [ 0 ] - # [ η₂ λ₂ δ₂ 1 • • ] [ • ] [ • ] - # [ 0 σ₃ 1 • • ] [ • ] zₖ = [ • ] - # [ • • η₃ λ₃ δ₃ 1 • • ] [ • ] [ • ] - # [ • • • • • • ] [ • ] [ • ] - # [ • • • • • • • • ] [ • ] [ • ] - # [ • • σₖ 1 0 ] [ d₂ₖ₋₁ ] [ • ] - # [ 0 • • • • 0 ηₖ λₖ δₖ 1 ] [ d₂ₖ] [ 0 ] - if iter == 1 - π₂ₖ₋₁ = βₖ / d₂ₖ₋₁ - π₂ₖ = (γₖ - δₖ * βₖ) / d₂ₖ + # β₁Ev₁ = b ↔ β₁v₁ = Mb + M⁻¹vₖ .= b₀ + MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) + βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E + if βₖ ≠ 0 + @kscal!(m, one(FC) / βₖ, M⁻¹vₖ) + MisI || @kscal!(m, one(FC) / βₖ, vₖ) else - π₂ₖ₋₁ = -(σₖ * d₂ₖ₋₂ * π₂ₖ₋₂) / d₂ₖ₋₁ - π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ + error("b must be nonzero") end - # Solve Gₖ = Wₖ(Lₖ)⁻ᵀ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ. - if iter == 1 - # [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ] - # [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ] - @. gx₂ₖ₋₁ = vₖ - @. gx₂ₖ = - conj(δₖ) * gx₂ₖ₋₁ - @. gy₂ₖ = uₖ + # γ₁Fu₁ = c ↔ γ₁u₁ = Nc + N⁻¹uₖ .= c₀ + NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv) + γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F + if γₖ ≠ 0 + @kscal!(n, one(FC) / γₖ, N⁻¹uₖ) + NisI || @kscal!(n, one(FC) / γₖ, uₖ) else - # [ 0 σ̄ₖ 1 0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0 ] - # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ] [ 0 uₖ ] - # [ gx₂ₖ₋₁ gy₂ₖ₋₁ ] - # [ gx₂ₖ gy₂ₖ ] - @. gx₂ₖ₋₁ = conj(ηₖ) * gx₂ₖ₋₁ + conj(λₖ) * gx₂ₖ - @. gy₂ₖ₋₁ = conj(ηₖ) * gy₂ₖ₋₁ + conj(λₖ) * gy₂ₖ - - @. gx₂ₖ = vₖ - conj(σₖ) * gx₂ₖ - @. gy₂ₖ = - conj(σₖ) * gy₂ₖ - - @. gx₂ₖ₋₁ = - gx₂ₖ₋₁ - conj(δₖ) * gx₂ₖ - @. gy₂ₖ₋₁ = uₖ - gy₂ₖ₋₁ - conj(δₖ) * gy₂ₖ - - # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁ - @kswap(gx₂ₖ₋₁, gx₂ₖ) - @kswap(gy₂ₖ₋₁, gy₂ₖ) + error("c must be nonzero") end - # Update xₖ = Gxₖ * pₖ - @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) - @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) - - # Update yₖ = Gyₖ * pₖ - @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) - @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) - - # Compute vₖ₊₁ and uₖ₊₁ - MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ - NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ + # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ + gx₂ₖ₋₁ .= zero(FC) + gy₂ₖ₋₁ .= zero(FC) + gx₂ₖ .= zero(FC) + gy₂ₖ .= zero(FC) - βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E - γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F - - # βₖ₊₁ ≠ 0 - if βₖ₊₁ > btol - @kscal!(m, one(FC) / βₖ₊₁, q) - MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) - end + # Compute ‖r₀‖² = (γ₁)² + (β₁)² + rNorm = sqrt(γₖ^2 + βₖ^2) + history && push!(rNorms, rNorm) + ε = atol + rtol * rNorm - # γₖ₊₁ ≠ 0 - if γₖ₊₁ > btol - @kscal!(n, one(FC) / γₖ₊₁, p) - NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) - end + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %5s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ, γₖ, ktimer(start_time)) - # Update M⁻¹vₖ and N⁻¹uₖ - M⁻¹vₖ .= q - N⁻¹uₖ .= p + # Set up workspace. + d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T) + π₂ₖ₋₃ = π₂ₖ₋₂ = zero(FC) + δₖ₋₁ = zero(FC) - # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|² - ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ - ζ₂ₖ = π₂ₖ - rNorm = sqrt(abs2(γₖ₊₁ * ζ₂ₖ₋₁) + abs2(βₖ₊₁ * ζ₂ₖ)) - history && push!(rNorms, rNorm) + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) - # Update βₖ, γₖ, π₂ₖ₋₃, π₂ₖ₋₂, d₂ₖ₋₃, d₂ₖ₋₂, δₖ₋₁, vₖ, uₖ. - βₖ = βₖ₊₁ - γₖ = γₖ₊₁ - π₂ₖ₋₃ = π₂ₖ₋₁ - π₂ₖ₋₂ = π₂ₖ - d₂ₖ₋₃ = d₂ₖ₋₁ - d₂ₖ₋₂ = d₂ₖ - δₖ₋₁ = δₖ - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol - solved = resid_decrease_lim || resid_decrease_mach + # Stopping criterion. + breakdown = false + solved = rNorm ≤ ε tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁) + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || breakdown || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 + + # Continue the orthogonal tridiagonalization process. + # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ + # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ + + mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ + mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ + + if iter ≥ 2 + @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ + @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ + end + + αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + + @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ + @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ + + # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁ + M⁻¹vₖ₋₁ .= M⁻¹vₖ + N⁻¹uₖ₋₁ .= N⁻¹uₖ + + # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] + # [0 u₁ ••• 0 uₖ] + # + # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ + # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ] + # + # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ + # [ Aᴴ νF ] [ 0 F ] + # + # TriCG subproblem : (Wₖ)ᴴ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂ + # + # Update the LDLᴴ factorization of Sₖ.ₖ. + # + # [ τ α₁ γ₂ 0 • • • • 0 ] + # [ ᾱ₁ ν β₂ • • ] + # [ β₂ τ α₂ γ₃ • • ] + # [ γ₂ ᾱ₂ ν β₃ • • ] + # [ 0 β₃ • • • • • ] + # [ • • γ₃ • • • 0 ] + # [ • • • • • γₖ ] + # [ • • • • • βₖ ] + # [ • • βₖ τ αₖ ] + # [ 0 • • • • 0 γₖ ᾱₖ ν ] + if iter == 1 + d₂ₖ₋₁ = τ + δₖ = conj(αₖ) / d₂ₖ₋₁ + d₂ₖ = ν - abs2(δₖ) * d₂ₖ₋₁ + else + σₖ = βₖ / d₂ₖ₋₂ + ηₖ = γₖ / d₂ₖ₋₃ + λₖ = -(ηₖ * conj(δₖ₋₁) * d₂ₖ₋₃) / d₂ₖ₋₂ + d₂ₖ₋₁ = τ - abs2(σₖ) * d₂ₖ₋₂ + δₖ = (conj(αₖ) - λₖ * conj(σₖ) * d₂ₖ₋₂) / d₂ₖ₋₁ + d₂ₖ = ν - abs2(ηₖ) * d₂ₖ₋₃ - abs2(λₖ) * d₂ₖ₋₂ - abs2(δₖ) * d₂ₖ₋₁ + end + + # Solve LₖDₖpₖ = (β₁e₁ + γ₁e₂) + # + # [ 1 0 • • • • • • • 0 ] [ d₁ ] [ β₁ ] + # [ δ₁ 1 • • ] [ d₂ ] [ γ₁ ] + # [ σ₂ 1 • • ] [ • ] [ 0 ] + # [ η₂ λ₂ δ₂ 1 • • ] [ • ] [ • ] + # [ 0 σ₃ 1 • • ] [ • ] zₖ = [ • ] + # [ • • η₃ λ₃ δ₃ 1 • • ] [ • ] [ • ] + # [ • • • • • • ] [ • ] [ • ] + # [ • • • • • • • • ] [ • ] [ • ] + # [ • • σₖ 1 0 ] [ d₂ₖ₋₁ ] [ • ] + # [ 0 • • • • 0 ηₖ λₖ δₖ 1 ] [ d₂ₖ] [ 0 ] + if iter == 1 + π₂ₖ₋₁ = βₖ / d₂ₖ₋₁ + π₂ₖ = (γₖ - δₖ * βₖ) / d₂ₖ + else + π₂ₖ₋₁ = -(σₖ * d₂ₖ₋₂ * π₂ₖ₋₂) / d₂ₖ₋₁ + π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ + end + + # Solve Gₖ = Wₖ(Lₖ)⁻ᴴ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ. + if iter == 1 + # [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ] + # [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ] + @. gx₂ₖ₋₁ = vₖ + @. gx₂ₖ = - conj(δₖ) * gx₂ₖ₋₁ + @. gy₂ₖ = uₖ + else + # [ 0 σ̄ₖ 1 0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0 ] + # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ] [ 0 uₖ ] + # [ gx₂ₖ₋₁ gy₂ₖ₋₁ ] + # [ gx₂ₖ gy₂ₖ ] + @. gx₂ₖ₋₁ = conj(ηₖ) * gx₂ₖ₋₁ + conj(λₖ) * gx₂ₖ + @. gy₂ₖ₋₁ = conj(ηₖ) * gy₂ₖ₋₁ + conj(λₖ) * gy₂ₖ + + @. gx₂ₖ = vₖ - conj(σₖ) * gx₂ₖ + @. gy₂ₖ = - conj(σₖ) * gy₂ₖ + + @. gx₂ₖ₋₁ = - gx₂ₖ₋₁ - conj(δₖ) * gx₂ₖ + @. gy₂ₖ₋₁ = uₖ - gy₂ₖ₋₁ - conj(δₖ) * gy₂ₖ + + # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁ + @kswap(gx₂ₖ₋₁, gx₂ₖ) + @kswap(gy₂ₖ₋₁, gy₂ₖ) + end + + # Update xₖ = Gxₖ * pₖ + @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) + @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) + + # Update yₖ = Gyₖ * pₖ + @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) + @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) + + # Compute vₖ₊₁ and uₖ₊₁ + MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ + NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ + + βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E + γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F + + # βₖ₊₁ ≠ 0 + if βₖ₊₁ > btol + @kscal!(m, one(FC) / βₖ₊₁, q) + MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) + end + + # γₖ₊₁ ≠ 0 + if γₖ₊₁ > btol + @kscal!(n, one(FC) / γₖ₊₁, p) + NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) + end + + # Update M⁻¹vₖ and N⁻¹uₖ + M⁻¹vₖ .= q + N⁻¹uₖ .= p + + # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|² + ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ + ζ₂ₖ = π₂ₖ + rNorm = sqrt(abs2(γₖ₊₁ * ζ₂ₖ₋₁) + abs2(βₖ₊₁ * ζ₂ₖ)) + history && push!(rNorms, rNorm) + + # Update βₖ, γₖ, π₂ₖ₋₃, π₂ₖ₋₂, d₂ₖ₋₃, d₂ₖ₋₂, δₖ₋₁, vₖ, uₖ. + βₖ = βₖ₊₁ + γₖ = γₖ₊₁ + π₂ₖ₋₃ = π₂ₖ₋₁ + π₂ₖ₋₂ = π₂ₖ + d₂ₖ₋₃ = d₂ₖ₋₁ + d₂ₖ₋₂ = d₂ₖ + δₖ₋₁ = δₖ + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ₊₁, γₖ₊₁, ktimer(start_time)) + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "inconsistent linear system") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x and y + warm_start && @kaxpy!(m, one(FC), Δx, xₖ) + warm_start && @kaxpy!(n, one(FC), Δy, yₖ) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = !solved && breakdown + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "inconsistent linear system") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x and y - warm_start && @kaxpy!(m, one(FC), Δx, xₖ) - warm_start && @kaxpy!(n, one(FC), Δy, yₖ) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = !solved && breakdown - stats.status = status - return solver end diff --git a/src/trilqr.jl b/src/trilqr.jl index edcb4c9b9..2b584c216 100644 --- a/src/trilqr.jl +++ b/src/trilqr.jl @@ -1,5 +1,5 @@ # An implementation of TRILQR for the solution of square or -# rectangular consistent linear adjoint systems Ax = b and Aᵀy = c. +# rectangular consistent linear adjoint systems Ax = b and Aᴴy = c. # # This method is described in # @@ -14,32 +14,54 @@ export trilqr, trilqr! """ (x, y, stats) = trilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + transfer_to_usymcg::Bool=true, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, y, stats) = trilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +TriLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + Combine USYMLQ and USYMQR to solve adjoint systems. [0 A] [y] = [b] - [Aᵀ 0] [x] [c] + [Aᴴ 0] [x] [c] + +USYMLQ is used for solving primal system `Ax = b` of size m × n. +USYMQR is used for solving dual system `Aᴴy = c` of size n × m. + +#### Input arguments -USYMLQ is used for solving primal system `Ax = b`. -USYMQR is used for solving dual system `Aᵀy = c`. +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. -An option gives the possibility of transferring from the USYMLQ point to the -USYMCG point, when it exists. The transfer is based on the residual norm. +#### Optional arguments -TriLQR can be warm-started from initial guesses `x0` and `y0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x; +* `y0`: a vector of length m that represents an initial guess of the solution y. - (x, y, stats) = trilqr(A, b, c, x0, y0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure. #### Reference @@ -47,18 +69,6 @@ and `false` otherwise. """ function trilqr end -function trilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = TrilqrSolver(A, b) - trilqr!(solver, A, b, c, x0, y0; kwargs...) - return (solver.x, solver.y, solver.stats) -end - -function trilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = TrilqrSolver(A, b) - trilqr!(solver, A, b, c; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = trilqr!(solver::TrilqrSolver, A, b, c; kwargs...) solver = trilqr!(solver::TrilqrSolver, A, b, c, x0, y0; kwargs...) @@ -69,349 +79,396 @@ See [`TrilqrSolver`](@ref) for more details about the `solver`. """ function trilqr! end -function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0, y0) - trilqr!(solver, A, b, c; kwargs...) - return solver -end +def_args_trilqr = (:(A ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_trilqr = (:(x0::AbstractVector), + :(y0::AbstractVector)) + +def_kwargs_trilqr = (:(; transfer_to_usymcg::Bool = true), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_trilqr = mapreduce(extract_parameters, vcat, def_kwargs_trilqr) + +args_trilqr = (:A, :b, :c) +optargs_trilqr = (:x0, :y0) +kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function trilqr($(def_args_trilqr...), $(def_optargs_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = TrilqrSolver(A, b) + warm_start!(solver, $(optargs_trilqr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + trilqr!(solver, $(args_trilqr...); $(kwargs_trilqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) + end -function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("TRILQR: primal system of %d equations in %d variables\n", m, n) - (verbose > 0) && @printf("TRILQR: dual system of %d equations in %d variables\n", n, m) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats - vₖ₋₁, vₖ, q, t, wₖ₋₃, wₖ₋₂ = solver.vₖ₋₁, solver.vₖ, solver.q, solver.y, solver.wₖ₋₃, solver.wₖ₋₂ - Δx, Δy, warm_start = solver.Δx, solver.Δy, solver.warm_start - rNorms, sNorms = stats.residuals_primal, stats.residuals_dual - reset!(stats) - r₀ = warm_start ? q : b - s₀ = warm_start ? p : c - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) - mul!(s₀, Aᵀ, Δy) - @kaxpby!(n, one(FC), c, -one(FC), s₀) + function trilqr($(def_args_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = TrilqrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + trilqr!(solver, $(args_trilqr...); $(kwargs_trilqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # Initial solution x₀ and residual r₀ = b - Ax₀. - x .= zero(FC) # x₀ - bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖ - - # Initial solution y₀ and residual s₀ = c - Aᵀy₀. - t .= zero(FC) # t₀ - cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ - - iter = 0 - itmax == 0 && (itmax = m+n) - - history && push!(rNorms, bNorm) - history && push!(sNorms, cNorm) - εL = atol + rtol * bNorm - εQ = atol + rtol * cNorm - ξ = zero(T) - (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm) - - # Set up workspace. - βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖ - γₖ = @knrm2(n, s₀) # γ₁ = ‖s₀‖ = ‖u₁‖ - vₖ₋₁ .= zero(FC) # v₀ = 0 - uₖ₋₁ .= zero(FC) # u₀ = 0 - vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= s₀ ./ γₖ # u₁ = (c - Aᵀy₀) / γ₁ - cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ - sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ - ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ - ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ - δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations - ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁ - ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ - wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ - wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ - - # Stopping criterion. - inconsistent = false - solved_lq = bNorm == 0 - solved_lq_tol = solved_lq_mach = false - solved_cg = solved_cg_tol = solved_cg_mach = false - solved_primal = solved_lq || solved_cg - solved_qr_tol = solved_qr_mach = false - solved_dual = cNorm == 0 - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !((solved_primal && solved_dual) || tired || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the SSY tridiagonalization process. - # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ - - @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ - - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - - @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - - βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ - γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ - - # Update the LQ factorization of Tₖ = L̅ₖQₖ. - # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] - # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] - # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] - # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ - # [ • • • • • 0 ] [ • • • • • • • ] - # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ] - # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] - - if iter == 1 - δbarₖ = αₖ - elseif iter == 2 - # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] - # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - λₖ₋₁ = cₖ * βₖ + sₖ * αₖ - δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ - else - # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] - # [sₖ₋₁ -cₖ₋₁ 0] - # [ 0 0 1] - # - # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] - # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] - # [0 sₖ -cₖ] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - ϵₖ₋₂ = sₖ₋₁ * βₖ - λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ - δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ + function trilqr!(solver :: TrilqrSolver{T,FC,S}, $(def_args_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "TRILQR: primal system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "TRILQR: dual system of %d equations in %d variables\n", n, m) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats + vₖ₋₁, vₖ, q, t, wₖ₋₃, wₖ₋₂ = solver.vₖ₋₁, solver.vₖ, solver.q, solver.y, solver.wₖ₋₃, solver.wₖ₋₂ + Δx, Δy, warm_start = solver.Δx, solver.Δy, solver.warm_start + rNorms, sNorms = stats.residuals_primal, stats.residuals_dual + reset!(stats) + r₀ = warm_start ? q : b + s₀ = warm_start ? p : c + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) + mul!(s₀, Aᴴ, Δy) + @kaxpby!(n, one(FC), c, -one(FC), s₀) end - if !solved_primal - # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ - # [δbar₁] [ζbar₁] = [β₁] - if iter == 1 - ηₖ = βₖ - end - # [δ₁ 0 ] [ ζ₁ ] = [β₁] - # [λ₁ δbar₂] [ζbar₂] [0 ] - if iter == 2 - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -λₖ₋₁ * ζₖ₋₁ - end - # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] - # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] - # [ζbarₖ] - if iter ≥ 3 - ζₖ₋₂ = ζₖ₋₁ - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ - end + # Initial solution x₀ and residual r₀ = b - Ax₀. + x .= zero(FC) # x₀ + bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖ + + # Initial solution y₀ and residual s₀ = c - Aᴴy₀. + t .= zero(FC) # t₀ + cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ + + iter = 0 + itmax == 0 && (itmax = m+n) + + history && push!(rNorms, bNorm) + history && push!(sNorms, cNorm) + εL = atol + rtol * bNorm + εQ = atol + rtol * cNorm + ξ = zero(T) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %5s\n", "k", "‖rₖ‖", "‖sₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, bNorm, cNorm, ktimer(start_time)) + + # Set up workspace. + βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖ + γₖ = @knrm2(n, s₀) # γ₁ = ‖s₀‖ = ‖u₁‖ + vₖ₋₁ .= zero(FC) # v₀ = 0 + uₖ₋₁ .= zero(FC) # u₀ = 0 + vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ + uₖ .= s₀ ./ γₖ # u₁ = (c - Aᴴy₀) / γ₁ + cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ + sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ + d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ + ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ + ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ + δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations + ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁ + ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ + wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ + wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ + + # Stopping criterion. + inconsistent = false + solved_lq = bNorm == 0 + solved_lq_tol = solved_lq_mach = false + solved_cg = solved_cg_tol = solved_cg_mach = false + solved_primal = solved_lq || solved_cg + solved_qr_tol = solved_qr_mach = false + solved_dual = cNorm == 0 + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ. - # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ - # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ - if iter ≥ 2 - # Compute solution xₖ. - # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) - end + while !((solved_primal && solved_dual) || tired || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 - # Compute d̅ₖ. - if iter == 1 - # d̅₁ = u₁ - @. d̅ = uₖ - else - # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ - @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) - end + # Continue the SSY tridiagonalization process. + # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ + # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ - # Compute USYMLQ residual norm - # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²) - if iter == 1 - rNorm_lq = bNorm - else - μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ - ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ - rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ)) - end - history && push!(rNorms, rNorm_lq) - - # Compute USYMCG residual norm - # ‖rₖ‖ = |ρₖ| - if transfer_to_usymcg && (abs(δbarₖ) > eps(T)) - ζbarₖ = ηₖ / δbarₖ - ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) - rNorm_cg = abs(ρₖ) - end + mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ + mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ - # Update primal stopping criterion - solved_lq_tol = rNorm_lq ≤ εL - solved_lq_mach = rNorm_lq + 1 ≤ 1 - solved_lq = solved_lq_tol || solved_lq_mach - solved_cg_tol = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL) - solved_cg_mach = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1) - solved_cg = solved_cg_tol || solved_cg_mach - solved_primal = solved_lq || solved_cg - end + @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + + αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + + @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + + βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ + γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ + + # Update the LQ factorization of Tₖ = L̅ₖQₖ. + # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] + # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] + # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] + # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ + # [ • • • • • 0 ] [ • • • • • • • ] + # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ] + # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] - if !solved_dual - # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ₁e₁. if iter == 1 - ψbarₖ = γₖ + δbarₖ = αₖ + elseif iter == 2 + # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] + # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + λₖ₋₁ = cₖ * βₖ + sₖ * αₖ + δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ else - # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ] - # [sₖ -cₖ] [ 0 ] [ ψbarₖ] - ψₖ₋₁ = cₖ * ψbarₖ₋₁ - ψbarₖ = sₖ * ψbarₖ₋₁ + # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] + # [sₖ₋₁ -cₖ₋₁ 0] + # [ 0 0 1] + # + # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] + # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] + # [0 sₖ -cₖ] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + ϵₖ₋₂ = sₖ₋₁ * βₖ + λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ + δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ end - # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ. - # w₁ = v₁ / δ̄₁ - if iter == 2 - wₖ₋₁ = wₖ₋₂ - @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) - @. wₖ₋₁ = vₖ₋₁ / conj(δₖ₋₁) + if !solved_primal + # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ + # [δbar₁] [ζbar₁] = [β₁] + if iter == 1 + ηₖ = βₖ + end + # [δ₁ 0 ] [ ζ₁ ] = [β₁] + # [λ₁ δbar₂] [ζbar₂] [0 ] + if iter == 2 + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -λₖ₋₁ * ζₖ₋₁ + end + # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] + # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] + # [ζbarₖ] + if iter ≥ 3 + ζₖ₋₂ = ζₖ₋₁ + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ + end + + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ. + # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ + # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ + if iter ≥ 2 + # Compute solution xₖ. + # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁ + @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) + end + + # Compute d̅ₖ. + if iter == 1 + # d̅₁ = u₁ + @. d̅ = uₖ + else + # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ + @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) + end + + # Compute USYMLQ residual norm + # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²) + if iter == 1 + rNorm_lq = bNorm + else + μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ + ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ + rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ)) + end + history && push!(rNorms, rNorm_lq) + + # Compute USYMCG residual norm + # ‖rₖ‖ = |ρₖ| + if transfer_to_usymcg && (abs(δbarₖ) > eps(T)) + ζbarₖ = ηₖ / δbarₖ + ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) + rNorm_cg = abs(ρₖ) + end + + # Update primal stopping criterion + solved_lq_tol = rNorm_lq ≤ εL + solved_lq_mach = rNorm_lq + 1 ≤ 1 + solved_lq = solved_lq_tol || solved_lq_mach + solved_cg_tol = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL) + solved_cg_mach = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1) + solved_cg = solved_cg_tol || solved_cg_mach + solved_primal = solved_lq || solved_cg end - # w₂ = (v₂ - λ̄₁w₁) / δ̄₂ - if iter == 3 - wₖ₋₁ = wₖ₋₃ - @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) - @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) - @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + + if !solved_dual + # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ₁e₁. + if iter == 1 + ψbarₖ = γₖ + else + # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ] + # [sₖ -cₖ] [ 0 ] [ ψbarₖ] + ψₖ₋₁ = cₖ * ψbarₖ₋₁ + ψbarₖ = sₖ * ψbarₖ₋₁ + end + + # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ. + # w₁ = v₁ / δ̄₁ + if iter == 2 + wₖ₋₁ = wₖ₋₂ + @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) + @. wₖ₋₁ = vₖ₋₁ / conj(δₖ₋₁) + end + # w₂ = (v₂ - λ̄₁w₁) / δ̄₂ + if iter == 3 + wₖ₋₁ = wₖ₋₃ + @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) + @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + end + # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁ + if iter ≥ 4 + @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃) + wₖ₋₁ = wₖ₋₃ + @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) + @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + end + + if iter ≥ 3 + # Swap pointers. + @kswap(wₖ₋₃, wₖ₋₂) + end + + if iter ≥ 2 + # Compute solution tₖ₋₁. + # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁ + @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t) + end + + # Update ψbarₖ₋₁ + ψbarₖ₋₁ = ψbarₖ + + # Compute USYMQR residual norm ‖sₖ₋₁‖ = |ψbarₖ|. + sNorm = abs(ψbarₖ) + history && push!(sNorms, sNorm) + + # Compute ‖Asₖ₋₁‖ = |ψbarₖ| * √(|δbarₖ|² + |λbarₖ|²). + AsNorm = abs(ψbarₖ) * √(abs2(δbarₖ) + abs2(cₖ * βₖ₊₁)) + + # Update dual stopping criterion + iter == 1 && (ξ = atol + rtol * AsNorm) + solved_qr_tol = sNorm ≤ εQ + solved_qr_mach = sNorm + 1 ≤ 1 + inconsistent = AsNorm ≤ ξ + solved_dual = solved_qr_tol || solved_qr_mach || inconsistent end - # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁ - if iter ≥ 4 - @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃) - wₖ₋₁ = wₖ₋₃ - @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) - @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) - @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁) + + # Compute uₖ₊₁ and uₖ₊₁. + @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ + @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + + if βₖ₊₁ ≠ zero(T) + @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + end + if γₖ₊₁ ≠ zero(T) + @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p end + # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ. if iter ≥ 3 - # Swap pointers. - @kswap(wₖ₋₃, wₖ₋₂) + ϵₖ₋₃ = ϵₖ₋₂ end - if iter ≥ 2 - # Compute solution tₖ₋₁. - # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁ - @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t) + λₖ₋₂ = λₖ₋₁ end - - # Update ψbarₖ₋₁ - ψbarₖ₋₁ = ψbarₖ - - # Compute USYMQR residual norm ‖sₖ₋₁‖ = |ψbarₖ|. - sNorm = abs(ψbarₖ) - history && push!(sNorms, sNorm) - - # Compute ‖Asₖ₋₁‖ = |ψbarₖ| * √(|δbarₖ|² + |λbarₖ|²). - AsNorm = abs(ψbarₖ) * √(abs2(δbarₖ) + abs2(cₖ * βₖ₊₁)) - - # Update dual stopping criterion - iter == 1 && (ξ = atol + rtol * AsNorm) - solved_qr_tol = sNorm ≤ εQ - solved_qr_mach = sNorm + 1 ≤ 1 - inconsistent = AsNorm ≤ ξ - solved_dual = solved_qr_tol || solved_qr_mach || inconsistent - end - - # Compute uₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if βₖ₊₁ ≠ zero(T) - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - end - if γₖ₊₁ ≠ zero(T) - @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p + δbarₖ₋₁ = δbarₖ + cₖ₋₁ = cₖ + sₖ₋₁ = sₖ + γₖ = γₖ₊₁ + βₖ = βₖ₊₁ + + user_requested_exit = callback(solver) :: Bool + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + + kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e %.2fs\n", iter, "✗ ✗ ✗ ✗", sNorm, ktimer(start_time)) + kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s %.2fs\n", iter, rNorm_lq, "✗ ✗ ✗ ✗", ktimer(start_time)) + kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, rNorm_lq, sNorm, ktimer(start_time)) end + (verbose > 0) && @printf(iostream, "\n") - # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ. - if iter ≥ 3 - ϵₖ₋₃ = ϵₖ₋₂ + # Compute USYMCG point + # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ + if solved_cg + @kaxpy!(n, ζbarₖ, d̅, x) end - if iter ≥ 2 - λₖ₋₂ = λₖ₋₁ - end - δbarₖ₋₁ = δbarₖ - cₖ₋₁ = cₖ - sₖ₋₁ = sₖ - γₖ = γₖ₊₁ - βₖ = βₖ₊₁ - - user_requested_exit = callback(solver) :: Bool - tired = iter ≥ itmax - kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm) - kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "") - kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm) + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol") + solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol") + !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol") + solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol") + solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol") + solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ") + solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ") + !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t") + solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)") + solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)") + solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol") + solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol") + solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t") + solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x and y + warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && @kaxpy!(m, one(FC), Δy, t) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved_primal = solved_primal + stats.solved_dual = solved_dual + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - # Compute USYMCG point - # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ - if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) - end - - tired && (status = "maximum number of iterations exceeded") - solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol") - solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol") - !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol") - solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol") - solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol") - solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ") - solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ") - !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t") - solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)") - solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)") - solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol") - solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol") - solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t") - solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t") - user_requested_exit && (status = "user-requested exit") - - # Update x and y - warm_start && @kaxpy!(n, one(FC), Δx, x) - warm_start && @kaxpy!(m, one(FC), Δy, t) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.status = status - stats.solved_primal = solved_primal - stats.solved_dual = solved_dual - return solver end diff --git a/src/trimr.jl b/src/trimr.jl index bc53633c2..ae61b785a 100644 --- a/src/trimr.jl +++ b/src/trimr.jl @@ -13,30 +13,31 @@ export trimr, trimr! """ (x, y, stats) = trimr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - spd::Bool=false, snd::Bool=false, flip::Bool=false, sp::Bool=false, - τ::T=one(T), ν::T=-one(T), itmax::Int=0, - verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + spd::Bool=false, snd::Bool=false, + flip::Bool=false, sp::Bool=false, + τ::T=one(T), ν::T=-one(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -TriMR solves the symmetric linear system + (x, y, stats) = trimr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +TriMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + +Given a matrix `A` of dimension m × n, TriMR solves the symmetric linear system [ τE A ] [ x ] = [ b ] - [ Aᵀ νF ] [ y ] [ c ], + [ Aᴴ νF ] [ y ] [ c ], -where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0. +of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0. `b` and `c` must both be nonzero. TriMR handles saddle-point systems (`τ = 0` or `ν = 0`) and adjoint systems (`τ = 0` and `ν = 0`) without any risk of breakdown. By default, TriMR solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1. -If `flip = true`, TriMR solves another known variant of SQD systems where τ = -1 and ν = 1. -If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved. -If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved. -If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is solved. -`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems. TriMR is based on the preconditioned orthogonal tridiagonalization process and its relation with the preconditioned block-Lanczos process. @@ -50,17 +51,41 @@ It's the Euclidean norm when `M` and `N` are identity operators. TriMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`. `atol` is an absolute tolerance and `rtol` is a relative tolerance. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. -TriMR can be warm-started from initial guesses `x0` and `y0` with the method +#### Optional arguments - (x, y, stats) = trimr(A, b, c, x0, y0; kwargs...) +* `x0`: a vector of length m that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system; +* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems; +* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems; +* `sp`: if `true`, set `τ = 1` and `ν = 0` for saddle-point systems; +* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length m; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -68,18 +93,6 @@ and `false` otherwise. """ function trimr end -function trimr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = TrimrSolver(A, b) - trimr!(solver, A, b, c, x0, y0; kwargs...) - return (solver.x, solver.y, solver.stats) -end - -function trimr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = TrimrSolver(A, b) - trimr!(solver, A, b, c; kwargs...) - return (solver.x, solver.y, solver.stats) -end - """ solver = trimr!(solver::TrimrSolver, A, b, c; kwargs...) solver = trimr!(solver::TrimrSolver, A, b, c, x0, y0; kwargs...) @@ -90,424 +103,477 @@ See [`TrimrSolver`](@ref) for more details about the `solver`. """ function trimr! end -function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0, y0) - trimr!(solver, A, b, c; kwargs...) - return solver -end - -function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false, - τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, - verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("TriMR: system of %d equations in %d variables\n", m+n, m+n) - - # Check flip, sp, spd and snd parameters - spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !") - spd && snd && error("The matrix cannot be symmetric positive definite and symmetric negative definite !") - spd && sp && error("The matrix cannot be symmetric positive definite and a saddle-point !") - snd && flip && error("The matrix cannot be symmetric negative definite and symmetric quasi-definite !") - snd && sp && error("The matrix cannot be symmetric negative definite and a saddle-point !") - sp && flip && error("The matrix cannot be symmetric quasi-definite and a saddle-point !") - - # Check M = Iₘ and N = Iₙ - MisI = (M === I) - NisI = (N === I) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Determine τ and ν associated to SQD, SPD or SND systems. - flip && (τ = -one(T) ; ν = one(T)) - spd && (τ = one(T) ; ν = one(T)) - snd && (τ = -one(T) ; ν = -one(T)) - sp && (τ = one(T) ; ν = zero(T)) - - warm_start = solver.warm_start - warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.") - warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - allocate_if(!MisI, solver, :vₖ, S, m) - allocate_if(!NisI, solver, :uₖ, S, n) - Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p - Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q - gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ - gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ - vₖ = MisI ? M⁻¹vₖ : solver.vₖ - uₖ = NisI ? N⁻¹uₖ : solver.uₖ - vₖ₊₁ = MisI ? q : M⁻¹vₖ₋₁ - uₖ₊₁ = NisI ? p : N⁻¹uₖ₋₁ - b₀ = warm_start ? q : b - c₀ = warm_start ? p : c - - stats = solver.stats - rNorms = stats.residuals - reset!(stats) - - # Initial solutions x₀ and y₀. - xₖ .= zero(FC) - yₖ .= zero(FC) - - iter = 0 - itmax == 0 && (itmax = m+n) - - # Initialize preconditioned orthogonal tridiagonalization process. - M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0 - N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0 - - # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] - # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ] - if warm_start - mul!(b₀, A, Δy) - (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) - @kaxpby!(m, one(FC), b, -one(FC), b₀) - mul!(c₀, Aᵀ, Δx) - (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) - @kaxpby!(n, one(FC), c, -one(FC), c₀) +def_args_trimr = (:(A ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_trimr = (:(x0::AbstractVector), + :(y0::AbstractVector)) + +def_kwargs_trimr = (:(; M = I ), + :(; N = I ), + :(; ldiv::Bool = false ), + :(; spd::Bool = false ), + :(; snd::Bool = false ), + :(; flip::Bool = false ), + :(; sp::Bool = false ), + :(; τ::T = one(T) ), + :(; ν::T = -one(T) ), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_trimr = mapreduce(extract_parameters, vcat, def_kwargs_trimr) + +args_trimr = (:A, :b, :c) +optargs_trimr = (:x0, :y0) +kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function trimr($(def_args_trimr...), $(def_optargs_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = TrimrSolver(A, b) + warm_start!(solver, $(optargs_trimr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + trimr!(solver, $(args_trimr...); $(kwargs_trimr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # β₁Ev₁ = b ↔ β₁v₁ = Mb - M⁻¹vₖ .= b₀ - MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) - βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E - if βₖ ≠ 0 - @kscal!(m, one(FC) / βₖ, M⁻¹vₖ) - MisI || @kscal!(m, one(FC) / βₖ, vₖ) - else - error("b must be nonzero") + function trimr($(def_args_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = TrimrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + trimr!(solver, $(args_trimr...); $(kwargs_trimr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.y, solver.stats) end - # γ₁Fu₁ = c ↔ γ₁u₁ = Nc - N⁻¹uₖ .= c₀ - NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv) - γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F - if γₖ ≠ 0 - @kscal!(n, one(FC) / γₖ, N⁻¹uₖ) - NisI || @kscal!(n, one(FC) / γₖ, uₖ) - else - error("c must be nonzero") - end + function trimr!(solver :: TrimrSolver{T,FC,S}, $(def_args_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "TriMR: system of %d equations in %d variables\n", m+n, m+n) + + # Check flip, sp, spd and snd parameters + spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !") + spd && snd && error("The matrix cannot be symmetric positive definite and symmetric negative definite !") + spd && sp && error("The matrix cannot be symmetric positive definite and a saddle-point !") + snd && flip && error("The matrix cannot be symmetric negative definite and symmetric quasi-definite !") + snd && sp && error("The matrix cannot be symmetric negative definite and a saddle-point !") + sp && flip && error("The matrix cannot be symmetric quasi-definite and a saddle-point !") + + # Check M = Iₘ and N = Iₙ + MisI = (M === I) + NisI = (N === I) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Determine τ and ν associated to SQD, SPD or SND systems. + flip && (τ = -one(T) ; ν = one(T)) + spd && (τ = one(T) ; ν = one(T)) + snd && (τ = -one(T) ; ν = -one(T)) + sp && (τ = one(T) ; ν = zero(T)) + + warm_start = solver.warm_start + warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.") + warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :vₖ, S, m) + allocate_if(!NisI, solver, :uₖ, S, n) + Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p + Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q + gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ + gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ + vₖ = MisI ? M⁻¹vₖ : solver.vₖ + uₖ = NisI ? N⁻¹uₖ : solver.uₖ + vₖ₊₁ = MisI ? q : M⁻¹vₖ₋₁ + uₖ₊₁ = NisI ? p : N⁻¹uₖ₋₁ + b₀ = warm_start ? q : b + c₀ = warm_start ? p : c + + stats = solver.stats + rNorms = stats.residuals + reset!(stats) + + # Initial solutions x₀ and y₀. + xₖ .= zero(FC) + yₖ .= zero(FC) + + iter = 0 + itmax == 0 && (itmax = m+n) + + # Initialize preconditioned orthogonal tridiagonalization process. + M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0 + N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0 + + # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] + # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ] + if warm_start + mul!(b₀, A, Δy) + (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) + @kaxpby!(m, one(FC), b, -one(FC), b₀) + mul!(c₀, Aᴴ, Δx) + (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) + @kaxpby!(n, one(FC), c, -one(FC), c₀) + end - # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ. - gx₂ₖ₋₃ .= zero(FC) - gy₂ₖ₋₃ .= zero(FC) - gx₂ₖ₋₂ .= zero(FC) - gy₂ₖ₋₂ .= zero(FC) - gx₂ₖ₋₁ .= zero(FC) - gy₂ₖ₋₁ .= zero(FC) - gx₂ₖ .= zero(FC) - gy₂ₖ .= zero(FC) - - # Compute ‖r₀‖² = (γ₁)² + (β₁)² - rNorm = sqrt(γₖ^2 + βₖ^2) - history && push!(rNorms, rNorm) - ε = atol + rtol * rNorm - - (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ) - - # Set up workspace. - old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T) - old_s₁ₖ = old_s₂ₖ = old_s₃ₖ = old_s₄ₖ = zero(FC) - σbar₂ₖ₋₂ = ηbar₂ₖ₋₃ = λbar₂ₖ₋₃ = μ₂ₖ₋₅ = λ₂ₖ₋₄ = μ₂ₖ₋₄ = zero(FC) - πbar₂ₖ₋₁ = βₖ - πbar₂ₖ = γₖ - - # Tolerance for breakdown detection. - btol = eps(T)^(3/4) - - # Stopping criterion. - breakdown = false - solved = rNorm ≤ ε - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - θbarₖ = δbar₂ₖ₋₁ = δbar₂ₖ = σbar₂ₖ₋₁ = σbar₂ₖ = λbar₂ₖ₋₁ = ηbar₂ₖ₋₁ = zero(FC) - - while !(solved || tired || breakdown || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the orthogonal tridiagonalization process. - # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ - - if iter ≥ 2 - @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ - @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ + # β₁Ev₁ = b ↔ β₁v₁ = Mb + M⁻¹vₖ .= b₀ + MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) + βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E + if βₖ ≠ 0 + @kscal!(m, one(FC) / βₖ, M⁻¹vₖ) + MisI || @kscal!(m, one(FC) / βₖ, vₖ) + else + error("b must be nonzero") end - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + # γ₁Fu₁ = c ↔ γ₁u₁ = Nc + N⁻¹uₖ .= c₀ + NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv) + γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F + if γₖ ≠ 0 + @kscal!(n, one(FC) / γₖ, N⁻¹uₖ) + NisI || @kscal!(n, one(FC) / γₖ, uₖ) + else + error("c must be nonzero") + end - @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ - @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ + # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ. + gx₂ₖ₋₃ .= zero(FC) + gy₂ₖ₋₃ .= zero(FC) + gx₂ₖ₋₂ .= zero(FC) + gy₂ₖ₋₂ .= zero(FC) + gx₂ₖ₋₁ .= zero(FC) + gy₂ₖ₋₁ .= zero(FC) + gx₂ₖ .= zero(FC) + gy₂ₖ .= zero(FC) + + # Compute ‖r₀‖² = (γ₁)² + (β₁)² + rNorm = sqrt(γₖ^2 + βₖ^2) + history && push!(rNorms, rNorm) + ε = atol + rtol * rNorm - # Compute vₖ₊₁ and uₖ₊₁ - MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ - NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %5s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ, γₖ, ktimer(start_time)) - βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E - γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F + # Set up workspace. + old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T) + old_s₁ₖ = old_s₂ₖ = old_s₃ₖ = old_s₄ₖ = zero(FC) + σbar₂ₖ₋₂ = ηbar₂ₖ₋₃ = λbar₂ₖ₋₃ = μ₂ₖ₋₅ = λ₂ₖ₋₄ = μ₂ₖ₋₄ = zero(FC) + πbar₂ₖ₋₁ = βₖ + πbar₂ₖ = γₖ - # βₖ₊₁ ≠ 0 - if βₖ₊₁ > btol - @kscal!(m, one(FC) / βₖ₊₁, q) - MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) - end + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) - # γₖ₊₁ ≠ 0 - if γₖ₊₁ > btol - @kscal!(n, one(FC) / γₖ₊₁, p) - NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) - end + # Stopping criterion. + breakdown = false + solved = rNorm ≤ ε + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false - # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] - # [0 u₁ ••• 0 uₖ] - # - # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ - # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ] - # - # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ - # [ Aᵀ νF ] [ 0 F ] - # - # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖ - # - # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ]. - # [ Oᵀ ] - if iter == 1 - θbarₖ = conj(αₖ) - δbar₂ₖ₋₁ = τ - δbar₂ₖ = ν - σbar₂ₖ₋₁ = αₖ - σbar₂ₖ = βₖ₊₁ - λbar₂ₖ₋₁ = γₖ₊₁ - ηbar₂ₖ₋₁ = zero(FC) - else - # Apply previous reflections - # [ 1 ][ 1 ][ c₂.ₖ₋₁ s₂.ₖ₋₁ ][ 1 ] - # Ζₖ₋₁ = [ c₄.ₖ₋₁ s₄.ₖ₋₁ ][ c₃.ₖ₋₁ s₃.ₖ₋₁ ][ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ][ c₁.ₖ₋₁ s₁.ₖ₋₁ ] - # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ][ 1 ][ 1 ][ 1 ] - # [ 1 ][ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ][ 1 ][ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ] - # - # [ δbar₂ₖ₋₃ σbar₂ₖ₋₃ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ δ₂ₖ₋₃ σ₂ₖ₋₃ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] - # Ζₖ₋₁ * [ θbarₖ₋₁ δbar₂ₖ₋₂ σbar₂ₖ₋₂ 0 0 0 ] = [ 0 δ₂ₖ₋₂ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ] - # [ 0 βₖ τ αₖ 0 γₖ₊₁ ] [ 0 0 δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ] - # [ γₖ 0 ᾱₖ ν βₖ₊₁ 0 ] [ 0 0 θbarₖ δbar₂ₖ σbar₂ₖ 0 ] - # - # [ 1 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] - # [ c₁.ₖ₋₁ s₁.ₖ₋₁ ] [ σbar₂ₖ₋₂ 0 0 0 ] = [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ] - # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ] - # [ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ] [ ᾱₖ ν βₖ₊₁ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] - σbis₂ₖ₋₂ = old_c₁ₖ * σbar₂ₖ₋₂ + old_s₁ₖ * conj(αₖ) - ηbis₂ₖ₋₂ = old_s₁ₖ * ν - λbis₂ₖ₋₂ = old_s₁ₖ * βₖ₊₁ - θbisₖ = conj(old_s₁ₖ) * σbar₂ₖ₋₂ - old_c₁ₖ * conj(αₖ) - δbis₂ₖ = - old_c₁ₖ * ν - σbis₂ₖ = - old_c₁ₖ * βₖ₊₁ - # [ c₂.ₖ₋₁ s₂.ₖ₋₁ ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] - # [ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ] [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ] = [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ] - # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ] - # [ 1 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] - η₂ₖ₋₃ = old_c₂ₖ * ηbar₂ₖ₋₃ + old_s₂ₖ * σbis₂ₖ₋₂ - λ₂ₖ₋₃ = old_c₂ₖ * λbar₂ₖ₋₃ + old_s₂ₖ * ηbis₂ₖ₋₂ - μ₂ₖ₋₃ = old_s₂ₖ * λbis₂ₖ₋₂ - σhat₂ₖ₋₂ = conj(old_s₂ₖ) * ηbar₂ₖ₋₃ - old_c₂ₖ * σbis₂ₖ₋₂ - ηhat₂ₖ₋₂ = conj(old_s₂ₖ) * λbar₂ₖ₋₃ - old_c₂ₖ * ηbis₂ₖ₋₂ - λhat₂ₖ₋₂ = - old_c₂ₖ * λbis₂ₖ₋₂ - # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] - # [ c₃.ₖ₋₁ s₃.ₖ₋₁ ] [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ] = [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ] - # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ] - # [ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] - σtmp₂ₖ₋₂ = old_c₃ₖ * σhat₂ₖ₋₂ + old_s₃ₖ * θbisₖ - ηtmp₂ₖ₋₂ = old_c₃ₖ * ηhat₂ₖ₋₂ + old_s₃ₖ * δbis₂ₖ - λtmp₂ₖ₋₂ = old_c₃ₖ * λhat₂ₖ₋₂ + old_s₃ₖ * σbis₂ₖ - θbarₖ = conj(old_s₃ₖ) * σhat₂ₖ₋₂ - old_c₃ₖ * θbisₖ - δbar₂ₖ = conj(old_s₃ₖ) * ηhat₂ₖ₋₂ - old_c₃ₖ * δbis₂ₖ - σbar₂ₖ = conj(old_s₃ₖ) * λhat₂ₖ₋₂ - old_c₃ₖ * σbis₂ₖ - # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] - # [ c₄.ₖ₋₁ s₄.ₖ₋₁ ] [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ] = [ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ] - # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ] [ τ αₖ 0 γₖ₊₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ] - # [ 1 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] - σ₂ₖ₋₂ = old_c₄ₖ * σtmp₂ₖ₋₂ + old_s₄ₖ * τ - η₂ₖ₋₂ = old_c₄ₖ * ηtmp₂ₖ₋₂ + old_s₄ₖ * αₖ - λ₂ₖ₋₂ = old_c₄ₖ * λtmp₂ₖ₋₂ - μ₂ₖ₋₂ = old_s₄ₖ * γₖ₊₁ - δbar₂ₖ₋₁ = conj(old_s₄ₖ) * σtmp₂ₖ₋₂ - old_c₄ₖ * τ - σbar₂ₖ₋₁ = conj(old_s₄ₖ) * ηtmp₂ₖ₋₂ - old_c₄ₖ * αₖ - ηbar₂ₖ₋₁ = conj(old_s₄ₖ) * λtmp₂ₖ₋₂ - λbar₂ₖ₋₁ = - old_c₄ₖ * γₖ₊₁ - end + θbarₖ = δbar₂ₖ₋₁ = δbar₂ₖ = σbar₂ₖ₋₁ = σbar₂ₖ = λbar₂ₖ₋₁ = ηbar₂ₖ₋₁ = zero(FC) - # [ 1 ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] - # [ c₁.ₖ s₁.ₖ ] [ θbarₖ δbar₂ₖ ] = [ θₖ δbar₂ₖ ] - # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ] - # [ s̄₁.ₖ -c₁.ₖ ] [ γₖ₊₁ 0 ] [ 0 gₖ ] - (c₁ₖ, s₁ₖ, θₖ) = sym_givens(θbarₖ, γₖ₊₁) - gₖ = conj(s₁ₖ) * δbar₂ₖ - δbar₂ₖ = c₁ₖ * δbar₂ₖ - - # [ c₂.ₖ s₂.ₖ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] - # [ s̄₂.ₖ -c₂.ₖ ] [ θₖ δbar₂ₖ ] = [ 0 δbis₂ₖ ] - # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ] - # [ 1 ] [ 0 gₖ ] [ 0 gₖ ] - (c₂ₖ, s₂ₖ, δ₂ₖ₋₁) = sym_givens(δbar₂ₖ₋₁, θₖ) - σ₂ₖ₋₁ = c₂ₖ * σbar₂ₖ₋₁ + s₂ₖ * δbar₂ₖ - δbis₂ₖ = conj(s₂ₖ) * σbar₂ₖ₋₁ - c₂ₖ * δbar₂ₖ - - # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] - # [ c₃.ₖ s₃.ₖ ] [ 0 δbis₂ₖ ] = [ 0 δhat₂ₖ ] - # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ] - # [ s̄₃.ₖ -c₃.ₖ ] [ 0 gₖ ] [ 0 0 ] - (c₃ₖ, s₃ₖ, δhat₂ₖ) = sym_givens(δbis₂ₖ, gₖ) - - # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] - # [ c₄.ₖ s₄.ₖ ] [ 0 δhat₂ₖ ] = [ 0 δ₂ₖ ] - # [ s̄₄.ₖ -c₄.ₖ ] [ 0 βₖ₊₁ ] [ 0 0 ] - # [ 1 ] [ 0 0 ] [ 0 0 ] - (c₄ₖ, s₄ₖ, δ₂ₖ) = sym_givens(δhat₂ₖ, βₖ₊₁) - - # Solve Gₖ = Wₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Gₖ)ᵀ = (Wₖ)ᵀ. - if iter == 1 - # [ δ₁ 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ] - # [ σ₁ δ₂ ] [ gx₂ gy₂ ] [ 0 u₁ ] - @. gx₂ₖ₋₁ = vₖ / δ₂ₖ₋₁ - @. gx₂ₖ = - σ₂ₖ₋₁ / δ₂ₖ * gx₂ₖ₋₁ - @. gy₂ₖ = uₖ / δ₂ₖ - elseif iter == 2 - # [ η₁ σ₂ δ₃ 0 ] [ gx₁ gy₁ ] = [ v₂ 0 ] - # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ] [ 0 u₂ ] - # [ gx₃ gy₃ ] - # [ gx₄ gy₄ ] - @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁) - @kswap(gx₂ₖ₋₂, gx₂ₖ) - @kswap(gy₂ₖ₋₂, gy₂ₖ) - @. gx₂ₖ₋₁ = (vₖ - η₂ₖ₋₃ * gx₂ₖ₋₃ - σ₂ₖ₋₂ * gx₂ₖ₋₂ ) / δ₂ₖ₋₁ - @. gx₂ₖ = ( - λ₂ₖ₋₃ * gx₂ₖ₋₃ - η₂ₖ₋₂ * gx₂ₖ₋₂ - σ₂ₖ₋₁ * gx₂ₖ₋₁) / δ₂ₖ - @. gy₂ₖ₋₁ = ( - η₂ₖ₋₃ * gy₂ₖ₋₃ - σ₂ₖ₋₂ * gy₂ₖ₋₂ ) / δ₂ₖ₋₁ - @. gy₂ₖ = (uₖ - λ₂ₖ₋₃ * gy₂ₖ₋₃ - η₂ₖ₋₂ * gy₂ₖ₋₂ - σ₂ₖ₋₁ * gy₂ₖ₋₁) / δ₂ₖ - else - # μ₂ₖ₋₅ * gx₂ₖ₋₅ + λ₂ₖ₋₄ * gx₂ₖ₋₄ + η₂ₖ₋₃ * gx₂ₖ₋₃ + σ₂ₖ₋₂ * gx₂ₖ₋₂ + δ₂ₖ₋₁ * gx₂ₖ₋₁ = vₖ - # μ₂ₖ₋₄ * gx₂ₖ₋₄ + λ₂ₖ₋₃ * gx₂ₖ₋₃ + η₂ₖ₋₂ * gx₂ₖ₋₂ + σ₂ₖ₋₁ * gx₂ₖ₋₁ + δ₂ₖ * gx₂ₖ = 0 - g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ - @. g₂ₖ₋₁ = (vₖ - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁ - @. g₂ₖ = ( - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ - @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁) - @kswap(gx₂ₖ₋₂, gx₂ₖ) - # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁ = 0 - # μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ - g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ - @. g₂ₖ₋₁ = ( - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁ - @. g₂ₖ = (uₖ - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ - @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁) - @kswap(gy₂ₖ₋₂, gy₂ₖ) - end + while !(solved || tired || breakdown || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 - # Update p̅ₖ = (Qₖ)ᵀ * (β₁e₁ + γ₁e₂) - πbis₂ₖ = c₁ₖ * πbar₂ₖ - πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ - # - π₂ₖ₋₁ = c₂ₖ * πbar₂ₖ₋₁ + s₂ₖ * πbis₂ₖ - πhat₂ₖ = conj(s₂ₖ) * πbar₂ₖ₋₁ - c₂ₖ * πbis₂ₖ - # - πtmp₂ₖ = c₃ₖ * πhat₂ₖ + s₃ₖ * πbis₂ₖ₊₂ - πbar₂ₖ₊₂ = conj(s₃ₖ) * πhat₂ₖ - c₃ₖ * πbis₂ₖ₊₂ - # - π₂ₖ = c₄ₖ * πtmp₂ₖ - πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ - - # Update xₖ = Gxₖ * pₖ - @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) - @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) - - # Update yₖ = Gyₖ * pₖ - @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) - @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) - - # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|² - rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂)) - history && push!(rNorms, rNorm) + # Continue the orthogonal tridiagonalization process. + # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ + # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ + + mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ + mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ + + if iter ≥ 2 + @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ + @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ + end + + αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + + @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ + @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ + + # Compute vₖ₊₁ and uₖ₊₁ + MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ + NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ + + βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E + γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F - # Update vₖ and uₖ - MisI || (vₖ .= vₖ₊₁) - NisI || (uₖ .= uₖ₊₁) - - # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁ - M⁻¹vₖ₋₁ .= M⁻¹vₖ - N⁻¹uₖ₋₁ .= N⁻¹uₖ - - # Update M⁻¹vₖ and N⁻¹uₖ - M⁻¹vₖ .= q - N⁻¹uₖ .= p - - # Update cosines and sines - old_s₁ₖ = s₁ₖ - old_s₂ₖ = s₂ₖ - old_s₃ₖ = s₃ₖ - old_s₄ₖ = s₄ₖ - old_c₁ₖ = c₁ₖ - old_c₂ₖ = c₂ₖ - old_c₃ₖ = c₃ₖ - old_c₄ₖ = c₄ₖ - - # Update workspace - βₖ = βₖ₊₁ - γₖ = γₖ₊₁ - σbar₂ₖ₋₂ = σbar₂ₖ - ηbar₂ₖ₋₃ = ηbar₂ₖ₋₁ - λbar₂ₖ₋₃ = λbar₂ₖ₋₁ - if iter ≥ 2 - μ₂ₖ₋₅ = μ₂ₖ₋₃ - μ₂ₖ₋₄ = μ₂ₖ₋₂ - λ₂ₖ₋₄ = λ₂ₖ₋₂ + # βₖ₊₁ ≠ 0 + if βₖ₊₁ > btol + @kscal!(m, one(FC) / βₖ₊₁, q) + MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) + end + + # γₖ₊₁ ≠ 0 + if γₖ₊₁ > btol + @kscal!(n, one(FC) / γₖ₊₁, p) + NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) + end + + # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] + # [0 u₁ ••• 0 uₖ] + # + # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ + # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ] + # + # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ + # [ Aᴴ νF ] [ 0 F ] + # + # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖ + # + # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ]. + # [ Oᵀ ] + if iter == 1 + θbarₖ = conj(αₖ) + δbar₂ₖ₋₁ = τ + δbar₂ₖ = ν + σbar₂ₖ₋₁ = αₖ + σbar₂ₖ = βₖ₊₁ + λbar₂ₖ₋₁ = γₖ₊₁ + ηbar₂ₖ₋₁ = zero(FC) + else + # Apply previous reflections + # [ 1 ][ 1 ][ c₂.ₖ₋₁ s₂.ₖ₋₁ ][ 1 ] + # Ζₖ₋₁ = [ c₄.ₖ₋₁ s₄.ₖ₋₁ ][ c₃.ₖ₋₁ s₃.ₖ₋₁ ][ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ][ c₁.ₖ₋₁ s₁.ₖ₋₁ ] + # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ][ 1 ][ 1 ][ 1 ] + # [ 1 ][ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ][ 1 ][ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ] + # + # [ δbar₂ₖ₋₃ σbar₂ₖ₋₃ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ δ₂ₖ₋₃ σ₂ₖ₋₃ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] + # Ζₖ₋₁ * [ θbarₖ₋₁ δbar₂ₖ₋₂ σbar₂ₖ₋₂ 0 0 0 ] = [ 0 δ₂ₖ₋₂ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ] + # [ 0 βₖ τ αₖ 0 γₖ₊₁ ] [ 0 0 δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ] + # [ γₖ 0 ᾱₖ ν βₖ₊₁ 0 ] [ 0 0 θbarₖ δbar₂ₖ σbar₂ₖ 0 ] + # + # [ 1 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] + # [ c₁.ₖ₋₁ s₁.ₖ₋₁ ] [ σbar₂ₖ₋₂ 0 0 0 ] = [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ] + # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ] + # [ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ] [ ᾱₖ ν βₖ₊₁ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] + σbis₂ₖ₋₂ = old_c₁ₖ * σbar₂ₖ₋₂ + old_s₁ₖ * conj(αₖ) + ηbis₂ₖ₋₂ = old_s₁ₖ * ν + λbis₂ₖ₋₂ = old_s₁ₖ * βₖ₊₁ + θbisₖ = conj(old_s₁ₖ) * σbar₂ₖ₋₂ - old_c₁ₖ * conj(αₖ) + δbis₂ₖ = - old_c₁ₖ * ν + σbis₂ₖ = - old_c₁ₖ * βₖ₊₁ + # [ c₂.ₖ₋₁ s₂.ₖ₋₁ ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] + # [ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ] [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ] = [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ] + # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ] + # [ 1 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] + η₂ₖ₋₃ = old_c₂ₖ * ηbar₂ₖ₋₃ + old_s₂ₖ * σbis₂ₖ₋₂ + λ₂ₖ₋₃ = old_c₂ₖ * λbar₂ₖ₋₃ + old_s₂ₖ * ηbis₂ₖ₋₂ + μ₂ₖ₋₃ = old_s₂ₖ * λbis₂ₖ₋₂ + σhat₂ₖ₋₂ = conj(old_s₂ₖ) * ηbar₂ₖ₋₃ - old_c₂ₖ * σbis₂ₖ₋₂ + ηhat₂ₖ₋₂ = conj(old_s₂ₖ) * λbar₂ₖ₋₃ - old_c₂ₖ * ηbis₂ₖ₋₂ + λhat₂ₖ₋₂ = - old_c₂ₖ * λbis₂ₖ₋₂ + # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] + # [ c₃.ₖ₋₁ s₃.ₖ₋₁ ] [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ] = [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ] + # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ] + # [ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] + σtmp₂ₖ₋₂ = old_c₃ₖ * σhat₂ₖ₋₂ + old_s₃ₖ * θbisₖ + ηtmp₂ₖ₋₂ = old_c₃ₖ * ηhat₂ₖ₋₂ + old_s₃ₖ * δbis₂ₖ + λtmp₂ₖ₋₂ = old_c₃ₖ * λhat₂ₖ₋₂ + old_s₃ₖ * σbis₂ₖ + θbarₖ = conj(old_s₃ₖ) * σhat₂ₖ₋₂ - old_c₃ₖ * θbisₖ + δbar₂ₖ = conj(old_s₃ₖ) * ηhat₂ₖ₋₂ - old_c₃ₖ * δbis₂ₖ + σbar₂ₖ = conj(old_s₃ₖ) * λhat₂ₖ₋₂ - old_c₃ₖ * σbis₂ₖ + # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] + # [ c₄.ₖ₋₁ s₄.ₖ₋₁ ] [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ] = [ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ] + # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ] [ τ αₖ 0 γₖ₊₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ] + # [ 1 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] + σ₂ₖ₋₂ = old_c₄ₖ * σtmp₂ₖ₋₂ + old_s₄ₖ * τ + η₂ₖ₋₂ = old_c₄ₖ * ηtmp₂ₖ₋₂ + old_s₄ₖ * αₖ + λ₂ₖ₋₂ = old_c₄ₖ * λtmp₂ₖ₋₂ + μ₂ₖ₋₂ = old_s₄ₖ * γₖ₊₁ + δbar₂ₖ₋₁ = conj(old_s₄ₖ) * σtmp₂ₖ₋₂ - old_c₄ₖ * τ + σbar₂ₖ₋₁ = conj(old_s₄ₖ) * ηtmp₂ₖ₋₂ - old_c₄ₖ * αₖ + ηbar₂ₖ₋₁ = conj(old_s₄ₖ) * λtmp₂ₖ₋₂ + λbar₂ₖ₋₁ = - old_c₄ₖ * γₖ₊₁ + end + + # [ 1 ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] + # [ c₁.ₖ s₁.ₖ ] [ θbarₖ δbar₂ₖ ] = [ θₖ δbar₂ₖ ] + # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ] + # [ s̄₁.ₖ -c₁.ₖ ] [ γₖ₊₁ 0 ] [ 0 gₖ ] + (c₁ₖ, s₁ₖ, θₖ) = sym_givens(θbarₖ, γₖ₊₁) + gₖ = conj(s₁ₖ) * δbar₂ₖ + δbar₂ₖ = c₁ₖ * δbar₂ₖ + + # [ c₂.ₖ s₂.ₖ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] + # [ s̄₂.ₖ -c₂.ₖ ] [ θₖ δbar₂ₖ ] = [ 0 δbis₂ₖ ] + # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ] + # [ 1 ] [ 0 gₖ ] [ 0 gₖ ] + (c₂ₖ, s₂ₖ, δ₂ₖ₋₁) = sym_givens(δbar₂ₖ₋₁, θₖ) + σ₂ₖ₋₁ = c₂ₖ * σbar₂ₖ₋₁ + s₂ₖ * δbar₂ₖ + δbis₂ₖ = conj(s₂ₖ) * σbar₂ₖ₋₁ - c₂ₖ * δbar₂ₖ + + # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] + # [ c₃.ₖ s₃.ₖ ] [ 0 δbis₂ₖ ] = [ 0 δhat₂ₖ ] + # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ] + # [ s̄₃.ₖ -c₃.ₖ ] [ 0 gₖ ] [ 0 0 ] + (c₃ₖ, s₃ₖ, δhat₂ₖ) = sym_givens(δbis₂ₖ, gₖ) + + # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] + # [ c₄.ₖ s₄.ₖ ] [ 0 δhat₂ₖ ] = [ 0 δ₂ₖ ] + # [ s̄₄.ₖ -c₄.ₖ ] [ 0 βₖ₊₁ ] [ 0 0 ] + # [ 1 ] [ 0 0 ] [ 0 0 ] + (c₄ₖ, s₄ₖ, δ₂ₖ) = sym_givens(δhat₂ₖ, βₖ₊₁) + + # Solve Gₖ = Wₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Gₖ)ᵀ = (Wₖ)ᵀ. + if iter == 1 + # [ δ₁ 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ] + # [ σ₁ δ₂ ] [ gx₂ gy₂ ] [ 0 u₁ ] + @. gx₂ₖ₋₁ = vₖ / δ₂ₖ₋₁ + @. gx₂ₖ = - σ₂ₖ₋₁ / δ₂ₖ * gx₂ₖ₋₁ + @. gy₂ₖ = uₖ / δ₂ₖ + elseif iter == 2 + # [ η₁ σ₂ δ₃ 0 ] [ gx₁ gy₁ ] = [ v₂ 0 ] + # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ] [ 0 u₂ ] + # [ gx₃ gy₃ ] + # [ gx₄ gy₄ ] + @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁) + @kswap(gx₂ₖ₋₂, gx₂ₖ) + @kswap(gy₂ₖ₋₂, gy₂ₖ) + @. gx₂ₖ₋₁ = (vₖ - η₂ₖ₋₃ * gx₂ₖ₋₃ - σ₂ₖ₋₂ * gx₂ₖ₋₂ ) / δ₂ₖ₋₁ + @. gx₂ₖ = ( - λ₂ₖ₋₃ * gx₂ₖ₋₃ - η₂ₖ₋₂ * gx₂ₖ₋₂ - σ₂ₖ₋₁ * gx₂ₖ₋₁) / δ₂ₖ + @. gy₂ₖ₋₁ = ( - η₂ₖ₋₃ * gy₂ₖ₋₃ - σ₂ₖ₋₂ * gy₂ₖ₋₂ ) / δ₂ₖ₋₁ + @. gy₂ₖ = (uₖ - λ₂ₖ₋₃ * gy₂ₖ₋₃ - η₂ₖ₋₂ * gy₂ₖ₋₂ - σ₂ₖ₋₁ * gy₂ₖ₋₁) / δ₂ₖ + else + # μ₂ₖ₋₅ * gx₂ₖ₋₅ + λ₂ₖ₋₄ * gx₂ₖ₋₄ + η₂ₖ₋₃ * gx₂ₖ₋₃ + σ₂ₖ₋₂ * gx₂ₖ₋₂ + δ₂ₖ₋₁ * gx₂ₖ₋₁ = vₖ + # μ₂ₖ₋₄ * gx₂ₖ₋₄ + λ₂ₖ₋₃ * gx₂ₖ₋₃ + η₂ₖ₋₂ * gx₂ₖ₋₂ + σ₂ₖ₋₁ * gx₂ₖ₋₁ + δ₂ₖ * gx₂ₖ = 0 + g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ + @. g₂ₖ₋₁ = (vₖ - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁ + @. g₂ₖ = ( - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ + @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁) + @kswap(gx₂ₖ₋₂, gx₂ₖ) + # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁ = 0 + # μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ + g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ + @. g₂ₖ₋₁ = ( - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁ + @. g₂ₖ = (uₖ - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ + @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁) + @kswap(gy₂ₖ₋₂, gy₂ₖ) + end + + # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂) + πbis₂ₖ = c₁ₖ * πbar₂ₖ + πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ + # + π₂ₖ₋₁ = c₂ₖ * πbar₂ₖ₋₁ + s₂ₖ * πbis₂ₖ + πhat₂ₖ = conj(s₂ₖ) * πbar₂ₖ₋₁ - c₂ₖ * πbis₂ₖ + # + πtmp₂ₖ = c₃ₖ * πhat₂ₖ + s₃ₖ * πbis₂ₖ₊₂ + πbar₂ₖ₊₂ = conj(s₃ₖ) * πhat₂ₖ - c₃ₖ * πbis₂ₖ₊₂ + # + π₂ₖ = c₄ₖ * πtmp₂ₖ + πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ + + # Update xₖ = Gxₖ * pₖ + @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) + @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) + + # Update yₖ = Gyₖ * pₖ + @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) + @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) + + # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|² + rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂)) + history && push!(rNorms, rNorm) + + # Update vₖ and uₖ + MisI || (vₖ .= vₖ₊₁) + NisI || (uₖ .= uₖ₊₁) + + # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁ + M⁻¹vₖ₋₁ .= M⁻¹vₖ + N⁻¹uₖ₋₁ .= N⁻¹uₖ + + # Update M⁻¹vₖ and N⁻¹uₖ + M⁻¹vₖ .= q + N⁻¹uₖ .= p + + # Update cosines and sines + old_s₁ₖ = s₁ₖ + old_s₂ₖ = s₂ₖ + old_s₃ₖ = s₃ₖ + old_s₄ₖ = s₄ₖ + old_c₁ₖ = c₁ₖ + old_c₂ₖ = c₂ₖ + old_c₃ₖ = c₃ₖ + old_c₄ₖ = c₄ₖ + + # Update workspace + βₖ = βₖ₊₁ + γₖ = γₖ₊₁ + σbar₂ₖ₋₂ = σbar₂ₖ + ηbar₂ₖ₋₃ = ηbar₂ₖ₋₁ + λbar₂ₖ₋₃ = λbar₂ₖ₋₁ + if iter ≥ 2 + μ₂ₖ₋₅ = μ₂ₖ₋₃ + μ₂ₖ₋₄ = μ₂ₖ₋₂ + λ₂ₖ₋₄ = λ₂ₖ₋₂ + end + πbar₂ₖ₋₁ = πbar₂ₖ₊₁ + πbar₂ₖ = πbar₂ₖ₊₂ + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + resid_decrease_lim = rNorm ≤ ε + breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ₊₁, γₖ₊₁, ktimer(start_time)) end - πbar₂ₖ₋₁ = πbar₂ₖ₊₁ - πbar₂ₖ = πbar₂ₖ₊₂ - - # Stopping conditions that do not depend on user input. - # This is to guard against tolerances that are unreasonably small. - resid_decrease_mach = (rNorm + one(T) ≤ one(T)) - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - resid_decrease_lim = rNorm ≤ ε - breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol - solved = resid_decrease_lim || resid_decrease_mach - tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁) + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + tired && (status = "maximum number of iterations exceeded") + breakdown && (status = "inconsistent linear system") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") + + # Update x and y + warm_start && @kaxpy!(m, one(FC), Δx, xₖ) + warm_start && @kaxpy!(n, one(FC), Δy, yₖ) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = !solved && breakdown + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - - tired && (status = "maximum number of iterations exceeded") - breakdown && (status = "inconsistent linear system") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x and y - warm_start && @kaxpy!(m, one(FC), Δx, xₖ) - warm_start && @kaxpy!(n, one(FC), Δy, yₖ) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = !solved && breakdown - stats.status = status - return solver end diff --git a/src/usymlq.jl b/src/usymlq.jl index 71670c80f..b80f0a622 100644 --- a/src/usymlq.jl +++ b/src/usymlq.jl @@ -21,34 +21,54 @@ export usymlq, usymlq! """ (x, stats) = usymlq(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + transfer_to_usymcg::Bool=true, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using the USYMLQ method. + (x, stats) = usymlq(A, b, c, x0::AbstractVector; kwargs...) + +USYMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +USYMLQ determines the least-norm solution of the consistent linear system Ax = b of size m × n. USYMLQ is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`. -The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`. +The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`. The error norm ‖x - x*‖ monotonously decreases in USYMLQ. It's considered as a generalization of SYMMLQ. It can also be applied to under-determined and over-determined problems. In all cases, problems must be consistent. -An option gives the possibility of transferring to the USYMCG point, -when it exists. The transfer is based on the residual norm. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional argument -USYMLQ can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = usymlq(A, b, c, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -58,18 +78,6 @@ and `false` otherwise. """ function usymlq end -function usymlq(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = UsymlqSolver(A, b) - usymlq!(solver, A, b, c, x0; kwargs...) - return (solver.x, solver.stats) -end - -function usymlq(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = UsymlqSolver(A, b) - usymlq!(solver, A, b, c; kwargs...) - return (solver.x, solver.stats) -end - """ solver = usymlq!(solver::UsymlqSolver, A, b, c; kwargs...) solver = usymlq!(solver::UsymlqSolver, A, b, c, x0; kwargs...) @@ -80,243 +88,290 @@ See [`UsymlqSolver`](@ref) for more details about the `solver`. """ function usymlq! end -function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - usymlq!(solver, A, b, c; kwargs...) - return solver -end - -function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("USYMLQ: system of %d equations in %d variables\n", m, n) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x - vₖ₋₁, vₖ, q, d̅, stats = solver.vₖ₋₁, solver.vₖ, solver.q, solver.d̅, solver.stats - warm_start = solver.warm_start - rNorms = stats.residuals - reset!(stats) - r₀ = warm_start ? q : b - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) +def_args_usymlq = (:(A ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_usymlq = (:(x0::AbstractVector),) + +def_kwargs_usymlq = (:(; transfer_to_usymcg::Bool = true), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_usymlq = mapreduce(extract_parameters, vcat, def_kwargs_usymlq) + +args_usymlq = (:A, :b, :c) +optargs_usymlq = (:x0,) +kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function usymlq($(def_args_usymlq...), $(def_optargs_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = UsymlqSolver(A, b) + warm_start!(solver, $(optargs_usymlq...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + usymlq!(solver, $(args_usymlq...); $(kwargs_usymlq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Initial solution x₀ and residual norm ‖r₀‖. - x .= zero(FC) - bNorm = @knrm2(m, r₀) - history && push!(rNorms, bNorm) - if bNorm == 0 - stats.niter = 0 - stats.solved = true - stats.inconsistent = false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function usymlq($(def_args_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = UsymlqSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + usymlq!(solver, $(args_usymlq...); $(kwargs_usymlq...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = m+n) - - ε = atol + rtol * bNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) - - βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ - γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ - vₖ₋₁ .= zero(FC) # v₀ = 0 - uₖ₋₁ .= zero(FC) # u₀ = 0 - vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= c ./ γₖ # u₁ = c / γ₁ - cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ - sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ - ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ - ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ - δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations - - # Stopping criterion. - solved_lq = bNorm ≤ ε - solved_cg = false - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved_lq || solved_cg || tired || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the SSY tridiagonalization process. - # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ - - @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ - - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - - @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - - βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ - γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ - - # Update the LQ factorization of Tₖ = L̅ₖQₖ. - # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] - # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] - # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] - # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ - # [ • • • • • 0 ] [ • • • • • • • ] - # [ • • • • γₖ] [ • • • • • 0 ] - # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] - - if iter == 1 - δbarₖ = αₖ - elseif iter == 2 - # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] - # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - λₖ₋₁ = cₖ * βₖ + sₖ * αₖ - δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ - else - # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] - # [sₖ₋₁ -cₖ₋₁ 0] - # [ 0 0 1] - # - # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] - # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] - # [0 sₖ -cₖ] - (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) - ϵₖ₋₂ = sₖ₋₁ * βₖ - λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ - δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ + function usymlq!(solver :: UsymlqSolver{T,FC,S}, $(def_args_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "USYMLQ: system of %d equations in %d variables\n", m, n) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x + vₖ₋₁, vₖ, q, d̅, stats = solver.vₖ₋₁, solver.vₖ, solver.q, solver.d̅, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + r₀ = warm_start ? q : b + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) end - # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ - # [δbar₁] [ζbar₁] = [β₁] - if iter == 1 - ηₖ = βₖ - end - # [δ₁ 0 ] [ ζ₁ ] = [β₁] - # [λ₁ δbar₂] [ζbar₂] [0 ] - if iter == 2 - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -λₖ₋₁ * ζₖ₋₁ - end - # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] - # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] - # [ζbarₖ] - if iter ≥ 3 - ζₖ₋₂ = ζₖ₋₁ - ηₖ₋₁ = ηₖ - ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ - ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ + # Initial solution x₀ and residual norm ‖r₀‖. + x .= zero(FC) + bNorm = @knrm2(m, r₀) + history && push!(rNorms, bNorm) + if bNorm == 0 + stats.niter = 0 + stats.solved = true + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ. - # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ - # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ - if iter ≥ 2 - # Compute solution xₖ. - # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) + iter = 0 + itmax == 0 && (itmax = m+n) + + ε = atol + rtol * bNorm + (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time)) + + βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ + γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ + vₖ₋₁ .= zero(FC) # v₀ = 0 + uₖ₋₁ .= zero(FC) # u₀ = 0 + vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ + cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ + sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ + d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ + ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ + ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ + δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations + + # Stopping criterion. + solved_lq = bNorm ≤ ε + solved_cg = false + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved_lq || solved_cg || tired || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 + + # Continue the SSY tridiagonalization process. + # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ + # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ + + mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ + mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ + + @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + + αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + + @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + + βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ + γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ + + # Update the LQ factorization of Tₖ = L̅ₖQₖ. + # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] + # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ] + # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ] + # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ + # [ • • • • • 0 ] [ • • • • • • • ] + # [ • • • • γₖ] [ • • • • • 0 ] + # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ] + + if iter == 1 + δbarₖ = αₖ + elseif iter == 2 + # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ] + # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + λₖ₋₁ = cₖ * βₖ + sₖ * αₖ + δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ + else + # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] + # [sₖ₋₁ -cₖ₋₁ 0] + # [ 0 0 1] + # + # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ] + # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ] + # [0 sₖ -cₖ] + (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ) + ϵₖ₋₂ = sₖ₋₁ * βₖ + λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ + δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ + end + + # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁ + # [δbar₁] [ζbar₁] = [β₁] + if iter == 1 + ηₖ = βₖ + end + # [δ₁ 0 ] [ ζ₁ ] = [β₁] + # [λ₁ δbar₂] [ζbar₂] [0 ] + if iter == 2 + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -λₖ₋₁ * ζₖ₋₁ + end + # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0] + # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0] + # [ζbarₖ] + if iter ≥ 3 + ζₖ₋₂ = ζₖ₋₁ + ηₖ₋₁ = ηₖ + ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁ + ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ + end + + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ. + # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ + # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ + if iter ≥ 2 + # Compute solution xₖ. + # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁ + @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) + end + + # Compute d̅ₖ. + if iter == 1 + # d̅₁ = u₁ + @. d̅ = uₖ + else + # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ + @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) + end + + # Compute uₖ₊₁ and uₖ₊₁. + @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ + @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + + if βₖ₊₁ ≠ zero(T) + @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + end + if γₖ₊₁ ≠ zero(T) + @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p + end + + # Compute USYMLQ residual norm + # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²) + if iter == 1 + rNorm_lq = bNorm + else + μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ + ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ + rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ)) + end + history && push!(rNorms, rNorm_lq) + + # Compute USYMCG residual norm + # ‖rₖ‖ = |ρₖ| + if transfer_to_usymcg && (abs(δbarₖ) > eps(T)) + ζbarₖ = ηₖ / δbarₖ + ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) + rNorm_cg = abs(ρₖ) + end + + # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ and δbarₖ₋₁. + sₖ₋₁ = sₖ + cₖ₋₁ = cₖ + γₖ = γₖ₊₁ + βₖ = βₖ₊₁ + δbarₖ₋₁ = δbarₖ + + # Update stopping criterion. + user_requested_exit = callback(solver) :: Bool + solved_lq = rNorm_lq ≤ ε + solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε) + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm_lq, ktimer(start_time)) end + (verbose > 0) && @printf(iostream, "\n") - # Compute d̅ₖ. - if iter == 1 - # d̅₁ = u₁ - @. d̅ = uₖ - else - # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ - @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) + # Compute USYMCG point + # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ + if solved_cg + @kaxpy!(n, ζbarₖ, d̅, x) end - # Compute uₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved_lq && (status = "solution xᴸ good enough given atol and rtol") + solved_cg && (status = "solution xᶜ good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - if βₖ₊₁ ≠ zero(T) - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - end - if γₖ₊₁ ≠ zero(T) - @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p - end - - # Compute USYMLQ residual norm - # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²) - if iter == 1 - rNorm_lq = bNorm - else - μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ - ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ - rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ)) - end - history && push!(rNorms, rNorm_lq) - - # Compute USYMCG residual norm - # ‖rₖ‖ = |ρₖ| - if transfer_to_usymcg && (abs(δbarₖ) > eps(T)) - ζbarₖ = ηₖ / δbarₖ - ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ) - rNorm_cg = abs(ρₖ) - end - - # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ and δbarₖ₋₁. - sₖ₋₁ = sₖ - cₖ₋₁ = cₖ - γₖ = γₖ₊₁ - βₖ = βₖ₊₁ - δbarₖ₋₁ = δbarₖ - - # Update stopping criterion. - user_requested_exit = callback(solver) :: Bool - solved_lq = rNorm_lq ≤ ε - solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε) - tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq) - end - (verbose > 0) && @printf("\n") + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - # Compute USYMCG point - # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ - if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + # Update stats + stats.niter = iter + stats.solved = solved_lq || solved_cg + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = status + return solver end - - tired && (status = "maximum number of iterations exceeded") - solved_lq && (status = "solution xᴸ good enough given atol and rtol") - solved_cg && (status = "solution xᶜ good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved_lq || solved_cg - stats.inconsistent = false - stats.status = status - return solver end diff --git a/src/usymqr.jl b/src/usymqr.jl index 863390c3f..0aae23335 100644 --- a/src/usymqr.jl +++ b/src/usymqr.jl @@ -21,31 +21,53 @@ export usymqr, usymqr! """ (x, stats) = usymqr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + timemax::Float64=Inf, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using the USYMQR method. + (x, stats) = usymqr(A, b, c, x0::AbstractVector; kwargs...) + +USYMQR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +USYMQR solves the linear least-squares problem min ‖b - Ax‖² of size m × n. +USYMQR solves Ax = b if it is consistent. USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`. -The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`. +The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`. The residual norm ‖b - Ax‖ monotonously decreases in USYMQR. It's considered as a generalization of MINRES. It can also be applied to under-determined and over-determined problems. USYMQR finds the minimum-norm solution if problems are inconsistent. -USYMQR can be warm-started from an initial guess `x0` with the method +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional argument - (x, stats) = usymqr(A, b, c, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -55,18 +77,6 @@ and `false` otherwise. """ function usymqr end -function usymqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex - solver = UsymqrSolver(A, b) - usymqr!(solver, A, b, c, x0; kwargs...) - return (solver.x, solver.stats) -end - -function usymqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex - solver = UsymqrSolver(A, b) - usymqr!(solver, A, b, c; kwargs...) - return (solver.x, solver.stats) -end - """ solver = usymqr!(solver::UsymqrSolver, A, b, c; kwargs...) solver = usymqr!(solver::UsymqrSolver, A, b, c, x0; kwargs...) @@ -77,235 +87,282 @@ See [`UsymqrSolver`](@ref) for more details about the `solver`. """ function usymqr! end -function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, - x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - warm_start!(solver, x0) - usymqr!(solver, A, b, c; kwargs...) - return solver -end - -function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - m, n = size(A) - length(b) == m || error("Inconsistent problem size") - length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("USYMQR: system of %d equations in %d variables\n", m, n) - - # Check type consistency - eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") - - # Compute the adjoint of A - Aᵀ = A' - - # Set up workspace. - vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p - wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats - warm_start = solver.warm_start - rNorms, AᵀrNorms = stats.residuals, stats.Aresiduals - reset!(stats) - r₀ = warm_start ? q : b - - if warm_start - mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) +def_args_usymqr = (:(A ), + :(b::AbstractVector{FC}), + :(c::AbstractVector{FC})) + +def_optargs_usymqr = (:(x0::AbstractVector),) + +def_kwargs_usymqr = (:(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false), + :(; iostream::IO = kstdout )) + +def_kwargs_usymqr = mapreduce(extract_parameters, vcat, def_kwargs_usymqr) + +args_usymqr = (:A, :b, :c) +optargs_usymqr = (:x0,) +kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +@eval begin + function usymqr($(def_args_usymqr...), $(def_optargs_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = UsymqrSolver(A, b) + warm_start!(solver, $(optargs_usymqr...)) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + usymqr!(solver, $(args_usymqr...); $(kwargs_usymqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - # Initial solution x₀ and residual norm ‖r₀‖. - x .= zero(FC) - rNorm = @knrm2(m, r₀) - history && push!(rNorms, rNorm) - if rNorm == 0 - stats.niter = 0 - stats.solved = true - stats.inconsistent = false - stats.status = "x = 0 is a zero-residual solution" - solver.warm_start = false - return solver + function usymqr($(def_args_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + solver = UsymqrSolver(A, b) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + usymqr!(solver, $(args_usymqr...); $(kwargs_usymqr...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) end - iter = 0 - itmax == 0 && (itmax = m+n) - - ε = atol + rtol * rNorm - κ = zero(T) - (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗") - - βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ - γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ - vₖ₋₁ .= zero(FC) # v₀ = 0 - uₖ₋₁ .= zero(FC) # u₀ = 0 - vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= c ./ γₖ # u₁ = c / γ₁ - cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ - sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ - wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ - ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁ - - # Stopping criterion. - solved = rNorm ≤ ε - inconsistent = false - tired = iter ≥ itmax - status = "unknown" - user_requested_exit = false - - while !(solved || tired || inconsistent || user_requested_exit) - # Update iteration index. - iter = iter + 1 - - # Continue the SSY tridiagonalization process. - # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - - mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ - - @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ - - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - - @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - - βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ - γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ - - # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. - # [ Oᵀ ] - # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ] - # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ] - # [ 0 • • • • • ] [ • • δ₃ • • • • ] - # [ • • • • • • • ] = Qₖ [ • • • • • 0 ] - # [ • • • • • 0 ] [ • • • • ϵₖ₋₂] - # [ • • • • γₖ ] [ • • • λₖ₋₁] - # [ • • βₖ αₖ ] [ 0 • • • • 0 δₖ ] - # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ] - # - # If k = 1, we don't have any previous reflexion. - # If k = 2, we apply the last reflexion. - # If k ≥ 3, we only apply the two previous reflexions. - - # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁ - if iter ≥ 3 - # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ] - # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁] - ϵₖ₋₂ = sₖ₋₂ * γₖ - λbarₖ₋₁ = -cₖ₋₂ * γₖ + function usymqr!(solver :: UsymqrSolver{T,FC,S}, $(def_args_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)") + length(b) == m || error("Inconsistent problem size") + length(c) == n || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "USYMQR: system of %d equations in %d variables\n", m, n) + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") + + # Compute the adjoint of A + Aᴴ = A' + + # Set up workspace. + vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p + wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats + warm_start = solver.warm_start + rNorms, AᴴrNorms = stats.residuals, stats.Aresiduals + reset!(stats) + r₀ = warm_start ? q : b + + if warm_start + mul!(r₀, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), r₀) end - # Apply previous Givens reflections Qₖ₋₁.ₖ - if iter ≥ 2 - iter == 2 && (λbarₖ₋₁ = γₖ) - # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ] - # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ] - λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ - δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ + # Initial solution x₀ and residual norm ‖r₀‖. + x .= zero(FC) + rNorm = @knrm2(m, r₀) + history && push!(rNorms, rNorm) + if rNorm == 0 + stats.niter = 0 + stats.solved = true + stats.inconsistent = false + stats.timer = ktimer(start_time) + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver end - # Compute and apply current Givens reflection Qₖ.ₖ₊₁ - iter == 1 && (δbarₖ = αₖ) - # [cₖ sₖ] [δbarₖ] = [δₖ] - # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ] - (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁) - - # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ] - # [ 0 ] - # - # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] - # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁] - ζₖ = cₖ * ζbarₖ - ζbarₖ₊₁ = conj(sₖ) * ζbarₖ - - # Compute the direction wₖ, the last column of Wₖ = Uₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Uₖ)ᵀ. - # w₁ = u₁ / δ₁ - if iter == 1 - wₖ = wₖ₋₁ - @kaxpy!(n, one(FC), uₖ, wₖ) - @. wₖ = wₖ / δₖ - end - # w₂ = (u₂ - λ₁w₁) / δ₂ - if iter == 2 - wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), uₖ, wₖ) - @. wₖ = wₖ / δₖ - end - # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ - if iter ≥ 3 - @kscal!(n, -ϵₖ₋₂, wₖ₋₂) - wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), uₖ, wₖ) - @. wₖ = wₖ / δₖ + iter = 0 + itmax == 0 && (itmax = m+n) + + ε = atol + rtol * rNorm + κ = zero(T) + (verbose > 0) && @printf(iostream, "%5s %7s %8s %5s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖", "timer") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8s %.2fs\n", iter, rNorm, " ✗ ✗ ✗ ✗", ktimer(start_time)) + + βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ + γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ + vₖ₋₁ .= zero(FC) # v₀ = 0 + uₖ₋₁ .= zero(FC) # u₀ = 0 + vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ + cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ + sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ + wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ + wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ + ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁ + + # Stopping criterion. + solved = rNorm ≤ ε + inconsistent = false + tired = iter ≥ itmax + status = "unknown" + user_requested_exit = false + overtimed = false + + while !(solved || tired || inconsistent || user_requested_exit || overtimed) + # Update iteration index. + iter = iter + 1 + + # Continue the SSY tridiagonalization process. + # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ + # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ + + mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ + mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ + + @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + + αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + + @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + + βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ + γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ + + # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. + # [ Oᵀ ] + # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ] + # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ] + # [ 0 • • • • • ] [ • • δ₃ • • • • ] + # [ • • • • • • • ] = Qₖ [ • • • • • 0 ] + # [ • • • • • 0 ] [ • • • • ϵₖ₋₂] + # [ • • • • γₖ ] [ • • • λₖ₋₁] + # [ • • βₖ αₖ ] [ 0 • • • • 0 δₖ ] + # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ] + # + # If k = 1, we don't have any previous reflexion. + # If k = 2, we apply the last reflexion. + # If k ≥ 3, we only apply the two previous reflexions. + + # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁ + if iter ≥ 3 + # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ] + # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁] + ϵₖ₋₂ = sₖ₋₂ * γₖ + λbarₖ₋₁ = -cₖ₋₂ * γₖ + end + + # Apply previous Givens reflections Qₖ₋₁.ₖ + if iter ≥ 2 + iter == 2 && (λbarₖ₋₁ = γₖ) + # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ] + # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ] + λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ + δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ + end + + # Compute and apply current Givens reflection Qₖ.ₖ₊₁ + iter == 1 && (δbarₖ = αₖ) + # [cₖ sₖ] [δbarₖ] = [δₖ] + # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ] + (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁) + + # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ] + # [ 0 ] + # + # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] + # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁] + ζₖ = cₖ * ζbarₖ + ζbarₖ₊₁ = conj(sₖ) * ζbarₖ + + # Compute the direction wₖ, the last column of Wₖ = Uₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Uₖ)ᵀ. + # w₁ = u₁ / δ₁ + if iter == 1 + wₖ = wₖ₋₁ + @kaxpy!(n, one(FC), uₖ, wₖ) + @. wₖ = wₖ / δₖ + end + # w₂ = (u₂ - λ₁w₁) / δ₂ + if iter == 2 + wₖ = wₖ₋₂ + @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + @kaxpy!(n, one(FC), uₖ, wₖ) + @. wₖ = wₖ / δₖ + end + # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ + if iter ≥ 3 + @kscal!(n, -ϵₖ₋₂, wₖ₋₂) + wₖ = wₖ₋₂ + @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + @kaxpy!(n, one(FC), uₖ, wₖ) + @. wₖ = wₖ / δₖ + end + + # Compute solution xₖ. + # xₖ ← xₖ₋₁ + ζₖ * wₖ + @kaxpy!(n, ζₖ, wₖ, x) + + # Compute ‖rₖ‖ = |ζbarₖ₊₁|. + rNorm = abs(ζbarₖ₊₁) + history && push!(rNorms, rNorm) + + # Compute ‖Aᴴrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²). + AᴴrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁)) + history && push!(AᴴrNorms, AᴴrNorm) + + # Compute uₖ₊₁ and uₖ₊₁. + @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ + @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ + + if βₖ₊₁ ≠ zero(T) + @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + end + if γₖ₊₁ ≠ zero(T) + @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p + end + + # Update directions for x. + if iter ≥ 2 + @kswap(wₖ₋₂, wₖ₋₁) + end + + # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ. + if iter ≥ 2 + sₖ₋₂ = sₖ₋₁ + cₖ₋₂ = cₖ₋₁ + end + sₖ₋₁ = sₖ + cₖ₋₁ = cₖ + ζbarₖ = ζbarₖ₊₁ + γₖ = γₖ₊₁ + βₖ = βₖ₊₁ + + # Update stopping criterion. + iter == 1 && (κ = atol + rtol * AᴴrNorm) + user_requested_exit = callback(solver) :: Bool + solved = rNorm ≤ ε + inconsistent = !solved && AᴴrNorm ≤ κ + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %.2fs\n", iter, rNorm, AᴴrNorm, ktimer(start_time)) end + (verbose > 0) && @printf(iostream, "\n") - # Compute solution xₖ. - # xₖ ← xₖ₋₁ + ζₖ * wₖ - @kaxpy!(n, ζₖ, wₖ, x) - - # Compute ‖rₖ‖ = |ζbarₖ₊₁|. - rNorm = abs(ζbarₖ₊₁) - history && push!(rNorms, rNorm) - - # Compute ‖Aᵀrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²). - AᵀrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁)) - history && push!(AᵀrNorms, AᵀrNorm) - - # Compute uₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if βₖ₊₁ ≠ zero(T) - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - end - if γₖ₊₁ ≠ zero(T) - @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p - end + # Termination status + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + user_requested_exit && (status = "user-requested exit") + overtimed && (status = "time limit exceeded") - # Update directions for x. - if iter ≥ 2 - @kswap(wₖ₋₂, wₖ₋₁) - end + # Update x + warm_start && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false - # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ. - if iter ≥ 2 - sₖ₋₂ = sₖ₋₁ - cₖ₋₂ = cₖ₋₁ - end - sₖ₋₁ = sₖ - cₖ₋₁ = cₖ - ζbarₖ = ζbarₖ₊₁ - γₖ = γₖ₊₁ - βₖ = βₖ₊₁ - - # Update stopping criterion. - iter == 1 && (κ = atol + rtol * AᵀrNorm) - user_requested_exit = callback(solver) :: Bool - solved = rNorm ≤ ε - inconsistent = !solved && AᵀrNorm ≤ κ - tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, rNorm, AᵀrNorm) + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.timer = ktimer(start_time) + stats.status = status + return solver end - (verbose > 0) && @printf("\n") - tired && (status = "maximum number of iterations exceeded") - solved && (status = "solution good enough given atol and rtol") - user_requested_exit && (status = "user-requested exit") - - # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) - solver.warm_start = false - - # Update stats - stats.niter = iter - stats.solved = solved - stats.inconsistent = inconsistent - stats.status = status - return solver end diff --git a/test/callback_utils.jl b/test/callback_utils.jl new file mode 100644 index 000000000..f88f01848 --- /dev/null +++ b/test/callback_utils.jl @@ -0,0 +1,152 @@ +mutable struct StorageGetxRestartedGmres{S} + x::S + y::S + p::S +end +StorageGetxRestartedGmres(solver::GmresSolver; N = I) = + StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x)) + +function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, + stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S} + NisI = (N === I) + x2, y2, p2 = stor.x, stor.y, stor.p + n = size(A, 2) + # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution. + nr = sum(1:solver.inner_iter) + y = solver.z # yᵢ = zᵢ + y2 .= y + R = solver.R + V = solver.V + x2 .= solver.Δx + for i = solver.inner_iter : -1 : 1 + pos = nr + i - solver.inner_iter # position of rᵢ.ₖ + for j = solver.inner_iter : -1 : i+1 + y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + end + # Rₖ can be singular if the system is inconsistent + if abs(R[pos]) ≤ eps(T)^(3/4) + y2[i] = zero(FC) + inconsistent = true + else + y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ + end + end + + # Form xₖ = N⁻¹Vₖyₖ + for i = 1 : solver.inner_iter + Krylov.@kaxpy!(n, y2[i], V[i], x2) + end + if !NisI + p2 .= solver.p + p2 .= x2 + mul!(x2, N, p2) + end + x2 .+= solver.x +end + +mutable struct TestCallbackN2{T, S, M} + A::M + b::S + storage_vec::S + tol::T +end +TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol) + +function (cb_n2::TestCallbackN2)(solver) + mul!(cb_n2.storage_vec, cb_n2.A, solver.x) + cb_n2.storage_vec .-= cb_n2.b + return norm(cb_n2.storage_vec) ≤ cb_n2.tol +end + +mutable struct TestCallbackN2Adjoint{T, S, M} + A::M + b::S + c::S + storage_vec1::S + storage_vec2::S + tol::T +end +TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol) + +function (cb_n2::TestCallbackN2Adjoint)(solver) + mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) + cb_n2.storage_vec1 .-= cb_n2.b + mul!(cb_n2.storage_vec2, cb_n2.A', solver.y) + cb_n2.storage_vec2 .-= cb_n2.c + return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) +end + +mutable struct TestCallbackN2Shifts{T, S, M} + A::M + b::S + shifts::Vector{T} + tol::T +end +TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol) + +function (cb_n2::TestCallbackN2Shifts)(solver) + r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x) + return all(map(norm, r) .≤ cb_n2.tol) +end + +mutable struct TestCallbackN2LS{T, S, M} + A::M + b::S + λ::T + storage_vec1::S + storage_vec2::S + tol::T +end +TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol) + +function (cb_n2::TestCallbackN2LS)(solver) + mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) + cb_n2.storage_vec1 .-= cb_n2.b + mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1) + cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x + return norm(cb_n2.storage_vec2) ≤ cb_n2.tol +end + +mutable struct TestCallbackN2LN{T, S, M} + A::M + b::S + λ::T + storage_vec::S + tol::T +end +TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol) + +function (cb_n2::TestCallbackN2LN)(solver) + mul!(cb_n2.storage_vec, cb_n2.A, solver.x) + cb_n2.storage_vec .-= cb_n2.b + cb_n2.λ != 0 && (cb_n2.storage_vec .+= cb_n2.λ .* solver.x) + return norm(cb_n2.storage_vec) ≤ cb_n2.tol +end + +mutable struct TestCallbackN2SaddlePts{T, S, M} + A::M + b::S + c::S + storage_vec1::S + storage_vec2::S + tol::T +end +TestCallbackN2SaddlePts(A, b, c; tol = 0.1) = + TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol) + +function (cb_n2::TestCallbackN2SaddlePts)(solver) + mul!(cb_n2.storage_vec1, cb_n2.A, solver.y) + cb_n2.storage_vec1 .+= solver.x .- cb_n2.b + mul!(cb_n2.storage_vec2, cb_n2.A', solver.x) + cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c + return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) +end + +function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol) + get_x_restarted_gmres!(solver, A, stor, N) + x = stor.x + mul!(storage_vec, A, x) + storage_vec .-= b + return (norm(storage_vec) ≤ tol) +end diff --git a/test/get_div_grad.jl b/test/get_div_grad.jl index 6d6bf012e..ae27e5061 100644 --- a/test/get_div_grad.jl +++ b/test/get_div_grad.jl @@ -1,8 +1,8 @@ # Identity matrix. eye(n::Int; FC=Float64) = sparse(one(FC) * I, n, n) -# Compute the energy norm ‖r‖ₚ = √(rᵀPr) where P is a symmetric and positive definite matrix. -metric(r, P) = sqrt(dot(r, P * r)) +# Compute the energy norm ‖r‖ₚ = √(rᴴPr) where P is a symmetric and positive definite matrix. +metric(r, P) = sqrt(real(dot(r, P * r))) # Based on Lars Ruthotto's initial implementation. function get_div_grad(n1 :: Int, n2 :: Int, n3 :: Int) diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl new file mode 100644 index 000000000..9fb6cdffd --- /dev/null +++ b/test/gpu/amd.jl @@ -0,0 +1,111 @@ +using AMDGPU + +include("gpu.jl") + +@testset "AMD -- AMDGPU.jl" begin + + @test AMDGPU.functional() + AMDGPU.allowscalar(false) + + @testset "documentation" begin + A_cpu = rand(ComplexF64, 20, 20) + A_cpu = A_cpu + A_cpu' + b_cpu = rand(ComplexF64, 20) + A_gpu = ROCMatrix(A_cpu) + b_gpu = ROCVector(b_cpu) + x, stats = minres(A_gpu, b_gpu) + end + + for FC in (Float32, Float64, ComplexF32, ComplexF64) + S = ROCVector{FC} + M = ROCMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + # @testset "processes -- $FC" begin + # test_processes(S, M) + # end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + end +end diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl new file mode 100644 index 000000000..65e123be1 --- /dev/null +++ b/test/gpu/gpu.jl @@ -0,0 +1,52 @@ +using LinearAlgebra, SparseArrays, Test +using Krylov + +include("../test_utils.jl") + +function test_processes(S, M) + m = 250 + n = 500 + k = 20 + FC = eltype(S) + + cpu_A, cpu_b = symmetric_indefinite(n, FC=FC) + gpu_A, gpu_b = M(cpu_A), S(cpu_b) + V, T = hermitian_lanczos(gpu_A, gpu_b, k) + + cpu_A, cpu_b = nonsymmetric_definite(n, FC=FC) + cpu_c = -cpu_b + gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c) + V, T, U, Tᴴ = nonhermitian_lanczos(gpu_A, gpu_b, gpu_c, k) + + cpu_A, cpu_b = nonsymmetric_indefinite(n, FC=FC) + gpu_A, gpu_b = M(cpu_A), S(cpu_b) + V, H = arnoldi(gpu_A, gpu_b, k) + + cpu_A, cpu_b = under_consistent(m, n, FC=FC) + gpu_A, gpu_b = M(cpu_A), S(cpu_b) + V, U, L = golub_kahan(gpu_A, gpu_b, k) + + cpu_A, cpu_b = under_consistent(m, n, FC=FC) + _, cpu_c = over_consistent(n, m, FC=FC) + gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c) + V, T, U, Tᴴ = saunders_simon_yip(gpu_A, gpu_b, gpu_c, k) + + cpu_A, cpu_b = under_consistent(m, n, FC=FC) + cpu_B, cpu_c = over_consistent(n, m, FC=FC) + gpu_A, gpu_B, gpu_b, gpu_c = M(cpu_A), M(cpu_B), S(cpu_b), S(cpu_c) + V, H, U, F = montoison_orban(gpu_A, gpu_B, gpu_b, gpu_c, k) +end + +function test_solver(S, M) + n = 10 + memory = 5 + A = M(undef, n, n) + b = S(undef, n) + solver = GmresSolver(n, n, memory, S) + solve!(solver, A, b) # Test that we don't have errors +end + +function test_conversion(S, M) + @test Krylov.vector_to_matrix(S) <: M + @test Krylov.matrix_to_vector(M) <: S +end diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl new file mode 100644 index 000000000..f03176199 --- /dev/null +++ b/test/gpu/intel.jl @@ -0,0 +1,113 @@ +using oneAPI + +include("gpu.jl") + +@testset "Intel -- oneAPI.jl" begin + + @test oneAPI.functional() + oneAPI.allowscalar(false) + + @testset "documentation" begin + T = Float32 + m = 20 + n = 10 + A_cpu = rand(T, m, n) + b_cpu = rand(T, m) + A_gpu = oneMatrix(A_cpu) + b_gpu = oneVector(b_cpu) + x, stats = lsqr(A_gpu, b_gpu) + end + + for FC ∈ (Float32, ComplexF32) + S = oneVector{FC} + M = oneMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + # @testset "processes -- $FC" begin + # test_processes(S, M) + # end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + end +end diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl new file mode 100644 index 000000000..2e684e21f --- /dev/null +++ b/test/gpu/metal.jl @@ -0,0 +1,113 @@ +using Metal + +include("gpu.jl") + +@testset "Apple M1 GPUs -- Metal.jl" begin + + # @test Metal.functional() + Metal.allowscalar(false) + + @testset "documentation" begin + T = Float32 + n = 10 + m = 20 + A_cpu = rand(T, n, m) + b_cpu = rand(T, n) + A_gpu = MtlMatrix(A_cpu) + b_gpu = MtlVector(b_cpu) + x, stats = craig(A_gpu, b_gpu) + end + + for FC in (Float32, ComplexF32) + S = MtlVector{FC} + M = MtlMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + # @testset "processes -- $FC" begin + # test_processes(S, M) + # end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + end +end diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl new file mode 100644 index 000000000..8cb44136d --- /dev/null +++ b/test/gpu/nvidia.jl @@ -0,0 +1,215 @@ +using LinearOperators, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER + +include("gpu.jl") + +@testset "Nvidia -- CUDA.jl" begin + + @test CUDA.functional() + CUDA.allowscalar(false) + + @testset "documentation" begin + A_cpu = rand(20, 20) + b_cpu = rand(20) + A_gpu = CuMatrix(A_cpu) + b_gpu = CuVector(b_cpu) + x, stats = bilq(A_gpu, b_gpu) + + A_cpu = sprand(200, 100, 0.3) + b_cpu = rand(200) + A_gpu = CuSparseMatrixCSC(A_cpu) + b_gpu = CuVector(b_cpu) + x, stats = lsmr(A_gpu, b_gpu) + + @testset "ic0" begin + A_cpu, b_cpu = sparse_laplacian() + @test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == true + + b_gpu = CuVector(b_cpu) + n = length(b_gpu) + T = eltype(b_gpu) + z = CUDA.zeros(T, n) + symmetric = hermitian = true + + A_gpu = CuSparseMatrixCSC(A_cpu) + P = ic02(A_gpu) + function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z) + ldiv!(z, UpperTriangular(P)', x) + ldiv!(y, UpperTriangular(P), z) + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z)) + x, stats = cg(A_gpu, b_gpu, M=opM) + @test norm(b_gpu - A_gpu * x) ≤ 1e-6 + @test stats.niter ≤ 19 + + A_gpu = CuSparseMatrixCSR(A_gpu) + P = ic02(A_gpu) + function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z) + ldiv!(z, LowerTriangular(P), x) + ldiv!(y, LowerTriangular(P)', z) + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z)) + x, stats = cg(A_gpu, b_gpu, M=opM) + @test norm(b_gpu - A_gpu * x) ≤ 1e-6 + @test stats.niter ≤ 19 + end + + @testset "ilu0" begin + A_cpu = Float64[1 0 0 4; + 0 0 7 8; + 9 0 0 12; + 0 14 0 16] + A_cpu = sparse(A_cpu) + b_cpu = ones(4) + @test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == false + + p = zfd(A_cpu) + p .+= 1 + invp = invperm(p) + @test reduce(&, invp .== p) == false + + b_gpu = CuVector(b_cpu) + n = length(b_gpu) + T = eltype(b_gpu) + z = CUDA.zeros(T, n) + symmetric = hermitian = false + + A_gpu = CuSparseMatrixCSC(A_cpu[:,p]) + P = ilu02(A_gpu) + function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z) + ldiv!(z, LowerTriangular(P), x) + ldiv!(y, UnitUpperTriangular(P), z) + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z)) + x̄, stats = gmres(A_gpu, b_gpu, M=opM) + x = Vector(x̄)[invp] + @test norm(b_gpu - A_gpu * x̄) ≤ 1e-6 + @test norm(b_cpu - A_cpu * x) ≤ 1e-6 + + A_gpu = CuSparseMatrixCSR(A_cpu[:,p]) + P = ilu02(A_gpu) + function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z) + ldiv!(z, UnitLowerTriangular(P), x) + ldiv!(y, UpperTriangular(P), z) + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z)) + x̄, stats = gmres(A_gpu, b_gpu, M=opM) + x = Vector(x̄)[invp] + @test norm(b_gpu - A_gpu * x̄) ≤ 1e-6 + @test norm(b_cpu - A_cpu * x) ≤ 1e-6 + end + end + + for FC in (Float32, Float64, ComplexF32, ComplexF64) + S = CuVector{FC} + V = CuSparseVector{FC} + M = CuMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "processes -- $FC" begin + test_processes(S, M) + end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + + @testset "ktypeof -- $FC" begin + dv = S(rand(FC, 10)) + b = view(dv, 4:8) + @test Krylov.ktypeof(dv) <: S + @test Krylov.ktypeof(b) <: S + + dm = M(rand(FC, 10, 10)) + b = view(dm, :, 3) + @test Krylov.ktypeof(b) <: S + + sv = V(sprand(FC, 10, 0.5)) + b = view(sv, 4:8) + @test Krylov.ktypeof(sv) <: S + @test Krylov.ktypeof(b) <: S + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 99ab25fda..5381fd10e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,9 @@ import Krylov.KRYLOV_SOLVERS include("test_utils.jl") include("test_aux.jl") include("test_stats.jl") +include("test_processes.jl") +include("test_fgmres.jl") include("test_gpmr.jl") include("test_fom.jl") include("test_gmres.jl") @@ -42,3 +44,5 @@ include("test_allocations.jl") include("test_mp.jl") include("test_solvers.jl") include("test_warm_start.jl") +include("test_verbose.jl") +include("test_extensions.jl") diff --git a/test/test_allocations.jl b/test/test_allocations.jl index 4c6817499..174d0ae55 100644 --- a/test/test_allocations.jl +++ b/test/test_allocations.jl @@ -1,26 +1,27 @@ @testset "allocations" begin - for FC in (Float64, ComplexF64) + for FC in (Float32, Float64, ComplexF32, ComplexF64) @testset "Data Type: $FC" begin - A = FC.(get_div_grad(16, 16, 16)) # Dimension n x n - n = size(A, 1) - m = div(n, 2) - Au = A[1:m,:] # Dimension m x n - Ao = A[:,1:m] # Dimension n x m - b = Ao * ones(FC, m) # Dimension n - c = Au * ones(FC, n) # Dimension m + A = FC.(get_div_grad(18, 18, 18)) # Dimension m x n + m,n = size(A) + k = div(n, 2) + Au = A[1:k,:] # Dimension k x n + Ao = A[:,1:k] # Dimension m x k + b = Ao * ones(FC, k) # Dimension m + c = Au * ones(FC, n) # Dimension k mem = 200 - shifts = [1.0; 2.0; 3.0; 4.0; 5.0] + T = real(FC) + shifts = T[1; 2; 3; 4; 5] nshifts = 5 - nbits = sizeof(FC) # 8 bits for Float64 and 16 bits for ComplexF64 + nbits_FC = sizeof(FC) # 8 bits for ComplexF32 and 16 bits for ComplexF64 + nbits_T = sizeof(T) # 4 bits for Float32 and 8 bits for Float64 @testset "SYMMLQ" begin # SYMMLQ needs: # 5 n-vectors: x, Mvold, Mv, Mv_next, w̅ - storage_symmlq(n) = 5 * n - storage_symmlq_bytes(n) = nbits * storage_symmlq(n) + storage_symmlq_bytes(n) = nbits_FC * 5 * n expected_symmlq_bytes = storage_symmlq_bytes(n) symmlq(A, b) # warmup @@ -36,8 +37,7 @@ @testset "CG" begin # CG needs: # 4 n-vectors: x, r, p, Ap - storage_cg(n) = 4 * n - storage_cg_bytes(n) = nbits * storage_cg(n) + storage_cg_bytes(n) = nbits_FC * 4 * n expected_cg_bytes = storage_cg_bytes(n) cg(A, b) # warmup @@ -53,8 +53,7 @@ @testset "CG-LANCZOS" begin # CG-LANCZOS needs: # 5 n-vectors: x, Mv, Mv_prev, p, Mv_next - storage_cg_lanczos(n) = 5 * n - storage_cg_lanczos_bytes(n) = nbits * storage_cg_lanczos(n) + storage_cg_lanczos_bytes(n) = nbits_FC * 5 * n expected_cg_lanczos_bytes = storage_cg_lanczos_bytes(n) cg_lanczos(A, b) # warmup @@ -73,9 +72,7 @@ # - 2 (n*nshifts)-matrices: x, p # - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms # - 3 nshifts-bitVector: indefinite, converged, not_cv - storage_cg_lanczos_shift(n, nshifts) = (3 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64) - storage_cg_lanczos_shift_bytes(n, nshifts) = nbits * storage_cg_lanczos_shift(n, nshifts) - + storage_cg_lanczos_shift_bytes(n, nshifts) = nbits_FC * ((3 * n) + (2 * n * nshifts)) + nbits_T * (5 * nshifts) + (3 * nshifts) expected_cg_lanczos_shift_bytes = storage_cg_lanczos_shift_bytes(n, nshifts) cg_lanczos_shift(A, b, shifts) # warmup actual_cg_lanczos_shift_bytes = @allocated cg_lanczos_shift(A, b, shifts) @@ -90,8 +87,7 @@ @testset "CR" begin # CR needs: # 5 n-vectors: x, r, p, q, Ar - storage_cr(n) = 5 * n - storage_cr_bytes(n) = nbits * storage_cr(n) + storage_cr_bytes(n) = nbits_FC * 5 * n expected_cr_bytes = storage_cr_bytes(n) cr(A, b) # warmup @@ -107,8 +103,7 @@ @testset "MINRES" begin # MINRES needs: # 6 n-vectors: x, r1, r2, w1, w2, y - storage_minres(n) = 6 * n - storage_minres_bytes(n) = nbits * storage_minres(n) + storage_minres_bytes(n) = nbits_FC * 6 * n expected_minres_bytes = storage_minres_bytes(n) minres(A, b) # warmup @@ -124,8 +119,7 @@ @testset "MINRES-QLP" begin # MINRES-QLP needs: # - 6 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x, p - storage_minres_qlp(n) = 6 * n - storage_minres_qlp_bytes(n) = nbits * storage_minres_qlp(n) + storage_minres_qlp_bytes(n) = nbits_FC * 6 * n expected_minres_qlp_bytes = storage_minres_qlp_bytes(n) minres_qlp(A, b) # warmup @@ -141,11 +135,11 @@ @testset "DIOM" begin # DIOM needs: # - 2 n-vectors: x, t - # - 2 (n*mem)-matrices: P, V - # - 1 mem-vector: L - # - 1 (mem+2)-vector: H - storage_diom(mem, n) = (2 * n) + (2 * n * mem) + (mem) + (mem + 2) - storage_diom_bytes(mem, n) = nbits * storage_diom(mem, n) + # - 1 (n*mem)-matrix: V + # - 1 n*(mem-1)-matrix: P + # - 1 (mem-1)-vector: L + # - 1 mem-vector: H + storage_diom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (n * (mem-1)) + (mem-1) + (mem)) expected_diom_bytes = storage_diom_bytes(mem, n) diom(A, b, memory=mem) # warmup @@ -164,8 +158,7 @@ # - 1 (n*mem)-matrix: V # - 2 mem-vectors: l, z # - 1 (mem*(mem+1)/2)-vector: U - storage_fom(mem, n) = (2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2) - storage_fom_bytes(mem, n) = nbits * storage_fom(mem, n) + storage_fom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) expected_fom_bytes = storage_fom_bytes(mem, n) fom(A, b, memory=mem) # warmup @@ -183,9 +176,8 @@ # - 2 n-vectors: x, t # - 2 (n*mem)-matrices: P, V # - 2 mem-vectors: c, s - # - 1 (mem+2)-vector: H - storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 2) - storage_dqgmres_bytes(mem, n) = nbits * storage_dqgmres(mem, n) + # - 1 (mem+1)-vector: H + storage_dqgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + mem + (mem + 1)) + nbits_T * mem expected_dqgmres_bytes = storage_dqgmres_bytes(mem, n) dqgmres(A, b, memory=mem) # warmup @@ -204,8 +196,7 @@ # - 1 n*(mem)-matrix: V # - 3 mem-vectors: c, s, z # - 1 (mem*(mem+1)/2)-vector: R - storage_gmres(mem, n) = (2 * n) + (n * mem) + (3 * mem) + (mem * (mem+1) / 2) - storage_gmres_bytes(mem, n) = nbits * storage_gmres(mem, n) + storage_gmres_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem expected_gmres_bytes = storage_gmres_bytes(mem, n) gmres(A, b, memory=mem) # warmup @@ -218,11 +209,29 @@ @test inplace_gmres_bytes == 0 end + @testset "FGMRES" begin + # FGMRES needs: + # - 2 n-vectors: x, w + # - 2 n*(mem)-matrix: V, Z + # - 3 mem-vectors: c, s, z + # - 1 (mem*(mem+1)/2)-vector: R + storage_fgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem + + expected_fgmres_bytes = storage_fgmres_bytes(mem, n) + fgmres(A, b, memory=mem) # warmup + actual_fgmres_bytes = @allocated fgmres(A, b, memory=mem) + @test expected_fgmres_bytes ≤ actual_fgmres_bytes ≤ 1.02 * expected_fgmres_bytes + + solver = FgmresSolver(A, b, mem) + fgmres!(solver, A, b) # warmup + inplace_fgmres_bytes = @allocated fgmres!(solver, A, b) + @test inplace_fgmres_bytes == 0 + end + @testset "CGS" begin # CGS needs: # 6 n-vectors: x, r, u, p, q, ts - storage_cgs(n) = 6 * n - storage_cgs_bytes(n) = nbits * storage_cgs(n) + storage_cgs_bytes(n) = nbits_FC * 6 * n expected_cgs_bytes = storage_cgs_bytes(n) cgs(A, b) # warmup @@ -238,8 +247,7 @@ @testset "BICGSTAB" begin # BICGSTAB needs: # 6 n-vectors: x, r, p, v, s, qd - storage_bicgstab(n) = 6 * n - storage_bicgstab_bytes(n) = nbits * storage_bicgstab(n) + storage_bicgstab_bytes(n) = nbits_FC * 6 * n expected_bicgstab_bytes = storage_bicgstab_bytes(n) bicgstab(A, b) # warmup @@ -254,12 +262,11 @@ @testset "CGNE" begin # CGNE needs: - # - 3 n-vectors: x, p, Aᵀz + # - 3 n-vectors: x, p, Aᴴz # - 2 m-vectors: r, q - storage_cgne(n, m) = 3 * n + 2 * m - storage_cgne_bytes(n, m) = nbits * storage_cgne(n, m) + storage_cgne_bytes(m, n) = nbits_FC * (3 * n + 2 * m) - expected_cgne_bytes = storage_cgne_bytes(n, m) + expected_cgne_bytes = storage_cgne_bytes(k, n) (x, stats) = cgne(Au, c) # warmup actual_cgne_bytes = @allocated cgne(Au, c) @test expected_cgne_bytes ≤ actual_cgne_bytes ≤ 1.02 * expected_cgne_bytes @@ -272,12 +279,11 @@ @testset "CRMR" begin # CRMR needs: - # - 3 n-vectors: x, p, Aᵀr + # - 3 n-vectors: x, p, Aᴴr # - 2 m-vectors: r, q - storage_crmr(n, m) = 3 * n + 2 * m - storage_crmr_bytes(n, m) = nbits * storage_crmr(n, m) + storage_crmr_bytes(m, n) = nbits_FC * (3 * n + 2 * m) - expected_crmr_bytes = storage_crmr_bytes(n, m) + expected_crmr_bytes = storage_crmr_bytes(k, n) (x, stats) = crmr(Au, c) # warmup actual_crmr_bytes = @allocated crmr(Au, c) @test expected_crmr_bytes ≤ actual_crmr_bytes ≤ 1.02 * expected_crmr_bytes @@ -290,12 +296,11 @@ @testset "LNLQ" begin # LNLQ needs: - # - 3 n-vectors: x, v, Aᵀu + # - 3 n-vectors: x, v, Aᴴu # - 4 m-vectors: y, w̄, u, Av - storage_lnlq(n, m) = 3 * n + 4 * m - storage_lnlq_bytes(n, m) = nbits * storage_lnlq(n, m) + storage_lnlq_bytes(m, n) = nbits_FC * (3 * n + 4 * m) - expected_lnlq_bytes = storage_lnlq_bytes(n, m) + expected_lnlq_bytes = storage_lnlq_bytes(k, n) lnlq(Au, c) # warmup actual_lnlq_bytes = @allocated lnlq(Au, c) @test expected_lnlq_bytes ≤ actual_lnlq_bytes ≤ 1.02 * expected_lnlq_bytes @@ -308,12 +313,11 @@ @testset "CRAIG" begin # CRAIG needs: - # - 3 n-vectors: x, v, Aᵀu + # - 3 n-vectors: x, v, Aᴴu # - 4 m-vectors: y, w, u, Av - storage_craig(n, m) = 3 * n + 4 * m - storage_craig_bytes(n, m) = nbits * storage_craig(n, m) + storage_craig_bytes(m, n) = nbits_FC * (3 * n + 4 * m) - expected_craig_bytes = storage_craig_bytes(n, m) + expected_craig_bytes = storage_craig_bytes(k, n) craig(Au, c) # warmup actual_craig_bytes = @allocated craig(Au, c) @test expected_craig_bytes ≤ actual_craig_bytes ≤ 1.02 * expected_craig_bytes @@ -326,12 +330,11 @@ @testset "CRAIGMR" begin # CRAIGMR needs: - # - 4 n-vectors: x, v, Aᵀu, d + # - 4 n-vectors: x, v, Aᴴu, d # - 5 m-vectors: y, u, w, wbar, Av - storage_craigmr(n, m) = 4 * n + 5 * m - storage_craigmr_bytes(n, m) = nbits * storage_craigmr(n, m) + storage_craigmr_bytes(m, n) = nbits_FC * (4 * n + 5 * m) - expected_craigmr_bytes = storage_craigmr_bytes(n, m) + expected_craigmr_bytes = storage_craigmr_bytes(k, n) craigmr(Au, c) # warmup actual_craigmr_bytes = @allocated craigmr(Au, c) @test expected_craigmr_bytes ≤ actual_craigmr_bytes ≤ 1.02 * expected_craigmr_bytes @@ -344,12 +347,11 @@ @testset "CGLS" begin # CGLS needs: - # - 3 m-vectors: x, p, s - # - 2 n-vectors: r, q - storage_cgls(n, m) = 3 * m + 2 * n - storage_cgls_bytes(n, m) = nbits * storage_cgls(n, m) + # - 3 n-vectors: x, p, s + # - 2 m-vectors: r, q + storage_cgls_bytes(m, n) = nbits_FC * (3 * n + 2 * m) - expected_cgls_bytes = storage_cgls_bytes(n, m) + expected_cgls_bytes = storage_cgls_bytes(m, k) (x, stats) = cgls(Ao, b) # warmup actual_cgls_bytes = @allocated cgls(Ao, b) @test expected_cgls_bytes ≤ actual_cgls_bytes ≤ 1.02 * expected_cgls_bytes @@ -362,12 +364,11 @@ @testset "LSLQ" begin # LSLQ needs: - # - 4 m-vectors: x_lq, v, Aᵀu, w̄ (= x_cg) - # - 2 n-vectors: u, Av - storage_lslq(n, m) = 4 * m + 2 * n - storage_lslq_bytes(n, m) = nbits * storage_lslq(n, m) + # - 4 n-vectors: x_lq, v, Aᴴu, w̄ (= x_cg) + # - 2 m-vectors: u, Av + storage_lslq_bytes(m, n) = nbits_FC * (4 * n + 2 * m) - expected_lslq_bytes = storage_lslq_bytes(n, m) + expected_lslq_bytes = storage_lslq_bytes(m, k) (x, stats) = lslq(Ao, b) # warmup actual_lslq_bytes = @allocated lslq(Ao, b) @test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.02 * expected_lslq_bytes @@ -380,12 +381,11 @@ @testset "CRLS" begin # CRLS needs: - # - 4 m-vectors: x, p, Ar, q - # - 3 n-vectors: r, Ap, s - storage_crls(n, m) = 4 * m + 3 * n - storage_crls_bytes(n, m) = nbits * storage_crls(n, m) + # - 4 n-vectors: x, p, Ar, q + # - 3 m-vectors: r, Ap, s + storage_crls_bytes(m, n) = nbits_FC * (4 * n + 3 * m) - expected_crls_bytes = storage_crls_bytes(n, m) + expected_crls_bytes = storage_crls_bytes(m, k) (x, stats) = crls(Ao, b) # warmup actual_crls_bytes = @allocated crls(Ao, b) @test expected_crls_bytes ≤ actual_crls_bytes ≤ 1.02 * expected_crls_bytes @@ -398,12 +398,11 @@ @testset "LSQR" begin # LSQR needs: - # - 4 m-vectors: x, v, w, Aᵀu - # - 2 n-vectors: u, Av - storage_lsqr(n, m) = 4 * m + 2 * n - storage_lsqr_bytes(n, m) = nbits * storage_lsqr(n, m) + # - 4 n-vectors: x, v, w, Aᴴu + # - 2 m-vectors: u, Av + storage_lsqr_bytes(m, n) = nbits_FC * (4 * n + 2 * m) - expected_lsqr_bytes = storage_lsqr_bytes(n, m) + expected_lsqr_bytes = storage_lsqr_bytes(m, k) (x, stats) = lsqr(Ao, b) # warmup actual_lsqr_bytes = @allocated lsqr(Ao, b) @test expected_lsqr_bytes ≤ actual_lsqr_bytes ≤ 1.02 * expected_lsqr_bytes @@ -416,12 +415,11 @@ @testset "LSMR" begin # LSMR needs: - # - 5 m-vectors: x, v, h, hbar, Aᵀu - # - 2 n-vectors: u, Av - storage_lsmr(n, m) = 5 * m + 2 * n - storage_lsmr_bytes(n, m) = nbits * storage_lsmr(n, m) + # - 5 n-vectors: x, v, h, hbar, Aᴴu + # - 2 m-vectors: u, Av + storage_lsmr_bytes(m, n) = nbits_FC * (5 * n + 2 * m) - expected_lsmr_bytes = storage_lsmr_bytes(n, m) + expected_lsmr_bytes = storage_lsmr_bytes(m, k) (x, stats) = lsmr(Ao, b) # warmup actual_lsmr_bytes = @allocated lsmr(Ao, b) @test expected_lsmr_bytes ≤ actual_lsmr_bytes ≤ 1.02 * expected_lsmr_bytes @@ -435,8 +433,7 @@ @testset "BiLQ" begin # BILQ needs: # - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q - storage_bilq(n) = 8 * n - storage_bilq_bytes(n) = nbits * storage_bilq(n) + storage_bilq_bytes(n) = nbits_FC * 8 * n expected_bilq_bytes = storage_bilq_bytes(n) bilq(A, b) # warmup @@ -452,8 +449,7 @@ @testset "QMR" begin # QMR needs: # - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q - storage_qmr(n) = 9 * n - storage_qmr_bytes(n) = nbits * storage_qmr(n) + storage_qmr_bytes(n) = nbits_FC * 9 * n expected_qmr_bytes = storage_qmr_bytes(n) qmr(A, b) # warmup @@ -469,8 +465,7 @@ @testset "BiLQR" begin # BILQR needs: # - 11 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ, p, q - storage_bilqr(n) = 11 * n - storage_bilqr_bytes(n) = nbits * storage_bilqr(n) + storage_bilqr_bytes(n) = nbits_FC * 11 * n expected_bilqr_bytes = storage_bilqr_bytes(n) bilqr(A, b, b) # warmup @@ -487,10 +482,9 @@ # USYMLQ needs: # - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p # - 3 m-vectors: vₖ₋₁, vₖ, q - storage_usymlq(n, m) = 5 * n + 3 * m - storage_usymlq_bytes(n, m) = nbits * storage_usymlq(n, m) + storage_usymlq_bytes(m, n) = nbits_FC * (5 * n + 3 * m) - expected_usymlq_bytes = storage_usymlq_bytes(n, m) + expected_usymlq_bytes = storage_usymlq_bytes(k, n) usymlq(Au, c, b) # warmup actual_usymlq_bytes = @allocated usymlq(Au, c, b) @test expected_usymlq_bytes ≤ actual_usymlq_bytes ≤ 1.02 * expected_usymlq_bytes @@ -503,12 +497,11 @@ @testset "USYMQR" begin # USYMQR needs: - # - 6 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p - # - 3 n-vectors: uₖ₋₁, uₖ, q - storage_usymqr(n, m) = 6 * m + 3 * n - storage_usymqr_bytes(n, m) = nbits * storage_usymqr(n, m) + # - 6 n-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p + # - 3 m-vectors: uₖ₋₁, uₖ, q + storage_usymqr_bytes(m, n) = nbits_FC * (6 * n + 3 * m) - expected_usymqr_bytes = storage_usymqr_bytes(n, m) + expected_usymqr_bytes = storage_usymqr_bytes(m, k) (x, stats) = usymqr(Ao, b, c) # warmup actual_usymqr_bytes = @allocated usymqr(Ao, b, c) @test expected_usymqr_bytes ≤ actual_usymqr_bytes ≤ 1.02 * expected_usymqr_bytes @@ -523,8 +516,7 @@ # TRILQR needs: # - 6 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ, q # - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p - storage_trilqr(n, m) = 6 * m + 5 * n - storage_trilqr_bytes(n, m) = nbits * storage_trilqr(n, m) + storage_trilqr_bytes(m, n) = nbits_FC * (6 * m + 5 * n) expected_trilqr_bytes = storage_trilqr_bytes(n, n) trilqr(A, b, b) # warmup @@ -541,10 +533,9 @@ # TriCG needs: # - 6 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ, p # - 6 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ, q - storage_tricg(n, m) = 6 * n + 6 * m - storage_tricg_bytes(n, m) = nbits * storage_tricg(n, m) + storage_tricg_bytes(m, n) = nbits_FC * (6 * n + 6 * m) - expected_tricg_bytes = storage_tricg_bytes(n, m) + expected_tricg_bytes = storage_tricg_bytes(k, n) tricg(Au, c, b) # warmup actual_tricg_bytes = @allocated tricg(Au, c, b) @test expected_tricg_bytes ≤ actual_tricg_bytes ≤ 1.02 * expected_tricg_bytes @@ -559,10 +550,9 @@ # TriMR needs: # - 8 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, p # - 8 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, q - storage_trimr(n, m) = 8 * n + 8 * m - storage_trimr_bytes(n, m) = nbits * storage_trimr(n, m) + storage_trimr_bytes(m, n) = nbits_FC * (8 * n + 8 * m) - expected_trimr_bytes = storage_trimr_bytes(n, m) + expected_trimr_bytes = storage_trimr_bytes(k, n) trimr(Au, c, b) # warmup actual_trimr_bytes = @allocated trimr(Au, c, b) @test expected_trimr_bytes ≤ actual_trimr_bytes ≤ 1.02 * expected_trimr_bytes @@ -575,17 +565,16 @@ @testset "GPMR" begin # GPMR needs: - # - 2 n-vectors: x, q - # - 2 m-vectors: y, p - # - 1 (n*mem)-matrix: V - # - 1 (m*mem)-matrix: U + # - 2 m-vectors: x, q + # - 2 n-vectors: y, p + # - 1 (m*mem)-matrix: V + # - 1 (n*mem)-matrix: U # - 1 (2*mem)-vector: zt # - 2 (4*mem)-vectors: gc, gs # - 1 (mem*(2mem+1))-vector: R - storage_gpmr(mem, n, m) = (mem + 2) * (n + m) + mem * (2 * mem + 11) - storage_gpmr_bytes(mem, n, m) = nbits * storage_gpmr(mem, n, m) + storage_gpmr_bytes(mem, m, n) = nbits_FC * ((mem + 2) * (n + m) + mem * (2 * mem + 7)) + nbits_T * 4 * mem - expected_gpmr_bytes = storage_gpmr_bytes(mem, n, m) + expected_gpmr_bytes = storage_gpmr_bytes(mem, m, k) gpmr(Ao, Au, b, c, memory=mem, itmax=mem) # warmup actual_gpmr_bytes = @allocated gpmr(Ao, Au, b, c, memory=mem, itmax=mem) @test expected_gpmr_bytes ≤ actual_gpmr_bytes ≤ 1.02 * expected_gpmr_bytes diff --git a/test/test_aux.jl b/test/test_aux.jl index 11bdb7c2d..6c43142c0 100644 --- a/test/test_aux.jl +++ b/test/test_aux.jl @@ -1,119 +1,203 @@ @testset "aux" begin - # test Givens reflector corner cases - (c, s, ρ) = Krylov.sym_givens(0.0, 0.0) - @test (c == 1.0) && (s == 0.0) && (ρ == 0.0) - - a = 3.14 - (c, s, ρ) = Krylov.sym_givens(a, 0.0) - @test (c == 1.0) && (s == 0.0) && (ρ == a) - (c, s, ρ) = Krylov.sym_givens(-a, 0.0) - @test (c == -1.0) && (s == 0.0) && (ρ == a) - - b = 3.14 - (c, s, ρ) = Krylov.sym_givens(0.0, b) - @test (c == 0.0) && (s == 1.0) && (ρ == b) - (c, s, ρ) = Krylov.sym_givens(0.0, -b) - @test (c == 0.0) && (s == -1.0) && (ρ == b) - - (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0)) - @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0)) - - a = Complex(1.0, 1.0) - (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0)) - @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a) - (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0)) - @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a) - - b = Complex(1.0, 1.0) - (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b) - @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b) - (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b) - @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b) - - # test roots of a quadratic - roots = Krylov.roots_quadratic(0.0, 0.0, 0.0) - @test length(roots) == 1 - @test roots[1] == 0.0 - - roots = Krylov.roots_quadratic(0.0, 0.0, 1.0) - @test length(roots) == 0 - - roots = Krylov.roots_quadratic(0.0, 3.14, -1.0) - @test length(roots) == 1 - @test roots[1] == 1.0 / 3.14 - - roots = Krylov.roots_quadratic(1.0, 0.0, 1.0) - @test length(roots) == 0 - - roots = Krylov.roots_quadratic(1.0, 0.0, 0.0) - @test length(roots) == 2 - @test roots[1] == 0.0 - @test roots[2] == 0.0 - - roots = Krylov.roots_quadratic(1.0, 3.0, 2.0) - @test length(roots) == 2 - @test roots[1] ≈ -2.0 - @test roots[2] ≈ -1.0 - - roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0) - @test length(roots) == 0 - - # ill-conditioned quadratic - roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0) - @test length(roots) == 2 - @test roots[1] == 1.0e+13 - @test roots[2] == 0.0 - - # iterative refinement is crucial! - roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1) - @test length(roots) == 2 - @test roots[1] == 1.0e+13 - @test roots[2] == -1.0e-05 - - # not ill-conditioned quadratic - roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0) - @test length(roots) == 2 - @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) - @test isapprox(roots[2], -1.0, rtol=1.0e-6) - - roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1) - @test length(roots) == 2 - @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) - @test isapprox(roots[2], -1.0, rtol=1.0e-6) - - # test trust-region boundary - x = ones(5) - d = ones(5); d[1:2:5] .= -1 - @test_throws ErrorException Krylov.to_boundary(x, d, -1.0) - @test_throws ErrorException Krylov.to_boundary(x, d, 0.5) - @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0) - @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178 - @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782 - @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782 - @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178 - - # test kzeros and kones - @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10) - @test Krylov.kones(Vector{Float64}, 10) == ones(10) - - # test ktypeof - a = rand(Float32, 10) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float32} - @test Krylov.ktypeof(b) == Vector{Float32} - - a = rand(Float64, 10) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float64} - @test Krylov.ktypeof(b) == Vector{Float64} - - a = sprand(Float32, 10, 0.5) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float32} - @test Krylov.ktypeof(b) == Vector{Float32} - - a = sprand(Float64, 10, 0.5) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float64} - @test Krylov.ktypeof(b) == Vector{Float64} + + @testset "sym_givens" begin + # test Givens reflector corner cases + (c, s, ρ) = Krylov.sym_givens(0.0, 0.0) + @test (c == 1.0) && (s == 0.0) && (ρ == 0.0) + + a = 3.14 + (c, s, ρ) = Krylov.sym_givens(a, 0.0) + @test (c == 1.0) && (s == 0.0) && (ρ == a) + (c, s, ρ) = Krylov.sym_givens(-a, 0.0) + @test (c == -1.0) && (s == 0.0) && (ρ == a) + + b = 3.14 + (c, s, ρ) = Krylov.sym_givens(0.0, b) + @test (c == 0.0) && (s == 1.0) && (ρ == b) + (c, s, ρ) = Krylov.sym_givens(0.0, -b) + @test (c == 0.0) && (s == -1.0) && (ρ == b) + + (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0)) + @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0)) + + a = Complex(1.0, 1.0) + (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0)) + @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a) + (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0)) + @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a) + + b = Complex(1.0, 1.0) + (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b) + @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b) + (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b) + @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b) + end + + @testset "roots_quadratic" begin + # test roots of a quadratic + roots = Krylov.roots_quadratic(0.0, 0.0, 0.0) + @test roots[1] == 0.0 + @test roots[2] == 0.0 + + @test_throws ErrorException Krylov.roots_quadratic(0.0, 0.0, 1.0) + + roots = Krylov.roots_quadratic(0.0, 3.14, -1.0) + @test roots[1] == 1.0 / 3.14 + @test roots[2] == 1.0 / 3.14 + + @test_throws ErrorException Krylov.roots_quadratic(1.0, 0.0, 1.0) + + roots = Krylov.roots_quadratic(1.0, 0.0, 0.0) + @test roots[1] == 0.0 + @test roots[2] == 0.0 + + roots = Krylov.roots_quadratic(1.0, 3.0, 2.0) + @test roots[1] ≈ -2.0 + @test roots[2] ≈ -1.0 + + @test_throws ErrorException Krylov.roots_quadratic(1.0e+8, 1.0, 1.0) + + # ill-conditioned quadratic + roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0) + @test roots[1] == 1.0e+13 + @test roots[2] == 0.0 + + # iterative refinement is crucial! + roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1) + @test roots[1] == 1.0e+13 + @test roots[2] == -1.0e-05 + + # not ill-conditioned quadratic + roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0) + @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) + @test isapprox(roots[2], -1.0, rtol=1.0e-6) + + roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1) + @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) + @test isapprox(roots[2], -1.0, rtol=1.0e-6) + + allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1) + @test allocations == 0 + end + + @testset "to_boundary" begin + # test trust-region boundary + n = 5 + x = ones(n) + d = ones(n); d[1:2:n] .= -1 + @test_throws ErrorException Krylov.to_boundary(n, x, d, -1.0) + @test_throws ErrorException Krylov.to_boundary(n, x, d, 0.5) + @test_throws ErrorException Krylov.to_boundary(n, x, zeros(n), 1.0) + @test maximum(Krylov.to_boundary(n, x, d, 5.0)) ≈ 2.209975124224178 + @test minimum(Krylov.to_boundary(n, x, d, 5.0)) ≈ -1.8099751242241782 + @test maximum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ 1.8099751242241782 + @test minimum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ -2.209975124224178 + end + + @testset "kzeros" begin + # test kzeros + @test Krylov.kzeros(Vector{Float64}, 10) == zeros(Float64, 10) + @test Krylov.kzeros(Vector{ComplexF32}, 10) == zeros(ComplexF32, 10) + end + + @testset "kones" begin + # test kones + @test Krylov.kones(Vector{Float64}, 10) == ones(Float64, 10) + @test Krylov.kones(Vector{ComplexF32}, 10) == ones(ComplexF32, 10) + end + + @testset "ktypeof" begin + # test ktypeof + for FC in (Float32, Float64, ComplexF32, ComplexF64) + dv = rand(FC, 10) + b = view(dv, 4:8) + @test Krylov.ktypeof(dv) == Vector{FC} + @test Krylov.ktypeof(b) == Vector{FC} + + dm = rand(FC, 10, 10) + b = view(dm, :, 3) + @test Krylov.ktypeof(b) == Vector{FC} + + sv = sprand(FC, 10, 0.5) + b = view(sv, 4:8) + @test Krylov.ktypeof(sv) == Vector{FC} + @test Krylov.ktypeof(b) == Vector{FC} + end + end + + @testset "vector_to_matrix" begin + # test vector_to_matrix + for FC in (Float32, Float64, ComplexF32, ComplexF64) + S = Vector{FC} + M = Krylov.vector_to_matrix(S) + @test M == Matrix{FC} + end + end + + @testset "matrix_to_vector" begin + # test matrix_to_vector + for FC in (Float32, Float64, ComplexF32, ComplexF64) + M = Matrix{FC} + S = Krylov.matrix_to_vector(M) + @test S == Vector{FC} + end + end + + @testset "macros" begin + # test macros + for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64) + n = 10 + x = rand(FC, n) + y = rand(FC, n) + a = rand(FC) + b = rand(FC) + c = rand(FC) + s = rand(FC) + + T = real(FC) + a2 = rand(T) + b2 = rand(T) + + Krylov.@kdot(n, x, y) + + Krylov.@kdotr(n, x, y) + + Krylov.@knrm2(n, x) + + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + + Krylov.@kcopy!(n, x, y) + + Krylov.@kswap(x, y) + + Krylov.@kref!(n, x, y, c, s) + end + end end diff --git a/test/test_bicgstab.jl b/test/test_bicgstab.jl index ce4e6dcd4..6817acf3d 100644 --- a/test/test_bicgstab.jl +++ b/test/test_bicgstab.jl @@ -82,10 +82,10 @@ @test(resid ≤ bicgstab_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = bicgstab(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function solver = BicgstabSolver(A, b) diff --git a/test/test_bilq.jl b/test/test_bilq.jl index 900d1f6e5..40b9872db 100644 --- a/test/test_bilq.jl +++ b/test/test_bilq.jl @@ -66,10 +66,10 @@ @test(resid ≤ bilq_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = bilq(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function diff --git a/test/test_bilqr.jl b/test/test_bilqr.jl index 6dab06ec7..fd46aade4 100644 --- a/test/test_bilqr.jl +++ b/test/test_bilqr.jl @@ -46,10 +46,10 @@ @test(resid_dual ≤ bilqr_tol) @test(stats.solved_dual) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, t, stats) = bilqr(A, b, c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function A, b, c = adjoint_pde(FC=FC) diff --git a/test/test_cgne.jl b/test/test_cgne.jl index 64cbc0ea7..c1a3e798b 100644 --- a/test/test_cgne.jl +++ b/test/test_cgne.jl @@ -1,6 +1,6 @@ -function test_cgne(A, b; λ=0.0, M=I) +function test_cgne(A, b; λ=0.0, N=I, history=false) (nrow, ncol) = size(A) - (x, stats) = cgne(A, b, λ=λ, M=M) + (x, stats) = cgne(A, b, λ=λ, N=N, history=history) r = b - A * x if λ > 0 s = r / sqrt(λ) @@ -69,8 +69,8 @@ end @test stats.status == "x = 0 is a zero-residual solution" # Test with Jacobi (or diagonal) preconditioner - A, b, M = square_preconditioned(FC=FC) - (x, stats, resid) = test_cgne(A, b, M=M) + A, b, N = square_preconditioned(FC=FC) + (x, stats, resid) = test_cgne(A, b, N=N) @test(resid ≤ cgne_tol) @test(stats.solved) (xI, xmin, xmin_norm) = check_min_norm(A, b, x) @@ -81,8 +81,8 @@ end A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0; 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0] b = [1.0; 0.0] - M = Diagonal(1 ./ (A * A')) - (x, stats, resid) = test_cgne(A, b, M=M) + N = Diagonal(1 ./ (A * A')) + (x, stats, resid) = test_cgne(A, b, N=N) @test(resid ≤ cgne_tol) @test(stats.solved) (xI, xmin, xmin_norm) = check_min_norm(A, b, x) @@ -92,7 +92,7 @@ end for transpose ∈ (false, true) A, b, c, D = small_sp(transpose, FC=FC) D⁻¹ = inv(D) - (x, stats) = cgne(A, b, M=D⁻¹, λ=1.0) + (x, stats) = cgne(A, b, N=D⁻¹, λ=1.0) end # test callback function diff --git a/test/test_cgs.jl b/test/test_cgs.jl index 5c505bb70..832cd76c3 100644 --- a/test/test_cgs.jl +++ b/test/test_cgs.jl @@ -74,10 +74,10 @@ @test(resid ≤ cgs_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = cgs(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function A, b = sparse_laplacian(FC=FC) diff --git a/test/test_crmr.jl b/test/test_crmr.jl index 6354f329f..d0f902df6 100644 --- a/test/test_crmr.jl +++ b/test/test_crmr.jl @@ -1,6 +1,6 @@ -function test_crmr(A, b; λ=0.0, M=I, history=false) +function test_crmr(A, b; λ=0.0, N=I, history=false) (nrow, ncol) = size(A) - (x, stats) = crmr(A, b, λ=λ, M=M, history=history) + (x, stats) = crmr(A, b, λ=λ, N=N, history=history) r = b - A * x if λ > 0 s = r / sqrt(λ) @@ -76,8 +76,8 @@ end A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0; 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0] b = [1.0; 0.0] - M = Diagonal(1 ./ (A * A')) - (x, stats, resid) = test_crmr(A, b, M=M) + N = Diagonal(1 ./ (A * A')) + (x, stats, resid) = test_crmr(A, b, N=N) @test(resid ≤ crmr_tol) @test(stats.solved) (xI, xmin, xmin_norm) = check_min_norm(A, b, x) @@ -87,7 +87,7 @@ end for transpose ∈ (false, true) A, b, c, D = small_sp(transpose, FC=FC) D⁻¹ = inv(D) - (x, stats) = crmr(A, b, M=D⁻¹, λ=1.0) + (x, stats) = crmr(A, b, N=D⁻¹, λ=1.0) end # test callback function diff --git a/test/test_diom.jl b/test/test_diom.jl index 4f1a8ecea..62a38b198 100644 --- a/test/test_diom.jl +++ b/test/test_diom.jl @@ -60,7 +60,7 @@ # Poisson equation in polar coordinates. A, b = polar_poisson(FC=FC) - (x, stats) = diom(A, b, memory=200) + (x, stats) = diom(A, b, memory=150) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ diom_tol) diff --git a/test/test_extensions.jl b/test/test_extensions.jl new file mode 100644 index 000000000..81bbe12ca --- /dev/null +++ b/test/test_extensions.jl @@ -0,0 +1,56 @@ +using ComponentArrays +using FillArrays +using StaticArrays + +@testset "extensions" begin + @testset "ComponentArrays" begin + n = 5 + for T in (Float32, Float64) + A = rand(T, n, n) + + b = ComponentVector(; b1=rand(T, n - 1), b2=rand(T)) + @test Krylov.ktypeof(b) == Vector{T} + x, stats = gmres(A, b) + @test stats.solved + end + end + + @testset "FillArrays" begin + n = 5 + for T in (Float32, Float64) + A = rand(T, n, n) + + b = Ones(T, n) + @test Krylov.ktypeof(b) == Vector{T} + x, stats = gmres(A, b) + @test stats.solved + + b = Zeros(T, n) + @test Krylov.ktypeof(b) == Vector{T} + x, stats = gmres(A, b) + @test stats.solved + end + end + + @testset "StaticArrays" begin + n = 5 + for T in (Float32, Float64) + A = rand(T, n, n) + + b = SVector{n}(rand(T, n)) + @test Krylov.ktypeof(b) == Vector{T} + x, stats = gmres(A, b) + @test stats.solved + + b = MVector{n}(rand(T, n)) + @test Krylov.ktypeof(b) == Vector{T} + x, stats = gmres(A, b) + @test stats.solved + + b = SizedVector{n}(rand(T, n)) + @test Krylov.ktypeof(b) == Vector{T} + x, stats = gmres(A, b) + @test stats.solved + end + end +end diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl new file mode 100644 index 000000000..9bb73d3e4 --- /dev/null +++ b/test/test_fgmres.jl @@ -0,0 +1,154 @@ +import LinearAlgebra.mul! + +mutable struct FlexiblePreconditioner{T,S} + D::Diagonal{T, S} + ω::T +end + +function mul!(y::Vector, P::FlexiblePreconditioner, x::Vector) + P.ω = -P.ω + mul!(y, P.D, x) + y .*= P.ω +end + +@testset "fgmres" begin + fgmres_tol = 1.0e-6 + + for FC in (Float64, ComplexF64) + @testset "Data Type: $FC" begin + + # Symmetric and positive definite system. + A, b = symmetric_definite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Symmetric indefinite variant. + A, b = symmetric_indefinite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Nonsymmetric and positive definite systems. + A, b = nonsymmetric_definite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Nonsymmetric indefinite variant. + A, b = nonsymmetric_indefinite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Symmetric indefinite variant, almost singular. + A, b = almost_singular(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ 100 * fgmres_tol) + @test(stats.solved) + + # Singular system. + A, b = square_inconsistent(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + Aresid = norm(A' * r) / norm(A' * b) + @test(Aresid ≤ fgmres_tol) + @test(stats.inconsistent) + + # Test b == 0 + A, b = zero_rhs(FC=FC) + (x, stats) = fgmres(A, b) + @test norm(x) == 0 + @test stats.status == "x = 0 is a zero-residual solution" + + # Poisson equation in polar coordinates. + A, b = polar_poisson(FC=FC) + (x, stats) = fgmres(A, b, reorthogonalization=true) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Left preconditioning + A, b, M = square_preconditioned(FC=FC) + (x, stats) = fgmres(A, b, M=M) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Right preconditioning + A, b, N = square_preconditioned(FC=FC) + (x, stats) = fgmres(A, b, N=N) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Split preconditioning + A, b, M, N = two_preconditioners(FC=FC) + (x, stats) = fgmres(A, b, M=M, N=N) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Restart + for restart ∈ (false, true) + memory = 10 + + A, b = sparse_laplacian(FC=FC) + (x, stats) = fgmres(A, b, restart=restart, memory=memory) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + + M = Diagonal(1 ./ diag(A)) + (x, stats) = fgmres(A, b, M=M, restart=restart, memory=memory) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + + N = Diagonal(1 ./ diag(A)) + (x, stats) = fgmres(A, b, N=N, restart=restart, memory=memory) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + + N = Diagonal(1 ./ sqrt.(diag(A))) + N = Diagonal(1 ./ sqrt.(diag(A))) + (x, stats) = fgmres(A, b, M=M, N=N, restart=restart, memory=memory) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + end + + A, b = polar_poisson(FC=FC) + J = inv(Diagonal(A)) # Jacobi preconditioner + N = FlexiblePreconditioner(J, 1.0) + (x, stats) = fgmres(A, b, N=N) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + end + end +end diff --git a/test/test_fom.jl b/test/test_fom.jl index 9469b6b9c..0500d139f 100644 --- a/test/test_fom.jl +++ b/test/test_fom.jl @@ -126,13 +126,6 @@ end # test callback function - solver = FomSolver(A, b) - tol = 1.0e-1 - cb_n2 = TestCallbackN2(A, b, tol = tol) - fom!(solver, A, b, restart = true, callback = cb_n2) - @test solver.stats.status == "user-requested exit" - @test cb_n2(solver) - @test_throws TypeError fom(A, b, restart = true, callback = solver -> "string", history = true) end end diff --git a/test/test_lnlq.jl b/test/test_lnlq.jl index 888119db8..b308609fa 100644 --- a/test/test_lnlq.jl +++ b/test/test_lnlq.jl @@ -1,5 +1,5 @@ function test_lnlq(A, b,transfer_to_craig) - (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0) + (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0) r = b - A * x resid = norm(r) / norm(b) return (x, y, stats, resid) @@ -61,8 +61,8 @@ end # Test regularization A, b, λ = regularization(FC=FC) - (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0) - (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, etolx=1e-10, etoly=1e-10, λ=λ) + (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0) + (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, utolx=1e-10, utoly=1e-10, λ=λ) for (x, y) in ((x, y), (xₛ, yₛ)) s = λ * y r = b - (A * x + λ * s) diff --git a/test/test_minres_qlp.jl b/test/test_minres_qlp.jl index 6e983e49a..0b4d2046d 100644 --- a/test/test_minres_qlp.jl +++ b/test/test_minres_qlp.jl @@ -80,7 +80,7 @@ solver = MinresQlpSolver(A, b) tol = 1.0 cb_n2 = TestCallbackN2(A, b, tol = tol) - minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, ctol = 0.0, callback = cb_n2) + minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, Artol = 0.0, callback = cb_n2) @test solver.stats.status == "user-requested exit" @test cb_n2(solver) diff --git a/test/test_mp.jl b/test/test_mp.jl index b7aa43d38..96300bea6 100644 --- a/test/test_mp.jl +++ b/test/test_mp.jl @@ -3,55 +3,57 @@ for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr, :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres, :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom, - :cg_lanczos_shift) - for T in (Float16, Float32, Float64, BigFloat) - for FC in (T, Complex{T}) - A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1)) - B = spdiagm(-1 => -ones(FC,n-1), 0 => 5*ones(FC,n), 1 => -ones(FC,n-1)) - b = ones(FC, n) - c = -ones(FC, n) - shifts = [-one(T), one(T)] - if fn in (:usymlq, :usymqr) - x, _ = @eval $fn($A, $b, $c) - elseif fn in (:trilqr, :bilqr) - x, t, _ = @eval $fn($A, $b, $c) - elseif fn in (:tricg, :trimr) - x, y, _ = @eval $fn($A, $b, $c) - elseif fn == :gpmr - x, y, _ = @eval $fn($A, $B, $b, $c) - elseif fn in (:lnlq, :craig, :craigmr) - x, y, _ = @eval $fn($A, $b) - elseif fn == :cg_lanczos_shift - x, _ = @eval $fn($A, $b, $shifts) - else - x, _ = @eval $fn($A, $b) - end - atol = √eps(T) - rtol = √eps(T) - Κ = (T == Float16 ? 10 : 1) - if fn in (:tricg, :trimr) - @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol) - @test norm(A' * x - y - c) ≤ Κ * (atol + norm([b; c]) * rtol) - @test eltype(y) == FC - elseif fn == :gpmr - @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol) - @test norm(B * x + y - c) ≤ Κ * (atol + norm([b; c]) * rtol) - @test eltype(y) == FC - elseif fn == :cg_lanczos_shift - @test norm((A - I) * x[1] - b) ≤ Κ * (atol + norm(b) * rtol) - @test norm((A + I) * x[2] - b) ≤ Κ * (atol + norm(b) * rtol) - @test eltype(x) == Vector{FC} - else - @test norm(A * x - b) ≤ Κ * (atol + norm(b) * rtol) - @test eltype(x) == FC - end - if fn in (:trilqr, :bilqr) - @test norm(A' * t - c) ≤ Κ * (atol + norm(c) * rtol) - @test eltype(t) == FC - end - if fn in (:lnlq, :craig, :craigmr) - @test norm(A * A' * y - b) ≤ Κ * (atol + norm(b) * rtol) - @test eltype(y) == FC + :fgmres, :cg_lanczos_shift) + @testset "$fn" begin + for T in (Float16, Float32, Float64, BigFloat) + for FC in (T, Complex{T}) + A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1)) + B = spdiagm(-1 => -ones(FC,n-1), 0 => 5*ones(FC,n), 1 => -ones(FC,n-1)) + b = ones(FC, n) + c = -ones(FC, n) + shifts = [-one(T), one(T)] + if fn in (:usymlq, :usymqr) + x, _ = @eval $fn($A, $b, $c) + elseif fn in (:trilqr, :bilqr) + x, t, _ = @eval $fn($A, $b, $c) + elseif fn in (:tricg, :trimr) + x, y, _ = @eval $fn($A, $b, $c) + elseif fn == :gpmr + x, y, _ = @eval $fn($A, $B, $b, $c) + elseif fn in (:lnlq, :craig, :craigmr) + x, y, _ = @eval $fn($A, $b) + elseif fn == :cg_lanczos_shift + x, _ = @eval $fn($A, $b, $shifts) + else + x, _ = @eval $fn($A, $b) + end + atol = √eps(T) + rtol = √eps(T) + Κ = (T == Float16 ? 10 : 1) + if fn in (:tricg, :trimr) + @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol) + @test norm(A' * x - y - c) ≤ Κ * (atol + norm([b; c]) * rtol) + @test eltype(y) == FC + elseif fn == :gpmr + @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol) + @test norm(B * x + y - c) ≤ Κ * (atol + norm([b; c]) * rtol) + @test eltype(y) == FC + elseif fn == :cg_lanczos_shift + @test norm((A - I) * x[1] - b) ≤ Κ * (atol + norm(b) * rtol) + @test norm((A + I) * x[2] - b) ≤ Κ * (atol + norm(b) * rtol) + @test eltype(x) == Vector{FC} + else + @test norm(A * x - b) ≤ Κ * (atol + norm(b) * rtol) + @test eltype(x) == FC + end + if fn in (:trilqr, :bilqr) + @test norm(A' * t - c) ≤ Κ * (atol + norm(c) * rtol) + @test eltype(t) == FC + end + if fn in (:lnlq, :craig, :craigmr) + @test norm(A * A' * y - b) ≤ Κ * (atol + norm(b) * rtol) + @test eltype(y) == FC + end end end end diff --git a/test/test_processes.jl b/test/test_processes.jl new file mode 100644 index 000000000..eb3ad19af --- /dev/null +++ b/test/test_processes.jl @@ -0,0 +1,146 @@ +""" + P = permutation_paige(k) + +Return the sparse (2k) × (2k) matrix + + [e₁ • eₖ ] + [ e₁ • eₖ] +""" +function permutation_paige(k) + P = spzeros(Float64, 2k, 2k) + for i = 1:k + P[i,2i-1] = 1.0 + P[i+k,2i] = 1.0 + end + return P +end + +@testset "processes" begin + m = 250 + n = 500 + k = 20 + + for FC in (Float64, ComplexF64) + R = real(FC) + nbits_FC = sizeof(FC) + nbits_R = sizeof(R) + nbits_I = sizeof(Int) + + @testset "Data Type: $FC" begin + + @testset "Hermitian Lanczos" begin + A, b = symmetric_indefinite(n, FC=FC) + V, T = hermitian_lanczos(A, b, k) + + @test A * V[:,1:k] ≈ V * T + + storage_hermitian_lanczos_bytes(n, k) = 4k * nbits_I + (3k-1) * nbits_R + n*(k+1) * nbits_FC + + expected_hermitian_lanczos_bytes = storage_hermitian_lanczos_bytes(n, k) + actual_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, b, k) + @test expected_hermitian_lanczos_bytes ≤ actual_hermitian_lanczos_bytes ≤ 1.02 * expected_hermitian_lanczos_bytes + end + + @testset "Non-Hermitian Lanczos" begin + A, b = nonsymmetric_definite(n, FC=FC) + c = -b + V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k) + + @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]' + @test A * V[:,1:k] ≈ V * T + @test A' * U[:,1:k] ≈ U * Tᴴ + + storage_nonhermitian_lanczos_bytes(n, k) = 4k * nbits_I + (6k-2) * nbits_FC + 2*n*(k+1) * nbits_FC + + expected_nonhermitian_lanczos_bytes = storage_nonhermitian_lanczos_bytes(n, k) + actual_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, b, c, k) + @test expected_nonhermitian_lanczos_bytes ≤ actual_nonhermitian_lanczos_bytes ≤ 1.02 * expected_nonhermitian_lanczos_bytes + end + + @testset "Arnoldi" begin + A, b = nonsymmetric_indefinite(n, FC=FC) + V, H = arnoldi(A, b, k) + + @test A * V[:,1:k] ≈ V * H + + function storage_arnoldi_bytes(n, k) + return k*(k+1) * nbits_FC + n*(k+1) * nbits_FC + end + + expected_arnoldi_bytes = storage_arnoldi_bytes(n, k) + actual_arnoldi_bytes = @allocated arnoldi(A, b, k) + @test expected_arnoldi_bytes ≤ actual_arnoldi_bytes ≤ 1.02 * expected_arnoldi_bytes + end + + @testset "Golub-Kahan" begin + A, b = under_consistent(m, n, FC=FC) + V, U, L = golub_kahan(A, b, k) + B = L[1:k+1,1:k] + + @test A * V[:,1:k] ≈ U * B + @test A' * U ≈ V * L' + @test A' * A * V[:,1:k] ≈ V * L' * B + @test A * A' * U[:,1:k] ≈ U * B * L[1:k,1:k]' + + storage_golub_kahan_bytes(m, n, k) = 3*(k+1) * nbits_I + (2k+1) * nbits_R + (n+m)*(k+1) * nbits_FC + + expected_golub_kahan_bytes = storage_golub_kahan_bytes(m, n, k) + actual_golub_kahan_bytes = @allocated golub_kahan(A, b, k) + @test expected_golub_kahan_bytes ≤ actual_golub_kahan_bytes ≤ 1.02 * expected_golub_kahan_bytes + end + + @testset "Saunders-Simon-Yip" begin + A, b = under_consistent(m, n, FC=FC) + _, c = over_consistent(n, m, FC=FC) + V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k) + + @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]' + @test A * U[:,1:k] ≈ V * T + @test A' * V[:,1:k] ≈ U * Tᴴ + @test A' * A * U[:,1:k-1] ≈ U * Tᴴ * T[1:k,1:k-1] + @test A * A' * V[:,1:k-1] ≈ V * T * Tᴴ[1:k,1:k-1] + + K = [zeros(FC,m,m) A; A' zeros(FC,n,n)] + Pₖ = permutation_paige(k) + Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ + Pₖ₊₁ = permutation_paige(k+1) + Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁ + G = Pₖ₊₁' * [zeros(FC,k+1,k) T; Tᴴ zeros(FC,k+1,k)] * Pₖ + @test K * Wₖ ≈ Wₖ₊₁ * G + + storage_saunders_simon_yip_bytes(m, n, k) = 4k * nbits_I + (6k-2) * nbits_FC + (n+m)*(k+1) * nbits_FC + + expected_saunders_simon_yip_bytes = storage_saunders_simon_yip_bytes(m, n, k) + actual_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, b, c, k) + @test expected_saunders_simon_yip_bytes ≤ actual_saunders_simon_yip_bytes ≤ 1.02 * expected_saunders_simon_yip_bytes + end + + @testset "Montoison-Orban" begin + A, b = under_consistent(m, n, FC=FC) + B, c = over_consistent(n, m, FC=FC) + V, H, U, F = montoison_orban(A, B, b, c, k) + + @test A * U[:,1:k] ≈ V * H + @test B * V[:,1:k] ≈ U * F + @test B * A * U[:,1:k-1] ≈ U * F * H[1:k,1:k-1] + @test A * B * V[:,1:k-1] ≈ V * H * F[1:k,1:k-1] + + K = [zeros(FC,m,m) A; B zeros(FC,n,n)] + Pₖ = permutation_paige(k) + Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ + Pₖ₊₁ = permutation_paige(k+1) + Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁ + G = Pₖ₊₁' * [zeros(FC,k+1,k) H; F zeros(FC,k+1,k)] * Pₖ + @test K * Wₖ ≈ Wₖ₊₁ * G + + function storage_montoison_orban_bytes(m, n, k) + return 2*k*(k+1) * nbits_FC + (n+m)*(k+1) * nbits_FC + end + + expected_montoison_orban_bytes = storage_montoison_orban_bytes(m, n, k) + actual_montoison_orban_bytes = @allocated montoison_orban(A, B, b, c, k) + @test expected_montoison_orban_bytes ≤ actual_montoison_orban_bytes ≤ 1.02 * expected_montoison_orban_bytes + end + end + end +end diff --git a/test/test_qmr.jl b/test/test_qmr.jl index 184b9877d..4a6b8c1c9 100644 --- a/test/test_qmr.jl +++ b/test/test_qmr.jl @@ -58,10 +58,10 @@ @test(resid ≤ qmr_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = qmr(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function solver = QmrSolver(A, b) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index 468fa5a05..71885029f 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -11,1139 +11,183 @@ function test_solvers(FC) nshifts = 5 T = real(FC) S = Vector{FC} + solvers = Dict{Symbol, KrylovSolver}() @eval begin - cg_solver = $(KRYLOV_SOLVERS[:cg])($n, $n, $S) - symmlq_solver = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S) - minres_solver = $(KRYLOV_SOLVERS[:minres])($n, $n, $S) - cg_lanczos_solver = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S) - diom_solver = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S) - fom_solver = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S) - dqgmres_solver = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S) - gmres_solver = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S) - cr_solver = $(KRYLOV_SOLVERS[:cr])($n, $n, $S) - crmr_solver = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S) - cgs_solver = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S) - bicgstab_solver = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S) - craigmr_solver = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S) - cgne_solver = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S) - lnlq_solver = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S) - craig_solver = $(KRYLOV_SOLVERS[:craig])($m, $n, $S) - lslq_solver = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S) - cgls_solver = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S) - lsqr_solver = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S) - crls_solver = $(KRYLOV_SOLVERS[:crls])($n, $m, $S) - lsmr_solver = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S) - usymqr_solver = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S) - trilqr_solver = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S) - bilq_solver = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S) - bilqr_solver = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S) - minres_qlp_solver = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S) - qmr_solver = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S) - usymlq_solver = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S) - tricg_solver = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S) - trimr_solver = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S) - gpmr_solver = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S) - cg_lanczos_shift_solver = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $m, $nshifts, $S) + $solvers[:cg] = $(KRYLOV_SOLVERS[:cg])($n, $n, $S) + $solvers[:symmlq] = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S) + $solvers[:minres] = $(KRYLOV_SOLVERS[:minres])($n, $n, $S) + $solvers[:cg_lanczos] = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S) + $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S) + $solvers[:diom] = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S) + $solvers[:fom] = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S) + $solvers[:dqgmres] = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S) + $solvers[:gmres] = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S) + $solvers[:fgmres] = $(KRYLOV_SOLVERS[:fgmres])($n, $n, $mem, $S) + $solvers[:cr] = $(KRYLOV_SOLVERS[:cr])($n, $n, $S) + $solvers[:crmr] = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S) + $solvers[:cgs] = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S) + $solvers[:bicgstab] = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S) + $solvers[:craigmr] = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S) + $solvers[:cgne] = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S) + $solvers[:lnlq] = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S) + $solvers[:craig] = $(KRYLOV_SOLVERS[:craig])($m, $n, $S) + $solvers[:lslq] = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S) + $solvers[:cgls] = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S) + $solvers[:lsqr] = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S) + $solvers[:crls] = $(KRYLOV_SOLVERS[:crls])($n, $m, $S) + $solvers[:lsmr] = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S) + $solvers[:usymqr] = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S) + $solvers[:trilqr] = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S) + $solvers[:bilq] = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S) + $solvers[:bilqr] = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S) + $solvers[:minres_qlp] = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S) + $solvers[:qmr] = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S) + $solvers[:usymlq] = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S) + $solvers[:tricg] = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S) + $solvers[:trimr] = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S) + $solvers[:gpmr] = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S) + $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S) end - for i = 1 : 3 - A = i * A - Au = i * Au - Ao = i * Ao - b = 5 * b - c = 3 * c - - solver = solve!(cg_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(symmlq_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(minres_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cg_lanczos_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cg_lanczos_shift_solver, A, b, shifts) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(diom_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(fom_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(dqgmres_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(gmres_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cr_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(crmr_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cgs_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == 2 * niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(bicgstab_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == 2 * niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(craigmr_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(cgne_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(lnlq_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(craig_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(lslq_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cgls_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(lsqr_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(crls_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(lsmr_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(usymqr_solver, Ao, b, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(trilqr_solver, A, b, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved_primal(solver) - @test issolved_dual(solver) - @test issolved(solver) - - solver = solve!(bilq_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(bilqr_solver, A, b, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved_primal(solver) - @test issolved_dual(solver) - @test issolved(solver) - - solver = solve!(minres_qlp_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(qmr_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(usymlq_solver, Au, c, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(tricg_solver, Au, c, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(trimr_solver, Au, c, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(gpmr_solver, Ao, Au, b, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test Bprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) + @testset "Check compatibility between KrylovSolvers and the dimension of the linear problems" begin + A2 = FC.(get_div_grad(2, 2, 2)) + n2 = size(A2, 1) + m2 = div(n2, 2) + Au2 = A2[1:m2,:] + Ao2 = A2[:,1:m2] + b2 = Ao2 * ones(FC, m2) + c2 = Au2 * ones(FC, n2) + shifts2 = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0] + for (method, solver) in solvers + if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr) + @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2) + end + method == :cg_lanczos_shift && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, shifts2) + method == :cg_lanczos_shift && @test_throws ErrorException("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $(length(shifts2))") solve!(solver, A, b, shifts2) + method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2) + method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2) + method ∈ (:bilqr, :trilqr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, b2) + method == :gpmr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, Au2, b2, c2) + method ∈ (:tricg, :trimr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2) + method == :usymlq && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2, b2) + method == :usymqr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2) + end end - io = IOBuffer() - show(io, cg_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ CgSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Ap│ Vector{$FC}│ 64│ - │ z│ Vector{$FC}│ 0│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, symmlq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │SymmlqSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ Mvold│ Vector{$FC}│ 64│ - │ Mv│ Vector{$FC}│ 64│ - │ Mv_next│ Vector{$FC}│ 64│ - │ w̅│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ clist│ Vector{$T}│ 5│ - │ zlist│ Vector{$T}│ 5│ - │ sprod│ Vector{$T}│ 5│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, minres_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │MinresSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r1│ Vector{$FC}│ 64│ - │ r2│ Vector{$FC}│ 64│ - │ w1│ Vector{$FC}│ 64│ - │ w2│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cg_lanczos_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────────┬───────────────┬─────────────────┐ - │CgLanczosSolver│Precision: $FC │Architecture: CPU│ - ├───────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ Mv│ Vector{$FC}│ 64│ - │ Mv_prev│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Mv_next│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cg_lanczos_shift_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────────────┬───────────────────┬─────────────────┐ - │CgLanczosShiftSolver│ Precision: $FC │Architecture: CPU│ - ├────────────────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────────────┼───────────────────┼─────────────────┤ - │ Mv│ Vector{$FC}│ 64│ - │ Mv_prev│ Vector{$FC}│ 64│ - │ Mv_next│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ x│Vector{Vector{$FC}}│ 5 x 64│ - │ p│Vector{Vector{$FC}}│ 5 x 64│ - │ σ│ Vector{$T}│ 5│ - │ δhat│ Vector{$T}│ 5│ - │ ω│ Vector{$T}│ 5│ - │ γ│ Vector{$T}│ 5│ - │ rNorms│ Vector{$T}│ 5│ - │ converged│ BitVector│ 5│ - │ not_cv│ BitVector│ 5│ - └────────────────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, diom_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────────┬─────────────────┐ - │DiomSolver│ Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ t│ Vector{$FC}│ 64│ - │ z│ Vector{$FC}│ 0│ - │ w│ Vector{$FC}│ 0│ - │ P│Vector{Vector{$FC}}│ 10 x 64│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ L│ Vector{$FC}│ 10│ - │ H│ Vector{$FC}│ 12│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, fom_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────────┬─────────────────┐ - │ FomSolver│ Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ w│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ l│ Vector{$FC}│ 10│ - │ z│ Vector{$FC}│ 10│ - │ U│ Vector{$FC}│ 55│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, dqgmres_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌─────────────┬───────────────────┬─────────────────┐ - │DqgmresSolver│ Precision: $FC │Architecture: CPU│ - ├─────────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├─────────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ t│ Vector{$FC}│ 64│ - │ z│ Vector{$FC}│ 0│ - │ w│ Vector{$FC}│ 0│ - │ P│Vector{Vector{$FC}}│ 10 x 64│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ c│ Vector{$T}│ 10│ - │ s│ Vector{$FC}│ 10│ - │ H│ Vector{$FC}│ 12│ - │ warm_start│ Bool│ 0│ - └─────────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, gmres_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────────┬─────────────────┐ - │GmresSolver│ Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ w│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ c│ Vector{$T}│ 10│ - │ s│ Vector{$FC}│ 10│ - │ z│ Vector{$FC}│ 10│ - │ R│ Vector{$FC}│ 55│ - │ warm_start│ Bool│ 0│ - │ inner_iter│ Int64│ 0│ - └───────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ CrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Ar│ Vector{$FC}│ 64│ - │ Mq│ Vector{$FC}│ 0│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, crmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CrmrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Aᵀr│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ Mq│ Vector{$FC}│ 0│ - │ s│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cgs_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ CgsSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │Attribute │ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ ts│ Vector{$FC}│ 64│ - │ yz│ Vector{$FC}│ 0│ - │ vw│ Vector{$FC}│ 0│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, bicgstab_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────────┬───────────────┬─────────────────┐ - │BicgstabSolver│Precision: $FC │Architecture: CPU│ - ├──────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 64│ - │ s│ Vector{$FC}│ 64│ - │ qd│ Vector{$FC}│ 64│ - │ yz│ Vector{$FC}│ 0│ - │ t│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └──────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, craigmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌─────────────┬───────────────┬─────────────────┐ - │CraigmrSolver│Precision: $FC │Architecture: CPU│ - ├─────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├─────────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ Nv│ Vector{$FC}│ 64│ - │ Aᵀu│ Vector{$FC}│ 64│ - │ d│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 32│ - │ w│ Vector{$FC}│ 32│ - │ wbar│ Vector{$FC}│ 32│ - │ Av│ Vector{$FC}│ 32│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - └─────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cgne_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CgneSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Aᵀz│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ s│ Vector{$FC}│ 0│ - │ z│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lnlq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LnlqSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ Nv│ Vector{$FC}│ 64│ - │ Aᵀu│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ w̄│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 32│ - │ Av│ Vector{$FC}│ 32│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, craig_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │CraigSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ Nv│ Vector{$FC}│ 64│ - │ Aᵀu│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ w│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 32│ - │ Av│ Vector{$FC}│ 32│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ w2│ Vector{$FC}│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lslq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LslqSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ Nv│ Vector{$FC}│ 32│ - │ Aᵀu│ Vector{$FC}│ 32│ - │ w̄│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 64│ - │ Av│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cgls_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CglsSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ p│ Vector{$FC}│ 32│ - │ s│ Vector{$FC}│ 32│ - │ r│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Mr│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lsqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LsqrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ Nv│ Vector{$FC}│ 32│ - │ Aᵀu│ Vector{$FC}│ 32│ - │ w│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 64│ - │ Av│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, crls_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CrlsSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ p│ Vector{$FC}│ 32│ - │ Ar│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ r│ Vector{$FC}│ 64│ - │ Ap│ Vector{$FC}│ 64│ - │ s│ Vector{$FC}│ 64│ - │ Ms│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lsmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LsmrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ Nv│ Vector{$FC}│ 32│ - │ Aᵀu│ Vector{$FC}│ 32│ - │ h│ Vector{$FC}│ 32│ - │ hbar│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 64│ - │ Av│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, usymqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │UsymqrSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 32│ - │ wₖ₋₂│ Vector{$FC}│ 32│ - │ wₖ₋₁│ Vector{$FC}│ 32│ - │ uₖ₋₁│ Vector{$FC}│ 32│ - │ uₖ│ Vector{$FC}│ 32│ - │ p│ Vector{$FC}│ 32│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, trilqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │TrilqrSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Δy│ Vector{$FC}│ 0│ - │ y│ Vector{$FC}│ 64│ - │ wₖ₋₃│ Vector{$FC}│ 64│ - │ wₖ₋₂│ Vector{$FC}│ 64│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, bilq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │BilqSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, bilqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │BilqrSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ Δy│ Vector{$FC}│ 0│ - │ y│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │ wₖ₋₃│ Vector{$FC}│ 64│ - │ wₖ₋₂│ Vector{$FC}│ 64│ - │ warm_start│ Bool│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, minres_qlp_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────────┬───────────────┬─────────────────┐ - │MinresQlpSolver│Precision: $FC │Architecture: CPU│ - ├───────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ wₖ₋₁│ Vector{$FC}│ 64│ - │ wₖ│ Vector{$FC}│ 64│ - │ M⁻¹vₖ₋₁│ Vector{$FC}│ 64│ - │ M⁻¹vₖ│ Vector{$FC}│ 64│ - │ x│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, qmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ QmrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ wₖ₋₂│ Vector{$FC}│ 64│ - │ wₖ₋₁│ Vector{$FC}│ 64│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, usymlq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │UsymlqSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 32│ - │ vₖ│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, tricg_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │TricgSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ y│ Vector{$FC}│ 64│ - │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│ - │ N⁻¹uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₁│ Vector{$FC}│ 64│ - │ gy₂ₖ│ Vector{$FC}│ 64│ - │ x│ Vector{$FC}│ 32│ - │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│ - │ M⁻¹vₖ│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₁│ Vector{$FC}│ 32│ - │ gx₂ₖ│ Vector{$FC}│ 32│ - │ Δx│ Vector{$FC}│ 0│ - │ Δy│ Vector{$FC}│ 0│ - │ uₖ│ Vector{$FC}│ 0│ - │ vₖ│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, trimr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │TrimrSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ y│ Vector{$FC}│ 64│ - │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│ - │ N⁻¹uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₃│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₂│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₁│ Vector{$FC}│ 64│ - │ gy₂ₖ│ Vector{$FC}│ 64│ - │ x│ Vector{$FC}│ 32│ - │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│ - │ M⁻¹vₖ│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₃│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₂│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₁│ Vector{$FC}│ 32│ - │ gx₂ₖ│ Vector{$FC}│ 32│ - │ Δx│ Vector{$FC}│ 0│ - │ Δy│ Vector{$FC}│ 0│ - │ uₖ│ Vector{$FC}│ 0│ - │ vₖ│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) + @testset "Test the keyword argument timemax" begin + timemax = 0.0 + for (method, solver) in solvers + method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr) && solve!(solver, A, b, timemax=timemax) + method == :cg_lanczos_shift && solve!(solver, A, b, shifts, timemax=timemax) + method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && solve!(solver, Au, c, timemax=timemax) + method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && solve!(solver, Ao, b, timemax=timemax) + method ∈ (:bilqr, :trilqr) && solve!(solver, A, b, b, timemax=timemax) + method == :gpmr && solve!(solver, Ao, Au, b, c, timemax=timemax) + method ∈ (:tricg, :trimr) && solve!(solver, Au, c, b, timemax=timemax) + method == :usymlq && solve!(solver, Au, c, b, timemax=timemax) + method == :usymqr && solve!(solver, Ao, b, c, timemax=timemax) + @test solver.stats.status == "time limit exceeded" + end + end - io = IOBuffer() - show(io, gpmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────────┬─────────────────┐ - │GpmrSolver│ Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────────┼─────────────────┤ - │ wA│ Vector{$FC}│ 0│ - │ wB│ Vector{$FC}│ 0│ - │ dA│ Vector{$FC}│ 64│ - │ dB│ Vector{$FC}│ 32│ - │ Δx│ Vector{$FC}│ 0│ - │ Δy│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 0│ - │ p│ Vector{$FC}│ 0│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ U│Vector{Vector{$FC}}│ 10 x 32│ - │ gs│ Vector{$FC}│ 40│ - │ gc│ Vector{$T}│ 40│ - │ zt│ Vector{$FC}│ 20│ - │ R│ Vector{$FC}│ 210│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) + for (method, solver) in solvers + @testset "$(method)" begin + for i = 1 : 3 + A = i * A + Au = i * Au + Ao = i * Ao + b = 5 * b + c = 3 * c + + if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, + :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr, :cg_lanczos_shift) + method == :cg_lanczos_shift ? solve!(solver, A, b, shifts) : solve!(solver, A, b) + niter = niterations(solver) + @test Aprod(solver) == (method ∈ (:cgs, :bicgstab) ? 2 * niter : niter) + @test Atprod(solver) == (method ∈ (:bilq, :qmr) ? niter : 0) + @test solution(solver) === solver.x + @test nsolution(solver) == 1 + end + + if method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) + solve!(solver, Au, c) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver, 1) === solver.x + @test nsolution(solver) == (method ∈ (:cgne, :crmr) ? 1 : 2) + (nsolution == 2) && (@test solution(solver, 2) == solver.y) + end + + if method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) + solve!(solver, Ao, b) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver) === solver.x + @test nsolution(solver) == 1 + end + + if method ∈ (:bilqr, :trilqr) + solve!(solver, A, b, b) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver, 1) === solver.x + @test solution(solver, 2) === solver.y + @test nsolution(solver) == 2 + @test issolved_primal(solver) + @test issolved_dual(solver) + end + + if method ∈ (:tricg, :trimr, :gpmr) + method == :gpmr ? solve!(solver, Ao, Au, b, c) : solve!(solver, Au, c, b) + niter = niterations(solver) + @test Aprod(solver) == niter + method != :gpmr && (@test Atprod(solver) == niter) + method == :gpmr && (@test Bprod(solver) == niter) + @test solution(solver, 1) === solver.x + @test solution(solver, 2) === solver.y + @test nsolution(solver) == 2 + end + + if method ∈ (:usymlq, :usymqr) + method == :usymlq ? solve!(solver, Au, c, b) : solve!(solver, Ao, b, c) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver) === solver.x + @test nsolution(solver) == 1 + end + + @test niter > 0 + @test statistics(solver) === solver.stats + @test issolved(solver) + end + + io = IOBuffer() + show(io, solver, show_stats=false) + showed = String(take!(io)) + + # Test that the lines have the same length + str = split(showed, '\n', keepempty=false) + len_row = length(str[1]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_row, &, str) + + # Test that the columns have the same length + str2 = split(showed, ['│','┌','┬','┐','├','┼','┤','└','┴','┴','┘','\n'], keepempty=false) + len_col1 = length(str2[1]) + len_col2 = length(str2[2]) + len_col3 = length(str2[3]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col1, &, str2[1:3:end-2]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col2, &, str2[2:3:end-1]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col3, &, str2[3:3:end]) + + # Code coverage + show(io, solver, show_stats=true) + end + end end @testset "solvers" begin diff --git a/test/test_stats.jl b/test/test_stats.jl index 4289a78a3..f4c212d50 100644 --- a/test/test_stats.jl +++ b/test/test_stats.jl @@ -1,28 +1,30 @@ @testset "stats" begin - stats = Krylov.SimpleStats(0, true, true, Float64[1.0], Float64[2.0], Float64[], "t") + stats = Krylov.SimpleStats(0, true, true, Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Simple stats + expected = """SimpleStats niter: 0 solved: true inconsistent: true residuals: [ 1.0e+00 ] Aresiduals: [ 2.0e+00 ] κ₂(A): [] - status: t""" + timer: 1.23s + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), "t") + stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), 0.1234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Lsmr stats + expected = """LsmrStats niter: 0 solved: true inconsistent: true @@ -33,53 +35,59 @@ κ₂(A): 5.0 ‖A‖F: 6.0 xNorm: 7.0 - status: t""" + timer: 123.40ms + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, "t") + stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, 1.234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Lanczos stats + expected = """LanczosStats niter: 0 solved: true residuals: [ 3.0e+00 ] indefinite: true ‖A‖F: NaN κ₂(A): NaN - status: t""" + timer: 1.23s + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, "t") + stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, 0.00056789, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """LanczosShift stats + expected = """LanczosShiftStats niter: 0 solved: true residuals: [[0.9, 0.5], [0.6, 0.4, 0.1]] indefinite: Bool[0, 1] ‖A‖F: NaN κ₂(A): NaN - status: t""" - @test (VERSION < v"1.5") || strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) + timer: 567.89μs + status: unknown""" + @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, "t") + stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, 1.234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Symmlq stats + expected = """SymmlqStats niter: 0 solved: true residuals: [ 4.0e+00 ] @@ -88,53 +96,59 @@ errors (cg): [ 7.0e+00 ✗✗✗✗ ] ‖A‖F: NaN κ₂(A): NaN - status: t""" + timer: 1.23s + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], "t") + stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], 1.234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Adjoint stats + expected = """AdjointStats niter: 0 solved primal: true solved dual: true residuals primal: [ 8.0e+00 ] residuals dual: [ 9.0e+00 ] - status: t""" + timer: 1.23s + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], "t") + stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], 1.234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """LNLQ stats + expected = """LNLQStats niter: 0 solved: true residuals: [ 1.0e+01 ] error with bnd: false error bnd x: [ 1.1e+01 ] error bnd y: [ 1.2e+01 ] - status: t""" + timer: 1.23s + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 - stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], "t") + stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], 1.234, "unknown") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """LSLQ stats + expected = """LSLQStats niter: 0 solved: true inconsistent: false @@ -144,9 +158,11 @@ error with bnd: false error bound LQ: [ 1.6e+01 ] error bound CG: [ 1.7e+01 ] - status: t""" + timer: 1.23s + status: unknown""" @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 end diff --git a/test/test_trilqr.jl b/test/test_trilqr.jl index 7d7927372..baf8a597e 100644 --- a/test/test_trilqr.jl +++ b/test/test_trilqr.jl @@ -74,7 +74,7 @@ @test(resid_dual ≤ trilqr_tol) @test(stats.solved_dual) - # Test consistent Ax = b and inconsistent Aᵀt = c. + # Test consistent Ax = b and inconsistent Aᴴt = c. A, b, c = rectangular_adjoint(FC=FC) (x, t, stats) = trilqr(A, b, c) diff --git a/test/test_utils.jl b/test/test_utils.jl index ed72056b6..f1c3ca44e 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -1,50 +1,51 @@ include("get_div_grad.jl") include("gen_lsq.jl") include("check_min_norm.jl") +include("callback_utils.jl") # Symmetric and positive definite systems. function symmetric_definite(n :: Int=10; FC=Float64) - α = FC <: Complex ? im : 1 + α = FC <: Complex ? FC(im) : one(FC) A = spdiagm(-1 => α * ones(FC, n-1), 0 => 4 * ones(FC, n), 1 => conj(α) * ones(FC, n-1)) - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Symmetric and indefinite systems. function symmetric_indefinite(n :: Int=10; FC=Float64) - α = FC <: Complex ? im : 1 + α = FC <: Complex ? FC(im) : one(FC) A = spdiagm(-1 => α * ones(FC, n-1), 0 => ones(FC, n), 1 => conj(α) * ones(FC, n-1)) - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Nonsymmetric and positive definite systems. function nonsymmetric_definite(n :: Int=10; FC=Float64) if FC <: Complex - A = [i == j ? n * one(FC) : im * one(FC) for i=1:n, j=1:n] + A = [i == j ? n * one(FC) : FC(im) * one(FC) for i=1:n, j=1:n] else A = [i == j ? n * one(FC) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n] end - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Nonsymmetric and indefinite systems. function nonsymmetric_indefinite(n :: Int=10; FC=Float64) if FC <: Complex - A = [i == j ? n * (-one(FC))^(i*j) : im * one(FC) for i=1:n, j=1:n] + A = [i == j ? n * (-one(FC))^(i*j) : FC(im) * one(FC) for i=1:n, j=1:n] else A = [i == j ? n * (-one(FC))^(i*j) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n] end - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Underdetermined and consistent systems. function under_consistent(n :: Int=10, m :: Int=25; FC=Float64) n < m || error("Square or overdetermined system!") - α = FC <: Complex ? im : 1 - A = [i/j - α * j/i for i=1:n, j=1:m] + α = FC <: Complex ? FC(im) : one(FC) + A = FC[i/j - α * j/i for i=1:n, j=1:m] b = A * ones(FC, m) return A, b end @@ -52,7 +53,7 @@ end # Underdetermined and inconsistent systems. function under_inconsistent(n :: Int=10, m :: Int=25; FC=Float64) n < m || error("Square or overdetermined system!") - α = FC <: Complex ? 1 + im : 1 + α = FC <: Complex ? FC(1 + im) : one(FC) A = α * ones(FC, n, m) b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n] return A, b @@ -84,8 +85,8 @@ end # Overdetermined and consistent systems. function over_consistent(n :: Int=25, m :: Int=10; FC=Float64) n > m || error("Underdetermined or square system!") - α = FC <: Complex ? im : 1 - A = [i/j - α * j/i for i=1:n, j=1:m] + α = FC <: Complex ? FC(im) : one(FC) + A = FC[i/j - α * j/i for i=1:n, j=1:m] b = A * ones(FC, m) return A, b end @@ -93,7 +94,7 @@ end # Overdetermined and inconsistent systems. function over_inconsistent(n :: Int=25, m :: Int=10; FC=Float64) n > m || error("Underdetermined or square system!") - α = FC <: Complex ? 1 + im : 1 + α = FC <: Complex ? FC(1 + im) : one(FC) A = α * ones(FC, n, m) b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n] return A, b @@ -162,23 +163,23 @@ end function underdetermined_adjoint(n :: Int=100, m :: Int=200; FC=Float64) n < m || error("Square or overdetermined system!") A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m] - b = A * [1:m;] - c = A' * [-n:-1;] + b = A * FC[1:m;] + c = A' * FC[-n:-1;] return A, b, c end # Square consistent adjoint systems. function square_adjoint(n :: Int=100; FC=Float64) A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n] - b = A * [1:n;] - c = A' * [-n:-1;] + b = A * FC[1:n;] + c = A' * FC[-n:-1;] return A, b, c end -# Adjoint systems with Ax = b underdetermined consistent and Aᵀt = c overdetermined insconsistent. +# Adjoint systems with Ax = b underdetermined consistent and Aᴴt = c overdetermined insconsistent. function rectangular_adjoint(n :: Int=10, m :: Int=25; FC=Float64) - Aᵀ, c = over_inconsistent(m, n; FC=FC) - A = adjoint(Aᵀ) + Aᴴ, c = over_inconsistent(m, n; FC=FC) + A = adjoint(Aᴴ) b = A * ones(FC, m) return A, b, c end @@ -187,8 +188,8 @@ end function overdetermined_adjoint(n :: Int=200, m :: Int=100; FC=Float64) n > m || error("Underdetermined or square system!") A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m] - b = A * [1:m;] - c = A' * [-n:-1;] + b = A * FC[1:m;] + c = A' * FC[-n:-1;] return A, b, c end @@ -251,7 +252,7 @@ end # Square and preconditioned problems. function square_preconditioned(n :: Int=10; FC=Float64) A = ones(FC, n, n) + (n-1) * eye(n) - b = FC(10.0) * [1:n;] + b = 10 * FC[1:n;] M⁻¹ = FC(1/n) * eye(n) return A, b, M⁻¹ end @@ -363,110 +364,3 @@ function check_reset(stats :: KS) where KS <: Krylov.KrylovStats end end end - -# Test callback -mutable struct TestCallbackN2{T, S, M} - A::M - b::S - storage_vec::S - tol::T -end -TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol) - -function (cb_n2::TestCallbackN2)(solver) - mul!(cb_n2.storage_vec, cb_n2.A, solver.x) - cb_n2.storage_vec .-= cb_n2.b - return norm(cb_n2.storage_vec) ≤ cb_n2.tol -end - -mutable struct TestCallbackN2Adjoint{T, S, M} - A::M - b::S - c::S - storage_vec1::S - storage_vec2::S - tol::T -end -TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol) - -function (cb_n2::TestCallbackN2Adjoint)(solver) - mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) - cb_n2.storage_vec1 .-= cb_n2.b - mul!(cb_n2.storage_vec2, cb_n2.A', solver.y) - cb_n2.storage_vec2 .-= cb_n2.c - return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) -end - -mutable struct TestCallbackN2Shifts{T, S, M} - A::M - b::S - shifts::Vector{T} - tol::T -end -TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol) - -function (cb_n2::TestCallbackN2Shifts)(solver) - r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x) - return all(map(norm, r) .≤ cb_n2.tol) -end - -mutable struct TestCallbackN2LS{T, S, M} - A::M - b::S - λ::T - storage_vec1::S - storage_vec2::S - tol::T -end -TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol) - -function (cb_n2::TestCallbackN2LS)(solver) - mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) - cb_n2.storage_vec1 .-= cb_n2.b - mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1) - cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x - return norm(cb_n2.storage_vec2) ≤ cb_n2.tol -end - -mutable struct TestCallbackN2LN{T, S, M} - A::M - b::S - λ::T - storage_vec::S - tol::T -end -TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol) - -function (cb_n2::TestCallbackN2LN)(solver) - mul!(cb_n2.storage_vec, cb_n2.A, solver.x) - cb_n2.storage_vec .-= cb_n2.b - cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s) - return norm(cb_n2.storage_vec) ≤ cb_n2.tol -end - -mutable struct TestCallbackN2SaddlePts{T, S, M} - A::M - b::S - c::S - storage_vec1::S - storage_vec2::S - tol::T -end -TestCallbackN2SaddlePts(A, b, c; tol = 0.1) = - TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol) - -function (cb_n2::TestCallbackN2SaddlePts)(solver) - mul!(cb_n2.storage_vec1, cb_n2.A, solver.y) - cb_n2.storage_vec1 .+= solver.x .- cb_n2.b - mul!(cb_n2.storage_vec2, cb_n2.A', solver.x) - cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c - return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) -end - -function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol) - get_x_restarted_gmres!(solver, A, stor, N) - x = stor.x - mul!(storage_vec, A, x) - storage_vec .-= b - return (norm(storage_vec) ≤ tol) -end diff --git a/test/test_verbose.jl b/test/test_verbose.jl new file mode 100644 index 000000000..ebc42c8f7 --- /dev/null +++ b/test/test_verbose.jl @@ -0,0 +1,60 @@ +function test_verbose(FC) + A = FC.(get_div_grad(4, 4, 4)) # Dimension m x n + m,n = size(A) + k = div(n, 2) + Au = A[1:k,:] # Dimension k x n + Ao = A[:,1:k] # Dimension m x k + b = Ao * ones(FC, k) # Dimension m + c = Au * ones(FC, n) # Dimension k + mem = 10 + + T = real(FC) + shifts = T[1; 2; 3; 4; 5] + nshifts = 5 + + for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr, + :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres, + :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom, + :fgmres, :cg_lanczos_shift) + + @testset "$fn" begin + io = IOBuffer() + if fn in (:trilqr, :bilqr) + @eval $fn($A, $b, $b, verbose=1, iostream=$io) + elseif fn in (:tricg, :trimr) + @eval $fn($Au, $c, $b, verbose=1, iostream=$io) + elseif fn in (:lnlq, :craig, :craigmr, :cgne, :crmr) + @eval $fn($Au, $c, verbose=1, iostream=$io) + elseif fn in (:lslq, :lsqr, :lsmr, :cgls, :crls) + @eval $fn($Ao, $b, verbose=1, iostream=$io) + elseif fn == :usymlq + @eval $fn($Au, $c, $b, verbose=1, iostream=$io) + elseif fn == :usymqr + @eval $fn($Ao, $b, $c, verbose=1, iostream=$io) + elseif fn == :gpmr + @eval $fn($Ao, $Au, $b, $c, verbose=1, iostream=$io) + elseif fn == :cg_lanczos_shift + @eval $fn($A, $b, $shifts, verbose=1, iostream=$io) + else + @eval $fn($A, $b, verbose=1, iostream=$io) + end + + showed = String(take!(io)) + str = split(showed, '\n', keepempty=false) + nrows = length(str) + first_row = fn in (:bilqr, :trilqr) ? 3 : 2 + last_row = fn == :cg ? nrows-1 : nrows + str = str[first_row:last_row] + len_header = length(str[1]) + @test mapreduce(x -> length(x) == len_header, &, str) + end + end +end + +@testset "verbose" begin + for FC in (Float64, ComplexF64) + @testset "Data Type: $FC" begin + test_verbose(FC) + end + end +end diff --git a/test/test_warm_start.jl b/test/test_warm_start.jl index 66a1cbea7..c788ed7e8 100644 --- a/test/test_warm_start.jl +++ b/test/test_warm_start.jl @@ -8,41 +8,126 @@ function test_warm_start(FC) nshifts = 5 tol = 1.0e-6 + x, y, stats = bilqr(A, b, c, x0, y0) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + s = c - A' * y + resid = norm(s) / norm(c) + @test(resid ≤ tol) + + solver = BilqrSolver(A, b) + solve!(solver, A, b, c, x0, y0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + s = c - A' * solver.y + resid = norm(s) / norm(c) + @test(resid ≤ tol) + + x, y, stats = trilqr(A, b, c, x0, y0) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + s = c - A' * y + resid = norm(s) / norm(c) + @test(resid ≤ tol) + + solver = TrilqrSolver(A, b) + solve!(solver, A, b, c, x0, y0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + s = c - A' * solver.y + resid = norm(s) / norm(c) + @test(resid ≤ tol) + x, y, stats = tricg(A, b, b, x0, y0) r = [b - x - A * y; b - A' * x + y] resid = norm(r) / norm([b; b]) @test(resid ≤ tol) + solver = TricgSolver(A, b) + solve!(solver, A, b, b, x0, y0) + r = [b - solver.x - A * solver.y; b - A' * solver.x + solver.y] + resid = norm(r) / norm([b; b]) + @test(resid ≤ tol) + x, y, stats = trimr(A, b, b, x0, y0) r = [b - x - A * y; b - A' * x + y] resid = norm(r) / norm([b; b]) @test(resid ≤ tol) + solver = TrimrSolver(A, b) + solve!(solver, A, b, b, x0, y0) + r = [b - solver.x - A * solver.y; b - A' * solver.x + solver.y] + resid = norm(r) / norm([b; b]) + @test(resid ≤ tol) + x, y, stats = gpmr(A, A', b, b, x0, y0) r = [b - x - A * y; b - A' * x - y] resid = norm(r) / norm([b; b]) @test(resid ≤ tol) + solver = GpmrSolver(A, b) + solve!(solver, A, A', b, b, x0, y0) + r = [b - solver.x - A * solver.y; b - A' * solver.x - solver.y] + resid = norm(r) / norm([b; b]) + @test(resid ≤ tol) + x, stats = minres_qlp(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = MinresQlpSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = symmlq(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = SymmlqSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = cg(A, b, x0) r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + + solver = CgSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) @test(resid ≤ tol) x, stats = cr(A, b, x0) r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + + solver = CrSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) @test(resid ≤ tol) x, stats = cg_lanczos(A, b, x0) r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + + solver = CgLanczosSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) @test(resid ≤ tol) x, stats = minres(A, b, x0) @@ -50,70 +135,131 @@ function test_warm_start(FC) resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = MinresSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = diom(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = DiomSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = dqgmres(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = DqgmresSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = fom(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) - + + solver = FomSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = gmres(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = GmresSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + + x, stats = fgmres(A, b, x0) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + + solver = FgmresSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = bicgstab(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = BicgstabSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = cgs(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) + solver = CgsSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = bilq(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) - x, stats = qmr(A, b, x0) - r = b - A * x + solver = BilqSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x resid = norm(r) / norm(b) @test(resid ≤ tol) - x, stats = usymlq(A, b, c, x0) + x, stats = qmr(A, b, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) - x, stats = usymqr(A, b, c, x0) - r = b - A * x + solver = QmrSolver(A, b) + solve!(solver, A, b, x0) + r = b - A * solver.x resid = norm(r) / norm(b) @test(resid ≤ tol) - x, y, stats = bilqr(A, b, c, x0, y0) + x, stats = usymlq(A, b, c, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) - s = c - A' * y - resid = norm(s) / norm(c) + + solver = UsymlqSolver(A, b) + solve!(solver, A, b, c, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) @test(resid ≤ tol) - x, y, stats = trilqr(A, b, c, x0, y0) + x, stats = usymqr(A, b, c, x0) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ tol) - s = c - A' * y - resid = norm(s) / norm(c) + + solver = UsymqrSolver(A, b) + solve!(solver, A, b, c, x0) + r = b - A * solver.x + resid = norm(r) / norm(b) @test(resid ≤ tol) end