diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
new file mode 100644
index 000000000..92ac58c74
--- /dev/null
+++ b/.buildkite/pipeline.yml
@@ -0,0 +1,71 @@
+steps:
+ - label: "Nvidia GPUs -- CUDA.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.9
+ agents:
+ queue: "juliagpu"
+ cuda: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("CUDA")
+ Pkg.add("LinearOperators")
+ Pkg.instantiate()
+ using CUDA
+ # CUDA.set_runtime_version!(v"11.8")'
+
+ julia --color=yes --project -e '
+ include("test/gpu/nvidia.jl")'
+ timeout_in_minutes: 30
+
+ # - label: "AMD GPUs -- AMDGPU.jl"
+ # plugins:
+ # - JuliaCI/julia#v1:
+ # version: 1.9
+ # agents:
+ # queue: "juliagpu"
+ # rocm: "*"
+ # rocmgpu: "gfx1031"
+ # env:
+ # JULIA_AMDGPU_CORE_MUST_LOAD: "1"
+ # JULIA_AMDGPU_HIP_MUST_LOAD: "1"
+ # JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
+ # command: |
+ # julia --color=yes --project -e '
+ # using Pkg
+ # Pkg.add("AMDGPU")
+ # Pkg.instantiate()
+ # include("test/gpu/amd.jl")'
+ # timeout_in_minutes: 30
+
+ - label: "Intel GPUs -- oneAPI.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.9
+ agents:
+ queue: "juliagpu"
+ intel: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("oneAPI")
+ Pkg.instantiate()
+ include("test/gpu/intel.jl")'
+ timeout_in_minutes: 30
+
+ - label: "Apple M1 GPUs -- Metal.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.9
+ agents:
+ queue: "juliaecosystem"
+ os: "macos"
+ arch: "aarch64"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("Metal")
+ Pkg.instantiate()
+ include("test/gpu/metal.jl")'
+ timeout_in_minutes: 30
diff --git a/.cirrus.yml b/.cirrus.yml
index d559cf609..792aad121 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,15 +1,36 @@
-freebsd_instance:
- image: freebsd-13-0-release-amd64
task:
- name: FreeBSD
- env:
- matrix:
- - JULIA_VERSION: 1.6
- - JULIA_VERSION: 1
- - JULIA_VERSION: nightly
- allow_failures: $JULIA_VERSION == 'nightly'
- install_script:
- - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
+ matrix:
+ - name: FreeBSD
+ freebsd_instance:
+ image_family: freebsd-13-1
+ env:
+ matrix:
+ - JULIA_VERSION: 1.6
+ - JULIA_VERSION: 1
+ - name: musl Linux
+ container:
+ image: alpine:3.14
+ env:
+ - JULIA_VERSION: 1
+ - name: MacOS M1
+ macos_instance:
+ image: ghcr.io/cirruslabs/macos-monterey-base:latest
+ env:
+ - JULIA_VERSION: 1
+ install_script: |
+ URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh"
+ set -x
+ if [ "$(uname -s)" = "Linux" ] && command -v apt; then
+ apt update
+ apt install -y curl
+ fi
+ if command -v curl; then
+ sh -c "$(curl ${URL})"
+ elif command -v wget; then
+ sh -c "$(wget ${URL} -q -O-)"
+ elif command -v fetch; then
+ sh -c "$(fetch ${URL} -o -)"
+ fi
build_script:
- cirrusjl build
test_script:
diff --git a/.github/codecov.yml b/.github/codecov.yml
new file mode 100644
index 000000000..e3469746f
--- /dev/null
+++ b/.github/codecov.yml
@@ -0,0 +1,11 @@
+# Drops on the order 0.01% are typical even when no change occurs
+# Having the threshold set a little higher (0.5%) than that makes it
+# a little more tolerant to fluctuations
+coverage:
+ status:
+ project:
+ default:
+ threshold: 0.5%
+ patch:
+ default:
+ threshold: 0.5%
diff --git a/.github/workflows/Aqua.yml b/.github/workflows/Aqua.yml
new file mode 100644
index 000000000..da872e225
--- /dev/null
+++ b/.github/workflows/Aqua.yml
@@ -0,0 +1,17 @@
+name: Aqua
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ types: [opened, synchronize, reopened]
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: julia-actions/setup-julia@latest
+ with:
+ version: '1'
+ - name: Aqua.jl
+ run: julia --color=yes -e 'using Pkg; Pkg.add("Aqua"); Pkg.develop(path="."); using Aqua, Krylov; Aqua.test_all(Krylov)'
diff --git a/.github/workflows/Breakage.yml b/.github/workflows/Breakage.yml
index 266eed3cc..4a907d631 100644
--- a/.github/workflows/Breakage.yml
+++ b/.github/workflows/Breakage.yml
@@ -19,19 +19,20 @@ jobs:
"JuliaSmoothOptimizers/JSOSolvers.jl",
"JuliaSmoothOptimizers/LLSModels.jl",
"JuliaSmoothOptimizers/Percival.jl",
- "JuliaSmoothOptimizers/RipQP.jl"
+ "JuliaSmoothOptimizers/RipQP.jl",
+ "SciML/LinearSolve.jl"
]
pkgversion: [latest, stable]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
# Install Julia
- uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: x64
- - uses: actions/cache@v1
+ - uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
@@ -85,7 +86,7 @@ jobs:
end;
end'
- - uses: actions/upload-artifact@v2
+ - uses: actions/upload-artifact@v3
with:
name: pr
path: pr/
@@ -94,9 +95,9 @@ jobs:
needs: break
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- - uses: actions/download-artifact@v2
+ - uses: actions/download-artifact@v3
with:
name: pr
path: pr/
@@ -127,7 +128,7 @@ jobs:
fi
done >> MSG
- - uses: actions/upload-artifact@v2
+ - uses: actions/upload-artifact@v3
with:
name: pr
path: pr/
diff --git a/.github/workflows/CI_M1.yml b/.github/workflows/CI_M1.yml
deleted file mode 100644
index 6f9aa720b..000000000
--- a/.github/workflows/CI_M1.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: CI_M1
-on:
- push:
- branches:
- - main
- pull_request:
- types: [opened, synchronize, reopened]
-jobs:
- test:
- name: Julia ${{ matrix.version }} - macOS - ${{ matrix.arch }} - ${{ github.event_name }}
- runs-on: self-hosted
- strategy:
- fail-fast: false
- matrix:
- version:
- - '1'
- arch:
- - aarch64
- steps:
- - uses: actions/checkout@v3
- - uses: julia-actions/setup-julia@v1
- with:
- version: ${{ matrix.version }}
- arch: ${{ matrix.arch }}
- - name: Version Info
- shell: julia --color=yes {0}
- run: |
- using InteractiveUtils
- versioninfo()
- - uses: julia-actions/julia-buildpkg@v1
- - uses: julia-actions/julia-runtest@v1
diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml
index 14f6dcd47..043113f74 100644
--- a/.github/workflows/CommentPR.yml
+++ b/.github/workflows/CommentPR.yml
@@ -39,16 +39,36 @@ jobs:
- run: unzip pr.zip
- name: 'Comment on PR'
- uses: actions/github-script@v3
+ uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
- var fs = require('fs');
- var issue_number = Number(fs.readFileSync('./NR'));
- var msg = fs.readFileSync('./MSG', 'utf8');
- await github.issues.createComment({
+ var fs = require('fs')
+ var issue_number = Number(fs.readFileSync('./NR'))
+ var msg = fs.readFileSync('./MSG', 'utf8')
+
+ // Get the existing comments.
+ const {data: comments} = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
- issue_number: issue_number,
- body: msg
- });
+ issue_number: issue_number
+ })
+
+ // Find any comment already made by the bot.
+ const botComment = comments.find(comment => comment.user.id === 41898282)
+
+ if (botComment) {
+ await github.rest.issues.updateComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: botComment.id,
+ body: msg
+ })
+ } else {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issue_number,
+ body: msg
+ })
+ }
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index b546a8082..7a9c79fd4 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -1,19 +1,44 @@
name: CompatHelper
-
on:
schedule:
- - cron: '00 00 * * *'
-
+ - cron: 0 0 * * *
+ workflow_dispatch:
+permissions:
+ contents: write
+ pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- - uses: julia-actions/setup-julia@latest
+ - name: Check if Julia is already available in the PATH
+ id: julia_in_path
+ run: which julia
+ continue-on-error: true
+ - name: Install Julia, but only if it is not already available in the PATH
+ uses: julia-actions/setup-julia@v1
with:
version: '1'
- - name: CompatHelper
- run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- - name: CompatHelper.main()
+ arch: ${{ runner.arch }}
+ if: steps.julia_in_path.outcome != 'success'
+ - name: "Add the General registry via Git"
+ run: |
+ import Pkg
+ ENV["JULIA_PKG_SERVER"] = ""
+ Pkg.Registry.add("General")
+ shell: julia --color=yes {0}
+ - name: "Install CompatHelper"
+ run: |
+ import Pkg
+ name = "CompatHelper"
+ uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+ version = "3"
+ Pkg.add(; name, uuid, version)
+ shell: julia --color=yes {0}
+ - name: "Run CompatHelper"
+ run: |
+ import CompatHelper
+ CompatHelper.main()
+ shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: julia -e 'using CompatHelper; CompatHelper.main()'
+ COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
index be0b86584..406f15e0d 100644
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@@ -10,12 +10,12 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@latest
with:
version: '1'
- name: Install dependencies
- run: julia --project=docs -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+ run: julia --project=docs --color=yes -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
new file mode 100644
index 000000000..b0c37e05f
--- /dev/null
+++ b/.github/workflows/Invalidations.yml
@@ -0,0 +1,43 @@
+name: Invalidations
+# Uses SnoopCompile to evaluate number of invalidations caused by `using` the package
+# using https://github.com/julia-actions/julia-invalidations
+# Based on https://github.com/julia-actions/julia-invalidations
+
+on:
+ pull_request:
+
+concurrency:
+ # Skip intermediate builds: always.
+ # Cancel intermediate builds: always.
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ evaluate:
+ # Only run on PRs to the default branch.
+ # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch
+ if: github.base_ref == github.event.repository.default_branch
+ runs-on: ubuntu-latest
+ steps:
+ - uses: julia-actions/setup-julia@v1
+ with:
+ version: '1'
+ - uses: actions/checkout@v3
+ - uses: julia-actions/julia-buildpkg@v1
+ - uses: julia-actions/julia-invalidations@v1
+ id: invs_pr
+
+ - uses: actions/checkout@v3
+ with:
+ ref: ${{ github.event.repository.default_branch }}
+ - uses: julia-actions/julia-buildpkg@v1
+ - uses: julia-actions/julia-invalidations@v1
+ id: invs_default
+
+ - name: Report invalidation counts
+ run: |
+ echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+ echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+ - name: Check if the PR does increase number of invalidations
+ if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
+ run: exit 1
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 409e0d146..9e1791f48 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,12 +31,12 @@ jobs:
arch: x64
allow_failure: true
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- - uses: actions/cache@v1
+ - uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
@@ -49,6 +49,6 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- - uses: codecov/codecov-action@v1
+ - uses: codecov/codecov-action@v3
with:
- file: lcov.info
+ files: lcov.info
diff --git a/LICENSE.md b/LICENSE.md
index 1533671ce..befba1c4d 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
-Copyright (c) 2015-2019: Dominique Orban
+Copyright (c) 2015-present: Alexis Montoison, Dominique Orban, and other contributors
-Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.0/).
+[Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.0/).
## License
@@ -11,83 +11,83 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
--------------
1.1. "Contributor"
- means each individual or legal entity that creates, contributes to
- the creation of, or owns Covered Software.
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
1.2. "Contributor Version"
- means the combination of the Contributions of others (if any) used
- by a Contributor and that particular Contributor's Contribution.
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
- means Covered Software of a particular Contributor.
+ means Covered Software of a particular Contributor.
1.4. "Covered Software"
- means Source Code Form to which the initial Contributor has attached
- the notice in Exhibit A, the Executable Form of such Source Code
- Form, and Modifications of such Source Code Form, in each case
- including portions thereof.
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
1.5. "Incompatible With Secondary Licenses"
- means
+ means
- (a) that the initial Contributor has attached the notice described
- in Exhibit B to the Covered Software; or
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
- (b) that the Covered Software was made available under the terms of
- version 1.1 or earlier of the License, but not also under the
- terms of a Secondary License.
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
1.6. "Executable Form"
- means any form of the work other than Source Code Form.
+ means any form of the work other than Source Code Form.
1.7. "Larger Work"
- means a work that combines Covered Software with other material, in
- a separate file or files, that is not Covered Software.
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
1.8. "License"
- means this document.
+ means this document.
1.9. "Licensable"
- means having the right to grant, to the maximum extent possible,
- whether at the time of the initial grant or subsequently, any and
- all of the rights conveyed by this License.
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
1.10. "Modifications"
- means any of the following:
+ means any of the following:
- (a) any file in Source Code Form that results from an addition to,
- deletion from, or modification of the contents of Covered
- Software; or
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
- (b) any new file in Source Code Form that contains any Covered
- Software.
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
1.11. "Patent Claims" of a Contributor
- means any patent claim(s), including without limitation, method,
- process, and apparatus claims, in any patent Licensable by such
- Contributor that would be infringed, but for the grant of the
- License, by the making, using, selling, offering for sale, having
- made, import, or transfer of either its Contributions or its
- Contributor Version.
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
1.12. "Secondary License"
- means either the GNU General Public License, Version 2.0, the GNU
- Lesser General Public License, Version 2.1, the GNU Affero General
- Public License, Version 3.0, or any later versions of those
- licenses.
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
1.13. "Source Code Form"
- means the form of the work preferred for making modifications.
+ means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
- means an individual or a legal entity exercising rights under this
- License. For legal entities, "You" includes any entity that
- controls, is controlled by, or is under common control with You. For
- purposes of this definition, "control" means (a) the power, direct
- or indirect, to cause the direction or management of such entity,
- whether by contract or otherwise, or (b) ownership of more than
- fifty percent (50%) of the outstanding shares or beneficial
- ownership of such entity.
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
2. License Grants and Conditions
--------------------------------
@@ -98,14 +98,14 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
- Licensable by such Contributor to use, reproduce, make available,
- modify, display, perform, distribute, and otherwise exploit its
- Contributions, either on an unmodified basis, with Modifications, or
- as part of a Larger Work; and
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
- for sale, have made, import, and otherwise transfer either its
- Contributions or its Contributor Version.
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
2.2. Effective Date
@@ -122,15 +122,15 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
- or
+ or
(b) for infringements caused by: (i) Your and any other third party's
- modifications of Covered Software, or (ii) the combination of its
- Contributions with other software (except as part of its Contributor
- Version); or
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
- its Contributions.
+ its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
@@ -178,15 +178,15 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
- Form, as described in Section 3.1, and You must inform recipients of
- the Executable Form how they can obtain a copy of such Source Code
- Form by reasonable means in a timely manner, at a charge no more
- than the cost of distribution to the recipient; and
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
- License, or sublicense it under different terms, provided that the
- license for the Executable Form does not attempt to limit or alter
- the recipients' rights in the Source Code Form under this License.
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
@@ -363,7 +363,7 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
- file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ file, You can obtain one at https://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
diff --git a/Project.toml b/Project.toml
index a91e07b8a..c711f565c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,18 +1,33 @@
name = "Krylov"
uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
-version = "0.8.3"
+version = "0.9.2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+[weakdeps]
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[extensions]
+KrylovComponentArraysExt = "ComponentArrays"
+KrylovFillArraysExt = "FillArrays"
+KrylovStaticArraysExt = "StaticArrays"
+
[compat]
+PackageExtensionCompat = "1.0.1"
julia = "^1.6.0"
[extras]
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
-test = ["Random", "Test"]
+test = ["ComponentArrays", "FillArrays", "Random", "StaticArrays", "Test"]
diff --git a/README.md b/README.md
index a4664e187..57bcd1d81 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ should be solved when **_b_** is not in the range of **_A_** (inconsistent syste
* **_A_** is square and singular,
* **_A_** is tall and thin.
-Underdetermined sytems are less common but also occur.
+Underdetermined systems are less common but also occur.
If there are infinitely many such **_x_** (because **_A_** is column rank-deficient), one with minimum norm is identified
@@ -61,32 +61,32 @@ If there are infinitely many such **_x_** (because **_A_** is column rank-defici
minimize ‖x‖ subject to Ax = b
-sould be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
+should be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
This situation mainly occurs when
* **_A_** is square and singular,
* **_A_** is short and wide.
-Overdetermined sytems are less common but also occur.
+Overdetermined systems are less common but also occur.
4. Adjoint systems
- Ax = b and Aᵀy = c
+ Ax = b and Aᴴy = c
where **_A_** can have any shape.
-5. Saddle-point and symmetric quasi-definite (SQD) systems
+5. Saddle-point and Hermitian quasi-definite systems
[M A] [x] = [b]
- [Aᵀ -N] [y] [c]
+ [Aᴴ -N] [y] [c]
where **_A_** can have any shape.
-6. Generalized saddle-point and unsymmetric partitioned systems
+6. Generalized saddle-point and non-Hermitian partitioned systems
[M A] [x] = [b]
@@ -94,7 +94,7 @@ where **_A_** can have any shape.
[B N] [y] [c]
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
**_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
@@ -121,3 +121,10 @@ julia> ]
pkg> add Krylov
pkg> test Krylov
```
+
+## Bug reports and discussions
+
+If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues).
+Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.
+
+If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome.
diff --git a/docs/make.jl b/docs/make.jl
index 57ad87cd2..441ddb3ee 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,23 +6,26 @@ makedocs(
linkcheck = true,
strict = true,
format = Documenter.HTML(assets = ["assets/style.css"],
- ansicolor=true,
+ ansicolor = true,
prettyurls = get(ENV, "CI", nothing) == "true",
collapselevel = 1),
sitename = "Krylov.jl",
pages = ["Home" => "index.md",
"API" => "api.md",
- "Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md",
- "Symmetric indefinite linear systems" => "solvers/sid.md",
- "Unsymmetric linear systems" => "solvers/unsymmetric.md",
+ "Krylov processes" => "processes.md",
+ "Krylov methods" => ["Hermitian positive definite linear systems" => "solvers/spd.md",
+ "Hermitian indefinite linear systems" => "solvers/sid.md",
+ "Non-Hermitian square linear systems" => "solvers/unsymmetric.md",
"Least-norm problems" => "solvers/ln.md",
"Least-squares problems" => "solvers/ls.md",
"Adjoint systems" => "solvers/as.md",
- "Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md",
- "Generalized saddle-point and unsymmetric partitioned systems" => "solvers/gsp.md"],
+ "Saddle-point and Hermitian quasi-definite systems" => "solvers/sp_sqd.md",
+ "Generalized saddle-point and non-Hermitian partitioned systems" => "solvers/gsp.md"],
"In-place methods" => "inplace.md",
+ "Preconditioners" => "preconditioners.md",
+ "Storage requirements" => "storage.md",
"GPU support" => "gpu.md",
- "Warm start" => "warm_start.md",
+ "Warm-start" => "warm-start.md",
"Factorization-free operators" => "factorization-free.md",
"Callbacks" => "callbacks.md",
"Performance tips" => "tips.md",
diff --git a/docs/src/api.md b/docs/src/api.md
index 7f2f4dff7..238c86f1a 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -48,6 +48,7 @@ LnlqSolver
CraigSolver
CraigmrSolver
GpmrSolver
+FgmresSolver
```
## Utilities
@@ -60,4 +61,6 @@ Krylov.vec2str
Krylov.ktypeof
Krylov.kzeros
Krylov.kones
+Krylov.vector_to_matrix
+Krylov.matrix_to_vector
```
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index f44018687..91e0b521c 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -1,43 +1,80 @@
-## Callbacks
+# [Callbacks](@id callbacks)
-Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. The callback should return `true` if the main loop should terminate, and `false` otherwise.
+Each Krylov method is able to call a callback function as `callback(solver)` at each iteration.
+The callback should return `true` if the main loop should terminate, and `false` otherwise.
If the method terminated because of the callback, the output status will be `"user-requested exit"`.
-For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using
+For example, if the user defines `minres_callback(solver::MinresSolver)`, it can be passed to the solver using
```julia
-(x, stats) = minres(A, b, callback = my_callback)
+(x, stats) = minres(A, b, callback = minres_callback)
```
-If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure:
+If you need to write a callback that uses variables that are not in a `KrylovSolver`, use a closure:
```julia
-function my_callback2(solver::MinresSolver, A, b, storage_vec, tol::Float64)
- mul!(storage_vec, A, solver.x)
- storage_vec .-= b
- return norm(storage_vec) ≤ tol # tolerance based on the 2-norm of the residual
+function custom_stopping_condition(solver::KrylovSolver, A, b, r, tol)
+ mul!(r, A, solver.x)
+ r .-= b # r := b - Ax
+ bool = norm(r) ≤ tol # tolerance based on the 2-norm of the residual
+ return bool
end
-storage_vec = similar(b)
-(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, storage_vec, 0.1))
+cg_callback(solver) = custom_stopping_condition(solver, A, b, r, tol)
+(x, stats) = cg(A, b, callback = cg_callback)
```
Alternatively, use a structure and make it callable:
```julia
-mutable struct MyCallback3{S, M}
- A::M
- b::S
- storage_vec::S
- tol::Float64
+mutable struct CallbackWorkspace{T}
+ A::Matrix{T}
+ b::Vector{T}
+ r::Vector{T}
+ tol::T
end
-MyCallback3(A, b; tol = 0.1) = MyCallback3(A, b, similar(b), tol)
-function (my_cb::MyCallback3)(solver)
- mul!(my_cb.storage_vec, my_cb.A, solver.x)
- my_cb.storage_vec .-= my_cb.b
- return norm(my_cb.storage_vec) ≤ my_cb.tol # tolerance based on the 2-norm of the residual
+function (workspace::CallbackWorkspace)(solver::KrylovSolver)
+ mul!(workspace.r, workspace.A, solver.x)
+ workspace.r .-= workspace.b
+ bool = norm(workspace.r) ≤ workspace.tol
+ return bool
end
-my_cb = MyCallback3(A, b; tol = 0.1)
-(x, stats) = minres(A, b, callback = my_cb)
+bicgstab_callback = CallbackWorkspace(A, b, r, tol)
+(x, stats) = bicgstab(A, b, callback = bicgstab_callback)
+```
+
+Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations.
+We now illustrate how to store all iterates $x_k$ of the GMRES method.
+
+```julia
+S = Krylov.ktypeof(b)
+global X = S[] # Storage for GMRES iterates
+
+function gmres_callback(solver)
+ z = solver.z
+ k = solver.inner_iter
+ nr = sum(1:k)
+ V = solver.V
+ R = solver.R
+ y = copy(z)
+
+ # Solve Rk * yk = zk
+ for i = k : -1 : 1
+ pos = nr + i - k
+ for j = k : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j]
+ pos = pos - j + 1
+ end
+ y[i] = y[i] / R[pos]
+ end
+
+ # xk = Vk * yk
+ xk = sum(V[i] * y[i] for i = 1:k)
+ push!(X, xk)
+
+ return false # We don't want to add new stopping conditions
+end
+
+(x, stats) = gmres(A, b, callback = gmres_callback)
```
diff --git a/docs/src/examples/tricg.md b/docs/src/examples/tricg.md
index e981c2f7e..61750de5f 100644
--- a/docs/src/examples/tricg.md
+++ b/docs/src/examples/tricg.md
@@ -14,7 +14,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
c = -b
# [I A] [x] = [b]
-# [Aᵀ -I] [y] [c]
+# [Aᴴ -I] [y] [c]
(x, y, stats) = tricg(A, b, c)
K = [eye(m) A; A' -eye(n)]
B = [b; c]
@@ -23,7 +23,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [-I A] [x] = [b]
-# [ Aᵀ I] [y] [c]
+# [ Aᴴ I] [y] [c]
(x, y, stats) = tricg(A, b, c, flip=true)
K = [-eye(m) A; A' eye(n)]
B = [b; c]
@@ -32,7 +32,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [I A] [x] = [b]
-# [Aᵀ I] [y] [c]
+# [Aᴴ I] [y] [c]
(x, y, stats) = tricg(A, b, c, spd=true)
K = [eye(m) A; A' eye(n)]
B = [b; c]
@@ -41,7 +41,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [-I A] [x] = [b]
-# [ Aᵀ -I] [y] [c]
+# [ Aᴴ -I] [y] [c]
(x, y, stats) = tricg(A, b, c, snd=true)
K = [-eye(m) A; A' -eye(n)]
B = [b; c]
@@ -50,7 +50,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [τI A] [x] = [b]
-# [ Aᵀ νI] [y] [c]
+# [ Aᴴ νI] [y] [c]
(τ, ν) = (1e-4, 1e2)
(x, y, stats) = tricg(A, b, c, τ=τ, ν=ν)
K = [τ*eye(m) A; A' ν*eye(n)]
@@ -60,7 +60,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [M⁻¹ A ] [x] = [b]
-# [Aᵀ -N⁻¹] [y] [c]
+# [Aᴴ -N⁻¹] [y] [c]
(x, y, stats) = tricg(A, b, c, M=M, N=N, verbose=1)
K = [inv(M) A; A' -inv(N)]
H = BlockDiagonalOperator(M, N)
diff --git a/docs/src/examples/trimr.md b/docs/src/examples/trimr.md
index 2aa48be1e..adc4e82e5 100644
--- a/docs/src/examples/trimr.md
+++ b/docs/src/examples/trimr.md
@@ -14,7 +14,7 @@ m, n = size(A)
c = -b
# [D A] [x] = [b]
-# [Aᵀ 0] [y] [c]
+# [Aᴴ 0] [y] [c]
llt_D = cholesky(D)
opD⁻¹ = LinearOperator(Float64, 5, 5, true, true, (y, v) -> ldiv!(y, llt_D, v))
opH⁻¹ = BlockDiagonalOperator(opD⁻¹, eye(n))
@@ -34,7 +34,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
c = -b
# [I A] [x] = [b]
-# [Aᵀ -I] [y] [c]
+# [Aᴴ -I] [y] [c]
(x, y, stats) = trimr(A, b, c)
K = [eye(m) A; A' -eye(n)]
B = [b; c]
@@ -43,7 +43,7 @@ resid = norm(r)
@printf("TriMR: Relative residual: %8.1e\n", resid)
# [M A] [x] = [b]
-# [Aᵀ -N] [y] [c]
+# [Aᴴ -N] [y] [c]
ldlt_M = ldl(M)
ldlt_N = ldl(N)
opM⁻¹ = LinearOperator(Float64, size(M,1), size(M,2), true, true, (y, v) -> ldiv!(y, ldlt_M, v))
diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md
index aa0f51f07..0bff49d4c 100644
--- a/docs/src/factorization-free.md
+++ b/docs/src/factorization-free.md
@@ -1,3 +1,32 @@
+```@raw html
+
+```
+
## [Factorization-free operators](@id factorization-free)
All methods are factorization-free, which means that you only need to provide operator-vector products.
@@ -10,8 +39,11 @@ Some methods only require `A * v` products, whereas other ones also require `A'
|:--------------------------------------:|:----------------------------------------:|
| CG, CR | CGLS, CRLS, CGNE, CRMR |
| SYMMLQ, CG-LANCZOS, MINRES, MINRES-QLP | LSLQ, LSQR, LSMR, LNLQ, CRAIG, CRAIGMR |
-| DIOM, FOM, DQGMRES, GMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
-| CGS, BICGSTAB | TriCG, TriMR, USYMLQR |
+| DIOM, FOM, DQGMRES, GMRES, FGMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
+| CGS, BICGSTAB | TriCG, TriMR |
+
+!!! info
+ GPMR is the only method that requires `A * v` and `B * w` products.
Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`.
@@ -27,9 +59,9 @@ where
* `type` is the operator element type;
* `nrow` and `ncol` are its dimensions;
* `symmetric` and `hermitian` should be set to `true` or `false`;
-* `prod(y, v)`, `tprod(y, w)` and `ctprod(u, w)` are called when writing `mul!(y, A, v)`, `mul!(y, tranpose(A), w)`, and `mul!(y, A', u)`, respectively.
+* `prod(y, v)`, `tprod(y, w)` and `ctprod(u, w)` are called when writing `mul!(y, A, v)`, `mul!(y, transpose(A), w)`, and `mul!(y, A', u)`, respectively.
-See the [tutorial](https://juliasmoothoptimizers.github.io/tutorials/introduction-to-linear-operators/) and the detailed [documentation](https://juliasmoothoptimizers.github.io/LinearOperators.jl/dev/) for more informations on `LinearOperators.jl`.
+See the [tutorial](https://juliasmoothoptimizers.github.io/tutorials/introduction-to-linear-operators/) and the detailed [documentation](https://juliasmoothoptimizers.github.io/LinearOperators.jl/dev/) for more information on `LinearOperators.jl`.
## Examples
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 4c9887f24..33b76b421 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -1,38 +1,51 @@
-## GPU support
+# [GPU support](@id gpu)
-All solvers in Krylov.jl can be used with `CuArrays` and allow computations with Nvidia GPU. Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to GPU format (`CuMatrix` and `CuVector`).
+Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are highly parallelizable.
+
+The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia.
+Those allow the implementations to be specialized automatically by the compiler for both CPU and GPU.
+Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+
+## Nvidia GPUs
+
+All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations on Nvidia GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`CuMatrix` and `CuVector`).
```julia
using CUDA, Krylov
-# CPU Arrays
-A_cpu = rand(20, 20)
-b_cpu = rand(20)
+if CUDA.functional()
+ # CPU Arrays
+ A_cpu = rand(20, 20)
+ b_cpu = rand(20)
-# GPU Arrays
-A_gpu = CuMatrix(A_cpu)
-b_gpu = CuVector(b_cpu)
+ # GPU Arrays
+ A_gpu = CuMatrix(A_cpu)
+ b_gpu = CuVector(b_cpu)
-# Solve a square and dense system on GPU
-x, stats = bilq(A_gpu, b_gpu)
+ # Solve a square and dense system on an Nivida GPU
+ x, stats = bilq(A_gpu, b_gpu)
+end
```
-Sparse matrices have a specific storage on GPU (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`):
+Sparse matrices have a specific storage on Nvidia GPUs (`CuSparseMatrixCSC`, `CuSparseMatrixCSR` or `CuSparseMatrixCOO`):
```julia
using CUDA, Krylov
using CUDA.CUSPARSE, SparseArrays
-# CPU Arrays
-A_cpu = sprand(200, 100, 0.3)
-b_cpu = rand(200)
+if CUDA.functional()
+ # CPU Arrays
+ A_cpu = sprand(200, 100, 0.3)
+ b_cpu = rand(200)
-# GPU Arrays
-A_gpu = CuSparseMatrixCSC(A_cpu)
-b_gpu = CuVector(b_cpu)
+ # GPU Arrays
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ b_gpu = CuVector(b_cpu)
-# Solve a rectangular and sparse system on GPU
-x, stats = lsmr(A_gpu, b_gpu)
+ # Solve a rectangular and sparse system on an Nvidia GPU
+ x, stats = lsmr(A_gpu, b_gpu)
+end
```
Optimized operator-vector products that exploit GPU features can be also used by means of linear operators.
@@ -46,64 +59,168 @@ can be applied directly on GPU thanks to efficient operators that take advantage
using SparseArrays, Krylov, LinearOperators
using CUDA, CUDA.CUSPARSE
-# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
-b_gpu = CuVector(b_cpu)
+if CUDA.functional()
+ # Transfer the linear system from the CPU to the GPU
+ A_gpu = CuSparseMatrixCSR(A_cpu) # A_gpu = CuSparseMatrixCSC(A_cpu)
+ b_gpu = CuVector(b_cpu)
+
+ # IC(0) decomposition LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
+ P = ic02(A_gpu)
+
+ # Additional vector required for solving triangular systems
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ z = CUDA.zeros(T, n)
+
+ # Solve Py = x
+ function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z)
+ ldiv!(z, LowerTriangular(P), x) # Forward substitution with L
+ ldiv!(y, LowerTriangular(P)', z) # Backward substitution with Lᴴ
+ return y
+ end
+
+ function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z)
+ ldiv!(z, UpperTriangular(P)', x) # Forward substitution with L
+ ldiv!(y, UpperTriangular(P), z) # Backward substitution with Lᴴ
+ return y
+ end
+
+ # Operator that model P⁻¹
+ symmetric = hermitian = true
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
+
+ # Solve an Hermitian positive definite system with an IC(0) preconditioner on GPU
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+end
+```
-# LLᵀ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
-P = ic02(A_gpu, 'O')
+### Example with a general square system
+
+```julia
+using SparseArrays, Krylov, LinearOperators
+using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
-# Solve Py = x
-function ldiv!(y, P, x)
- copyto!(y, x) # Variant for CuSparseMatrixCSR
- sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O')
- sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O')
- return y
+if CUDA.functional()
+ # Optional -- Compute a permutation vector p such that A[:,p] has no zero diagonal
+ p = zfd(A_cpu)
+ p .+= 1
+ A_cpu = A_cpu[:,p]
+
+ # Transfer the linear system from the CPU to the GPU
+ A_gpu = CuSparseMatrixCSR(A_cpu) # A_gpu = CuSparseMatrixCSC(A_cpu)
+ b_gpu = CuVector(b_cpu)
+
+ # ILU(0) decomposition LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
+ P = ilu02(A_gpu)
+
+ # Additional vector required for solving triangular systems
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ z = CUDA.zeros(T, n)
+
+ # Solve Py = x
+ function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z)
+ ldiv!(z, UnitLowerTriangular(P), x) # Forward substitution with L
+ ldiv!(y, UpperTriangular(P), z) # Backward substitution with U
+ return y
+ end
+
+ function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z)
+ ldiv!(z, LowerTriangular(P), x) # Forward substitution with L
+ ldiv!(y, UnitUpperTriangular(P), z) # Backward substitution with U
+ return y
+ end
+
+ # Operator that model P⁻¹
+ symmetric = hermitian = false
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
+
+ # Solve a non-Hermitian system with an ILU(0) preconditioner on GPU
+ x̄, stats = bicgstab(A_gpu, b_gpu, M=opM)
+
+ # Recover the solution of Ax = b with the solution of A[:,p]x̄ = b
+ invp = invperm(p)
+ x = x̄[invp]
end
+```
+
+## AMD GPUs
+
+All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`ROCMatrix` and `ROCVector`).
+
+```julia
+using Krylov, AMDGPU
-# Operator that model P⁻¹
-n = length(b_gpu)
-T = eltype(b_gpu)
-symmetric = hermitian = true
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+if AMDGPU.functional()
+ # CPU Arrays
+ A_cpu = rand(ComplexF64, 20, 20)
+ A_cpu = A_cpu + A_cpu'
+ b_cpu = rand(ComplexF64, 20)
-# Solve a symmetric positive definite system with an incomplete Cholesky preconditioner on GPU
-(x, stats) = cg(A_gpu, b_gpu, M=opM)
+ A_gpu = ROCMatrix(A_cpu)
+ b_gpu = ROCVector(b_cpu)
+
+ # Solve a dense Hermitian system on an AMD GPU
+ x, stats = minres(A_gpu, b_gpu)
+end
```
-### Example with a general square system
+!!! info
+ The library `rocSPARSE` is not interfaced yet in AMDGPU.jl and only dense linear systems are supported.
+
+## Intel GPUs
+
+All solvers in Krylov.jl can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations on Intel GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`oneMatrix` and `oneVector`).
```julia
-using SparseArrays, Krylov, LinearOperators
-using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+using Krylov, oneAPI
+
+if oneAPI.functional()
+ T = Float32 # oneAPI.jl also works with ComplexF32
+ m = 20
+ n = 10
+
+ # CPU Arrays
+ A_cpu = rand(T, m, n)
+ b_cpu = rand(T, m)
-# Optional -- Compute a permutation vector p such that A[p,:] has no zero diagonal
-p = zfd(A_cpu, 'O')
-p .+= 1
-A_cpu = A_cpu[p,:]
-b_cpu = b_cpu[p]
-
-# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
-b_gpu = CuVector(b_cpu)
-
-# LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
-P = ilu02(A_gpu, 'O')
-
-# Solve Py = x
-function ldiv!(y, P, x)
- copyto!(y, x) # Variant for CuSparseMatrixCSR
- sv2!('N', 'L', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'U', 1.0, P, y, 'O')
- sv2!('N', 'U', 'U', 1.0, P, y, 'O') # sv2!('N', 'U', 'N', 1.0, P, y, 'O')
- return y
+ # GPU Arrays
+ A_gpu = oneMatrix(A_cpu)
+ b_gpu = oneVector(b_cpu)
+
+ # Solve a dense least-squares problem on an Intel GPU
+ x, stats = lsqr(A_gpu, b_gpu)
end
+```
-# Operator that model P⁻¹
-n = length(b_gpu)
-T = eltype(b_gpu)
-symmetric = hermitian = false
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+!!! note
+ The library `oneMKL` is interfaced in oneAPI.jl and accelerates linear algebra operations on Intel GPUs. Only dense linear systems are supported for the time being because sparse linear algebra routines are not interfaced yet.
-# Solve an unsymmetric system with an incomplete LU preconditioner on GPU
-(x, stats) = bicgstab(A_gpu, b_gpu, M=opM)
+## Apple M1 GPUs
+
+All solvers in Krylov.jl can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations on Apple M1 GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`MtlMatrix` and `MtlVector`).
+
+```julia
+using Krylov, Metal
+
+T = Float32 # Metal.jl also works with ComplexF32
+n = 10
+m = 20
+
+# CPU Arrays
+A_cpu = rand(T, n, m)
+b_cpu = rand(T, n)
+
+# GPU Arrays
+A_gpu = MtlMatrix(A_cpu)
+b_gpu = MtlVector(b_cpu)
+
+# Solve a dense least-norm problem on an Apple M1 GPU
+x, stats = craig(A_gpu, b_gpu)
```
+
+!!! warning
+ Metal.jl is under heavy development and is considered experimental for now.
diff --git a/docs/src/graphics/arnoldi.png b/docs/src/graphics/arnoldi.png
new file mode 100644
index 000000000..9ef8bd3a3
Binary files /dev/null and b/docs/src/graphics/arnoldi.png differ
diff --git a/docs/src/graphics/golub_kahan.png b/docs/src/graphics/golub_kahan.png
new file mode 100644
index 000000000..32fc3d7b8
Binary files /dev/null and b/docs/src/graphics/golub_kahan.png differ
diff --git a/docs/src/graphics/hermitian_lanczos.png b/docs/src/graphics/hermitian_lanczos.png
new file mode 100644
index 000000000..c70082e72
Binary files /dev/null and b/docs/src/graphics/hermitian_lanczos.png differ
diff --git a/docs/src/graphics/montoison_orban.png b/docs/src/graphics/montoison_orban.png
new file mode 100644
index 000000000..5a14eda04
Binary files /dev/null and b/docs/src/graphics/montoison_orban.png differ
diff --git a/docs/src/graphics/nonhermitian_lanczos.png b/docs/src/graphics/nonhermitian_lanczos.png
new file mode 100644
index 000000000..b8d83961c
Binary files /dev/null and b/docs/src/graphics/nonhermitian_lanczos.png differ
diff --git a/docs/src/graphics/saunders_simon_yip.png b/docs/src/graphics/saunders_simon_yip.png
new file mode 100644
index 000000000..c3acfd181
Binary files /dev/null and b/docs/src/graphics/saunders_simon_yip.png differ
diff --git a/docs/src/index.md b/docs/src/index.md
index ce657436d..1cc2c3302 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -22,7 +22,7 @@ should be solved when **_b_** is not in the range of **_A_** (inconsistent syste
* **_A_** is square and singular,
* **_A_** is tall and thin.
-Underdetermined sytems are less common but also occur.
+Underdetermined systems are less common but also occur.
If there are infinitely many such **_x_** (because **_A_** is column rank-deficient), one with minimum norm is identified
@@ -36,36 +36,36 @@ If there are infinitely many such **_x_** (because **_A_** is column rank-defici
\min \|x\| \quad \text{subject to} \quad Ax = b
```
-sould be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
+should be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
This situation mainly occurs when
* **_A_** is square and singular,
* **_A_** is short and wide.
-Overdetermined sytems are less common but also occur.
+Overdetermined systems are less common but also occur.
4 - Adjoint systems
```math
- Ax = b \quad \text{and} \quad A^T y = c
+ Ax = b \quad \text{and} \quad A^H y = c
```
where **_A_** can have any shape.
-5 - Saddle-point and symmetric quasi-definite (SQD) systems
+5 - Saddle-point and Hermitian quasi-definite systems
```math
- \begin{bmatrix} M & \phantom{-}A \\ A^T & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
+ \begin{bmatrix} M & \phantom{-}A \\ A^H & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
```
where **_A_** can have any shape.
-6 - Generalized saddle-point and unsymmetric partitioned systems
+6 - Generalized saddle-point and non-Hermitian partitioned systems
```math
\begin{bmatrix} M & A \\ B & N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}
```
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
**_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
@@ -92,3 +92,10 @@ julia> ]
pkg> add Krylov
pkg> test Krylov
```
+
+# Bug reports and discussions
+
+If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues).
+Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.
+
+If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome.
diff --git a/docs/src/inplace.md b/docs/src/inplace.md
index 71a4e25de..9950575fe 100644
--- a/docs/src/inplace.md
+++ b/docs/src/inplace.md
@@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver`
For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU.
!!! note
- `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
+ `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
The workspace is always the first argument of the in-place methods:
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
new file mode 100644
index 000000000..fd203dddb
--- /dev/null
+++ b/docs/src/preconditioners.md
@@ -0,0 +1,237 @@
+# [Preconditioners](@id preconditioners)
+
+The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear system $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
+Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance.
+
+The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account.
+Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations.
+
+The construction of a preconditioner necessitates trade-offs because we need to apply it at least once per iteration within a Krylov method.
+Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense.
+
+There exist three variants of preconditioning:
+
+| Left preconditioning | Two-sided preconditioning | Right preconditioning |
+|:----------------------------------:|:----------------------------------------------------------------------:|:--------------------------------------------:|
+| $P_{\ell}^{-1}Ax = P_{\ell}^{-1}b$ | $P_{\ell}^{-1}AP_r^{-1}y = P_{\ell}^{-1}b~~\text{with}~~x = P_r^{-1}y$ | $AP_r^{-1}y = b~~\text{with}~~x = P_r^{-1}y$ |
+
+where $P_{\ell}$ and $P_r$ are square and nonsingular.
+
+In Krylov.jl , we call $P_{\ell}^{-1}$ and $P_r^{-1}$ the preconditioners and we assume that we can apply them with the operation $y \leftarrow P^{-1} * x$.
+It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available.
+Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov solvers.
+
+## How to use preconditioners in Krylov.jl?
+
+!!! info
+ - A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+ - The default value of a preconditioner in Krylov.jl is the identity operator `I`.
+
+### Square non-Hermitian linear systems
+
+Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`FGMRES`](@ref fgmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+
+A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning.
+
+| Preconditioners | $P_{\ell}^{-1}$ | $P_{\ell}$ | $P_r^{-1}$ | $P_r$ |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+### Hermitian linear systems
+
+Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
+
+When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-H}y = L^{-1}b$ with $x = L^{-H}y$.
+Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^H$ that maintains hermicity.
+However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its inverse directly.
+
+| Preconditioners | $P_c^{-1}$ | $P_c$ |
+|:---------------:|:-------------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` |
+
+!!! warning
+ The preconditioner `M` must be hermitian and positive definite.
+
+### Linear least-squares problems
+
+Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:------------------------------------:|:-------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
+| Normal equation | $A^HAx = A^Hb$ | $A^HE^{-1}Ax = A^HE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
+
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
+| Normal equation | $(A^HA + \lambda^2 I)x = A^Hb$ | $(A^HE^{-1}A + \lambda^2 F)x = A^HE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^H & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Linear least-norm problems
+
+Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
+
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
+| Normal equation | $AA^Hy = b~~\text{with}~~x = A^Hy$ | $AF^{-1}A^Hy = b~~\text{with}~~x = F^{-1}A^Hy$ |
+| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
+[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems.
+
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
+| Normal equation | $(AA^H + \lambda^2 I)y = b~~\text{with}~~x = A^Hy$ | $(AF^{-1}A^H + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Hy$ |
+| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Saddle-point and symmetric quasi-definite systems
+
+[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure
+```math
+ \begin{bmatrix} \tau E & \phantom{-}A \\ A^H & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Generalized saddle-point and unsymmetric partitioned systems
+
+[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure
+```math
+ \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
+|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:|
+| Arguments | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` |
+
+!!! note
+ Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
+
+## Packages that provide preconditioners
+
+- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions.
+- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in.
+- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
+- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
+- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices.
+- [BasicLU.jl](https://github.com/JuliaSmoothOptimizers/BasicLU.jl) uses a sparse LU factorization to compute a maximum volume basis that can be used as a preconditioner for least-norm and least-squares problems.
+
+## Examples
+
+```julia
+using Krylov
+n, m = size(A)
+d = [A[i,i] ≠ 0 ? 1 / abs(A[i,i]) : 1 for i=1:n] # Jacobi preconditioner
+P⁻¹ = diagm(d)
+x, stats = symmlq(A, b, M=P⁻¹)
+```
+
+```julia
+using Krylov
+n, m = size(A)
+d = [1 / norm(A[:,i]) for i=1:m] # diagonal preconditioner
+P⁻¹ = diagm(d)
+x, stats = minres(A, b, M=P⁻¹)
+```
+
+```julia
+using IncompleteLU, Krylov
+Pℓ = ilu(A)
+x, stats = gmres(A, b, M=Pℓ, ldiv=true) # left preconditioning
+```
+
+```julia
+using LimitedLDLFactorizations, Krylov
+P = lldl(A)
+P.D .= abs.(P.D)
+x, stats = cg(A, b, M=P, ldiv=true) # centered preconditioning
+```
+
+```julia
+using ILUZero, Krylov
+Pᵣ = ilu0(A)
+x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true) # right preconditioning
+```
+
+```julia
+using LDLFactorizations, Krylov
+
+M = ldl(E)
+N = ldl(F)
+
+# [E A] [x] = [b]
+# [Aᴴ -F] [y] [c]
+x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true)
+```
+
+```julia
+using SuiteSparse, Krylov
+import LinearAlgebra.ldiv!
+
+M = cholesky(E)
+
+# ldiv! is not implemented for the sparse Cholesky factorization (SuiteSparse.CHOLMOD)
+ldiv!(y::Vector{T}, F::SuiteSparse.CHOLMOD.Factor{T}, x::Vector{T}) where T = (y .= F \ x)
+
+# [E A] [x] = [b]
+# [Aᴴ 0] [y] [c]
+x, y, stats = trimr(A, b, c, M=M, sp=true, ldiv=true)
+```
+
+```julia
+using Krylov
+
+C = lu(M)
+
+# [M A] [x] = [b]
+# [B 0] [y] [c]
+x, y, stats = gpmr(A, B, b, c, C=C, gsp=true, ldiv=true)
+```
+
+```julia
+import BasicLU
+using LinearOperators, Krylov
+
+# Least-squares problem
+m, n = size(A)
+Aᴴ = sparse(A')
+basis, B = BasicLU.maxvolbasis(Aᴴ)
+opA = LinearOperator(A)
+B⁻ᴴ = LinearOperator(Float64, n, n, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')))
+
+d, stats = lsmr(opA * B⁻ᴴ, b) # min ‖AB⁻ᴴd - b‖₂
+x = B⁻ᴴ * d # recover the solution of min ‖Ax - b‖₂
+
+# Least-norm problem
+m, n = size(A)
+basis, B = maxvolbasis(A)
+opA = LinearOperator(A)
+B⁻¹ = LinearOperator(Float64, m, m, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')))
+
+x, y, stats = craigmr(B⁻¹ * opA, B⁻¹ * b) # min ‖x‖₂ s.t. B⁻¹Ax = B⁻¹b
+```
diff --git a/docs/src/processes.md b/docs/src/processes.md
new file mode 100644
index 000000000..e9d4066d2
--- /dev/null
+++ b/docs/src/processes.md
@@ -0,0 +1,334 @@
+```@raw html
+
+```
+
+# [Krylov processes](@id krylov-processes)
+
+Krylov processes are the foundation of Krylov methods, they generate bases of Krylov subspaces.
+Depending on the Krylov subspaces generated, Krylov processes are more or less specialized for a subset of linear problems.
+The following table summarizes the most relevant processes for each linear problem.
+
+| Linear problems | Processes |
+|:--------------------------------------------------------------:|:---------------------------------:|
+| Hermitian linear systems | Hermitian Lanczos |
+| Square Non-Hermitian linear systems | Non-Hermitian Lanczos -- Arnoldi |
+| Least-squares problems | Golub-Kahan -- Saunders-Simon-Yip |
+| Least-norm problems | Golub-Kahan -- Saunders-Simon-Yip |
+| Saddle-point and Hermitian quasi-definite systems | Golub-Kahan -- Saunders-Simon-Yip |
+| Generalized saddle-point and non-Hermitian partitioned systems | Montoison-Orban |
+
+### Notation
+
+For a matrix $A$, $A^H$ denotes the conjugate transpose of $A$.
+It coincides with $A^T$, the transpose of $A$, for real matrices.
+Define $V_k := \begin{bmatrix} v_1 & \ldots & v_k \end{bmatrix} \enspace$ and $\enspace U_k := \begin{bmatrix} u_1 & \ldots & u_k \end{bmatrix}$.
+
+For a matrix $C \in \mathbb{C}^{n \times n}$ and a vector $t \in \mathbb{C}^{n}$, the $k$-th Krylov subspace generated by $C$ and $t$ is
+```math
+\mathcal{K}_k(C, t) :=
+\left\{\sum_{i=0}^{k-1} \omega_i C^i t \, \middle \vert \, \omega_i \in \mathbb{C},~0 \le i \le k-1 \right\}.
+```
+
+For matrices $C \in \mathbb{C}^{n \times n} \enspace$ and $\enspace T \in \mathbb{C}^{n \times p}$, the $k$-th block Krylov subspace generated by $C$ and $T$ is
+```math
+\mathcal{K}_k^{\square}(C, T) :=
+\left\{\sum_{i=0}^{k-1} C^i T \, \Omega_i \, \middle \vert \, \Omega_i \in \mathbb{C}^{p \times p},~0 \le i \le k-1 \right\}.
+```
+
+## Hermitian Lanczos
+
+![hermitian_lanczos](./graphics/hermitian_lanczos.png)
+
+After $k$ iterations of the Hermitian Lanczos process, the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= V_k T_k + \beta_{k+1,k} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\
+ V_k^H V_k &= I_k,
+\end{align*}
+```
+where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$,
+```math
+T_k =
+\begin{bmatrix}
+ \alpha_1 & \beta_2 & & \\
+ \beta_2 & \alpha_2 & \ddots & \\
+ & \ddots & \ddots & \beta_k \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+ T_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}.
+```
+Note that $T_{k+1,k}$ is a real tridiagonal matrix even if $A$ is a complex matrix.
+
+The function [`hermitian_lanczos`](@ref hermitian_lanczos) returns $V_{k+1}$ and $T_{k+1,k}$.
+
+Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift).
+
+```@docs
+hermitian_lanczos
+```
+
+## Non-Hermitian Lanczos
+
+![nonhermitian_lanczos](./graphics/nonhermitian_lanczos.png)
+
+After $k$ iterations of the non-Hermitian Lanczos process (also named the Lanczos biorthogonalization process), the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\
+ A^H U_k &= U_k T_k^H + \bar{\gamma}_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\
+ V_k^H U_k &= U_k^H V_k = I_k,
+\end{align*}
+```
+where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A,b)$ and $\mathcal{K}_k (A^H,c)$, respectively,
+```math
+T_k =
+\begin{bmatrix}
+ \alpha_1 & \gamma_2 & & \\
+ \beta_2 & \alpha_2 & \ddots & \\
+ & \ddots & \ddots & \gamma_k \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+ T_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}
+, \qquad
+T_{k,k+1} =
+\begin{bmatrix}
+ T_{k} & \gamma_{k+1} e_k
+\end{bmatrix}.
+```
+
+The function [`nonhermitian_lanczos`](@ref nonhermitian_lanczos) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$.
+
+Related methods: [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`CGS`](@ref cgs) and [`BICGSTAB`](@ref bicgstab).
+
+!!! note
+ The scaling factors used in our implementation are $\beta_k = |u_k^H v_k|^{\tfrac{1}{2}}$ and $\gamma_k = (u_k^H v_k) / \beta_k$.
+ With these scaling factors, the non-Hermitian Lanczos process coincides with the Hermitian Lanczos process when $A = A^H$ and $b = c$.
+
+```@docs
+nonhermitian_lanczos
+```
+
+## Arnoldi
+
+![arnoldi](./graphics/arnoldi.png)
+
+After $k$ iterations of the Arnoldi process, the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\
+ V_k^H V_k &= I_k,
+\end{align*}
+```
+where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$,
+```math
+H_k =
+\begin{bmatrix}
+ h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\
+ h_{2,1}~ & \ddots~ & \ddots & \vdots \\
+ & \ddots~ & \ddots & h_{k-1,k} \\
+ & & h_{k,k-1} & h_{k,k}
+\end{bmatrix}
+, \qquad
+H_{k+1,k} =
+\begin{bmatrix}
+ H_{k} \\
+ h_{k+1,k} e_{k}^T
+\end{bmatrix}.
+```
+
+The function [`arnoldi`](@ref arnoldi) returns $V_{k+1}$ and $H_{k+1,k}$.
+
+Related methods: [`DIOM`](@ref diom), [`FOM`](@ref fom), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres) and [`FGMRES`](@ref fgmres).
+
+!!! note
+ The Arnoldi process coincides with the Hermitian Lanczos process when $A$ is Hermitian.
+
+```@docs
+arnoldi
+```
+
+## Golub-Kahan
+
+![golub_kahan](./graphics/golub_kahan.png)
+
+After $k$ iterations of the Golub-Kahan bidiagonalization process, the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= U_{k+1} B_k, \\
+ A^H U_{k+1} &= V_k B_k^H + \alpha_{k+1} v_{k+1} e_{k+1}^T = V_{k+1} L_{k+1}^H, \\
+ V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A^HA,A^Hb)$ and $\mathcal{K}_k (AA^H,b)$, respectively,
+```math
+L_k =
+\begin{bmatrix}
+ \alpha_1 & & & \\
+ \beta_2 & \alpha_2 & & \\
+ & \ddots & \ddots & \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+B_k =
+\begin{bmatrix}
+ \alpha_1 & & & \\
+ \beta_2 & \alpha_2 & & \\
+ & \ddots & \ddots & \\
+ & & \beta_k & \alpha_k \\
+ & & & \beta_{k+1} \\
+\end{bmatrix}
+=
+\begin{bmatrix}
+ L_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}.
+```
+Note that $L_k$ is a real bidiagonal matrix even if $A$ is a complex matrix.
+
+The function [`golub_kahan`](@ref golub_kahan) returns $V_{k+1}$, $U_{k+1}$ and $L_{k+1}$.
+
+Related methods: [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig), [`CRAIGMR`](@ref craigmr), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+!!! note
+ The Golub-Kahan process coincides with the Hermitian Lanczos process applied to the normal equations $A^HA x = A^Hb$ and $AA^H x = b$.
+ It is also related to the Hermitian Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial vector $\begin{bmatrix} b \\ 0 \end{bmatrix}$.
+
+```@docs
+golub_kahan
+```
+
+## Saunders-Simon-Yip
+
+![saunders_simon_yip](./graphics/saunders_simon_yip.png)
+
+After $k$ iterations of the Saunders-Simon-Yip process (also named the orthogonal tridiagonalization process), the situation may be summarized as
+```math
+\begin{align*}
+ A U_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\
+ A^H V_k &= U_k T_k^H + \gamma_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\
+ V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$,
+```math
+T_k =
+\begin{bmatrix}
+ \alpha_1 & \gamma_2 & & \\
+ \beta_2 & \alpha_2 & \ddots & \\
+ & \ddots & \ddots & \gamma_k \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+ T_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}
+, \qquad
+T_{k,k+1} =
+\begin{bmatrix}
+ T_{k} & \gamma_{k+1} e_{k}
+\end{bmatrix}.
+```
+
+The function [`saunders_simon_yip`](@ref saunders_simon_yip) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$.
+
+Related methods: [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr), [`TriLQR`](@ref trilqr), [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr).
+
+```@docs
+saunders_simon_yip
+```
+
+!!! note
+ The Saunders-Simon-Yip is equivalent to the block-Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$.
+
+## Montoison-Orban
+
+![montoison_orban](./graphics/montoison_orban.png)
+
+After $k$ iterations of the Montoison-Orban process (also named the orthogonal Hessenberg reduction process), the situation may be summarized as
+```math
+\begin{align*}
+ A U_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\
+ B V_k &= U_k F_k + f_{k+1,k} u_{k+1} e_k^T = U_{k+1} F_{k+1,k}, \\
+ V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$,
+```math
+H_k =
+\begin{bmatrix}
+ h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\
+ h_{2,1}~ & \ddots~ & \ddots & \vdots \\
+ & \ddots~ & \ddots & h_{k-1,k} \\
+ & & h_{k,k-1} & h_{k,k}
+\end{bmatrix}
+, \qquad
+F_k =
+\begin{bmatrix}
+ f_{1,1}~ & f_{1,2}~ & \ldots & f_{1,k} \\
+ f_{2,1}~ & \ddots~ & \ddots & \vdots \\
+ & \ddots~ & \ddots & f_{k-1,k} \\
+ & & f_{k,k-1} & f_{k,k}
+\end{bmatrix},
+```
+```math
+H_{k+1,k} =
+\begin{bmatrix}
+ H_{k} \\
+ h_{k+1,k} e_{k}^T
+\end{bmatrix}
+, \qquad
+F_{k+1,k} =
+\begin{bmatrix}
+ F_{k} \\
+ f_{k+1,k} e_{k}^T
+\end{bmatrix}.
+```
+
+The function [`montoison_orban`](@ref montoison_orban) returns $V_{k+1}$, $H_{k+1,k}$, $U_{k+1}$ and $F_{k+1,k}$.
+
+Related methods: [`GPMR`](@ref gpmr).
+
+!!! note
+ The Montoison-Orban is equivalent to the block-Arnoldi process applied to $\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$.
+ It also coincides with the Saunders-Simon-Yip process when $B = A^H$.
+
+```@docs
+montoison_orban
+```
diff --git a/docs/src/reference.md b/docs/src/reference.md
index 0896e1639..be0ac5288 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -10,5 +10,7 @@ Krylov.FloatOrComplex
Krylov.niterations
Krylov.Aprod
Krylov.Atprod
+Krylov.kstdout
+Krylov.extract_parameters
Base.show
```
diff --git a/docs/src/solvers/gsp.md b/docs/src/solvers/gsp.md
index 10aaccbe0..33c580b8a 100644
--- a/docs/src/solvers/gsp.md
+++ b/docs/src/solvers/gsp.md
@@ -1,5 +1,5 @@
```@meta
-# Generalized saddle-point and unsymmetric partitioned systems
+# Generalized saddle-point and non-Hermitian partitioned systems
```
## GPMR
diff --git a/docs/src/solvers/ln.md b/docs/src/solvers/ln.md
index c5396ffdd..b638b8247 100644
--- a/docs/src/solvers/ln.md
+++ b/docs/src/solvers/ln.md
@@ -36,3 +36,10 @@ craig!
craigmr
craigmr!
```
+
+## USYMLQ
+
+```@docs
+usymlq
+usymlq!
+```
diff --git a/docs/src/solvers/ls.md b/docs/src/solvers/ls.md
index f77057d94..fecfbc417 100644
--- a/docs/src/solvers/ls.md
+++ b/docs/src/solvers/ls.md
@@ -36,3 +36,10 @@ lsqr!
lsmr
lsmr!
```
+
+## USYMQR
+
+```@docs
+usymqr
+usymqr!
+```
diff --git a/docs/src/solvers/sid.md b/docs/src/solvers/sid.md
index 1bd459cd2..e911681be 100644
--- a/docs/src/solvers/sid.md
+++ b/docs/src/solvers/sid.md
@@ -1,5 +1,5 @@
```@meta
-# Symmetric indefinite linear systems
+# Hermitian indefinite linear systems
```
## SYMMLQ
diff --git a/docs/src/solvers/sp_sqd.md b/docs/src/solvers/sp_sqd.md
index 518684b5b..4ee4ab09b 100644
--- a/docs/src/solvers/sp_sqd.md
+++ b/docs/src/solvers/sp_sqd.md
@@ -1,5 +1,5 @@
```@meta
-# Saddle-point and symmetric quasi-definite systems
+# Saddle-point and Hermitian quasi-definite systems
```
## TriCG
diff --git a/docs/src/solvers/spd.md b/docs/src/solvers/spd.md
index 79bb6e9e8..aebda285b 100644
--- a/docs/src/solvers/spd.md
+++ b/docs/src/solvers/spd.md
@@ -1,5 +1,5 @@
```@meta
-# Symmetric positive definite linear systems
+# Hermitian positive definite linear systems
```
## CG
diff --git a/docs/src/solvers/unsymmetric.md b/docs/src/solvers/unsymmetric.md
index 280908ea5..c9e77f787 100644
--- a/docs/src/solvers/unsymmetric.md
+++ b/docs/src/solvers/unsymmetric.md
@@ -1,5 +1,5 @@
```@meta
-# Unsymmetric linear systems
+# Non-Hermitian square linear systems
```
## BiLQ
@@ -16,20 +16,6 @@ qmr
qmr!
```
-## USYMLQ
-
-```@docs
-usymlq
-usymlq!
-```
-
-## USYMQR
-
-```@docs
-usymqr
-usymqr!
-```
-
## CGS
```@docs
@@ -71,3 +57,10 @@ dqgmres!
gmres
gmres!
```
+
+## FGMRES
+
+```@docs
+fgmres
+fgmres!
+```
diff --git a/docs/src/storage.md b/docs/src/storage.md
new file mode 100644
index 000000000..903cc0558
--- /dev/null
+++ b/docs/src/storage.md
@@ -0,0 +1,152 @@
+```@meta
+# Thanks Morten Piibeleht for the hack with the tables!
+```
+
+```@raw html
+
+```
+
+# [Storage requirements](@id storage-requirements)
+
+This section provides the storage requirements of all Krylov methods available in Krylov.jl.
+
+### Notation
+
+We denote by $m$ and $n$ the number of rows and columns of the linear problem.
+The memory parameter of DIOM, FOM, DQGMRES, GMRES, FGMRES and GPMR is $k$.
+The numbers of shifts of CG-LANCZOS-SHIFT is $p$.
+
+## Theoretical storage requirements
+
+The following tables provide the number of coefficients that must be allocated for each Krylov method.
+The coefficients have the same type as those that compose the linear problem we seek to solve.
+Each table summarizes the storage requirements of Krylov methods recommended to a specific linear problem.
+
+#### Hermitian positive definite linear systems
+
+| Methods | [`CG`](@ref cg) | [`CR`](@ref cr) | [`CG-LANCZOS`](@ref cg_lanczos) | [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift) |
+|:-------:|:---------------:|:---------------:|:-------------------------------:|:-------------------------------------------:|
+ Storage | $4n$ | $5n$ | $5n$ | $3n + 2np + 5p$ |
+
+#### Hermitian indefinite linear systems
+
+| Methods | [`SYMMLQ`](@ref symmlq) | [`MINRES`](@ref minres) | [`MINRES-QLP`](@ref minres_qlp) |
+|:-------:|:-----------------------:|:-----------------------:|:-------------------------------:|
+| Storage | $5n$ | $6n$ | $6n$ |
+
+#### Non-Hermitian square linear systems
+
+| Methods | [`CGS`](@ref cgs) | [`BICGSTAB`](@ref bicgstab) | [`BiLQ`](@ref bilq) | [`QMR`](@ref qmr) |
+|:-------:|:-----------------:|:---------------------------:|:-------------------:|:-----------------:|
+| Storage | $6n$ | $6n$ | $8n$ | $9n$ |
+
+| Methods | [`DIOM`](@ref diom) | [`DQGMRES`](@ref dqgmres) |
+|:-------:|:-------------------:|:-------------------------:|
+| Storage | $n(2k+1) + 2k - 1$ | $n(2k+2) + 3k + 1$ |
+
+| Methods | [`FOM`](@ref fom) | [`GMRES`](@ref gmres) | [`FGMRES`](@ref fgmres) |
+|:-------:|:--------------------------------------------------:|:---------------------------------------:|:----------------------------------------:|
+| Storage$\dfrac{}{}$ | $\!n(2+k) +2k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+k) + 3k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+2k) + 3k + \dfrac{k(k + 1)}{2}\!$ |
+
+#### Least-norm problems
+
+| Methods | [`USYMLQ`](@ref usymlq) | [`CGNE`](@ref cgne) | [`CRMR`](@ref crmr) | [`LNLQ`](@ref lnlq) | [`CRAIG`](@ref craig) | [`CRAIGMR`](@ref craigmr) |
+|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:---------------------:|:-------------------------:|
+| Storage | $5n + 3m$ | $3n + 2m$ | $3n + 2m$ | $3n + 4m$ | $3n + 4m$ | $4n + 5m$ |
+
+#### Least-squares problems
+
+| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) |
+|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|
+| Storage | $6n + 3m$ | $3n + 2m$ | $4n + 3m$ | $4n + 2m$ | $4n + 2m$ | $5n + 2m$ |
+
+#### Adjoint systems
+
+| Methods | [`BiLQR`](@ref bilqr) | [`TriLQR`](@ref trilqr) |
+|:-------:|:---------------------:|:-----------------------:|
+| Storage | $11n$ | $6m + 5n$ |
+
+#### Saddle-point and Hermitian quasi-definite systems
+
+| Methods | [`TriCG`](@ref tricg) | [`TriMR`](@ref trimr) |
+|:--------:|:---------------------:|:---------------------:|
+| Storage | $6n + 6m$ | $8n + 8m$ |
+
+#### Generalized saddle-point and non-Hermitian partitioned systems
+
+| Method | [`GPMR`](@ref gpmr) |
+|:-------:|:-------------------------:|
+| Storage | $(2+k)(n+m) + 2k^2 + 11k$ |
+
+## Practical storage requirements
+
+Each method has its own `KrylovSolver` that contains all the storage needed by the method.
+In the REPL, the size in bytes of each attribute and the total amount of memory allocated by the solver are displayed when we show a `KrylovSolver`.
+
+```@example storage
+using Krylov
+
+m = 5000
+n = 12000
+A = rand(Float64, m, n)
+b = rand(Float64, m)
+solver = LsmrSolver(A, b)
+show(stdout, solver, show_stats=false)
+```
+
+If we want the total number of bytes used by the solver, we can call `nbytes = sizeof(solver)`.
+
+```@example storage
+nbytes = sizeof(solver)
+```
+
+Thereafter, we can use `Base.format_bytes(nbytes)` to recover what is displayed in the REPL.
+
+```@example storage
+Base.format_bytes(nbytes)
+```
+
+To verify that we match the theoretical results, we just need to multiply the storage requirement of a method by the number of bytes associated to the precision of the linear problem.
+For instance, we need 4 bytes for the precision `Float32`, 8 bytes for precisions `Float64` and `ComplexF32`, and 16 bytes for the precision `ComplexF64`.
+
+```@example storage
+FC = Float64 # precision of the least-squares problem
+ncoefs_lsmr = 5*n + 2*m # number of coefficients
+nbytes_lsmr = sizeof(FC) * ncoefs_lsmr # number of bytes
+```
+
+Therefore, you can check that you have enough memory in RAM to allocate a `KrylovSolver`.
+
+```@example storage
+free_nbytes = Sys.free_memory()
+Base.format_bytes(free_nbytes) # Total free memory in RAM in bytes.
+```
+
+!!! note
+ - Beyond having faster operations, using low precisions, such as simple precision, allows to store more coefficients in RAM and solve larger linear problems.
+ - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to test that we match the expected storage requirement of each method with a tolerance of 2%.
diff --git a/docs/src/tips.md b/docs/src/tips.md
index 604c0633d..e08567ae1 100644
--- a/docs/src/tips.md
+++ b/docs/src/tips.md
@@ -16,14 +16,14 @@ If you don't know the maximum number of threads available on your computer, you
NMAX = Sys.CPU_THREADS
```
-and define the number of OpenBLAS/MKL threads at runtine with
+and define the number of OpenBLAS/MKL threads at runtime with
```julia
BLAS.set_num_threads(N) # 1 ≤ N ≤ NMAX
BLAS.get_num_threads()
```
-The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2`.
+The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2` if your CPU supports simultaneous multithreading (SMT).
By default Julia ships with OpenBLAS but it's also possible to use Intel MKL BLAS and LAPACK with [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl).
diff --git a/docs/src/warm_start.md b/docs/src/warm-start.md
similarity index 59%
rename from docs/src/warm_start.md
rename to docs/src/warm-start.md
index 030cad6c0..6b830bff3 100644
--- a/docs/src/warm_start.md
+++ b/docs/src/warm-start.md
@@ -1,9 +1,10 @@
-## Warm Start
+# [Warm-start](@id warm-start)
-Most Krylov methods in this module accept a starting point as argument. The starting point is used as initial approximation to a solution.
+Most Krylov methods in this module accept a starting point as argument.
+The starting point is used as initial approximation to a solution.
```julia
-solver = CgSolver(n, n, S)
+solver = CgSolver(A, b)
cg!(solver, A, b, itmax=100)
if !issolved(solver)
cg!(solver, A, b, solver.x, itmax=100) # cg! uses the approximate solution `solver.x` as starting point
@@ -28,7 +29,7 @@ If a Krylov method doesn't have the option to warm start, it can still be done e
We provide an example with `cg_lanczos!`.
```julia
-solver = CgLanczosSolver(n, n, S)
+solver = CgLanczosSolver(A, b)
cg_lanczos!(solver, A, b)
x₀ = solver.x # Ax₀ ≈ b
r = b - A * x₀ # r = b - Ax₀
@@ -41,33 +42,34 @@ Explicit restarts cannot be avoided in certain block methods, such as TriMR, due
```julia
# [E A] [x] = [b]
-# [Aᵀ F] [y] [c]
+# [Aᴴ F] [y] [c]
M = inv(E)
N = inv(F)
x₀, y₀, stats = trimr(A, b, c, M=M, N=N)
# E and F are not available inside TriMR
b₀ = b - Ex₀ - Ay
-c₀ = c - Aᵀx₀ - Fy
+c₀ = c - Aᴴx₀ - Fy
Δx, Δy, stats = trimr(A, b₀, c₀, M=M, N=N)
x = x₀ + Δx
y = y₀ + Δy
```
-
-## Restarted methods
-
-The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses.
-For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered.
-In this section, we show how to use warm starts to implement GMRES(k) and FOM(k).
-
-```julia
-k = 50
-solver = GmresSolver(A, b, k) # FomSolver(A, b, k)
-solver.x .= 0 # solver.x .= x₀
-nrestart = 0
-while !issolved(solver) || nrestart ≤ 10
- solve!(solver, A, b, solver.x, itmax=k)
- nrestart += 1
-end
+```@meta
+# ## Restarted methods
+#
+# The storage requirements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses.
+# For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are preferred.
+# In this section, we show how to use warm starts to implement GMRES(k) and FOM(k).
+#
+# ```julia
+# k = 50
+# solver = GmresSolver(A, b, k) # FomSolver(A, b, k)
+# solver.x .= 0 # solver.x .= x₀
+# nrestart = 0
+# while !issolved(solver) || nrestart ≤ 10
+# solve!(solver, A, b, solver.x, itmax=k)
+# nrestart += 1
+# end
+# ```
```
diff --git a/ext/KrylovComponentArraysExt.jl b/ext/KrylovComponentArraysExt.jl
new file mode 100644
index 000000000..68cc3e7cf
--- /dev/null
+++ b/ext/KrylovComponentArraysExt.jl
@@ -0,0 +1,13 @@
+module KrylovComponentArraysExt
+
+using Krylov: Krylov
+using ComponentArrays: ComponentVector
+
+"""
+ Krylov.ktypeof(::ComponentVector{T,V}) where {T,V}
+
+Return the underlying `V` type.
+"""
+Krylov.ktypeof(::ComponentVector{T,V}) where {T,V} = V
+
+end
diff --git a/ext/KrylovFillArraysExt.jl b/ext/KrylovFillArraysExt.jl
new file mode 100644
index 000000000..636533942
--- /dev/null
+++ b/ext/KrylovFillArraysExt.jl
@@ -0,0 +1,13 @@
+module KrylovFillArraysExt
+
+using Krylov: Krylov
+using FillArrays: AbstractFill
+
+"""
+ Krylov.ktypeof(::AbstractFill{T,1}) where {T}
+
+Return the corresponding `Vector{T}` type.
+"""
+Krylov.ktypeof(::AbstractFill{T,1}) where {T} = Vector{T}
+
+end
diff --git a/ext/KrylovStaticArraysExt.jl b/ext/KrylovStaticArraysExt.jl
new file mode 100644
index 000000000..f24bd34cc
--- /dev/null
+++ b/ext/KrylovStaticArraysExt.jl
@@ -0,0 +1,13 @@
+module KrylovStaticArraysExt
+
+using Krylov: Krylov
+using StaticArrays: StaticVector
+
+"""
+ Krylov.ktypeof(::StaticVector{S,T}) where {S,T}
+
+Return the corresponding `Vector{T}` type.
+"""
+Krylov.ktypeof(::StaticVector{S,T}) where {S,T} = Vector{T}
+
+end
diff --git a/src/Krylov.jl b/src/Krylov.jl
index b714ccd79..013ea3e65 100644
--- a/src/Krylov.jl
+++ b/src/Krylov.jl
@@ -1,10 +1,16 @@
module Krylov
using LinearAlgebra, SparseArrays, Printf
+using PackageExtensionCompat
+
+function __init__()
+ @require_extensions
+end
include("krylov_utils.jl")
include("krylov_stats.jl")
include("krylov_solvers.jl")
+include("krylov_processes.jl")
include("cg.jl")
include("cr.jl")
@@ -19,6 +25,7 @@ include("diom.jl")
include("fom.jl")
include("dqgmres.jl")
include("gmres.jl")
+include("fgmres.jl")
include("gpmr.jl")
@@ -49,6 +56,6 @@ include("lnlq.jl")
include("craig.jl")
include("craigmr.jl")
-include("callback_utils.jl")
+include("krylov_solve.jl")
end
diff --git a/src/bicgstab.jl b/src/bicgstab.jl
index c3b914599..16a3ceae9 100644
--- a/src/bicgstab.jl
+++ b/src/bicgstab.jl
@@ -16,40 +16,60 @@
export bicgstab, bicgstab!
"""
- (x, stats) = bicgstab(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = bicgstab(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, M=I, N=I,
+ ldiv::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the BICGSTAB method.
+ (x, stats) = bicgstab(A, b, x0::AbstractVector; kwargs...)
+
+BICGSTAB can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using BICGSTAB.
BICGSTAB requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
The Biconjugate Gradient Stabilized method is a variant of BiCG, like CGS,
-but using different updates for the Aᵀ-sequence in order to obtain smoother
+but using different updates for the Aᴴ-sequence in order to obtain smoother
convergence than CGS.
If BICGSTAB stagnates, we recommend DQGMRES and BiLQ as alternative methods for unsymmetric square systems.
BICGSTAB stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖b‖ * rtol`.
-`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
-This implementation allows a left preconditioner `M` and a right preconditioner `N`.
+#### Optional argument
-BICGSTAB can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = bicgstab(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -58,18 +78,6 @@ and `false` otherwise.
"""
function bicgstab end
-function bicgstab(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = BicgstabSolver(A, b)
- bicgstab!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function bicgstab(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = BicgstabSolver(A, b)
- bicgstab!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = bicgstab!(solver::BicgstabSolver, A, b; kwargs...)
solver = bicgstab!(solver::BicgstabSolver, A, b, x0; kwargs...)
@@ -80,150 +88,201 @@ See [`BicgstabSolver`](@ref) for more details about the `solver`.
"""
function bicgstab! end
-function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- bicgstab!(solver, A, b; kwargs...)
- return solver
-end
-
-function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("BICGSTAB: system of size %d\n", n)
-
- # Check M = Iₙ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :t , S, n)
- allocate_if(!NisI, solver, :yz, S, n)
- Δx, x, r, p, v, s, qd, stats = solver.Δx, solver.x, solver.r, solver.p, solver.v, solver.s, solver.qd, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- q = d = solver.qd
- t = MisI ? d : solver.t
- y = NisI ? p : solver.yz
- z = NisI ? s : solver.yz
- r₀ = MisI ? r : solver.qd
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
- else
- r₀ .= b
+def_args_bicgstab = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_bicgstab = (:(x0::AbstractVector),)
+
+def_kwargs_bicgstab = (:(; c::AbstractVector{FC} = b ),
+ :(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_bicgstab = mapreduce(extract_parameters, vcat, def_kwargs_bicgstab)
+
+args_bicgstab = (:A, :b)
+optargs_bicgstab = (:x0,)
+kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function bicgstab($(def_args_bicgstab...), $(def_optargs_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = BicgstabSolver(A, b)
+ warm_start!(solver, $(optargs_bicgstab...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ bicgstab!(solver, $(args_bicgstab...); $(kwargs_bicgstab...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- x .= zero(FC) # x₀
- s .= zero(FC) # s₀
- v .= zero(FC) # v₀
- MisI || mulorldiv!(r, M, r₀, ldiv) # r₀
- p .= r # p₁
-
- α = one(FC) # α₀
- ω = one(FC) # ω₀
- ρ = one(FC) # ρ₀
-
- # Compute residual norm ‖r₀‖₂.
- rNorm = @knrm2(n, r)
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function bicgstab($(def_args_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = BicgstabSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ bicgstab!(solver, $(args_bicgstab...); $(kwargs_bicgstab...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s %8s %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω))
-
- next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩
- if next_ρ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = false, false
- stats.status = "Breakdown bᵀc = 0"
- solver.warm_start = false
- return solver
- end
-
- # Stopping criterion.
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- breakdown = false
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || breakdown || user_requested_exit)
- # Update iteration index and ρ.
- iter = iter + 1
- ρ = next_ρ
-
- NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ
- mul!(q, A, y) # qₖ = Ayₖ
- mulorldiv!(v, M, q, ldiv) # vₖ = M⁻¹qₖ
- α = ρ / @kdot(n, c, v) # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩
- @kcopy!(n, r, s) # sₖ = rₖ₋₁
- @kaxpy!(n, -α, v, s) # sₖ = sₖ - αₖvₖ
- @kaxpy!(n, α, y, x) # xₐᵤₓ = xₖ₋₁ + αₖyₖ
- NisI || mulorldiv!(z, N, s, ldiv) # zₖ = N⁻¹sₖ
- mul!(d, A, z) # dₖ = Azₖ
- MisI || mulorldiv!(t, M, d, ldiv) # tₖ = M⁻¹dₖ
- ω = @kdot(n, t, s) / @kdot(n, t, t) # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩
- @kaxpy!(n, ω, z, x) # xₖ = xₐᵤₓ + ωₖzₖ
- @kcopy!(n, s, r) # rₖ = sₖ
- @kaxpy!(n, -ω, t, r) # rₖ = rₖ - ωₖtₖ
- next_ρ = @kdot(n, c, r) # ρₖ₊₁ = ⟨r̅₀,rₖ⟩
- β = (next_ρ / ρ) * (α / ω) # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ)
- @kaxpy!(n, -ω, v, p) # pₐᵤₓ = pₖ - ωₖvₖ
- @kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ
-
- # Compute residual norm ‖rₖ‖₂.
+ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, $(def_args_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "BICGSTAB: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :t , S, n)
+ allocate_if(!NisI, solver, :yz, S, n)
+ Δx, x, r, p, v, s, qd, stats = solver.Δx, solver.x, solver.r, solver.p, solver.v, solver.s, solver.qd, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ q = d = solver.qd
+ t = MisI ? d : solver.t
+ y = NisI ? p : solver.yz
+ z = NisI ? s : solver.yz
+ r₀ = MisI ? r : solver.qd
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
+ else
+ r₀ .= b
+ end
+
+ x .= zero(FC) # x₀
+ s .= zero(FC) # s₀
+ v .= zero(FC) # v₀
+ MisI || mulorldiv!(r, M, r₀, ldiv) # r₀
+ p .= r # p₁
+
+ α = one(FC) # α₀
+ ω = one(FC) # ω₀
+ ρ = one(FC) # ρ₀
+
+ # Compute residual norm ‖r₀‖₂.
rNorm = @knrm2(n, r)
history && push!(rNorms, rNorm)
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- solved = resid_decrease_lim || resid_decrease_mach
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %5s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time))
+
+ next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩
+ if next_ρ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = false, false
+ stats.timer = ktimer(start_time)
+ stats.status = "Breakdown bᴴc = 0"
+ solver.warm_start = false
+ return solver
+ end
+
+ # Stopping criterion.
+ solved = rNorm ≤ ε
tired = iter ≥ itmax
- breakdown = (α == 0 || isnan(α))
- kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω))
+ breakdown = false
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+ # Update iteration index and ρ.
+ iter = iter + 1
+ ρ = next_ρ
+
+ NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ
+ mul!(q, A, y) # qₖ = Ayₖ
+ mulorldiv!(v, M, q, ldiv) # vₖ = M⁻¹qₖ
+ α = ρ / @kdot(n, c, v) # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩
+ @kcopy!(n, r, s) # sₖ = rₖ₋₁
+ @kaxpy!(n, -α, v, s) # sₖ = sₖ - αₖvₖ
+ @kaxpy!(n, α, y, x) # xₐᵤₓ = xₖ₋₁ + αₖyₖ
+ NisI || mulorldiv!(z, N, s, ldiv) # zₖ = N⁻¹sₖ
+ mul!(d, A, z) # dₖ = Azₖ
+ MisI || mulorldiv!(t, M, d, ldiv) # tₖ = M⁻¹dₖ
+ ω = @kdot(n, t, s) / @kdot(n, t, t) # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩
+ @kaxpy!(n, ω, z, x) # xₖ = xₐᵤₓ + ωₖzₖ
+ @kcopy!(n, s, r) # rₖ = sₖ
+ @kaxpy!(n, -ω, t, r) # rₖ = rₖ - ωₖtₖ
+ next_ρ = @kdot(n, c, r) # ρₖ₊₁ = ⟨r̅₀,rₖ⟩
+ β = (next_ρ / ρ) * (α / ω) # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ)
+ @kaxpy!(n, -ω, v, p) # pₐᵤₓ = pₖ - ωₖvₖ
+ @kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ
+
+ # Compute residual norm ‖rₖ‖₂.
+ rNorm = @knrm2(n, r)
+ history && push!(rNorms, rNorm)
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ breakdown = (α == 0 || isnan(α))
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time))
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "breakdown αₖ == 0")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "breakdown αₖ == 0")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/bilq.jl b/src/bilq.jl
index 39725fbfe..2e8823e93 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -13,50 +13,58 @@
export bilq, bilq!
"""
- (x, stats) = bilq(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ (x, stats) = bilq(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, transfer_to_bicg::Bool=true,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the BiLQ method.
+ (x, stats) = bilq(A, b, x0::AbstractVector; kwargs...)
+BiLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using BiLQ.
BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
-When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
+When `A` is Hermitian and `b = c`, BiLQ is equivalent to SYMMLQ.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-An option gives the possibility of transferring to the BiCG point,
-when it exists. The transfer is based on the residual norm.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-BiLQ can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = bilq(A, b, x0; kwargs...)
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
-#### Reference
+#### References
* A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
+* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, Springer, pp. 73--89, 1976.
"""
function bilq end
-function bilq(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = BilqSolver(A, b)
- bilq!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function bilq(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = BilqSolver(A, b)
- bilq!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = bilq!(solver::BilqSolver, A, b; kwargs...)
solver = bilq!(solver::BilqSolver, A, b, x0; kwargs...)
@@ -67,263 +75,312 @@ See [`BilqSolver`](@ref) for more details about the `solver`.
"""
function bilq! end
-function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- bilq!(solver, A, b; kwargs...)
- return solver
-end
-
-function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("BILQ: system of size %d\n", n)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
- p, Δx, x, d̅, stats = solver.p, solver.Δx, solver.x, solver.d̅, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- r₀ = warm_start ? q : b
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_bilq = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_bilq = (:(x0::AbstractVector),)
+
+def_kwargs_bilq = (:(; c::AbstractVector{FC} = b ),
+ :(; transfer_to_bicg::Bool = true),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_bilq = mapreduce(extract_parameters, vcat, def_kwargs_bilq)
+
+args_bilq = (:A, :b)
+optargs_bilq = (:x0,)
+kwargs_bilq = (:c, :transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function bilq($(def_args_bilq...), $(def_optargs_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = BilqSolver(A, b)
+ warm_start!(solver, $(optargs_bilq...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ bilq!(solver, $(args_bilq...); $(kwargs_bilq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Initial solution x₀ and residual norm ‖r₀‖.
- x .= zero(FC)
- bNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖
-
- history && push!(rNorms, bNorm)
- if bNorm == 0
- stats.niter = 0
- stats.solved = true
- stats.inconsistent = false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function bilq($(def_args_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = BilqSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ bilq!(solver, $(args_bilq...); $(kwargs_bilq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- ε = atol + rtol * bNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
-
- # Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩
- if cᵗb == 0
- stats.niter = 0
- stats.solved = false
- stats.inconsistent = false
- stats.status = "Breakdown bᵀc = 0"
- solver.warm_start = false
- return solver
- end
-
- βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀)
- vₖ₋₁ .= zero(FC) # v₀ = 0
- uₖ₋₁ .= zero(FC) # u₀ = 0
- vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
- cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
- sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
- ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
- ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
- δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
- norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
-
- # Stopping criterion.
- solved_lq = bNorm ≤ ε
- solved_cg = false
- breakdown = false
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved_lq || solved_cg || tired || breakdown || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the Lanczos biorthogonalization process.
- # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
-
- @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
- @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
-
- αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩
-
- @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
- @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
-
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
-
- # Update the LQ factorization of Tₖ = L̅ₖQₖ.
- # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
- # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
- # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
- # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
- # [ • • • • • 0 ] [ • • • • • • • ]
- # [ • • • • γₖ] [ • • • • • 0 ]
- # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
- if iter == 1
- δbarₖ = αₖ
- elseif iter == 2
- # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
- # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
- δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
- else
- # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
- # [sₖ₋₁ -cₖ₋₁ 0]
- # [ 0 0 1]
- #
- # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
- # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
- # [0 sₖ -cₖ]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- ϵₖ₋₂ = sₖ₋₁ * βₖ
- λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
- δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+ function bilq!(solver :: BilqSolver{T,FC,S}, $(def_args_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "BILQ: system of size %d\n", n)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
+ p, Δx, x, d̅, stats = solver.p, solver.Δx, solver.x, solver.d̅, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ r₀ = warm_start ? q : b
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
end
- # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
- # [δbar₁] [ζbar₁] = [β₁]
- if iter == 1
- ηₖ = βₖ
- end
- # [δ₁ 0 ] [ ζ₁ ] = [β₁]
- # [λ₁ δbar₂] [ζbar₂] [0 ]
- if iter == 2
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -λₖ₋₁ * ζₖ₋₁
- end
- # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
- # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
- # [ζbarₖ]
- if iter ≥ 3
- ζₖ₋₂ = ζₖ₋₁
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+ # Initial solution x₀ and residual norm ‖r₀‖.
+ x .= zero(FC)
+ bNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖
+
+ history && push!(rNorms, bNorm)
+ if bNorm == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
- # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
- # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
- if iter ≥ 2
- # Compute solution xₖ.
- # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
- @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
- @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ ε = atol + rtol * bNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time))
+
+ # Initialize the Lanczos biorthogonalization process.
+ cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩
+ if cᴴb == 0
+ stats.niter = 0
+ stats.solved = false
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "Breakdown bᴴc = 0"
+ solver.warm_start = false
+ return solver
end
- # Compute d̅ₖ.
- if iter == 1
- # d̅₁ = v₁
- @. d̅ = vₖ
- else
- # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
- @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
+ vₖ₋₁ .= zero(FC) # v₀ = 0
+ uₖ₋₁ .= zero(FC) # u₀ = 0
+ vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
+ uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
+ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
+ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
+ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
+ norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
+
+ # Stopping criterion.
+ solved_lq = bNorm ≤ ε
+ solved_cg = false
+ breakdown = false
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved_lq || solved_cg || tired || breakdown || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
+
+ # Continue the Lanczos biorthogonalization process.
+ # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+ mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
+
+ @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
+
+ αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩
+
+ @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
+ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
+
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
+
+ # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+ # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
+ # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
+ # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
+ # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
+ # [ • • • • • 0 ] [ • • • • • • • ]
+ # [ • • • • γₖ] [ • • • • • 0 ]
+ # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
+
+ if iter == 1
+ δbarₖ = αₖ
+ elseif iter == 2
+ # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
+ # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
+ δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
+ else
+ # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
+ # [sₖ₋₁ -cₖ₋₁ 0]
+ # [ 0 0 1]
+ #
+ # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
+ # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
+ # [0 sₖ -cₖ]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ ϵₖ₋₂ = sₖ₋₁ * βₖ
+ λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
+ δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+ end
+
+ # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+ # [δbar₁] [ζbar₁] = [β₁]
+ if iter == 1
+ ηₖ = βₖ
+ end
+ # [δ₁ 0 ] [ ζ₁ ] = [β₁]
+ # [λ₁ δbar₂] [ζbar₂] [0 ]
+ if iter == 2
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -λₖ₋₁ * ζₖ₋₁
+ end
+ # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
+ # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
+ # [ζbarₖ]
+ if iter ≥ 3
+ ζₖ₋₂ = ζₖ₋₁
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+ end
+
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
+ # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
+ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+ if iter ≥ 2
+ # Compute solution xₖ.
+ # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
+ @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
+ @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
+ end
+
+ # Compute d̅ₖ.
+ if iter == 1
+ # d̅₁ = v₁
+ @. d̅ = vₖ
+ else
+ # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+ @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
+ end
+
+ # Compute vₖ₊₁ and uₖ₊₁.
+ @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+ if pᴴq ≠ 0
+ @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+ @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
+ end
+
+ # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
+ vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
+ norm_vₖ₊₁ = @knrm2(n, vₖ)
+
+ # Compute BiLQ residual norm
+ # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
+ if iter == 1
+ rNorm_lq = bNorm
+ else
+ μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+ ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+ θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
+ rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
+ end
+ history && push!(rNorms, rNorm_lq)
+
+ # Compute BiCG residual norm
+ # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
+ if transfer_to_bicg && (abs(δbarₖ) > eps(T))
+ ζbarₖ = ηₖ / δbarₖ
+ ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+ rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
+ end
+
+ # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ, δbarₖ₋₁ and norm_vₖ.
+ sₖ₋₁ = sₖ
+ cₖ₋₁ = cₖ
+ γₖ = γₖ₊₁
+ βₖ = βₖ₊₁
+ δbarₖ₋₁ = δbarₖ
+ norm_vₖ = norm_vₖ₊₁
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ solved_lq = rNorm_lq ≤ ε
+ solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
+ tired = iter ≥ itmax
+ breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm_lq, ktimer(start_time))
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Compute vₖ₊₁ and uₖ₊₁.
- @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
- @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
- if pᵗq ≠ 0
- @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
- @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
+ # Compute BICG point
+ # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+ if solved_cg
+ @kaxpy!(n, ζbarₖ, d̅, x)
end
- # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
- vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
- norm_vₖ₊₁ = @knrm2(n, vₖ)
-
- # Compute BiLQ residual norm
- # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
- if iter == 1
- rNorm_lq = bNorm
- else
- μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
- ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
- rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
- end
- history && push!(rNorms, rNorm_lq)
-
- # Compute BiCG residual norm
- # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
- if transfer_to_bicg && (abs(δbarₖ) > eps(T))
- ζbarₖ = ηₖ / δbarₖ
- ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
- rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
- end
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
+ solved_lq && (status = "solution xᴸ good enough given atol and rtol")
+ solved_cg && (status = "solution xᶜ good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ, δbarₖ₋₁ and norm_vₖ.
- sₖ₋₁ = sₖ
- cₖ₋₁ = cₖ
- γₖ = γₖ₊₁
- βₖ = βₖ₊₁
- δbarₖ₋₁ = δbarₖ
- norm_vₖ = norm_vₖ₊₁
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- solved_lq = rNorm_lq ≤ ε
- solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
- tired = iter ≥ itmax
- breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
- end
- (verbose > 0) && @printf("\n")
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- # Compute BICG point
- # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
- if solved_cg
- @kaxpy!(n, ζbarₖ, d̅, x)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved_lq || solved_cg
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
- solved_lq && (status = "solution xᴸ good enough given atol and rtol")
- solved_cg && (status = "solution xᶜ good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved_lq || solved_cg
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/bilqr.jl b/src/bilqr.jl
index 09fef1f6c..486ccceec 100644
--- a/src/bilqr.jl
+++ b/src/bilqr.jl
@@ -1,5 +1,5 @@
# An implementation of BILQR for the solution of square
-# consistent linear adjoint systems Ax = b and Aᵀy = c.
+# consistent linear adjoint systems Ax = b and Aᴴy = c.
#
# This method is described in
#
@@ -14,33 +14,55 @@ export bilqr, bilqr!
"""
(x, y, stats) = bilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ transfer_to_bicg::Bool=true, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, y, stats) = bilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+BiLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
Combine BiLQ and QMR to solve adjoint systems.
[0 A] [y] = [b]
- [Aᵀ 0] [x] [c]
+ [Aᴴ 0] [x] [c]
+
+The relation `bᴴc ≠ 0` must be satisfied.
+BiLQ is used for solving primal system `Ax = b` of size n.
+QMR is used for solving dual system `Aᴴy = c` of size n.
+
+#### Input arguments
-The relation `bᵀc ≠ 0` must be satisfied.
-BiLQ is used for solving primal system `Ax = b`.
-QMR is used for solving dual system `Aᵀy = c`.
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n;
+* `c`: a vector of length n.
-An option gives the possibility of transferring from the BiLQ point to the
-BiCG point, when it exists. The transfer is based on the residual norm.
+#### Optional arguments
-BiLQR can be warm-started from initial guesses `x0` and `y0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
- (x, y, stats) = bilqr(A, b, c, x0, y0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure.
#### Reference
@@ -48,18 +70,6 @@ and `false` otherwise.
"""
function bilqr end
-function bilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = BilqrSolver(A, b)
- bilqr!(solver, A, b, c, x0, y0; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
-function bilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = BilqrSolver(A, b)
- bilqr!(solver, A, b, c; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = bilqr!(solver::BilqrSolver, A, b, c; kwargs...)
solver = bilqr!(solver::BilqrSolver, A, b, c, x0, y0; kwargs...)
@@ -70,369 +80,417 @@ See [`BilqrSolver`](@ref) for more details about the `solver`.
"""
function bilqr! end
-function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0, y0)
- bilqr!(solver, A, b, c; kwargs...)
- return solver
-end
-
-function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("Systems must be square")
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("BILQR: systems of size %d\n", n)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
- p, Δx, Δy, x, t = solver.p, solver.Δx, solver.Δy, solver.x, solver.y
- d̅, wₖ₋₃, wₖ₋₂, stats = solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂, solver.stats
- warm_start = solver.warm_start
- rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
- reset!(stats)
- r₀ = warm_start ? q : b
- s₀ = warm_start ? p : c
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
- mul!(s₀, Aᵀ, Δy)
- @kaxpby!(n, one(FC), c, -one(FC), s₀)
+def_args_bilqr = (:(A ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_bilqr = (:(x0 :: AbstractVector),
+ :(y0 :: AbstractVector))
+
+def_kwargs_bilqr = (:(; transfer_to_bicg::Bool = true),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_bilqr = mapreduce(extract_parameters, vcat, def_kwargs_bilqr)
+
+args_bilqr = (:A, :b, :c)
+optargs_bilqr = (:x0, :y0)
+kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function bilqr($(def_args_bilqr...), $(def_optargs_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = BilqrSolver(A, b)
+ warm_start!(solver, $(optargs_bilqr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ bilqr!(solver, $(args_bilqr...); $(kwargs_bilqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖.
- x .= zero(FC) # x₀
- bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖
-
- # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀy₀‖.
- t .= zero(FC) # t₀
- cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
-
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- history && push!(rNorms, bNorm)
- history && push!(sNorms, cNorm)
- εL = atol + rtol * bNorm
- εQ = atol + rtol * cNorm
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
-
- # Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᵀy₀,b - Ax₀⟩
- if cᵗb == 0
- stats.niter = 0
- stats.solved_primal = false
- stats.solved_dual = false
- stats.status = "Breakdown bᵀc = 0"
- solver.warm_start = false
- return solver
+ function bilqr($(def_args_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = BilqrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ bilqr!(solver, $(args_bilqr...); $(kwargs_bilqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # Set up workspace.
- βₖ = √(abs(cᵗb)) # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
- vₖ₋₁ .= zero(FC) # v₀ = 0
- uₖ₋₁ .= zero(FC) # u₀ = 0
- vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᵀy₀) / γ̄₁
- cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
- sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
- ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
- ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
- δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
- ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
- norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
- ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
- wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ
- τₖ = zero(T) # τₖ is used for the dual residual norm estimate
-
- # Stopping criterion.
- solved_lq = bNorm == 0
- solved_lq_tol = solved_lq_mach = false
- solved_cg = solved_cg_tol = solved_cg_mach = false
- solved_primal = solved_lq || solved_cg
- solved_qr_tol = solved_qr_mach = false
- solved_dual = cNorm == 0
- tired = iter ≥ itmax
- breakdown = false
- status = "unknown"
- user_requested_exit = false
-
- while !((solved_primal && solved_dual) || tired || breakdown || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the Lanczos biorthogonalization process.
- # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
-
- @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
- @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
-
- αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩
-
- @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
- @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
-
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
-
- # Update the LQ factorization of Tₖ = L̅ₖQₖ.
- # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
- # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
- # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
- # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
- # [ • • • • • 0 ] [ • • • • • • • ]
- # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ]
- # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
- if iter == 1
- δbarₖ = αₖ
- elseif iter == 2
- # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
- # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
- δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
- else
- # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
- # [sₖ₋₁ -cₖ₋₁ 0]
- # [ 0 0 1]
- #
- # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
- # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
- # [0 sₖ -cₖ]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- ϵₖ₋₂ = sₖ₋₁ * βₖ
- λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
- δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+ function bilqr!(solver :: BilqrSolver{T,FC,S}, $(def_args_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("Systems must be square")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "BILQR: systems of size %d\n", n)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
+ p, Δx, Δy, x, t = solver.p, solver.Δx, solver.Δy, solver.x, solver.y
+ d̅, wₖ₋₃, wₖ₋₂, stats = solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂, solver.stats
+ warm_start = solver.warm_start
+ rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
+ reset!(stats)
+ r₀ = warm_start ? q : b
+ s₀ = warm_start ? p : c
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
+ mul!(s₀, Aᴴ, Δy)
+ @kaxpby!(n, one(FC), c, -one(FC), s₀)
end
- if !solved_primal
- # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
- # [δbar₁] [ζbar₁] = [β₁]
- if iter == 1
- ηₖ = βₖ
- end
- # [δ₁ 0 ] [ ζ₁ ] = [β₁]
- # [λ₁ δbar₂] [ζbar₂] [0 ]
- if iter == 2
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -λₖ₋₁ * ζₖ₋₁
- end
- # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
- # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
- # [ζbarₖ]
- if iter ≥ 3
- ζₖ₋₂ = ζₖ₋₁
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
- end
+ # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖.
+ x .= zero(FC) # x₀
+ bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖
+
+ # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖.
+ t .= zero(FC) # t₀
+ cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
+
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ history && push!(rNorms, bNorm)
+ history && push!(sNorms, cNorm)
+ εL = atol + rtol * bNorm
+ εQ = atol + rtol * cNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %5s\n", "k", "‖rₖ‖", "‖sₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, bNorm, cNorm, ktimer(start_time))
+
+ # Initialize the Lanczos biorthogonalization process.
+ cᴴb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩
+ if cᴴb == 0
+ stats.niter = 0
+ stats.solved_primal = false
+ stats.solved_dual = false
+ stats.timer = ktimer(start_time)
+ stats.status = "Breakdown bᴴc = 0"
+ solver.warm_start = false
+ return solver
+ end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
- # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
- # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
- if iter ≥ 2
- # Compute solution xₖ.
- # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
- @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
- @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
- end
+ # Set up workspace.
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
+ vₖ₋₁ .= zero(FC) # v₀ = 0
+ uₖ₋₁ .= zero(FC) # u₀ = 0
+ vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
+ uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁
+ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
+ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
+ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
+ ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
+ norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
+ ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
+ wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
+ τₖ = zero(T) # τₖ is used for the dual residual norm estimate
+
+ # Stopping criterion.
+ solved_lq = bNorm == 0
+ solved_lq_tol = solved_lq_mach = false
+ solved_cg = solved_cg_tol = solved_cg_mach = false
+ solved_primal = solved_lq || solved_cg
+ solved_qr_tol = solved_qr_mach = false
+ solved_dual = cNorm == 0
+ tired = iter ≥ itmax
+ breakdown = false
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
- # Compute d̅ₖ.
- if iter == 1
- # d̅₁ = v₁
- @. d̅ = vₖ
- else
- # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
- @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
- end
+ while !((solved_primal && solved_dual) || tired || breakdown || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
- # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
- vₖᵀvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
- norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁
+ # Continue the Lanczos biorthogonalization process.
+ # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
- # Compute BiLQ residual norm
- # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
- if iter == 1
- rNorm_lq = bNorm
- else
- μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
- ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
- rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
- end
- history && push!(rNorms, rNorm_lq)
+ mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
- # Update ‖vₖ‖
- norm_vₖ = norm_vₖ₊₁
+ @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
- # Compute BiCG residual norm
- # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
- if transfer_to_bicg && (abs(δbarₖ) > eps(T))
- ζbarₖ = ηₖ / δbarₖ
- ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
- rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
- end
+ αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩
- # Update primal stopping criterion
- solved_lq_tol = rNorm_lq ≤ εL
- solved_lq_mach = rNorm_lq + 1 ≤ 1
- solved_lq = solved_lq_tol || solved_lq_mach
- solved_cg_tol = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
- solved_cg_mach = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
- solved_cg = solved_cg_tol || solved_cg_mach
- solved_primal = solved_lq || solved_cg
- end
+ @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
+ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
+
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
+
+ # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+ # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
+ # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
+ # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
+ # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
+ # [ • • • • • 0 ] [ • • • • • • • ]
+ # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ]
+ # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
- if !solved_dual
- # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ̄₁e₁.
if iter == 1
- ψbarₖ = conj(γₖ)
+ δbarₖ = αₖ
+ elseif iter == 2
+ # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
+ # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
+ δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
else
- # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
- # [sₖ -cₖ] [ 0 ] [ ψbarₖ]
- ψₖ₋₁ = cₖ * ψbarₖ₋₁
- ψbarₖ = sₖ * ψbarₖ₋₁
+ # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
+ # [sₖ₋₁ -cₖ₋₁ 0]
+ # [ 0 0 1]
+ #
+ # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
+ # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
+ # [0 sₖ -cₖ]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ ϵₖ₋₂ = sₖ₋₁ * βₖ
+ λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
+ δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
end
- # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
- # w₁ = u₁ / δ̄₁
- if iter == 2
- wₖ₋₁ = wₖ₋₂
- @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
- @. wₖ₋₁ = uₖ₋₁ / conj(δₖ₋₁)
+ if !solved_primal
+ # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+ # [δbar₁] [ζbar₁] = [β₁]
+ if iter == 1
+ ηₖ = βₖ
+ end
+ # [δ₁ 0 ] [ ζ₁ ] = [β₁]
+ # [λ₁ δbar₂] [ζbar₂] [0 ]
+ if iter == 2
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -λₖ₋₁ * ζₖ₋₁
+ end
+ # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
+ # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
+ # [ζbarₖ]
+ if iter ≥ 3
+ ζₖ₋₂ = ζₖ₋₁
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+ end
+
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
+ # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
+ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+ if iter ≥ 2
+ # Compute solution xₖ.
+ # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
+ @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
+ @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
+ end
+
+ # Compute d̅ₖ.
+ if iter == 1
+ # d̅₁ = v₁
+ @. d̅ = vₖ
+ else
+ # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+ @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
+ end
+
+ # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
+ vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
+ norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁
+
+ # Compute BiLQ residual norm
+ # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
+ if iter == 1
+ rNorm_lq = bNorm
+ else
+ μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+ ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+ θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
+ rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
+ end
+ history && push!(rNorms, rNorm_lq)
+
+ # Update ‖vₖ‖
+ norm_vₖ = norm_vₖ₊₁
+
+ # Compute BiCG residual norm
+ # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
+ if transfer_to_bicg && (abs(δbarₖ) > eps(T))
+ ζbarₖ = ηₖ / δbarₖ
+ ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+ rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
+ end
+
+ # Update primal stopping criterion
+ solved_lq_tol = rNorm_lq ≤ εL
+ solved_lq_mach = rNorm_lq + 1 ≤ 1
+ solved_lq = solved_lq_tol || solved_lq_mach
+ solved_cg_tol = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
+ solved_cg_mach = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
+ solved_cg = solved_cg_tol || solved_cg_mach
+ solved_primal = solved_lq || solved_cg
end
- # w₂ = (u₂ - λ̄₁w₁) / δ̄₂
- if iter == 3
- wₖ₋₁ = wₖ₋₃
- @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
- @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
- @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+ if !solved_dual
+ # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ̄₁e₁.
+ if iter == 1
+ ψbarₖ = conj(γₖ)
+ else
+ # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
+ # [sₖ -cₖ] [ 0 ] [ ψbarₖ]
+ ψₖ₋₁ = cₖ * ψbarₖ₋₁
+ ψbarₖ = sₖ * ψbarₖ₋₁
+ end
+
+ # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
+ # w₁ = u₁ / δ̄₁
+ if iter == 2
+ wₖ₋₁ = wₖ₋₂
+ @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
+ @. wₖ₋₁ = uₖ₋₁ / conj(δₖ₋₁)
+ end
+ # w₂ = (u₂ - λ̄₁w₁) / δ̄₂
+ if iter == 3
+ wₖ₋₁ = wₖ₋₃
+ @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
+ @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+ @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+ end
+ # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
+ if iter ≥ 4
+ @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃)
+ wₖ₋₁ = wₖ₋₃
+ @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
+ @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+ @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+ end
+
+ if iter ≥ 3
+ # Swap pointers.
+ @kswap(wₖ₋₃, wₖ₋₂)
+ end
+
+ if iter ≥ 2
+ # Compute solution tₖ₋₁.
+ # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
+ @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t)
+ end
+
+ # Update ψbarₖ₋₁
+ ψbarₖ₋₁ = ψbarₖ
+
+ # Compute τₖ = τₖ₋₁ + ‖uₖ‖²
+ τₖ += @kdotr(n, uₖ, uₖ)
+
+ # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ
+ sNorm = abs(ψbarₖ) * √τₖ
+ history && push!(sNorms, sNorm)
+
+ # Update dual stopping criterion
+ solved_qr_tol = sNorm ≤ εQ
+ solved_qr_mach = sNorm + 1 ≤ 1
+ solved_dual = solved_qr_tol || solved_qr_mach
end
- # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
- if iter ≥ 4
- @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃)
- wₖ₋₁ = wₖ₋₃
- @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
- @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
- @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+ # Compute vₖ₊₁ and uₖ₊₁.
+ @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+ if pᴴq ≠ zero(FC)
+ @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+ @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
+ # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
if iter ≥ 3
- # Swap pointers.
- @kswap(wₖ₋₃, wₖ₋₂)
+ ϵₖ₋₃ = ϵₖ₋₂
end
-
if iter ≥ 2
- # Compute solution tₖ₋₁.
- # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
- @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t)
+ λₖ₋₂ = λₖ₋₁
end
-
- # Update ψbarₖ₋₁
- ψbarₖ₋₁ = ψbarₖ
-
- # Compute τₖ = τₖ₋₁ + ‖uₖ‖²
- τₖ += @kdotr(n, uₖ, uₖ)
-
- # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ
- sNorm = abs(ψbarₖ) * √τₖ
- history && push!(sNorms, sNorm)
-
- # Update dual stopping criterion
- solved_qr_tol = sNorm ≤ εQ
- solved_qr_mach = sNorm + 1 ≤ 1
- solved_dual = solved_qr_tol || solved_qr_mach
- end
-
- # Compute vₖ₊₁ and uₖ₊₁.
- @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
- @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
- if pᵗq ≠ zero(FC)
- @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
- @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
+ δbarₖ₋₁ = δbarₖ
+ cₖ₋₁ = cₖ
+ sₖ₋₁ = sₖ
+ γₖ = γₖ₊₁
+ βₖ = βₖ₊₁
+
+ user_requested_exit = callback(solver) :: Bool
+ tired = iter ≥ itmax
+ breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+
+ kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e %.2fs\n", iter, "✗ ✗ ✗ ✗", sNorm, ktimer(start_time))
+ kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s %.2fs\n", iter, rNorm_lq, "✗ ✗ ✗ ✗", ktimer(start_time))
+ kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, rNorm_lq, sNorm, ktimer(start_time))
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
- if iter ≥ 3
- ϵₖ₋₃ = ϵₖ₋₂
- end
- if iter ≥ 2
- λₖ₋₂ = λₖ₋₁
+ # Compute BICG point
+ # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+ if solved_cg
+ @kaxpy!(n, ζbarₖ, d̅, x)
end
- δbarₖ₋₁ = δbarₖ
- cₖ₋₁ = cₖ
- sₖ₋₁ = sₖ
- γₖ = γₖ₊₁
- βₖ = βₖ₊₁
- user_requested_exit = callback(solver) :: Bool
- tired = iter ≥ itmax
- breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
-
- kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm)
- kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "")
- kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm)
- end
- (verbose > 0) && @printf("\n")
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
+ solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
+ solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
+ !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol")
+ solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
+ solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
+ solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ")
+ solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ")
+ !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
+ solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
+ solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
+ solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
+ solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
+ solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+ solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x and y
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ warm_start && @kaxpy!(n, one(FC), Δy, t)
+ solver.warm_start = false
- # Compute BICG point
- # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
- if solved_cg
- @kaxpy!(n, ζbarₖ, d̅, x)
+ # Update stats
+ stats.niter = iter
+ stats.solved_primal = solved_primal
+ stats.solved_dual = solved_dual
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
- solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
- solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
- !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol")
- solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
- solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
- solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ")
- solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ")
- !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
- solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
- solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
- solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
- solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
- solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
- solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x and y
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- warm_start && @kaxpy!(n, one(FC), Δy, t)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.status = status
- stats.solved_primal = solved_primal
- stats.solved_dual = solved_dual
- return solver
end
diff --git a/src/callback_utils.jl b/src/callback_utils.jl
deleted file mode 100644
index eac362e5d..000000000
--- a/src/callback_utils.jl
+++ /dev/null
@@ -1,50 +0,0 @@
-export StorageGetxRestartedGmres
-
-export get_x_restarted_gmres!
-
-mutable struct StorageGetxRestartedGmres{S}
- x::S
- y::S
- p::S
-end
-StorageGetxRestartedGmres(solver::GmresSolver; N = I) =
- StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
-
-function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A,
- stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
- NisI = (N === I)
- x2, y2, p2 = stor.x, stor.y, stor.p
- n = size(A, 2)
- # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
- nr = sum(1:solver.inner_iter)
- y = solver.z # yᵢ = zᵢ
- y2 .= y
- R = solver.R
- V = solver.V
- x2 .= solver.Δx
- for i = solver.inner_iter : -1 : 1
- pos = nr + i - solver.inner_iter # position of rᵢ.ₖ
- for j = solver.inner_iter : -1 : i+1
- y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
- pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
- end
- # Rₖ can be singular if the system is inconsistent
- if abs(R[pos]) ≤ eps(T)^(3/4)
- y2[i] = zero(FC)
- inconsistent = true
- else
- y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
- end
- end
-
- # Form xₖ = N⁻¹Vₖyₖ
- for i = 1 : solver.inner_iter
- @kaxpy!(n, y2[i], V[i], x2)
- end
- if !NisI
- p2 .= solver.p
- p2 .= x2
- mul!(x2, N, p2)
- end
- x2 .+= solver.x
-end
diff --git a/src/cg.jl b/src/cg.jl
index 8a974accc..1345a6232 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -15,36 +15,54 @@
export cg, cg!
-
"""
(x, stats) = cg(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, radius::T=zero(T), linesearch::Bool=false,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ linesearch::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-The conjugate gradient method to solve the symmetric linear system Ax=b.
+ (x, stats) = cg(A, b, x0::AbstractVector; kwargs...)
-The method does _not_ abort if A is not definite.
+CG can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+The conjugate gradient method to solve the Hermitian linear system Ax = b of size n.
+
+The method does _not_ abort if A is not definite.
M also indicates the weighted norm in which residuals are measured.
-If `itmax=0`, the default number of iterations is set to `2 * n`,
-with `n = length(b)`.
+#### Input arguments
-CG can be warm-started from an initial guess `x0` with the method
+* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n;
+* `b`: a vector of length n.
- (x, stats) = cg(A, b, x0; kwargs...)
+#### Optional argument
-where `kwargs` are the same keyword arguments as above.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -52,18 +70,6 @@ and `false` otherwise.
"""
function cg end
-function cg(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = CgSolver(A, b)
- cg!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function cg(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CgSolver(A, b)
- cg!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cg!(solver::CgSolver, A, b; kwargs...)
solver = cg!(solver::CgSolver, A, b, x0; kwargs...)
@@ -74,152 +80,200 @@ See [`CgSolver`](@ref) for more details about the `solver`.
"""
function cg! end
-function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- cg!(solver, A, b; kwargs...)
- return solver
-end
-
-function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0")
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("CG: system of %d equations in %d variables\n", n, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :z, S, n)
- Δx, x, r, p, Ap, stats = solver.Δx, solver.x, solver.r, solver.p, solver.Ap, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- z = MisI ? r : solver.z
-
- x .= zero(FC)
- if warm_start
- mul!(r, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r)
- else
- r .= b
+def_args_cg = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_cg = (:(x0::AbstractVector),)
+
+def_kwargs_cg = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; radius::T = zero(T) ),
+ :(; linesearch::Bool = false ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cg = mapreduce(extract_parameters, vcat, def_kwargs_cg)
+
+args_cg = (:A, :b)
+optargs_cg = (:x0,)
+kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cg($(def_args_cg...), $(def_optargs_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgSolver(A, b)
+ warm_start!(solver, $(optargs_cg...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cg!(solver, $(args_cg...); $(kwargs_cg...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(z, M, r, ldiv)
- p .= z
- γ = @kdotr(n, r, z)
- rNorm = sqrt(γ)
- history && push!(rNorms, rNorm)
- if γ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+
+ function cg($(def_args_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cg!(solver, $(args_cg...); $(kwargs_cg...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2 * n)
-
- pAp = zero(T)
- pNorm² = γ
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s %8s %8s %8s\n", "k", "‖r‖", "pAp", "α", "σ")
- kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm)
-
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- inconsistent = false
- on_boundary = false
- zero_curvature = false
- user_requested_exit = false
-
- status = "unknown"
-
- while !(solved || tired || zero_curvature || user_requested_exit)
- mul!(Ap, A, p)
- pAp = @kdotr(n, p, Ap)
- if (pAp ≤ eps(T) * pNorm²) && (radius == 0)
- if abs(pAp) ≤ eps(T) * pNorm²
- zero_curvature = true
- inconsistent = !linesearch
- end
- if linesearch
- iter == 0 && (x .= b)
- solved = true
- end
+ function cg!(solver :: CgSolver{T,FC,S}, $(def_args_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == n || error("Inconsistent problem size")
+ linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0")
+ (verbose > 0) && @printf(iostream, "CG: system of %d equations in %d variables\n", n, n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :z, S, n)
+ Δx, x, r, p, Ap, stats = solver.Δx, solver.x, solver.r, solver.p, solver.Ap, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ z = MisI ? r : solver.z
+
+ x .= zero(FC)
+ if warm_start
+ mul!(r, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r)
+ else
+ r .= b
+ end
+ MisI || mulorldiv!(z, M, r, ldiv)
+ p .= z
+ γ = @kdotr(n, r, z)
+ rNorm = sqrt(γ)
+ history && push!(rNorms, rNorm)
+ if γ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
end
- (zero_curvature || solved) && continue
- α = γ / pAp
+ iter = 0
+ itmax == 0 && (itmax = 2 * n)
- # Compute step size to boundary if applicable.
- σ = radius > 0 ? maximum(to_boundary(x, p, radius, dNorm2=pNorm²)) : α
+ pAp = zero(T)
+ pNorm² = γ
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %8s %5s\n", "k", "‖r‖", "pAp", "α", "σ", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e", iter, rNorm)
- kdisplay(iter, verbose) && @printf("%8.1e %8.1e %8.1e\n", pAp, α, σ)
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ inconsistent = false
+ on_boundary = false
+ zero_curvature = false
+ user_requested_exit = false
+ overtimed = false
+
+ status = "unknown"
+
+ while !(solved || tired || zero_curvature || user_requested_exit || overtimed)
+ mul!(Ap, A, p)
+ pAp = @kdotr(n, p, Ap)
+ if (pAp ≤ eps(T) * pNorm²) && (radius == 0)
+ if abs(pAp) ≤ eps(T) * pNorm²
+ zero_curvature = true
+ inconsistent = !linesearch
+ end
+ if linesearch
+ iter == 0 && (x .= b)
+ solved = true
+ end
+ end
+ (zero_curvature || solved) && continue
- # Move along p from x to the boundary if either
- # the next step leads outside the trust region or
- # we have nonpositive curvature.
- if (radius > 0) && ((pAp ≤ 0) || (α > σ))
- α = σ
- on_boundary = true
- end
+ α = γ / pAp
- @kaxpy!(n, α, p, x)
- @kaxpy!(n, -α, Ap, r)
- MisI || mulorldiv!(z, M, r, ldiv)
- γ_next = @kdotr(n, r, z)
- rNorm = sqrt(γ_next)
- history && push!(rNorms, rNorm)
+ # Compute step size to boundary if applicable.
+ σ = radius > 0 ? maximum(to_boundary(n, x, p, radius, dNorm2=pNorm²)) : α
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ kdisplay(iter, verbose) && @printf(iostream, " %8.1e %8.1e %8.1e %.2fs\n", pAp, α, σ, ktimer(start_time))
+
+ # Move along p from x to the boundary if either
+ # the next step leads outside the trust region or
+ # we have nonpositive curvature.
+ if (radius > 0) && ((pAp ≤ 0) || (α > σ))
+ α = σ
+ on_boundary = true
+ end
- resid_decrease_lim = rNorm ≤ ε
- resid_decrease = resid_decrease_lim || resid_decrease_mach
- solved = resid_decrease || on_boundary
+ @kaxpy!(n, α, p, x)
+ @kaxpy!(n, -α, Ap, r)
+ MisI || mulorldiv!(z, M, r, ldiv)
+ γ_next = @kdotr(n, r, z)
+ rNorm = sqrt(γ_next)
+ history && push!(rNorms, rNorm)
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ resid_decrease_lim = rNorm ≤ ε
+ resid_decrease = resid_decrease_lim || resid_decrease_mach
+ solved = resid_decrease || on_boundary
+
+ if !solved
+ β = γ_next / γ
+ pNorm² = γ_next + β^2 * pNorm²
+ γ = γ_next
+ @kaxpby!(n, one(FC), z, β, p)
+ end
- if !solved
- β = γ_next / γ
- pNorm² = γ_next + β^2 * pNorm²
- γ = γ_next
- @kaxpby!(n, one(FC), z, β, p)
+ iter = iter + 1
+ tired = iter ≥ itmax
+ user_requested_exit = callback(solver) :: Bool
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e", iter, rNorm)
end
+ (verbose > 0) && @printf(iostream, "\n\n")
+
+ # Termination status
+ solved && on_boundary && (status = "on trust-region boundary")
+ solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected")
+ solved && (status == "unknown") && (status = "solution good enough given atol and rtol")
+ zero_curvature && (status = "zero curvature detected")
+ tired && (status = "maximum number of iterations exceeded")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- iter = iter + 1
- tired = iter ≥ itmax
- user_requested_exit = callback(solver) :: Bool
- kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- solved && on_boundary && (status = "on trust-region boundary")
- solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected")
- solved && (status == "unknown") && (status = "solution good enough given atol and rtol")
- zero_curvature && (status = "zero curvature detected")
- tired && (status = "maximum number of iterations exceeded")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl
index a8e24f02f..2c5d72a64 100644
--- a/src/cg_lanczos.jl
+++ b/src/cg_lanczos.jl
@@ -12,34 +12,53 @@
export cg_lanczos, cg_lanczos!
-
"""
(x, stats) = cg_lanczos(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
- check_curvature::Bool=false, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false,
+ check_curvature::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-The Lanczos version of the conjugate gradient method to solve the
-symmetric linear system
+ (x, stats) = cg_lanczos(A, b, x0::AbstractVector; kwargs...)
+
+CG-LANCZOS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
- Ax = b
+The Lanczos version of the conjugate gradient method to solve the
+Hermitian linear system Ax = b of size n.
The method does _not_ abort if A is not definite.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be hermitian and positive definite.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-CG-LANCZOS can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = cg_lanczos(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LanczosStats`](@ref) structure.
#### References
@@ -48,18 +67,6 @@ and `false` otherwise.
"""
function cg_lanczos end
-function cg_lanczos(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = CgLanczosSolver(A, b)
- cg_lanczos!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function cg_lanczos(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CgLanczosSolver(A, b)
- cg_lanczos!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cg_lanczos!(solver::CgLanczosSolver, A, b; kwargs...)
solver = cg_lanczos!(solver::CgLanczosSolver, A, b, x0; kwargs...)
@@ -70,150 +77,199 @@ See [`CgLanczosSolver`](@ref) for more details about the `solver`.
"""
function cg_lanczos! end
-function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- cg_lanczos!(solver, A, b; kwargs...)
- return solver
-end
-
-function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
- check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $T")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :v, S, n)
- Δx, x, Mv, Mv_prev = solver.Δx, solver.x, solver.Mv, solver.Mv_prev
- p, Mv_next, stats = solver.p, solver.Mv_next, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- v = MisI ? Mv : solver.v
-
- # Initial state.
- x .= zero(FC)
- if warm_start
- mul!(Mv, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), Mv)
- else
- Mv .= b
+def_args_cg_lanczos = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_cg_lanczos = (:(x0::AbstractVector),)
+
+def_kwargs_cg_lanczos = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; check_curvature::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cg_lanczos = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos)
+
+args_cg_lanczos = (:A, :b)
+optargs_cg_lanczos = (:x0,)
+kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cg_lanczos($(def_args_cg_lanczos...), $(def_optargs_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgLanczosSolver(A, b)
+ warm_start!(solver, $(optargs_cg_lanczos...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cg_lanczos!(solver, $(args_cg_lanczos...); $(kwargs_cg_lanczos...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀
- β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁
- σ = β
- rNorm = σ
- history && push!(rNorms, rNorm)
- if β == 0
- stats.niter = 0
- stats.solved = true
- stats.Anorm = zero(T)
- stats.indefinite = false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+
+ function cg_lanczos($(def_args_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgLanczosSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cg_lanczos!(solver, $(args_cg_lanczos...); $(kwargs_cg_lanczos...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- p .= v
-
- # Initialize Lanczos process.
- # β₁Mv₁ = b
- @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
- MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
- Mv_prev .= Mv
-
- iter = 0
- itmax == 0 && (itmax = 2 * n)
-
- # Initialize some constants used in recursions below.
- ω = zero(T)
- γ = one(T)
- Anorm2 = zero(T)
- β_prev = zero(T)
-
- # Define stopping tolerance.
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
-
- indefinite = false
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- # Main loop.
- while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit)
- # Form next Lanczos vector.
- # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
- mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
- δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ
-
- # Check curvature. Exit fast if requested.
- # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ A pₖ.
- γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
- indefinite |= (γ ≤ 0)
- (check_curvature & indefinite) && continue
-
- @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
- if iter > 0
- @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
- @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ
+
+ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, $(def_args_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CG-LANCZOS: system of %d equations in %d variables\n", n, n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :v, S, n)
+ Δx, x, Mv, Mv_prev = solver.Δx, solver.x, solver.Mv, solver.Mv_prev
+ p, Mv_next, stats = solver.p, solver.Mv_next, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ v = MisI ? Mv : solver.v
+
+ # Initial state.
+ x .= zero(FC)
+ if warm_start
+ mul!(Mv, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), Mv)
+ else
+ Mv .= b
end
- @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
- MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
- β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
- @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
- MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
- Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
- β_prev = β
-
- # Compute next CG iterate.
- @kaxpy!(n, γ, p, x) # xₖ₊₁ = xₖ + γₖ * pₖ
- ω = β * γ
- σ = -ω * σ # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ
- ω = ω * ω # ωₖ = (βₖ₊₁ * γₖ)²
- @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ
- rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1
+ MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀
+ β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
+ σ = β
+ rNorm = σ
history && push!(rNorms, rNorm)
- iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- solved = resid_decrease_lim || resid_decrease_mach
+ if β == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.Anorm = zero(T)
+ stats.indefinite = false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+ p .= v
+
+ # Initialize Lanczos process.
+ # β₁Mv₁ = b
+ @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
+ MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
+ Mv_prev .= Mv
+
+ iter = 0
+ itmax == 0 && (itmax = 2 * n)
+
+ # Initialize some constants used in recursions below.
+ ω = zero(T)
+ γ = one(T)
+ Anorm2 = zero(T)
+ β_prev = zero(T)
+
+ # Define stopping tolerance.
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ indefinite = false
+ solved = rNorm ≤ ε
tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ # Main loop.
+ while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit || overtimed)
+ # Form next Lanczos vector.
+ # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
+ mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
+ δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ
+
+ # Check curvature. Exit fast if requested.
+ # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ.
+ γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
+ indefinite |= (γ ≤ 0)
+ (check_curvature & indefinite) && continue
+
+ @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
+ if iter > 0
+ @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
+ @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ
+ end
+ @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
+ MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
+ β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
+ @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
+ MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
+ Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
+ β_prev = β
+
+ # Compute next CG iterate.
+ @kaxpy!(n, γ, p, x) # xₖ₊₁ = xₖ + γₖ * pₖ
+ ω = β * γ
+ σ = -ω * σ # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ
+ ω = ω * ω # ωₖ = (βₖ₊₁ * γₖ)²
+ @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ
+ rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1
+ history && push!(rNorms, rNorm)
+ iter = iter + 1
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ (check_curvature & indefinite) && (status = "negative curvature")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats. TODO: Estimate Acond.
+ stats.niter = iter
+ stats.solved = solved
+ stats.Anorm = sqrt(Anorm2)
+ stats.indefinite = indefinite
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- (check_curvature & indefinite) && (status = "negative curvature")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats. TODO: Estimate Acond.
- stats.niter = iter
- stats.solved = solved
- stats.Anorm = sqrt(Anorm2)
- stats.indefinite = indefinite
- stats.status = status
- return solver
end
diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl
index 01f11e41f..b523e5cc3 100644
--- a/src/cg_lanczos_shift.jl
+++ b/src/cg_lanczos_shift.jl
@@ -13,13 +13,13 @@
export cg_lanczos_shift, cg_lanczos_shift!
-
"""
(x, stats) = cg_lanczos_shift(A, b::AbstractVector{FC}, shifts::AbstractVector{T};
- M=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, check_curvature::Bool=false,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false,
+ check_curvature::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -27,25 +27,42 @@ export cg_lanczos_shift, cg_lanczos_shift!
The Lanczos version of the conjugate gradient method to solve a family
of shifted systems
- (A + αI) x = b (α = α₁, ..., αₙ)
+ (A + αI) x = b (α = α₁, ..., αₚ)
+
+of size n. The method does _not_ abort if A + αI is not definite.
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n;
+* `shifts`: a vector of length p.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
-The method does _not_ abort if A + αI is not definite.
+* `x`: a vector of p dense vectors, each one of length n;
+* `stats`: statistics collected on the run in a [`LanczosShiftStats`](@ref) structure.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be hermitian and positive definite.
+#### References
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* A. Frommer and P. Maass, [*Fast CG-Based Methods for Tikhonov-Phillips Regularization*](https://doi.org/10.1137/S1064827596313310), SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999.
+* C. C. Paige and M. A. Saunders, [*Solution of Sparse Indefinite Systems of Linear Equations*](https://doi.org/10.1137/0712047), SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975.
"""
function cg_lanczos_shift end
-function cg_lanczos_shift(A, b :: AbstractVector{FC}, shifts :: AbstractVector{T}; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
- nshifts = length(shifts)
- solver = CgLanczosShiftSolver(A, b, nshifts)
- cg_lanczos_shift!(solver, A, b, shifts; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cg_lanczos!(solver::CgLanczosShiftSolver, A, b, shifts; kwargs...)
@@ -55,174 +72,213 @@ See [`CgLanczosShiftSolver`](@ref) for more details about the `solver`.
"""
function cg_lanczos_shift! end
-function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: AbstractVector{FC}, shifts :: AbstractVector{T};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, check_curvature :: Bool=false,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == n || error("Inconsistent problem size")
-
- nshifts = length(shifts)
- (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :v, S, n)
- Mv, Mv_prev, Mv_next = solver.Mv, solver.Mv_prev, solver.Mv_next
- x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat
- ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged
- not_cv, stats = solver.not_cv, solver.stats
- rNorms_history, indefinite = stats.residuals, stats.indefinite
- reset!(stats)
- v = MisI ? Mv : solver.v
-
- # Initial state.
- ## Distribute x similarly to shifts.
- for i = 1 : nshifts
- x[i] .= zero(FC) # x₀
- end
- Mv .= b # Mv₁ ← b
- MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁
- β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁
- rNorms .= β
- if history
- for i = 1 : nshifts
- push!(rNorms_history[i], rNorms[i])
- end
+def_args_cg_lanczos_shift = (:(A ),
+ :(b::AbstractVector{FC} ),
+ :(shifts::AbstractVector{T}))
+
+def_kwargs_cg_lanczos_shift = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; check_curvature::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cg_lanczos_shift = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos_shift)
+
+args_cg_lanczos_shift = (:A, :b, :shifts)
+kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cg_lanczos_shift($(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ nshifts = length(shifts)
+ solver = CgLanczosShiftSolver(A, b, nshifts)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cg_lanczos_shift!(solver, $(args_cg_lanczos_shift...); $(kwargs_cg_lanczos_shift...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Keep track of shifted systems with negative curvature if required.
- indefinite .= false
+ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, $(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
- if β == 0
- stats.niter = 0
- stats.solved = true
- stats.status = "x = 0 is a zero-residual solution"
- return solver
- end
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
- # Initialize each p to v.
- for i = 1 : nshifts
- p[i] .= v
- end
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == n || error("Inconsistent problem size")
- # Initialize Lanczos process.
- # β₁Mv₁ = b
- @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
- MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
- Mv_prev .= Mv
-
- # Initialize some constants used in recursions below.
- ρ = one(T)
- σ .= β
- δhat .= zero(T)
- ω .= zero(T)
- γ .= one(T)
-
- # Define stopping tolerance.
- ε = atol + rtol * β
-
- # Keep track of shifted systems that have converged.
- for i = 1 : nshifts
- converged[i] = rNorms[i] ≤ ε
- not_cv[i] = !converged[i]
- end
- iter = 0
- itmax == 0 && (itmax = 2 * n)
-
- # Build format strings for printing.
- if kdisplay(iter, verbose)
- fmt = "%5d" * repeat(" %8.1e", nshifts) * "\n"
- # precompile printf for our particular format
- local_printf(data...) = Core.eval(Main, :(@printf($fmt, $(data)...)))
- local_printf(iter, rNorms...)
- end
+ nshifts = length(shifts)
+ nshifts == solver.nshifts || error("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $nshifts")
+ (verbose > 0) && @printf(iostream, "CG-LANCZOS-SHIFT: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
- solved = sum(not_cv) == 0
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- # Main loop.
- while ! (solved || tired || user_requested_exit)
- # Form next Lanczos vector.
- # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
- mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
- δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ
- @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
- if iter > 0
- @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
- @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ
- end
- @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
- MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
- β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
- @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
- MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
-
- # Check curvature: vₖᵀ(A + sᵢI)vₖ = vₖᵀAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
- # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ (A + sᵢ I) pₖ.
- MisI || (ρ = @kdotr(n, v, v))
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :v, S, n)
+ Mv, Mv_prev, Mv_next = solver.Mv, solver.Mv_prev, solver.Mv_next
+ x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat
+ ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged
+ not_cv, stats = solver.not_cv, solver.stats
+ rNorms_history, indefinite = stats.residuals, stats.indefinite
+ reset!(stats)
+ v = MisI ? Mv : solver.v
+
+ # Initial state.
+ ## Distribute x similarly to shifts.
for i = 1 : nshifts
- δhat[i] = δ + ρ * shifts[i]
- γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
+ x[i] .= zero(FC) # x₀
end
- for i = 1 : nshifts
- indefinite[i] |= γ[i] ≤ 0
+ Mv .= b # Mv₁ ← b
+ MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁
+ β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
+ rNorms .= β
+ if history
+ for i = 1 : nshifts
+ push!(rNorms_history[i], rNorms[i])
+ end
end
- # Compute next CG iterate for each shifted system that has not yet converged.
- # Stop iterating on indefinite problems if requested.
- for i = 1 : nshifts
- not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
- if not_cv[i]
- @kaxpy!(n, γ[i], p[i], x[i])
- ω[i] = β * γ[i]
- σ[i] *= -ω[i]
- ω[i] *= ω[i]
- @kaxpby!(n, σ[i], v, ω[i], p[i])
-
- # Update list of systems that have not converged.
- rNorms[i] = abs(σ[i])
- converged[i] = rNorms[i] ≤ ε
- end
+ # Keep track of shifted systems with negative curvature if required.
+ indefinite .= false
+
+ if β == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ return solver
end
- if length(not_cv) > 0 && history
- for i = 1 : nshifts
- not_cv[i] && push!(rNorms_history[i], rNorms[i])
- end
+ # Initialize each p to v.
+ for i = 1 : nshifts
+ p[i] .= v
end
- # Is there a better way than to update this array twice per iteration?
+ # Initialize Lanczos process.
+ # β₁Mv₁ = b
+ @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
+ MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
+ Mv_prev .= Mv
+
+ # Initialize some constants used in recursions below.
+ ρ = one(T)
+ σ .= β
+ δhat .= zero(T)
+ ω .= zero(T)
+ γ .= one(T)
+
+ # Define stopping tolerance.
+ ε = atol + rtol * β
+
+ # Keep track of shifted systems that have converged.
for i = 1 : nshifts
- not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+ converged[i] = rNorms[i] ≤ ε
+ not_cv[i] = !converged[i]
end
- iter = iter + 1
- kdisplay(iter, verbose) && local_printf(iter, rNorms...)
+ iter = 0
+ itmax == 0 && (itmax = 2 * n)
+
+ # Build format strings for printing.
+ (verbose > 0) && (fmt = Printf.Format("%5d" * repeat(" %8.1e", nshifts) * " %.2fs\n"))
+ kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time))
- user_requested_exit = callback(solver) :: Bool
- solved = sum(not_cv) == 0
+ solved = !reduce(|, not_cv)
tired = iter ≥ itmax
- end
- (verbose > 0) && @printf("\n")
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ # Main loop.
+ while ! (solved || tired || user_requested_exit || overtimed)
+ # Form next Lanczos vector.
+ # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
+ mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
+ δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ
+ @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
+ if iter > 0
+ @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
+ @. Mv_prev = Mv # Mvₖ₋₁ ← Mvₖ
+ end
+ @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
+ MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
+ β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
+ @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
+ MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
+
+ # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
+ # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ.
+ MisI || (ρ = @kdotr(n, v, v))
+ for i = 1 : nshifts
+ δhat[i] = δ + ρ * shifts[i]
+ γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
+ end
+ for i = 1 : nshifts
+ indefinite[i] |= γ[i] ≤ 0
+ end
+
+ # Compute next CG iterate for each shifted system that has not yet converged.
+ # Stop iterating on indefinite problems if requested.
+ for i = 1 : nshifts
+ not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+ if not_cv[i]
+ @kaxpy!(n, γ[i], p[i], x[i])
+ ω[i] = β * γ[i]
+ σ[i] *= -ω[i]
+ ω[i] *= ω[i]
+ @kaxpby!(n, σ[i], v, ω[i], p[i])
+
+ # Update list of systems that have not converged.
+ rNorms[i] = abs(σ[i])
+ converged[i] = rNorms[i] ≤ ε
+ end
+ end
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
+ if length(not_cv) > 0 && history
+ for i = 1 : nshifts
+ not_cv[i] && push!(rNorms_history[i], rNorms[i])
+ end
+ end
- # Update stats. TODO: Estimate Anorm and Acond.
- stats.niter = iter
- stats.solved = solved
- stats.status = status
- return solver
+ # Is there a better way than to update this array twice per iteration?
+ for i = 1 : nshifts
+ not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+ end
+ iter = iter + 1
+ kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time))
+
+ user_requested_exit = callback(solver) :: Bool
+ solved = !reduce(|, not_cv)
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats. TODO: Estimate Anorm and Acond.
+ stats.niter = iter
+ stats.solved = solved
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
+ end
end
diff --git a/src/cgls.jl b/src/cgls.jl
index f5529fbfb..e36d5acbd 100644
--- a/src/cgls.jl
+++ b/src/cgls.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# CGLS is formally equivalent to applying the conjugate gradient method
# to the normal equations but should be more stable. It is also formally
@@ -28,12 +28,13 @@
export cgls, cgls!
-
"""
(x, stats) = cgls(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+ itmax::Int=0, timemax::Float64=Inf,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -42,19 +43,41 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ‖x‖₂²
-using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CG to the normal equations
- (AᵀA + λI) x = Aᵀb
+ (AᴴA + λI) x = Aᴴb
but is more stable.
-CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to LSQR, though can be slightly less accurate,
but simpler to implement.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -63,12 +86,6 @@ and `false` otherwise.
"""
function cgls end
-function cgls(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CglsSolver(A, b)
- cgls!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cgls!(solver::CglsSolver, A, b; kwargs...)
@@ -78,110 +95,151 @@ See [`CglsSolver`](@ref) for more details about the `solver`.
"""
function cgls! end
-function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CGLS: system of %d equations in %d variables\n", m, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :Mr, S, m)
- x, p, s, r, q, stats = solver.x, solver.p, solver.s, solver.r, solver.q, solver.stats
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- Mr = MisI ? r : solver.Mr
- Mq = MisI ? q : solver.Mr
-
- x .= zero(FC)
- r .= b
- bNorm = @knrm2(m, r) # Marginally faster than norm(b)
- if bNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(rNorms, zero(T))
- history && push!(ArNorms, zero(T))
- return solver
+def_args_cgls = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_cgls = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; radius::T = zero(T) ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cgls = mapreduce(extract_parameters, vcat, def_kwargs_cgls)
+
+args_cgls = (:A, :b)
+kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cgls($(def_args_cgls...); $(def_kwargs_cgls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CglsSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cgls!(solver, $(args_cgls...); $(kwargs_cgls...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(s, Aᵀ, Mr)
- p .= s
- γ = @kdotr(n, s, s) # γ = sᵀs
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- rNorm = bNorm
- ArNorm = sqrt(γ)
- history && push!(rNorms, rNorm)
- history && push!(ArNorms, ArNorm)
- ε = atol + rtol * ArNorm
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
-
- status = "unknown"
- on_boundary = false
- solved = ArNorm ≤ ε
- tired = iter ≥ itmax
- user_requested_exit = false
-
- while ! (solved || tired || user_requested_exit)
- mul!(q, A, p)
- MisI || mulorldiv!(Mq, M, q, ldiv)
- δ = @kdotr(m, q, Mq) # δ = qᵀMq
- λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᵀp
- α = γ / δ
-
- # if a trust-region constraint is give, compute step to the boundary
- σ = radius > 0 ? maximum(to_boundary(x, p, radius)) : α
- if (radius > 0) & (α > σ)
- α = σ
- on_boundary = true
- end
- @kaxpy!(n, α, p, x) # Faster than x = x + α * p
- @kaxpy!(m, -α, q, r) # Faster than r = r - α * q
+ function cgls!(solver :: CglsSolver{T,FC,S}, $(def_args_cgls...); $(def_kwargs_cgls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CGLS: system of %d equations in %d variables\n", m, n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :Mr, S, m)
+ x, p, s, r, q, stats = solver.x, solver.p, solver.s, solver.r, solver.q, solver.stats
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ Mr = MisI ? r : solver.Mr
+ Mq = MisI ? q : solver.Mr
+
+ x .= zero(FC)
+ r .= b
+ bNorm = @knrm2(m, r) # Marginally faster than norm(b)
+ if bNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(rNorms, zero(T))
+ history && push!(ArNorms, zero(T))
+ return solver
+ end
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(s, Aᵀ, Mr)
- λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x
- γ_next = @kdotr(n, s, s) # γ_next = sᵀs
- β = γ_next / γ
- @kaxpby!(n, one(FC), s, β, p) # p = s + βp
- γ = γ_next
- rNorm = @knrm2(m, r) # Marginally faster than norm(r)
+ mul!(s, Aᴴ, Mr)
+ p .= s
+ γ = @kdotr(n, s, s) # γ = sᴴs
+ iter = 0
+ itmax == 0 && (itmax = m + n)
+
+ rNorm = bNorm
ArNorm = sqrt(γ)
history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
- iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
- user_requested_exit = callback(solver) :: Bool
- solved = (ArNorm ≤ ε) | on_boundary
+ ε = atol + rtol * ArNorm
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+
+ status = "unknown"
+ on_boundary = false
+ solved = ArNorm ≤ ε
tired = iter ≥ itmax
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || user_requested_exit || overtimed)
+ mul!(q, A, p)
+ MisI || mulorldiv!(Mq, M, q, ldiv)
+ δ = @kdotr(m, q, Mq) # δ = qᴴMq
+ λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᴴp
+ α = γ / δ
+
+ # if a trust-region constraint is give, compute step to the boundary
+ σ = radius > 0 ? maximum(to_boundary(n, x, p, radius)) : α
+ if (radius > 0) & (α > σ)
+ α = σ
+ on_boundary = true
+ end
+
+ @kaxpy!(n, α, p, x) # Faster than x = x + α * p
+ @kaxpy!(m, -α, q, r) # Faster than r = r - α * q
+ MisI || mulorldiv!(Mr, M, r, ldiv)
+ mul!(s, Aᴴ, Mr)
+ λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x
+ γ_next = @kdotr(n, s, s) # γ_next = sᴴs
+ β = γ_next / γ
+ @kaxpby!(n, one(FC), s, β, p) # p = s + βp
+ γ = γ_next
+ rNorm = @knrm2(m, r) # Marginally faster than norm(r)
+ ArNorm = sqrt(γ)
+ history && push!(rNorms, rNorm)
+ history && push!(ArNorms, ArNorm)
+ iter = iter + 1
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+ user_requested_exit = callback(solver) :: Bool
+ solved = (ArNorm ≤ ε) || on_boundary
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ on_boundary && (status = "on trust-region boundary")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- on_boundary && (status = "on trust-region boundary")
- user_requested_exit && (status = "user-requested exit")
-
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/cgne.jl b/src/cgne.jl
index 2859414e1..8a4e6dddb 100644
--- a/src/cgne.jl
+++ b/src/cgne.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate gradient method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is also known as Craig's method, CGME, and other
# names, and is described in
@@ -28,12 +28,13 @@
export cgne, cgne!
-
"""
(x, stats) = cgne(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ N=I, ldiv::Bool=false,
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -42,11 +43,11 @@ Solve the consistent linear system
Ax + √λs = b
-using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CG to the normal equations
of the second kind
- (AAᵀ + λI) y = b
+ (AAᴴ + λI) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -60,10 +61,29 @@ CGNE produces monotonic errors ‖x-x*‖₂ but not residuals ‖r‖₂.
It is formally equivalent to CRAIG, though can be slightly less accurate,
but simpler to implement. Only the x-part of the solution is returned.
-A preconditioner M may be provided in the form of a linear operator.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -72,12 +92,6 @@ and `false` otherwise.
"""
function cgne end
-function cgne(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CgneSolver(A, b)
- cgne!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cgne!(solver::CgneSolver, A, b; kwargs...)
@@ -87,113 +101,154 @@ See [`CgneSolver`](@ref) for more details about the `solver`.
"""
function cgne! end
-function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CGNE: system of %d equations in %d variables\n", m, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :z, S, m)
- allocate_if(λ > 0, solver, :s, S, m)
- x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats
- rNorms = stats.residuals
- reset!(stats)
- z = MisI ? r : solver.z
-
- x .= zero(FC)
- r .= b
- MisI || mulorldiv!(z, M, r, ldiv)
- rNorm = @knrm2(m, r) # Marginally faster than norm(r)
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- return solver
+def_args_cgne = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_cgne = (:(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cgne = mapreduce(extract_parameters, vcat, def_kwargs_cgne)
+
+args_cgne = (:A, :b)
+kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cgne($(def_args_cgne...); $(def_kwargs_cgne...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgneSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cgne!(solver, $(args_cgne...); $(kwargs_cgne...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- λ > 0 && (s .= r)
- mul!(p, Aᵀ, z)
-
- # Use ‖p‖ to detect inconsistent system.
- # An inconsistent system will necessarily have AA' singular.
- # Because CGNE is equivalent to CG applied to AA'y = b, there will be a
- # conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this
- # implementation, p is a substitute for A'u.
- pNorm = @knrm2(n, p)
-
- γ = @kdotr(m, r, z) # Faster than γ = dot(r, z)
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
- ɛ_i = atol + rtol * pNorm # Stopping tolerance for inconsistent systems.
- (verbose > 0) && @printf("%5s %8s\n", "k", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm)
-
- status = "unknown"
- solved = rNorm ≤ ɛ_c
- inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
- tired = iter ≥ itmax
- user_requested_exit = false
-
- while ! (solved || inconsistent || tired || user_requested_exit)
- mul!(q, A, p)
- λ > 0 && @kaxpy!(m, λ, s, q)
- δ = @kdotr(n, p, p) # Faster than dot(p, p)
- λ > 0 && (δ += λ * @kdotr(m, s, s))
- α = γ / δ
- @kaxpy!(n, α, p, x) # Faster than x = x + α * p
- @kaxpy!(m, -α, q, r) # Faster than r = r - α * q
- MisI || mulorldiv!(z, M, r, ldiv)
- γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z)
- β = γ_next / γ
- mul!(Aᵀz, Aᵀ, z)
- @kaxpby!(n, one(FC), Aᵀz, β, p) # Faster than p = Aᵀz + β * p
- pNorm = @knrm2(n, p)
- if λ > 0
- @kaxpby!(m, one(FC), r, β, s) # s = r + β * s
- end
- γ = γ_next
- rNorm = sqrt(γ_next)
+
+ function cgne!(solver :: CgneSolver{T,FC,S}, $(def_args_cgne...); $(def_kwargs_cgne...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CGNE: system of %d equations in %d variables\n", m, n)
+
+ # Tests N = Iₙ
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!NisI, solver, :z, S, m)
+ allocate_if(λ > 0, solver, :s, S, m)
+ x, p, Aᴴz, r, q, s, stats = solver.x, solver.p, solver.Aᴴz, solver.r, solver.q, solver.s, solver.stats
+ rNorms = stats.residuals
+ reset!(stats)
+ z = NisI ? r : solver.z
+
+ x .= zero(FC)
+ r .= b
+ NisI || mulorldiv!(z, N, r, ldiv)
+ rNorm = @knrm2(m, r) # Marginally faster than norm(r)
history && push!(rNorms, rNorm)
- iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm)
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ return solver
+ end
+ λ > 0 && (s .= r)
+ mul!(p, Aᴴ, z)
+
+ # Use ‖p‖ to detect inconsistent system.
+ # An inconsistent system will necessarily have AA' singular.
+ # Because CGNE is equivalent to CG applied to AA'y = b, there will be a
+ # conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this
+ # implementation, p is a substitute for A'u.
+ pNorm = @knrm2(n, p)
+
+ γ = @kdotr(m, r, z) # Faster than γ = dot(r, z)
+ iter = 0
+ itmax == 0 && (itmax = m + n)
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
+ ɛ_i = atol + rtol * pNorm # Stopping tolerance for inconsistent systems.
+ (verbose > 0) && @printf(iostream, "%5s %8s %5s\n", "k", "‖r‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %.2fs\n", iter, rNorm, ktimer(start_time))
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ɛ_c
- solved = resid_decrease_lim || resid_decrease_mach
+ status = "unknown"
+ solved = rNorm ≤ ɛ_c
inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
tired = iter ≥ itmax
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || inconsistent || tired || user_requested_exit || overtimed)
+ mul!(q, A, p)
+ λ > 0 && @kaxpy!(m, λ, s, q)
+ δ = @kdotr(n, p, p) # Faster than dot(p, p)
+ λ > 0 && (δ += λ * @kdotr(m, s, s))
+ α = γ / δ
+ @kaxpy!(n, α, p, x) # Faster than x = x + α * p
+ @kaxpy!(m, -α, q, r) # Faster than r = r - α * q
+ NisI || mulorldiv!(z, N, r, ldiv)
+ γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z)
+ β = γ_next / γ
+ mul!(Aᴴz, Aᴴ, z)
+ @kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p
+ pNorm = @knrm2(n, p)
+ if λ > 0
+ @kaxpby!(m, one(FC), r, β, s) # s = r + β * s
+ end
+ γ = γ_next
+ rNorm = sqrt(γ_next)
+ history && push!(rNorms, rNorm)
+ iter = iter + 1
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ɛ_c
+ solved = resid_decrease_lim || resid_decrease_mach
+ inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ inconsistent && (status = "system probably inconsistent")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- inconsistent && (status = "system probably inconsistent")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/src/cgs.jl b/src/cgs.jl
index c1eb1056e..e95e74d17 100644
--- a/src/cgs.jl
+++ b/src/cgs.jl
@@ -11,17 +11,23 @@
export cgs, cgs!
"""
- (x, stats) = cgs(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = cgs(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, M=I, N=I,
+ ldiv::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using conjugate gradient squared algorithm.
+ (x, stats) = cgs(A, b, x0::AbstractVector; kwargs...)
+
+CGS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using CGS.
CGS requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
From "Iterative Methods for Sparse Linear Systems (Y. Saad)" :
@@ -38,16 +44,34 @@ to become inaccurate.
TFQMR and BICGSTAB were developed to remedy this difficulty.»
-This implementation allows a left preconditioner M and a right preconditioner N.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
-CGS can be warm-started from an initial guess `x0` with the method
+#### Optional argument
- (x, stats) = cgs(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -55,18 +79,6 @@ and `false` otherwise.
"""
function cgs end
-function cgs(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = CgsSolver(A, b)
- cgs!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function cgs(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CgsSolver(A, b)
- cgs!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cgs!(solver::CgsSolver, A, b; kwargs...)
solver = cgs!(solver::CgsSolver, A, b, x0; kwargs...)
@@ -77,153 +89,204 @@ See [`CgsSolver`](@ref) for more details about the `solver`.
"""
function cgs! end
-function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- cgs!(solver, A, b; kwargs...)
- return solver
-end
-
-function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CGS: system of size %d\n", n)
-
- # Check M = Iₙ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :vw, S, n)
- allocate_if(!NisI, solver, :yz, S, n)
- Δx, x, r, u, p, q, ts, stats = solver.Δx, solver.x, solver.r, solver.u, solver.p, solver.q, solver.ts, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- t = s = solver.ts
- v = MisI ? t : solver.vw
- w = MisI ? s : solver.vw
- y = NisI ? p : solver.yz
- z = NisI ? u : solver.yz
- r₀ = MisI ? r : solver.ts
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
- else
- r₀ .= b
+def_args_cgs = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_cgs = (:(x0::AbstractVector),)
+
+def_kwargs_cgs = (:(; c::AbstractVector{FC} = b ),
+ :(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cgs = mapreduce(extract_parameters, vcat, def_kwargs_cgs)
+
+args_cgs = (:A, :b)
+optargs_cgs = (:x0,)
+kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cgs($(def_args_cgs...), $(def_optargs_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgsSolver(A, b)
+ warm_start!(solver, $(optargs_cgs...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cgs!(solver, $(args_cgs...); $(kwargs_cgs...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- x .= zero(FC) # x₀
- MisI || mulorldiv!(r, M, r₀, ldiv) # r₀
-
- # Compute residual norm ‖r₀‖₂.
- rNorm = @knrm2(n, r)
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
- end
-
- # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩
- ρ = @kdot(n, c, r)
- if ρ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = false, false
- stats.status = "Breakdown bᵀc = 0"
- solver.warm_start =false
- return solver
+ function cgs($(def_args_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CgsSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cgs!(solver, $(args_cgs...); $(kwargs_cgs...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
-
- u .= r # u₀
- p .= r # p₀
- q .= zero(FC) # q₋₁
-
- # Stopping criterion.
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- breakdown = false
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || breakdown || user_requested_exit)
-
- NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ
- mul!(t, A, y) # tₖ = Ayₖ
- MisI || mulorldiv!(v, M, t, ldiv) # vₖ = M⁻¹tₖ
- σ = @kdot(n, c, v) # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩
- α = ρ / σ # αₖ = ρₖ / σₖ
- @kcopy!(n, u, q) # qₖ = uₖ
- @kaxpy!(n, -α, v, q) # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ
- @kaxpy!(n, one(FC), q, u) # uₖ₊½ = uₖ + qₖ
- NisI || mulorldiv!(z, N, u, ldiv) # zₖ = N⁻¹uₖ₊½
- @kaxpy!(n, α, z, x) # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ)
- mul!(s, A, z) # sₖ = Azₖ
- MisI || mulorldiv!(w, M, s, ldiv) # wₖ = M⁻¹sₖ
- @kaxpy!(n, -α, w, r) # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ)
- ρ_next = @kdot(n, c, r) # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩
- β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ
- @kcopy!(n, r, u) # uₖ₊₁ = rₖ₊₁
- @kaxpy!(n, β, q, u) # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ
- @kaxpby!(n, one(FC), q, β, p) # pₐᵤₓ = qₖ + βₖ * pₖ
- @kaxpby!(n, one(FC), u, β, p) # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ
-
- # Update ρ.
- ρ = ρ_next # ρₖ ← ρₖ₊₁
-
- # Update iteration index.
- iter = iter + 1
-
- # Compute residual norm ‖rₖ‖₂.
+ function cgs!(solver :: CgsSolver{T,FC,S}, $(def_args_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CGS: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :vw, S, n)
+ allocate_if(!NisI, solver, :yz, S, n)
+ Δx, x, r, u, p, q, ts, stats = solver.Δx, solver.x, solver.r, solver.u, solver.p, solver.q, solver.ts, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ t = s = solver.ts
+ v = MisI ? t : solver.vw
+ w = MisI ? s : solver.vw
+ y = NisI ? p : solver.yz
+ z = NisI ? u : solver.yz
+ r₀ = MisI ? r : solver.ts
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
+ else
+ r₀ .= b
+ end
+
+ x .= zero(FC) # x₀
+ MisI || mulorldiv!(r, M, r₀, ldiv) # r₀
+
+ # Compute residual norm ‖r₀‖₂.
rNorm = @knrm2(n, r)
history && push!(rNorms, rNorm)
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- solved = resid_decrease_lim || resid_decrease_mach
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩
+ ρ = @kdot(n, c, r)
+ if ρ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = false, false
+ stats.timer = ktimer(start_time)
+ stats.status = "Breakdown bᴴc = 0"
+ solver.warm_start =false
+ return solver
+ end
+
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ u .= r # u₀
+ p .= r # p₀
+ q .= zero(FC) # q₋₁
+
+ # Stopping criterion.
+ solved = rNorm ≤ ε
tired = iter ≥ itmax
- breakdown = (α == 0 || isnan(α))
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ breakdown = false
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+ NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ
+ mul!(t, A, y) # tₖ = Ayₖ
+ MisI || mulorldiv!(v, M, t, ldiv) # vₖ = M⁻¹tₖ
+ σ = @kdot(n, c, v) # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩
+ α = ρ / σ # αₖ = ρₖ / σₖ
+ @kcopy!(n, u, q) # qₖ = uₖ
+ @kaxpy!(n, -α, v, q) # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ
+ @kaxpy!(n, one(FC), q, u) # uₖ₊½ = uₖ + qₖ
+ NisI || mulorldiv!(z, N, u, ldiv) # zₖ = N⁻¹uₖ₊½
+ @kaxpy!(n, α, z, x) # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ)
+ mul!(s, A, z) # sₖ = Azₖ
+ MisI || mulorldiv!(w, M, s, ldiv) # wₖ = M⁻¹sₖ
+ @kaxpy!(n, -α, w, r) # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ)
+ ρ_next = @kdot(n, c, r) # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩
+ β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ
+ @kcopy!(n, r, u) # uₖ₊₁ = rₖ₊₁
+ @kaxpy!(n, β, q, u) # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ
+ @kaxpby!(n, one(FC), q, β, p) # pₐᵤₓ = qₖ + βₖ * pₖ
+ @kaxpby!(n, one(FC), u, β, p) # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ
+
+ # Update ρ.
+ ρ = ρ_next # ρₖ ← ρₖ₊₁
+
+ # Update iteration index.
+ iter = iter + 1
+
+ # Compute residual norm ‖rₖ‖₂.
+ rNorm = @knrm2(n, r)
+ history && push!(rNorms, rNorm)
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ breakdown = (α == 0 || isnan(α))
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "breakdown αₖ == 0")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "breakdown αₖ == 0")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/cr.jl b/src/cr.jl
index c678c7d29..96194f459 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -6,6 +6,9 @@
# E. Stiefel, Relaxationsmethoden bester Strategie zur Losung linearer Gleichungssysteme.
# Commentarii Mathematici Helvetici, 29(1), pp. 157--179, 1955.
#
+# D. G. Luenberger, The conjugate residual method for constrained minimization problems.
+# SIAM Journal on Numerical Analysis, 7(3), pp. 390--398, 1970.
+#
# M-A. Dahito and D. Orban, The Conjugate Residual Method in Linesearch and Trust-Region Methods.
# SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019.
#
@@ -16,53 +19,63 @@ export cr, cr!
"""
(x, stats) = cr(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T), γ::T=√eps(T), itmax::Int=0,
- radius::T=zero(T), verbose::Int=0, linesearch::Bool=false, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ linesearch::Bool=false, γ::T=√eps(T),
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-A truncated version of Stiefel’s Conjugate Residual method to solve the symmetric linear system Ax = b or the least-squares problem min ‖b - Ax‖.
-The matrix A must be positive semi-definite.
+ (x, stats) = cr(A, b, x0::AbstractVector; kwargs...)
+
+CR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
-A preconditioner M may be provided in the form of a linear operator and is assumed to be symmetric and positive definite.
+A truncated version of Stiefel’s Conjugate Residual method to solve the Hermitian linear system Ax = b
+of size n or the least-squares problem min ‖b - Ax‖ if A is singular.
+The matrix A must be Hermitian semi-definite.
M also indicates the weighted norm in which residuals are measured.
-In a linesearch context, 'linesearch' must be set to 'true'.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n;
+* `b`: a vector of length n.
-If `itmax=0`, the default number of iterations is set to `2 * n`,
-with `n = length(b)`.
+#### Optional argument
-CR can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = cr(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
+* `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
* M. R. Hestenes and E. Stiefel, [*Methods of conjugate gradients for solving linear systems*](https://doi.org/10.6028/jres.049.044), Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952.
* E. Stiefel, [*Relaxationsmethoden bester Strategie zur Losung linearer Gleichungssysteme*](https://doi.org/10.1007/BF02564277), Commentarii Mathematici Helvetici, 29(1), pp. 157--179, 1955.
+* D. G. Luenberger, [*The conjugate residual method for constrained minimization problems*](https://doi.org/10.1137/0707032), SIAM Journal on Numerical Analysis, 7(3), pp. 390--398, 1970.
* M-A. Dahito and D. Orban, [*The Conjugate Residual Method in Linesearch and Trust-Region Methods*](https://doi.org/10.1137/18M1204255), SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019.
"""
function cr end
-function cr(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = CrSolver(A, b)
- cr!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function cr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CrSolver(A, b)
- cr!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = cr!(solver::CrSolver, A, b; kwargs...)
solver = cr!(solver::CrSolver, A, b, x0; kwargs...)
@@ -73,286 +86,339 @@ See [`CrSolver`](@ref) for more details about the `solver`.
"""
function cr! end
-function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- cr!(solver, A, b; kwargs...)
- return solver
-end
-
-function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0,
- radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("CR: system of %d equations in %d variables\n", n, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace
- allocate_if(!MisI, solver, :Mq, S, n)
- Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats
- warm_start = solver.warm_start
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- Mq = MisI ? q : solver.Mq
-
- # Initial state.
- x .= zero(FC)
- if warm_start
- mul!(p, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), p)
- else
- p .= b
+def_args_cr = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_cr = (:(x0::AbstractVector),)
+
+def_kwargs_cr = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; radius::T = zero(T) ),
+ :(; linesearch::Bool = false ),
+ :(; γ::T = √eps(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_cr = mapreduce(extract_parameters, vcat, def_kwargs_cr)
+
+args_cr = (:A, :b)
+optargs_cr = (:x0,)
+kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function cr($(def_args_cr...), $(def_optargs_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CrSolver(A, b)
+ warm_start!(solver, $(optargs_cr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cr!(solver, $(args_cr...); $(kwargs_cr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- mulorldiv!(r, M, p, ldiv)
- mul!(Ar, A, r)
- ρ = @kdotr(n, r, Ar)
-
- rNorm = sqrt(@kdotr(n, r, p)) # ‖r‖
- history && push!(rNorms, rNorm) # Values of ‖r‖
-
- if ρ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(ArNorms, zero(T))
- solver.warm_start = false
- return solver
+
+ function cr($(def_args_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ cr!(solver, $(args_cr...); $(kwargs_cr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- p .= r
- q .= Ar
- (verbose > 0) && (m = zero(T)) # quadratic model
-
- iter = 0
- itmax == 0 && (itmax = 2 * n)
-
- rNorm² = rNorm * rNorm
- pNorm = rNorm
- pNorm² = rNorm²
- pr = rNorm²
- abspr = pr
- pAp = ρ
- abspAp = abs(pAp)
- xNorm = zero(T)
- ArNorm = @knrm2(n, Ar) # ‖Ar‖
- history && push!(ArNorms, ArNorm)
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad")
- kdisplay(iter, verbose) && @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
-
- descent = pr > 0 # pᵀr > 0 means p is a descent direction
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- on_boundary = false
- npcurv = false
- status = "unknown"
- user_requested_exit = false
-
- while ! (solved || tired || user_requested_exit)
- if linesearch
- if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
- npcurv = true
- (verbose > 0) && @printf("nonpositive curvature detected: pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
- stats.solved = solved
- stats.inconsistent = false
- stats.status = "nonpositive curvature"
- return solver
- end
- elseif pAp ≤ 0 && radius == 0
- error("Indefinite system and no trust region")
+
+ function cr!(solver :: CrSolver{T,FC,S}, $(def_args_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == n || error("Inconsistent problem size")
+ linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
+ (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace
+ allocate_if(!MisI, solver, :Mq, S, n)
+ Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats
+ warm_start = solver.warm_start
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ Mq = MisI ? q : solver.Mq
+
+ # Initial state.
+ x .= zero(FC)
+ if warm_start
+ mul!(p, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), p)
+ else
+ p .= b
end
- MisI || mulorldiv!(Mq, M, q, ldiv)
-
- if radius > 0
- (verbose > 0) && @printf("radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
- # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2)
- xNorm² = xNorm * xNorm
- t = to_boundary(x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
- t1 = maximum(t) # > 0
- t2 = minimum(t) # < 0
- tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
- (verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
-
- if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᵀAp ≃ 0
- npcurv = true # nonpositive curvature
- (verbose > 0) && @printf("pᵀAp = %8.1e ≃ 0\n", pAp)
- if abspr ≤ γ * pNorm * rNorm # pᵀr ≃ 0
- (verbose > 0) && @printf("pᵀr = %8.1e ≃ 0, redefining p := r\n", pr)
- p = r # - ∇q(x)
- q = Ar
- # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᵀAr
- # 1) if rᵀAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᵀAr
- # 2) if rᵀAr ≤ 0, the quadratic decreases to -∞ in the direction r
- if ρ > 0 # case 1
- (verbose > 0) && @printf("quadratic is convex in direction r, curv = %8.1e\n", ρ)
- α = min(tr, rNorm² / ρ)
- else # case 2
- (verbose > 0) && @printf("r is a direction of nonpositive curvature: %8.1e\n", ρ)
+ mulorldiv!(r, M, p, ldiv)
+ mul!(Ar, A, r)
+ ρ = @kdotr(n, r, Ar)
+
+ rNorm = sqrt(@kdotr(n, r, p)) # ‖r‖
+ history && push!(rNorms, rNorm) # Values of ‖r‖
+
+ if ρ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(ArNorms, zero(T))
+ solver.warm_start = false
+ return solver
+ end
+ p .= r
+ q .= Ar
+ (verbose > 0) && (m = zero(T)) # quadratic model
+
+ iter = 0
+ itmax == 0 && (itmax = 2 * n)
+
+ rNorm² = rNorm * rNorm
+ pNorm = rNorm
+ pNorm² = rNorm²
+ pr = rNorm²
+ abspr = pr
+ pAp = ρ
+ abspAp = abs(pAp)
+ xNorm = zero(T)
+ ArNorm = @knrm2(n, Ar) # ‖Ar‖
+ history && push!(ArNorms, ArNorm)
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %5s\n", "k", "‖x‖", "‖r‖", "quad", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.1e %8.1e %8.1e %.2fs\n", iter, xNorm, rNorm, m, ktimer(start_time))
+
+ descent = pr > 0 # pᴴr > 0 means p is a descent direction
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ on_boundary = false
+ npcurv = false
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || user_requested_exit || overtimed)
+ if linesearch
+ if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
+ npcurv = true
+ (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "nonpositive curvature"
+ return solver
+ end
+ elseif pAp ≤ 0 && radius == 0
+ error("Indefinite system and no trust region")
+ end
+ MisI || mulorldiv!(Mq, M, q, ldiv)
+
+ if radius > 0
+ (verbose > 0) && @printf(iostream, "radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
+ # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2)
+ xNorm² = xNorm * xNorm
+ t = to_boundary(n, x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
+ t1 = maximum(t) # > 0
+ t2 = minimum(t) # < 0
+ tr = maximum(to_boundary(n, x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
+ (verbose > 0) && @printf(iostream, "t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
+
+ if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0
+ npcurv = true # nonpositive curvature
+ (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp)
+ if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0
+ (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr)
+ p = r # - ∇q(x)
+ q = Ar
+ # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr
+ # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr
+ # 2) if rᴴAr ≤ 0, the quadratic decreases to -∞ in the direction r
+ if ρ > 0 # case 1
+ (verbose > 0) && @printf(iostream, "quadratic is convex in direction r, curv = %8.1e\n", ρ)
+ α = min(tr, rNorm² / ρ)
+ else # case 2
+ (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ)
+ α = tr
+ end
+ else
+ # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp
+ # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᴴAr
+ # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed
+ α = descent ? t1 : t2
+ ρ > 0 && (tr = min(tr, rNorm² / ρ))
+ Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0
+ if Δ > 0 # direction r engenders a better decrease
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
+ p = r
+ q = Ar
+ α = tr
+ else
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ end
+ end
+
+ elseif pAp > 0 && ρ > 0 # no negative curvature
+ (verbose > 0) && @printf(iostream, "positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
+ α = ρ / @kdotr(n, q, Mq)
+ if α ≥ t1
+ α = t1
+ on_boundary = true
+ end
+
+ elseif pAp > 0 && ρ < 0
+ npcurv = true
+ (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ)
+ # q_p is minimal for α_p = rᴴp / pᴴAp
+ α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp)
+ Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
+ if Δ > 0
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
+ p = r
+ q = Ar
α = tr
+ else
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
end
- else
- # q_p = q(x + α_p * p) - q(x) = -α_p * rᵀp + ½ (α_p)² * pᵀAp
- # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᵀAr
- # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed
+
+ elseif pAp < 0 && ρ > 0
+ npcurv = true
+ (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ)
α = descent ? t1 : t2
- ρ > 0 && (tr = min(tr, rNorm² / ρ))
- Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᵀAp = 0
- if Δ > 0 # direction r engenders a better decrease
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
+ tr = min(tr, rNorm² / ρ)
+ Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
+ if Δ > 0
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
p = r
q = Ar
α = tr
else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
end
- end
-
- elseif pAp > 0 && ρ > 0 # no negative curvature
- (verbose > 0) && @printf("positive curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
- α = ρ / @kdotr(n, q, Mq)
- if α ≥ t1
- α = t1
- on_boundary = true
- end
-
- elseif pAp > 0 && ρ < 0
- npcurv = true
- (verbose > 0) && @printf("pᵀAp = %8.1e > 0 and rᵀAr = %8.1e < 0\n", pAp, ρ)
- # q_p is minimal for α_p = rᵀp / pᵀAp
- α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp)
- Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
- if Δ > 0
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
- p = r
- q = Ar
- α = tr
- else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
- end
- elseif pAp < 0 && ρ > 0
- npcurv = true
- (verbose > 0) && @printf("pᵀAp = %8.1e < 0 and rᵀAr = %8.1e > 0\n", pAp, ρ)
- α = descent ? t1 : t2
- tr = min(tr, rNorm² / ρ)
- Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
- if Δ > 0
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
- p = r
- q = Ar
- α = tr
- else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ elseif pAp < 0 && ρ < 0
+ npcurv = true
+ (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
+ α = descent ? t1 : t2
+ Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
+ if Δ > 0
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
+ p = r
+ q = Ar
+ α = tr
+ else
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ end
end
- elseif pAp < 0 && ρ < 0
- npcurv = true
- (verbose > 0) && @printf("negative curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
- α = descent ? t1 : t2
- Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
- if Δ > 0
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
- p = r
- q = Ar
- α = tr
- else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
- end
+ elseif radius == 0
+ α = ρ / @kdotr(n, q, Mq) # step
end
- elseif radius == 0
- α = ρ / @kdotr(n, q, Mq) # step
- end
-
- @kaxpy!(n, α, p, x)
- xNorm = @knrm2(n, x)
- xNorm ≈ radius && (on_boundary = true)
- @kaxpy!(n, -α, Mq, r) # residual
- if MisI
- rNorm² = @kdotr(n, r, r)
- rNorm = sqrt(rNorm²)
- else
- ω = sqrt(α) * sqrt(ρ)
- rNorm = sqrt(abs(rNorm + ω)) * sqrt(abs(rNorm - ω))
- rNorm² = rNorm * rNorm # rNorm² = rNorm² - α * ρ
- end
- history && push!(rNorms, rNorm)
- mul!(Ar, A, r)
- ArNorm = @knrm2(n, Ar)
- history && push!(ArNorms, ArNorm)
-
- iter = iter + 1
- if kdisplay(iter, verbose)
- m = m - α * pr + α^2 * pAp / 2
- @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
- end
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ @kaxpy!(n, α, p, x)
+ xNorm = @knrm2(n, x)
+ xNorm ≈ radius && (on_boundary = true)
+ @kaxpy!(n, -α, Mq, r) # residual
+ if MisI
+ rNorm² = @kdotr(n, r, r)
+ rNorm = sqrt(rNorm²)
+ else
+ ω = sqrt(α) * sqrt(ρ)
+ rNorm = sqrt(abs(rNorm + ω)) * sqrt(abs(rNorm - ω))
+ rNorm² = rNorm * rNorm # rNorm² = rNorm² - α * ρ
+ end
+ history && push!(rNorms, rNorm)
+ mul!(Ar, A, r)
+ ArNorm = @knrm2(n, Ar)
+ history && push!(ArNorms, ArNorm)
+
+ iter = iter + 1
+ if kdisplay(iter, verbose)
+ m = m - α * pr + α^2 * pAp / 2
+ @printf(iostream, "%5d %8.1e %8.1e %8.1e %.2fs\n", iter, xNorm, rNorm, m, ktimer(start_time))
+ end
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- resid_decrease = resid_decrease_lim || resid_decrease_mach
- solved = resid_decrease || npcurv || on_boundary
- tired = iter ≥ itmax
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ resid_decrease = resid_decrease_lim || resid_decrease_mach
+ solved = resid_decrease || npcurv || on_boundary
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+
+ (solved || tired || user_requested_exit || overtimed) && continue
+ ρbar = ρ
+ ρ = @kdotr(n, r, Ar)
+ β = ρ / ρbar # step for the direction computation
+ @kaxpby!(n, one(FC), r, β, p)
+ @kaxpby!(n, one(FC), Ar, β, q)
+
+ pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm²
+ if pNorm² > sqrt(eps(T))
+ pNorm = sqrt(pNorm²)
+ elseif abs(pNorm²) ≤ sqrt(eps(T))
+ pNorm = zero(T)
+ else
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "solver encountered numerical issues"
+ solver.warm_start = false
+ return solver
+ end
+ pr = rNorm² + β * pr - β * α * pAp # pᴴr
+ abspr = abs(pr)
+ pAp = ρ + β^2 * pAp # pᴴq
+ abspAp = abs(pAp)
+ descent = pr > 0
- (solved || tired || user_requested_exit) && continue
- ρbar = ρ
- ρ = @kdotr(n, r, Ar)
- β = ρ / ρbar # step for the direction computation
- @kaxpby!(n, one(FC), r, β, p)
- @kaxpby!(n, one(FC), Ar, β, q)
-
- pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm²
- if pNorm² > sqrt(eps(T))
- pNorm = sqrt(pNorm²)
- elseif abs(pNorm²) ≤ sqrt(eps(T))
- pNorm = zero(T)
- else
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = "solver encountered numerical issues"
- solver.warm_start = false
- return solver
end
- pr = rNorm² + β * pr - β * α * pAp # pᵀr
- abspr = abs(pr)
- pAp = ρ + β^2 * pAp # pᵀq
- abspAp = abs(pAp)
- descent = pr > 0
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ on_boundary && (status = "on trust-region boundary")
+ npcurv && (status = "nonpositive curvature")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- on_boundary && (status = "on trust-region boundary")
- npcurv && (status = "nonpositive curvature")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/craig.jl b/src/craig.jl
index 20597ea02..46e8f93e5 100644
--- a/src/craig.jl
+++ b/src/craig.jl
@@ -11,7 +11,7 @@
# and is equivalent to applying the conjugate gradient method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method, sometimes known under the name CRAIG, is the
# Golub-Kahan implementation of CGNE, and is described in
@@ -32,13 +32,15 @@
export craig, craig!
-
"""
(x, y, stats) = craig(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T),
- btol::T=√eps(T), rtol::T=√eps(T), conlim::T=1/√eps(T), itmax::Int=0,
- verbose::Int=0, transfer_to_lsqr::Bool=false, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ transfer_to_lsqr::Bool=false, sqd::Bool=false,
+ λ::T=zero(T), btol::T=√eps(T),
+ conlim::T=1/√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -47,19 +49,19 @@ Find the least-norm solution of the consistent linear system
Ax + λ²y = b
-using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
+of size m × n using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
regularization parameter. This method is equivalent to CGNE but is more
stable.
For a system in the form Ax = b, Craig's method is equivalent to applying
-CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange
+CG to AAᴴy = b and recovering x = Aᴴy. Note that y are the Lagrange
multipliers of the least-norm problem
minimize ‖x‖ s.t. Ax = b.
If `λ > 0`, CRAIG solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -70,12 +72,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIG is then equivalent to applying CG to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIG is then equivalent to applying CG to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, CRAIG solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -86,8 +88,35 @@ In this case, `M` can still be specified and indicates the weighted norm in whic
In this implementation, both the x and y-parts of the solution are returned.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -96,12 +125,6 @@ and `false` otherwise.
"""
function craig end
-function craig(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CraigSolver(A, b)
- craig!(solver, A, b; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = craig!(solver::CraigSolver, A, b; kwargs...)
@@ -111,192 +134,130 @@ See [`CraigSolver`](@ref) for more details about the `solver`.
"""
function craig! end
-function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
- btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0,
- verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRAIG: system of %d equations in %d variables\n", m, n)
-
- # Check sqd and λ parameters
- sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
- sqd && (λ = one(T))
-
- # Tests M = Iₘ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :u , S, m)
- allocate_if(!NisI, solver, :v , S, n)
- allocate_if(λ > 0, solver, :w2, S, n)
- x, Nv, Aᵀu, y, w = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w
- Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats
- rNorms = stats.residuals
- reset!(stats)
- u = MisI ? Mu : solver.u
- v = NisI ? Nv : solver.v
-
- x .= zero(FC)
- y .= zero(FC)
-
- Mu .= b
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β₁ = sqrt(@kdotr(m, u, Mu))
- rNorm = β₁
- history && push!(rNorms, rNorm)
- if β₁ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- return solver
+def_args_craig = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_craig = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; transfer_to_lsqr::Bool = false),
+ :(; sqd::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; btol::T = √eps(T) ),
+ :(; conlim::T = 1/√eps(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_craig = mapreduce(extract_parameters, vcat, def_kwargs_craig)
+
+args_craig = (:A, :b)
+kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function craig($(def_args_craig...); $(def_kwargs_craig...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CraigSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ craig!(solver, $(args_craig...); $(kwargs_craig...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- β₁² = β₁^2
- β = β₁
- θ = β₁ # θ will differ from β when there is regularization (λ > 0).
- ξ = -one(T) # Most recent component of x in Range(V).
- δ = λ
- ρ_prev = one(T)
-
- # Initialize Golub-Kahan process.
- # β₁Mu₁ = b.
- @kscal!(m, one(FC) / β₁, u)
- MisI || @kscal!(m, one(FC) / β₁, Mu)
-
- Nv .= zero(FC)
- w .= zero(FC) # Used to update y.
-
- λ > 0 && (w2 .= zero(FC))
-
- Anorm² = zero(T) # Estimate of ‖A‖²_F.
- Anorm = zero(T)
- Dnorm² = zero(T) # Estimate of ‖(AᵀA)⁻¹‖².
- Acond = zero(T) # Estimate of cond(A).
- xNorm² = zero(T) # Estimate of ‖x‖².
- xNorm = zero(T)
-
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
- ɛ_i = atol # Stopping tolerance for inconsistent systems.
- ctol = conlim > 0 ? 1/conlim : zero(T) # Stopping tolerance for ill-conditioned operators.
- (verbose > 0) && @printf("%5s %8s %8s %8s %8s %8s %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e\n", iter, rNorm, xNorm, Anorm, Acond)
-
- bkwerr = one(T) # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²)
-
- status = "unknown"
-
- solved_lim = bkwerr ≤ btol
- solved_mach = one(T) + bkwerr ≤ one(T)
- solved_resid_tol = rNorm ≤ ɛ_c
- solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁
- solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
-
- ill_cond = ill_cond_mach = ill_cond_lim = false
-
- inconsistent = false
- tired = iter ≥ itmax
- user_requested_exit = false
-
- while ! (solved || inconsistent || ill_cond || tired || user_requested_exit)
- # Generate the next Golub-Kahan vectors
- # 1. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
- if α == 0
- inconsistent = true
- continue
- end
- @kscal!(n, one(FC) / α, v)
- NisI || @kscal!(n, one(FC) / α, Nv)
-
- Anorm² += α * α + λ * λ
-
- if λ > 0
- # Givens rotation to zero out the δ in position (k, 2k):
- # k-1 k 2k k 2k k-1 k 2k
- # k [ θ α δ ] [ c₁ s₁ ] = [ θ ρ ]
- # k+1 [ β ] [ s₁ -c₁ ] [ θ+ γ ]
- (c₁, s₁, ρ) = sym_givens(α, δ)
- else
- ρ = α
- end
- ξ = -θ / ρ * ξ
-
- if λ > 0
- # w1 = c₁ * v + s₁ * w2
- # w2 = s₁ * v - c₁ * w2
- # x = x + ξ * w1
- @kaxpy!(n, ξ * c₁, v, x)
- @kaxpy!(n, ξ * s₁, w2, x)
- @kaxpby!(n, s₁, v, -c₁, w2)
- else
- @kaxpy!(n, ξ, v, x) # x = x + ξ * v
- end
+ function craig!(solver :: CraigSolver{T,FC,S}, $(def_args_craig...); $(def_kwargs_craig...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CRAIG: system of %d equations in %d variables\n", m, n)
+
+ # Check sqd and λ parameters
+ sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+ sqd && (λ = one(T))
- # Recur y.
- @kaxpby!(m, one(FC), u, -θ/ρ_prev, w) # w = u - θ/ρ_prev * w
- @kaxpy!(m, ξ/ρ, w, y) # y = y + ξ/ρ * w
+ # Tests M = Iₘ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
- Dnorm² += @knrm2(m, w)
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
- # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
- mul!(Av, A, v)
- @kaxpby!(m, one(FC), Av, -α, Mu)
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :u , S, m)
+ allocate_if(!NisI, solver, :v , S, n)
+ allocate_if(λ > 0, solver, :w2, S, n)
+ x, Nv, Aᴴu, y, w = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w
+ Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats
+ rNorms = stats.residuals
+ reset!(stats)
+ u = MisI ? Mu : solver.u
+ v = NisI ? Nv : solver.v
+
+ x .= zero(FC)
+ y .= zero(FC)
+
+ Mu .= b
MisI || mulorldiv!(u, M, Mu, ldiv)
- β = sqrt(@kdotr(m, u, Mu))
- if β ≠ 0
- @kscal!(m, one(FC) / β, u)
- MisI || @kscal!(m, one(FC) / β, Mu)
+ β₁ = sqrt(@kdotr(m, u, Mu))
+ rNorm = β₁
+ history && push!(rNorms, rNorm)
+ if β₁ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ return solver
end
+ β₁² = β₁^2
+ β = β₁
+ θ = β₁ # θ will differ from β when there is regularization (λ > 0).
+ ξ = -one(T) # Most recent component of x in Range(V).
+ δ = λ
+ ρ_prev = one(T)
- # Finish updates from the first Givens rotation.
- if λ > 0
- θ = β * c₁
- γ = β * s₁
- else
- θ = β
- end
+ # Initialize Golub-Kahan process.
+ # β₁Mu₁ = b.
+ @kscal!(m, one(FC) / β₁, u)
+ MisI || @kscal!(m, one(FC) / β₁, Mu)
- if λ > 0
- # Givens rotation to zero out the γ in position (k+1, 2k)
- # 2k 2k+1 2k 2k+1 2k 2k+1
- # k+1 [ γ λ ] [ -c₂ s₂ ] = [ 0 δ ]
- # k+2 [ 0 0 ] [ s₂ c₂ ] [ 0 0 ]
- c₂, s₂, δ = sym_givens(λ, γ)
- @kscal!(n, s₂, w2)
- end
+ Nv .= zero(FC)
+ w .= zero(FC) # Used to update y.
- Anorm² += β * β
- Anorm = sqrt(Anorm²)
- Acond = Anorm * sqrt(Dnorm²)
- xNorm² += ξ * ξ
- xNorm = sqrt(xNorm²)
- rNorm = β * abs(ξ) # r = - β * ξ * u
- λ > 0 && (rNorm *= abs(c₁)) # r = -c₁ * β * ξ * u when λ > 0.
- history && push!(rNorms, rNorm)
- iter = iter + 1
+ λ > 0 && (w2 .= zero(FC))
+
+ Anorm² = zero(T) # Estimate of ‖A‖²_F.
+ Anorm = zero(T)
+ Dnorm² = zero(T) # Estimate of ‖(AᴴA)⁻¹‖².
+ Acond = zero(T) # Estimate of cond(A).
+ xNorm² = zero(T) # Estimate of ‖x‖².
+ xNorm = zero(T)
- bkwerr = rNorm / sqrt(β₁² + Anorm² * xNorm²)
+ iter = 0
+ itmax == 0 && (itmax = m + n)
- ρ_prev = ρ # Only differs from α if λ > 0.
+ ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
+ ɛ_i = atol # Stopping tolerance for inconsistent systems.
+ ctol = conlim > 0 ? 1/conlim : zero(T) # Stopping tolerance for ill-conditioned operators.
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %8s %8s %7s %5s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e %8s %7s %.2fs\n", iter, rNorm, xNorm, Anorm, Acond, " ✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time))
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β)
+ bkwerr = one(T) # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²)
+
+ status = "unknown"
solved_lim = bkwerr ≤ btol
solved_mach = one(T) + bkwerr ≤ one(T)
@@ -304,34 +265,141 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁
solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
- ill_cond_mach = one(T) + one(T) / Acond ≤ one(T)
- ill_cond_lim = 1 / Acond ≤ ctol
- ill_cond = ill_cond_mach | ill_cond_lim
+ ill_cond = ill_cond_mach = ill_cond_lim = false
- user_requested_exit = callback(solver) :: Bool
inconsistent = false
tired = iter ≥ itmax
- end
- (verbose > 0) && @printf("\n")
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || inconsistent || ill_cond || tired || user_requested_exit || overtimed)
+ # Generate the next Golub-Kahan vectors
+ # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+ if α == 0
+ inconsistent = true
+ continue
+ end
+ @kscal!(n, one(FC) / α, v)
+ NisI || @kscal!(n, one(FC) / α, Nv)
+
+ Anorm² += α * α + λ * λ
+
+ if λ > 0
+ # Givens rotation to zero out the δ in position (k, 2k):
+ # k-1 k 2k k 2k k-1 k 2k
+ # k [ θ α δ ] [ c₁ s₁ ] = [ θ ρ ]
+ # k+1 [ β ] [ s₁ -c₁ ] [ θ+ γ ]
+ (c₁, s₁, ρ) = sym_givens(α, δ)
+ else
+ ρ = α
+ end
+
+ ξ = -θ / ρ * ξ
+
+ if λ > 0
+ # w1 = c₁ * v + s₁ * w2
+ # w2 = s₁ * v - c₁ * w2
+ # x = x + ξ * w1
+ @kaxpy!(n, ξ * c₁, v, x)
+ @kaxpy!(n, ξ * s₁, w2, x)
+ @kaxpby!(n, s₁, v, -c₁, w2)
+ else
+ @kaxpy!(n, ξ, v, x) # x = x + ξ * v
+ end
+
+ # Recur y.
+ @kaxpby!(m, one(FC), u, -θ/ρ_prev, w) # w = u - θ/ρ_prev * w
+ @kaxpy!(m, ξ/ρ, w, y) # y = y + ξ/ρ * w
+
+ Dnorm² += @knrm2(m, w)
+
+ # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+ mul!(Av, A, v)
+ @kaxpby!(m, one(FC), Av, -α, Mu)
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β = sqrt(@kdotr(m, u, Mu))
+ if β ≠ 0
+ @kscal!(m, one(FC) / β, u)
+ MisI || @kscal!(m, one(FC) / β, Mu)
+ end
+
+ # Finish updates from the first Givens rotation.
+ if λ > 0
+ θ = β * c₁
+ γ = β * s₁
+ else
+ θ = β
+ end
+
+ if λ > 0
+ # Givens rotation to zero out the γ in position (k+1, 2k)
+ # 2k 2k+1 2k 2k+1 2k 2k+1
+ # k+1 [ γ λ ] [ -c₂ s₂ ] = [ 0 δ ]
+ # k+2 [ 0 0 ] [ s₂ c₂ ] [ 0 0 ]
+ c₂, s₂, δ = sym_givens(λ, γ)
+ @kscal!(n, s₂, w2)
+ end
+
+ Anorm² += β * β
+ Anorm = sqrt(Anorm²)
+ Acond = Anorm * sqrt(Dnorm²)
+ xNorm² += ξ * ξ
+ xNorm = sqrt(xNorm²)
+ rNorm = β * abs(ξ) # r = - β * ξ * u
+ λ > 0 && (rNorm *= abs(c₁)) # r = -c₁ * β * ξ * u when λ > 0.
+ history && push!(rNorms, rNorm)
+ iter = iter + 1
+
+ bkwerr = rNorm / sqrt(β₁² + Anorm² * xNorm²)
+
+ ρ_prev = ρ # Only differs from α if λ > 0.
+
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e %.2fs\n", iter, rNorm, xNorm, Anorm, Acond, α, β, ktimer(start_time))
+
+ solved_lim = bkwerr ≤ btol
+ solved_mach = one(T) + bkwerr ≤ one(T)
+ solved_resid_tol = rNorm ≤ ɛ_c
+ solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁
+ solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
+
+ ill_cond_mach = one(T) + one(T) / Acond ≤ one(T)
+ ill_cond_lim = 1 / Acond ≤ ctol
+ ill_cond = ill_cond_mach | ill_cond_lim
+
+ user_requested_exit = callback(solver) :: Bool
+ inconsistent = false
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
- # transfer to LSQR point if requested
- if λ > 0 && transfer_to_lsqr
- ξ *= -θ / δ
- @kaxpy!(n, ξ, w2, x)
- # TODO: update y
- end
+ # transfer to LSQR point if requested
+ if λ > 0 && transfer_to_lsqr
+ ξ *= -θ / δ
+ @kaxpy!(n, ξ, w2, x)
+ # TODO: update y
+ end
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough for the tolerances given")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- ill_cond_lim && (status = "condition number exceeds tolerance")
- inconsistent && (status = "system may be inconsistent")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough for the tolerances given")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ ill_cond_lim && (status = "condition number exceeds tolerance")
+ inconsistent && (status = "system may be inconsistent")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
+ end
end
diff --git a/src/craigmr.jl b/src/craigmr.jl
index e08bb9c36..5f05aa2ae 100644
--- a/src/craigmr.jl
+++ b/src/craigmr.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate residual method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is equivalent to CRMR, and is described in
#
@@ -26,12 +26,13 @@
export craigmr, craigmr!
-
"""
(x, y, stats) = craigmr(A, b::AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
- rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -40,11 +41,11 @@ Solve the consistent linear system
Ax + λ²y = b
-using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
This method is equivalent to applying the Conjugate Residuals method
to the normal equations of the second kind
- (AAᵀ + λ²I) y = b
+ (AAᴴ + λ²I) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -52,7 +53,7 @@ but is more stable. When λ = 0, this method solves the minimum-norm problem
If `λ > 0`, CRAIGMR solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -63,12 +64,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, CRAIGMR solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -82,8 +83,32 @@ It is formally equivalent to CRMR, though can be slightly more accurate,
and intricate to implement. Both the x- and y-parts of the solution are
returned.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -92,12 +117,6 @@ and `false` otherwise.
"""
function craigmr end
-function craigmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CraigmrSolver(A, b)
- craigmr!(solver, A, b; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = craigmr!(solver::CraigmrSolver, A, b; kwargs...)
@@ -107,230 +126,274 @@ See [`CraigmrSolver`](@ref) for more details about the `solver`.
"""
function craigmr! end
-function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
- rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRAIGMR: system of %d equations in %d variables\n", m, n)
-
- # Check sqd and λ parameters
- sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
- sqd && (λ = one(T))
-
- # Tests M = Iₘ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :u, S, m)
- allocate_if(!NisI, solver, :v, S, n)
- allocate_if(λ > 0, solver, :q, S, n)
- x, Nv, Aᵀu, d, y, Mu = solver.x, solver.Nv, solver.Aᵀu, solver.d, solver.y, solver.Mu
- w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- u = MisI ? Mu : solver.u
- v = NisI ? Nv : solver.v
-
- # Compute y such that AAᵀy = b. Then recover x = Aᵀy.
- x .= zero(FC)
- y .= zero(FC)
- Mu .= b
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β = sqrt(@kdotr(m, u, Mu))
- if β == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- history && push!(rNorms, β)
- history && push!(ArNorms, zero(T))
- stats.status = "x = 0 is a zero-residual solution"
- return solver
+def_args_craigmr = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_craigmr = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; sqd::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_craigmr = mapreduce(extract_parameters, vcat, def_kwargs_craigmr)
+
+args_craigmr = (:A, :b)
+kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function craigmr($(def_args_craigmr...); $(def_kwargs_craigmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CraigmrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ craigmr!(solver, $(args_craigmr...); $(kwargs_craigmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # Initialize Golub-Kahan process.
- # β₁Mu₁ = b.
- @kscal!(m, one(FC)/β, u)
- MisI || @kscal!(m, one(FC)/β, Mu)
- # α₁Nv₁ = Aᵀu₁.
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
- Anorm² = α * α
-
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²)
-
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
- if α == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- history && push!(rNorms, β)
- history && push!(ArNorms, zero(T))
- stats.status = "x = 0 is a minimum least-squares solution"
- return solver
- end
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
-
- # Regularization.
- λₖ = λ # λ₁ = λ
- cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ
- cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁
- λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0
-
- if λ > 0
- (cpₖ, spₖ, αhat) = sym_givens(α, λₖ)
- @kscal!(n, spₖ, q) # q̄₁ = sp₁ * v₁
- else
- αhat = α
- end
-
- # Initialize other constants.
- ζbar = β
- ρbar = αhat
- θ = zero(T)
- rNorm = ζbar
- history && push!(rNorms, rNorm)
- ArNorm = α
- history && push!(ArNorms, ArNorm)
-
- ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
- ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.
-
- wbar .= u
- @kscal!(m, one(FC)/αhat, wbar)
- w .= zero(FC)
- d .= zero(FC)
-
- status = "unknown"
- solved = rNorm ≤ ɛ_c
- inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
- tired = iter ≥ itmax
- user_requested_exit = false
-
- while ! (solved || inconsistent || tired || user_requested_exit)
- iter = iter + 1
-
- # Generate next Golub-Kahan vectors.
- # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
- mul!(Av, A, v)
- @kaxpby!(m, one(FC), Av, -α, Mu)
+ function craigmr!(solver :: CraigmrSolver{T,FC,S}, $(def_args_craigmr...); $(def_kwargs_craigmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CRAIGMR: system of %d equations in %d variables\n", m, n)
+
+ # Check sqd and λ parameters
+ sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+ sqd && (λ = one(T))
+
+ # Tests M = Iₘ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :u, S, m)
+ allocate_if(!NisI, solver, :v, S, n)
+ allocate_if(λ > 0, solver, :q, S, n)
+ x, Nv, Aᴴu, d, y, Mu = solver.x, solver.Nv, solver.Aᴴu, solver.d, solver.y, solver.Mu
+ w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ u = MisI ? Mu : solver.u
+ v = NisI ? Nv : solver.v
+
+ # Compute y such that AAᴴy = b. Then recover x = Aᴴy.
+ x .= zero(FC)
+ y .= zero(FC)
+ Mu .= b
MisI || mulorldiv!(u, M, Mu, ldiv)
β = sqrt(@kdotr(m, u, Mu))
- if β ≠ 0
- @kscal!(m, one(FC)/β, u)
- MisI || @kscal!(m, one(FC)/β, Mu)
+ if β == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ history && push!(rNorms, β)
+ history && push!(ArNorms, zero(T))
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ return solver
+ end
+
+ # Initialize Golub-Kahan process.
+ # β₁Mu₁ = b.
+ @kscal!(m, one(FC)/β, u)
+ MisI || @kscal!(m, one(FC)/β, Mu)
+ # α₁Nv₁ = Aᴴu₁.
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+ Anorm² = α * α
+
+ iter = 0
+ itmax == 0 && (itmax = m + n)
+
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, β, α, β, α, 0, 1, Anorm², ktimer(start_time))
+
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+ if α == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ history && push!(rNorms, β)
+ history && push!(ArNorms, zero(T))
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a minimum least-squares solution"
+ return solver
end
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
- Anorm² = Anorm² + β * β # = ‖B_{k-1}‖²
+ # Regularization.
+ λₖ = λ # λ₁ = λ
+ cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ
+ cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁
+ λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0
if λ > 0
- βhat = cpₖ * β
- λₐᵤₓ = spₖ * β
+ (cpₖ, spₖ, αhat) = sym_givens(α, λₖ)
+ @kscal!(n, spₖ, q) # q̄₁ = sp₁ * v₁
else
- βhat = β
+ αhat = α
end
- # Continue QR factorization
- #
- # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] :
- # [ β 0 ] [ 0 ζbar ]
- #
- # k k+1 k k+1 k k+1
- # k [ c s ] [ ρbar ] = [ ρ θ⁺ ]
- # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ]
- #
- # so that we obtain
- #
- # [ c s ] [ ζbar ] = [ ζ ]
- # [ s -c ] [ 0 ] [ ζbar⁺ ]
- (c, s, ρ) = sym_givens(ρbar, βhat)
- ζ = c * ζbar
- ζbar = s * ζbar
- rNorm = abs(ζbar)
+ # Initialize other constants.
+ ζbar = β
+ ρbar = αhat
+ θ = zero(T)
+ rNorm = ζbar
history && push!(rNorms, rNorm)
+ ArNorm = α
+ history && push!(ArNorms, ArNorm)
- @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w) # w = (wbar - θ * w) / ρ
- @kaxpy!(m, ζ, w, y) # y = y + ζ * w
+ ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
+ ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.
- if λ > 0
- # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁
- if iter == 1
- @kaxpy!(n, one(FC)/ρ, cpₖ * v, d)
+ wbar .= u
+ @kscal!(m, one(FC)/αhat, wbar)
+ w .= zero(FC)
+ d .= zero(FC)
+
+ status = "unknown"
+ solved = rNorm ≤ ɛ_c
+ inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
+ tired = iter ≥ itmax
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || inconsistent || tired || user_requested_exit || overtimed)
+ iter = iter + 1
+
+ # Generate next Golub-Kahan vectors.
+ # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+ mul!(Av, A, v)
+ @kaxpby!(m, one(FC), Av, -α, Mu)
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β = sqrt(@kdotr(m, u, Mu))
+ if β ≠ 0
+ @kscal!(m, one(FC)/β, u)
+ MisI || @kscal!(m, one(FC)/β, Mu)
+ end
+
+ Anorm² = Anorm² + β * β # = ‖B_{k-1}‖²
+
+ if λ > 0
+ βhat = cpₖ * β
+ λₐᵤₓ = spₖ * β
else
- @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d)
- @kaxpy!(n, one(FC)/ρ, spₖ * q, d)
- @kaxpby!(n, spₖ, v, -cpₖ, q) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
+ βhat = β
end
- else
- # DₖRₖ = Vₖ
- if iter == 1
- @kaxpy!(n, one(FC)/ρ, v, d)
+
+ # Continue QR factorization
+ #
+ # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] :
+ # [ β 0 ] [ 0 ζbar ]
+ #
+ # k k+1 k k+1 k k+1
+ # k [ c s ] [ ρbar ] = [ ρ θ⁺ ]
+ # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ]
+ #
+ # so that we obtain
+ #
+ # [ c s ] [ ζbar ] = [ ζ ]
+ # [ s -c ] [ 0 ] [ ζbar⁺ ]
+ (c, s, ρ) = sym_givens(ρbar, βhat)
+ ζ = c * ζbar
+ ζbar = s * ζbar
+ rNorm = abs(ζbar)
+ history && push!(rNorms, rNorm)
+
+ @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w) # w = (wbar - θ * w) / ρ
+ @kaxpy!(m, ζ, w, y) # y = y + ζ * w
+
+ if λ > 0
+ # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁
+ if iter == 1
+ @kaxpy!(n, one(FC)/ρ, cpₖ * v, d)
+ else
+ @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d)
+ @kaxpy!(n, one(FC)/ρ, spₖ * q, d)
+ @kaxpby!(n, spₖ, v, -cpₖ, q) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
+ end
else
- @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d)
+ # DₖRₖ = Vₖ
+ if iter == 1
+ @kaxpy!(n, one(FC)/ρ, v, d)
+ else
+ @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d)
+ end
end
- end
- # xₖ = Dₖzₖ
- @kaxpy!(n, ζ, d, x)
+ # xₖ = Dₖzₖ
+ @kaxpy!(n, ζ, d, x)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
- Anorm² = Anorm² + α * α # = ‖Lₖ‖
- ArNorm = α * β * abs(ζ/ρ)
- history && push!(ArNorms, ArNorm)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+ Anorm² = Anorm² + α * α # = ‖Lₖ‖
+ ArNorm = α * β * abs(ζ/ρ)
+ history && push!(ArNorms, ArNorm)
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², ktimer(start_time))
- if λ > 0
- (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ)
- @kscal!(n, sdₖ, q) # qₖ ← sdₖ * q̄ₖ
- (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁)
- else
- αhat = α
- end
+ if λ > 0
+ (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ)
+ @kscal!(n, sdₖ, q) # qₖ ← sdₖ * q̄ₖ
+ (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁)
+ else
+ αhat = α
+ end
- if α ≠ 0
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
- @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar) # wbar = (u - beta * wbar) / alpha
+ if α ≠ 0
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
+ @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar) # wbar = (u - beta * wbar) / alpha
+ end
+ θ = s * αhat
+ ρbar = -c * αhat
+
+ user_requested_exit = callback(solver) :: Bool
+ solved = rNorm ≤ ɛ_c
+ inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
end
- θ = s * αhat
- ρbar = -c * αhat
-
- user_requested_exit = callback(solver) :: Bool
- solved = rNorm ≤ ɛ_c
- inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
- tired = iter ≥ itmax
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "found approximate minimum-norm solution")
+ !tired && !solved && (status = "found approximate minimum least-squares solution")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "found approximate minimum-norm solution")
- !tired && !solved && (status = "found approximate minimum least-squares solution")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/src/crls.jl b/src/crls.jl
index 6410fb836..bf43fa79b 100644
--- a/src/crls.jl
+++ b/src/crls.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the linear system
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# This implementation follows the formulation given in
#
@@ -20,12 +20,13 @@
export crls, crls!
-
"""
(x, stats) = crls(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+ itmax::Int=0, timemax::Float64=Inf,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -34,19 +35,41 @@ Solve the linear least-squares problem
minimize ‖b - Ax‖₂² + λ‖x‖₂²
-using the Conjugate Residuals (CR) method. This method is equivalent to
-applying MINRES to the normal equations
+of size m × n using the Conjugate Residuals (CR) method.
+This method is equivalent to applying MINRES to the normal equations
- (AᵀA + λI) x = Aᵀb.
+ (AᴴA + λI) x = Aᴴb.
This implementation recurs the residual r := b - Ax.
-CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to LSMR, though can be substantially less accurate,
but simpler to implement.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -54,12 +77,6 @@ and `false` otherwise.
"""
function crls end
-function crls(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CrlsSolver(A, b)
- crls!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = crls!(solver::CrlsSolver, A, b; kwargs...)
@@ -69,143 +86,185 @@ See [`CrlsSolver`](@ref) for more details about the `solver`.
"""
function crls! end
-function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRLS: system of %d equations in %d variables\n", m, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :Ms, S, m)
- x, p, Ar, q = solver.x, solver.p, solver.Ar, solver.q
- r, Ap, s, stats = solver.r, solver.Ap, solver.s, solver.stats
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- Ms = MisI ? s : solver.Ms
- Mr = MisI ? r : solver.Ms
- MAp = MisI ? Ap : solver.Ms
-
- x .= zero(FC)
- r .= b
- bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
- rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
- history && push!(rNorms, rNorm)
- if bNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(ArNorms, zero(T))
- return solver
+def_args_crls = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_crls = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; radius::T = zero(T) ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_crls = mapreduce(extract_parameters, vcat, def_kwargs_crls)
+
+args_crls = (:A, :b)
+kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function crls($(def_args_crls...); $(def_kwargs_crls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CrlsSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ crls!(solver, $(args_crls...); $(kwargs_crls...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(Ar, Aᵀ, Mr) # - λ * x0 if x0 ≠ 0.
- mul!(s, A, Ar)
- MisI || mulorldiv!(Ms, M, s, ldiv)
-
- p .= Ar
- Ap .= s
- mul!(q, Aᵀ, Ms) # Ap
- λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
- γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms)
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- ArNorm = @knrm2(n, Ar) # Marginally faster than norm(Ar)
- λ > 0 && (γ += λ * ArNorm * ArNorm)
- history && push!(ArNorms, ArNorm)
- ε = atol + rtol * ArNorm
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
-
- status = "unknown"
- on_boundary = false
- solved = ArNorm ≤ ε
- tired = iter ≥ itmax
- psd = false
- user_requested_exit = false
-
- while ! (solved || tired || user_requested_exit)
- qNorm² = @kdotr(n, q, q) # dot(q, q)
- α = γ / qNorm²
-
- # if a trust-region constraint is give, compute step to the boundary
- # (note that α > 0 in CRLS)
- if radius > 0
- pNorm = @knrm2(n, p)
- if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p
- psd = true # det(AᵀA) = 0
- p = Ar # p = Aᵀr
- pNorm² = ArNorm * ArNorm
- mul!(q, Aᵀ, s)
- α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᵀr for α = ‖Ar‖²/γ
- else
- pNorm² = pNorm * pNorm
- σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))
- if α ≥ σ
- α = σ
- on_boundary = true
- end
- end
+ function crls!(solver :: CrlsSolver{T,FC,S}, $(def_args_crls...); $(def_kwargs_crls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CRLS: system of %d equations in %d variables\n", m, n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :Ms, S, m)
+ x, p, Ar, q = solver.x, solver.p, solver.Ar, solver.q
+ r, Ap, s, stats = solver.r, solver.Ap, solver.s, solver.stats
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ Ms = MisI ? s : solver.Ms
+ Mr = MisI ? r : solver.Ms
+ MAp = MisI ? Ap : solver.Ms
+
+ x .= zero(FC)
+ r .= b
+ bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
+ rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
+ history && push!(rNorms, rNorm)
+ if bNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(ArNorms, zero(T))
+ return solver
end
- @kaxpy!(n, α, p, x) # Faster than x = x + α * p
- @kaxpy!(n, -α, q, Ar) # Faster than Ar = Ar - α * q
- ArNorm = @knrm2(n, Ar)
- solved = psd || on_boundary
- solved && continue
- @kaxpy!(m, -α, Ap, r) # Faster than r = r - α * Ap
+ MisI || mulorldiv!(Mr, M, r, ldiv)
+ mul!(Ar, Aᴴ, Mr) # - λ * x0 if x0 ≠ 0.
mul!(s, A, Ar)
MisI || mulorldiv!(Ms, M, s, ldiv)
- γ_next = @kdotr(m, s, Ms) # Faster than γ_next = dot(s, s)
- λ > 0 && (γ_next += λ * ArNorm * ArNorm)
- β = γ_next / γ
-
- @kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p
- @kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap
- MisI || mulorldiv!(MAp, M, Ap, ldiv)
- mul!(q, Aᵀ, MAp)
+
+ p .= Ar
+ Ap .= s
+ mul!(q, Aᴴ, Ms) # Ap
λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
+ γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms)
+ iter = 0
+ itmax == 0 && (itmax = m + n)
- γ = γ_next
- if λ > 0
- rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x))
- else
- rNorm = @knrm2(m, r) # norm(r)
- end
- history && push!(rNorms, rNorm)
+ ArNorm = @knrm2(n, Ar) # Marginally faster than norm(Ar)
+ λ > 0 && (γ += λ * ArNorm * ArNorm)
history && push!(ArNorms, ArNorm)
- iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
- user_requested_exit = callback(solver) :: Bool
- solved = (ArNorm ≤ ε) || on_boundary
+ ε = atol + rtol * ArNorm
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+
+ status = "unknown"
+ on_boundary = false
+ solved = ArNorm ≤ ε
tired = iter ≥ itmax
+ psd = false
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || user_requested_exit || overtimed)
+ qNorm² = @kdotr(n, q, q) # dot(q, q)
+ α = γ / qNorm²
+
+ # if a trust-region constraint is give, compute step to the boundary
+ # (note that α > 0 in CRLS)
+ if radius > 0
+ pNorm = @knrm2(n, p)
+ if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p
+ psd = true # det(AᴴA) = 0
+ p = Ar # p = Aᴴr
+ pNorm² = ArNorm * ArNorm
+ mul!(q, Aᴴ, s)
+ α = min(ArNorm^2 / γ, maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ
+ else
+ pNorm² = pNorm * pNorm
+ σ = maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))
+ if α ≥ σ
+ α = σ
+ on_boundary = true
+ end
+ end
+ end
+
+ @kaxpy!(n, α, p, x) # Faster than x = x + α * p
+ @kaxpy!(n, -α, q, Ar) # Faster than Ar = Ar - α * q
+ ArNorm = @knrm2(n, Ar)
+ solved = psd || on_boundary
+ solved && continue
+ @kaxpy!(m, -α, Ap, r) # Faster than r = r - α * Ap
+ mul!(s, A, Ar)
+ MisI || mulorldiv!(Ms, M, s, ldiv)
+ γ_next = @kdotr(m, s, Ms) # Faster than γ_next = dot(s, s)
+ λ > 0 && (γ_next += λ * ArNorm * ArNorm)
+ β = γ_next / γ
+
+ @kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p
+ @kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap
+ MisI || mulorldiv!(MAp, M, Ap, ldiv)
+ mul!(q, Aᴴ, MAp)
+ λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
+
+ γ = γ_next
+ if λ > 0
+ rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x))
+ else
+ rNorm = @knrm2(m, r) # norm(r)
+ end
+ history && push!(rNorms, rNorm)
+ history && push!(ArNorms, ArNorm)
+ iter = iter + 1
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+ user_requested_exit = callback(solver) :: Bool
+ solved = (ArNorm ≤ ε) || on_boundary
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ psd && (status = "zero-curvature encountered")
+ on_boundary && (status = "on trust-region boundary")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- psd && (status = "zero-curvature encountered")
- on_boundary && (status = "on trust-region boundary")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/crmr.jl b/src/crmr.jl
index deb5cf79f..db333856c 100644
--- a/src/crmr.jl
+++ b/src/crmr.jl
@@ -10,9 +10,9 @@
# and is equivalent to applying the conjugate residual method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
-# This method is equivalent to Craig-MR, described in
+# This method is equivalent to CRAIGMR, described in
#
# D. Orban and M. Arioli. Iterative Solution of Symmetric Quasi-Definite Linear Systems,
# Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017.
@@ -26,12 +26,13 @@
export crmr, crmr!
-
"""
(x, stats) = crmr(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T),
- rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ N=I, ldiv::Bool=false,
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -40,11 +41,11 @@ Solve the consistent linear system
Ax + √λs = b
-using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CR to the normal equations
of the second kind
- (AAᵀ + λI) y = b
+ (AAᴴ + λI) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -58,10 +59,29 @@ CRMR produces monotonic residuals ‖r‖₂.
It is formally equivalent to CRAIG-MR, though can be slightly less accurate,
but simpler to implement. Only the x-part of the solution is returned.
-A preconditioner M may be provided.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -70,12 +90,6 @@ and `false` otherwise.
"""
function crmr end
-function crmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = CrmrSolver(A, b)
- crmr!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = crmr!(solver::CrmrSolver, A, b; kwargs...)
@@ -85,107 +99,148 @@ See [`CrmrSolver`](@ref) for more details about the `solver`.
"""
function crmr! end
-function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T),
- rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRMR: system of %d equations in %d variables\n", m, n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :Mq, S, m)
- allocate_if(λ > 0, solver, :s , S, m)
- x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r
- q, s, stats = solver.q, solver.s, solver.stats
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- Mq = MisI ? q : solver.Mq
-
- x .= zero(FC) # initial estimation x = 0
- mulorldiv!(r, M, b, ldiv) # initial residual r = M * (b - Ax) = M * b
- bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
- rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
- history && push!(rNorms, rNorm)
- if bNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(ArNorms, zero(T))
- return solver
+def_args_crmr = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_crmr = (:(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_crmr = mapreduce(extract_parameters, vcat, def_kwargs_crmr)
+
+args_crmr = (:A, :b)
+kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function crmr($(def_args_crmr...); $(def_kwargs_crmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = CrmrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ crmr!(solver, $(args_crmr...); $(kwargs_crmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- λ > 0 && (s .= r)
- mul!(Aᵀr, Aᵀ, r) # - λ * x0 if x0 ≠ 0.
- p .= Aᵀr
- γ = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr)
- λ > 0 && (γ += λ * rNorm * rNorm)
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- ArNorm = sqrt(γ)
- history && push!(ArNorms, ArNorm)
- ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
- ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
-
- status = "unknown"
- solved = rNorm ≤ ɛ_c
- inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
- tired = iter ≥ itmax
- user_requested_exit = false
-
- while ! (solved || inconsistent || tired || user_requested_exit)
- mul!(q, A, p)
- λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s
- MisI || mulorldiv!(Mq, M, q, ldiv)
- α = γ / @kdotr(m, q, Mq) # Compute qᵗ * M * q
- @kaxpy!(n, α, p, x) # Faster than x = x + α * p
- @kaxpy!(m, -α, Mq, r) # Faster than r = r - α * Mq
- rNorm = @knrm2(m, r) # norm(r)
- mul!(Aᵀr, Aᵀ, r)
- γ_next = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ_next = dot(Aᵀr, Aᵀr)
- λ > 0 && (γ_next += λ * rNorm * rNorm)
- β = γ_next / γ
-
- @kaxpby!(n, one(FC), Aᵀr, β, p) # Faster than p = Aᵀr + β * p
- if λ > 0
- @kaxpby!(m, one(FC), r, β, s) # s = r + β * s
+
+ function crmr!(solver :: CrmrSolver{T,FC,S}, $(def_args_crmr...); $(def_kwargs_crmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "CRMR: system of %d equations in %d variables\n", m, n)
+
+ # Tests N = Iₙ
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!NisI, solver, :Nq, S, m)
+ allocate_if(λ > 0, solver, :s , S, m)
+ x, p, Aᴴr, r = solver.x, solver.p, solver.Aᴴr, solver.r
+ q, s, stats = solver.q, solver.s, solver.stats
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ Nq = NisI ? q : solver.Nq
+
+ x .= zero(FC) # initial estimation x = 0
+ mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b
+ bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
+ rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
+ history && push!(rNorms, rNorm)
+ if bNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(ArNorms, zero(T))
+ return solver
end
+ λ > 0 && (s .= r)
+ mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0.
+ p .= Aᴴr
+ γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr)
+ λ > 0 && (γ += λ * rNorm * rNorm)
+ iter = 0
+ itmax == 0 && (itmax = m + n)
- γ = γ_next
ArNorm = sqrt(γ)
- history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
- iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
- user_requested_exit = callback(solver) :: Bool
+ ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
+ ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+
+ status = "unknown"
solved = rNorm ≤ ɛ_c
inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
tired = iter ≥ itmax
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || inconsistent || tired || user_requested_exit || overtimed)
+ mul!(q, A, p)
+ λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s
+ NisI || mulorldiv!(Nq, N, q, ldiv)
+ α = γ / @kdotr(m, q, Nq) # Compute qᴴ * N * q
+ @kaxpy!(n, α, p, x) # Faster than x = x + α * p
+ @kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq
+ rNorm = @knrm2(m, r) # norm(r)
+ mul!(Aᴴr, Aᴴ, r)
+ γ_next = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr)
+ λ > 0 && (γ_next += λ * rNorm * rNorm)
+ β = γ_next / γ
+
+ @kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p
+ if λ > 0
+ @kaxpby!(m, one(FC), r, β, s) # s = r + β * s
+ end
+
+ γ = γ_next
+ ArNorm = sqrt(γ)
+ history && push!(rNorms, rNorm)
+ history && push!(ArNorms, ArNorm)
+ iter = iter + 1
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+ user_requested_exit = callback(solver) :: Bool
+ solved = rNorm ≤ ɛ_c
+ inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ inconsistent && (status = "system probably inconsistent but least squares/norm solution found")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- inconsistent && (status = "system probably inconsistent but least squares/norm solution found")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/src/diom.jl b/src/diom.jl
index 9c6b9767b..72ce462f6 100644
--- a/src/diom.jl
+++ b/src/diom.jl
@@ -11,40 +11,59 @@
export diom, diom!
"""
- (x, stats) = diom(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = diom(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ reorthogonalization::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using direct incomplete orthogonalization method.
+ (x, stats) = diom(A, b, x0::AbstractVector; kwargs...)
+
+DIOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using DIOM.
DIOM only orthogonalizes the new vectors of the Krylov basis against the `memory` most recent vectors.
If CG is well defined on `Ax = b` and `memory = 2`, DIOM is theoretically equivalent to CG.
If `k ≤ memory` where `k` is the number of iterations, DIOM is theoretically equivalent to FOM.
Otherwise, DIOM interpolates between CG and FOM and is similar to CG with partial reorthogonalization.
-Partial reorthogonalization is available with the `reorthogonalization` option.
-
-An advantage of DIOM is that nonsymmetric or symmetric indefinite or both nonsymmetric
+An advantage of DIOM is that non-Hermitian or Hermitian indefinite or both non-Hermitian
and indefinite systems of linear equations can be handled by this single algorithm.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-DIOM can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = diom(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -52,18 +71,6 @@ and `false` otherwise.
"""
function diom end
-function diom(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = DiomSolver(A, b, memory)
- diom!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function diom(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = DiomSolver(A, b, memory)
- diom!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = diom!(solver::DiomSolver, A, b; kwargs...)
solver = diom!(solver::DiomSolver, A, b, x0; kwargs...)
@@ -77,198 +84,256 @@ See [`DiomSolver`](@ref) for more details about the `solver`.
"""
function diom! end
-function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- diom!(solver, A, b; kwargs...)
- return solver
-end
-
-function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("DIOM: system of size %d\n", n)
-
- # Check M = Iₙ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :w, S, n)
- allocate_if(!NisI, solver, :z, S, n)
- Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
- L, H, stats = solver.L, solver.H, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- w = MisI ? t : solver.w
- r₀ = MisI ? t : solver.w
-
- # Initial solution x₀ and residual r₀.
- x .= zero(FC) # x₀
- if warm_start
- mul!(t, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), t)
- else
- t .= b
+def_args_diom = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_diom = (:(x0::AbstractVector),)
+
+def_kwargs_diom = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; reorthogonalization::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_diom = mapreduce(extract_parameters, vcat, def_kwargs_diom)
+
+args_diom = (:A, :b)
+optargs_diom = (:x0,)
+kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function diom($(def_args_diom...), $(def_optargs_diom...); memory :: Int=20, $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = DiomSolver(A, b, memory)
+ warm_start!(solver, $(optargs_diom...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ diom!(solver, $(args_diom...); $(kwargs_diom...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀)
- rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+
+ function diom($(def_args_diom...); memory :: Int=20, $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = DiomSolver(A, b, memory)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ diom!(solver, $(args_diom...); $(kwargs_diom...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2*n)
+ function diom!(solver :: DiomSolver{T,FC,S}, $(def_args_diom...); $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "DIOM: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :w, S, n)
+ allocate_if(!NisI, solver, :z, S, n)
+ Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
+ L, H, stats = solver.L, solver.H, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ w = MisI ? t : solver.w
+ r₀ = MisI ? t : solver.w
+
+ # Initial solution x₀ and residual r₀.
+ x .= zero(FC) # x₀
+ if warm_start
+ mul!(t, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), t)
+ else
+ t .= b
+ end
+ MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
+ rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
+ history && push!(rNorms, rNorm)
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
- mem = length(L) # Memory
- for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
- P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
- end
- H .= zero(FC) # Last column of the band hessenberg matrix Hₘ = LₘUₘ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
- # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
- # In addition of that, the last column of Uₘ is stored in H.
- L .= zero(FC) # Last mem pivots of Lₘ.
-
- # Initial ξ₁ and V₁.
- ξ = rNorm
- @. V[1] = r₀ / rNorm
-
- # Stopping criterion.
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || user_requested_exit)
-
- # Update iteration index.
- iter = iter + 1
-
- # Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
-
- # Incomplete Arnoldi procedure.
- z = NisI ? V[pos] : solver.z
- NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ
- mul!(t, A, z) # AN⁻¹vₘ
- MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
- for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
- diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ mem = length(V) # Memory
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ end
+ for i = 1 : mem-1
+ P[i] .= zero(FC) # Directions Pₖ = NVₖ(Uₖ)⁻¹.
end
+ H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
+ # Each column has at most mem + 1 nonzero elements.
+ # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+ # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # In addition of that, the last column of Uₖ is stored in H.
+ L .= zero(FC) # Last mem-1 pivots of Lₖ.
+
+ # Initial ξ₁ and V₁.
+ ξ = rNorm
+ V[1] .= r₀ ./ rNorm
+
+ # Stopping criterion.
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || user_requested_exit || overtimed)
+
+ # Update iteration index.
+ iter = iter + 1
- # Partial reorthogonalization of the Krylov basis.
- if reorthogonalization
+ # Set position in circulars stacks.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to vₖ in the circular stack V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
+
+ # Incomplete Arnoldi procedure.
+ z = NisI ? V[pos] : solver.z
+ NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ
+ mul!(t, A, z) # ANvₖ
+ MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1
- diag = iter - i + 2
- Htmp = @kdot(n, w, V[ipos])
- H[diag] += Htmp
- @kaxpy!(n, -Htmp, V[ipos], w)
+ ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
+ diag = iter - i + 1
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
- end
- # Compute hₘ₊₁.ₘ and vₘ₊₁.
- H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
- if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
- end
- # It's possible that uₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
- if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
- end
+ # Partial reorthogonalization of the Krylov basis.
+ if reorthogonalization
+ for i = max(1, iter-mem+1) : iter
+ ipos = mod(i-1, mem) + 1
+ diag = iter - i + 1
+ Htmp = @kdot(n, w, V[ipos])
+ H[diag] += Htmp
+ @kaxpy!(n, -Htmp, V[ipos], w)
+ end
+ end
- # Update the LU factorization with partial pivoting of H.
- # Compute the last column of Uₘ.
- if iter ≥ 2
- for i = max(2,iter-mem+1) : iter
- lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
- diag = iter - i + 2
- next_diag = diag + 1
- # uᵢ.ₘ ← hᵢ.ₘ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₘ
- H[diag] = H[diag] - L[lpos] * H[next_diag]
+ # Compute hₖ₊₁.ₖ and vₖ₊₁.
+ Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ
end
- # Compute ξₘ the last component of zₘ = β(Lₘ)⁻¹e₁.
- # ξₘ = -lₘ.ₘ₋₁ * ξₘ₋₁
- ξ = - L[pos] * ξ
- end
- # Compute next pivot lₘ₊₁.ₘ = hₘ₊₁.ₘ / uₘ.ₘ
- L[next_pos] = H[1] / H[2]
-
- # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
- for i = max(1,iter-mem) : iter-1
- ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
- diag = iter - i + 2
- if ipos == pos
- # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
- @kscal!(n, -H[diag], P[pos])
- else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
- @kaxpy!(n, -H[diag], P[ipos], P[pos])
+
+ # Update the LU factorization of Hₖ.
+ # Compute the last column of Uₖ.
+ if iter ≥ 2
+ # u₁.ₖ ← h₁.ₖ if iter ≤ mem
+ # uₖ₋ₘₑₘ₊₁.ₖ ← hₖ₋ₘₑₘ₊₁.ₖ if iter ≥ mem + 1
+ for i = max(2,iter-mem+2) : iter
+ lpos = mod(i-1, mem-1) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
+ diag = iter - i + 1
+ next_diag = diag + 1
+ # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
+ H[diag] = H[diag] - L[lpos] * H[next_diag]
+ if i == iter
+ # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁.
+ # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁
+ ξ = - L[lpos] * ξ
+ end
+ end
end
+ # Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
+ next_lpos = mod(iter, mem-1) + 1
+ L[next_lpos] = Haux / H[1]
+
+ ppos = mod(iter-1, mem-1) + 1 # Position corresponding to pₖ in the circular stack P.
+
+ # Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹.
+ # u₁.ₖp₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≤ mem
+ # uₖ₋ₘₑₘ₊₁.ₖpₖ₋ₘₑₘ₊₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≥ mem + 1
+ for i = max(1,iter-mem+1) : iter-1
+ ipos = mod(i-1, mem-1) + 1 # Position corresponding to pᵢ in the circular stack P.
+ diag = iter - i + 1
+ if ipos == ppos
+ # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁
+ @kscal!(n, -H[diag], P[ppos])
+ else
+ # pₖ ← pₖ - uᵢ.ₖ * pᵢ
+ @kaxpy!(n, -H[diag], P[ipos], P[ppos])
+ end
+ end
+ # pₐᵤₓ ← pₐᵤₓ + Nvₖ
+ @kaxpy!(n, one(FC), z, P[ppos])
+ # pₖ = pₐᵤₓ / uₖ.ₖ
+ P[ppos] .= P[ppos] ./ H[1]
+
+ # Update solution xₖ.
+ # xₖ = xₖ₋₁ + ξₖ * pₖ
+ @kaxpy!(n, ξ, P[ppos], x)
+
+ # Compute residual norm.
+ # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ|
+ rNorm = Haux * abs(ξ / H[1])
+ history && push!(rNorms, rNorm)
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
end
- # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
- @kaxpy!(n, one(FC), z, P[pos])
- # pₘ = pₐᵤₓ / uₘ.ₘ
- @. P[pos] = P[pos] / H[2]
-
- # Update solution xₘ.
- # xₘ = xₘ₋₁ + ξₘ * pₘ
- @kaxpy!(n, ξ, P[pos], x)
-
- # Compute residual norm.
- # ‖ M⁻¹(b - Axₘ) ‖₂ = hₘ₊₁.ₘ * |ξₘ / uₘ.ₘ|
- rNorm = real(H[1]) * abs(ξ / H[2])
- history && push!(rNorms, rNorm)
+ (verbose > 0) && @printf(iostream, "\n")
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- solved = resid_decrease_lim || resid_decrease_mach
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/dqgmres.jl b/src/dqgmres.jl
index ab7c490a6..4c1e52b37 100644
--- a/src/dqgmres.jl
+++ b/src/dqgmres.jl
@@ -11,16 +11,21 @@
export dqgmres, dqgmres!
"""
- (x, stats) = dqgmres(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = dqgmres(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ reorthogonalization::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using DQGMRES method.
+ (x, stats) = dqgmres(A, b, x0::AbstractVector; kwargs...)
+
+DQGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using DQGMRES.
DQGMRES algorithm is based on the incomplete Arnoldi orthogonalization process
and computes a sequence of approximate solutions with the quasi-minimal residual property.
@@ -30,21 +35,35 @@ If MINRES is well defined on `Ax = b` and `memory = 2`, DQGMRES is theoretically
If `k ≤ memory` where `k` is the number of iterations, DQGMRES is theoretically equivalent to GMRES.
Otherwise, DQGMRES interpolates between MINRES and GMRES and is similar to MINRES with partial reorthogonalization.
-Partial reorthogonalization is available with the `reorthogonalization` option.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Optional argument
-DQGMRES can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = dqgmres(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -52,18 +71,6 @@ and `false` otherwise.
"""
function dqgmres end
-function dqgmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = DqgmresSolver(A, b, memory)
- dqgmres!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function dqgmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = DqgmresSolver(A, b, memory)
- dqgmres!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = dqgmres!(solver::DqgmresSolver, A, b; kwargs...)
solver = dqgmres!(solver::DqgmresSolver, A, b, x0; kwargs...)
@@ -77,206 +84,258 @@ See [`DqgmresSolver`](@ref) for more details about the `solver`.
"""
function dqgmres! end
-function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- dqgmres!(solver, A, b; kwargs...)
- return solver
-end
-
-function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("DQGMRES: system of size %d\n", n)
-
- # Check M = Iₙ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :w, S, n)
- allocate_if(!NisI, solver, :z, S, n)
- Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
- c, s, H, stats = solver.c, solver.s, solver.H, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- w = MisI ? t : solver.w
- r₀ = MisI ? t : solver.w
-
- # Initial solution x₀ and residual r₀.
- x .= zero(FC) # x₀
- if warm_start
- mul!(t, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), t)
- else
- t .= b
+def_args_dqgmres = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_dqgmres = (:(x0::AbstractVector),)
+
+def_kwargs_dqgmres = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; reorthogonalization::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_dqgmres = mapreduce(extract_parameters, vcat, def_kwargs_dqgmres)
+
+args_dqgmres = (:A, :b)
+optargs_dqgmres = (:x0,)
+kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function dqgmres($(def_args_dqgmres...), $(def_optargs_dqgmres...); memory :: Int=20, $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = DqgmresSolver(A, b, memory)
+ warm_start!(solver, $(optargs_dqgmres...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ dqgmres!(solver, $(args_dqgmres...); $(kwargs_dqgmres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀)
- rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+
+ function dqgmres($(def_args_dqgmres...); memory :: Int=20, $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = DqgmresSolver(A, b, memory)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ dqgmres!(solver, $(args_dqgmres...); $(kwargs_dqgmres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2*n)
+ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, $(def_args_dqgmres...); $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "DQGMRES: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :w, S, n)
+ allocate_if(!NisI, solver, :z, S, n)
+ Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
+ c, s, H, stats = solver.c, solver.s, solver.H, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ w = MisI ? t : solver.w
+ r₀ = MisI ? t : solver.w
+
+ # Initial solution x₀ and residual r₀.
+ x .= zero(FC) # x₀
+ if warm_start
+ mul!(t, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), t)
+ else
+ t .= b
+ end
+ MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
+ rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
+ history && push!(rNorms, rNorm)
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
- # Set up workspace.
- mem = length(c) # Memory.
- for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
- P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
- end
- c .= zero(T) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ.
- s .= zero(FC) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ.
- H .= zero(FC) # Last column of the band hessenberg matrix Hₘ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
- # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
- # In addition of that, the last column of Rₘ is also stored in H.
-
- # Initial γ₁ and V₁.
- γₘ = rNorm # γₘ and γₘ₊₁ are the last components of gₘ, right-hand of the least squares problem min ‖ Hₘyₘ - gₘ ‖₂.
- @. V[1] = r₀ / rNorm
-
- # The following stopping criterion compensates for the lag in the
- # residual, but usually increases the number of iterations.
- # solved = sqrt(max(1, iter-mem+1)) * |γₘ₊₁| ≤ ε
- solved = rNorm ≤ ε # less accurate, but acceptable.
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || user_requested_exit)
-
- # Update iteration index.
- iter = iter + 1
-
- # Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
-
- # Incomplete Arnoldi procedure.
- z = NisI ? V[pos] : solver.z
- NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ
- mul!(t, A, z) # AN⁻¹vₘ
- MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
- for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
- diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ # Set up workspace.
+ mem = length(V) # Memory.
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹.
end
+ c .= zero(T) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ.
+ s .= zero(FC) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ.
+ H .= zero(FC) # Last column of the band hessenberg matrix Hₖ.
+ # Each column has at most mem + 1 nonzero elements.
+ # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+ # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # In addition of that, the last column of Rₖ is also stored in H.
+
+ # Initial γ₁ and V₁.
+ γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂.
+ V[1] .= r₀ ./ rNorm
+
+ # The following stopping criterion compensates for the lag in the
+ # residual, but usually increases the number of iterations.
+ # solved = sqrt(max(1, iter-mem+1)) * |γₖ₊₁| ≤ ε
+ solved = rNorm ≤ ε # less accurate, but acceptable.
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || user_requested_exit || overtimed)
+
+ # Update iteration index.
+ iter = iter + 1
- # Partial reorthogonalization of the Krylov basis.
- if reorthogonalization
+ # Set position in circulars stacks.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
+
+ # Incomplete Arnoldi procedure.
+ z = NisI ? V[pos] : solver.z
+ NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ
+ mul!(t, A, z) # ANvₖ
+ MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1
- diag = iter - i + 2
- Htmp = @kdot(n, w, V[ipos])
- H[diag] += Htmp
- @kaxpy!(n, -Htmp, V[ipos], w)
+ ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
+ diag = iter - i + 1
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
- end
- # Compute hₘ₊₁.ₘ and vₘ₊₁.
- H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
- if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
- end
- # rₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
- if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
- end
+ # Partial reorthogonalization of the Krylov basis.
+ if reorthogonalization
+ for i = max(1, iter-mem+1) : iter
+ ipos = mod(i-1, mem) + 1
+ diag = iter - i + 1
+ Htmp = @kdot(n, w, V[ipos])
+ H[diag] += Htmp
+ @kaxpy!(n, -Htmp, V[ipos], w)
+ end
+ end
- # Update the QR factorization of H.
- # Apply mem previous Givens reflections Ωᵢ.
- for i = max(1,iter-mem) : iter-1
- irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
- diag = iter - i + 1
- next_diag = diag + 1
- H_aux = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag]
- H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag]
- H[next_diag] = H_aux
- end
+ # Compute hₖ₊₁.ₖ and vₖ₊₁.
+ Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ
+ end
+ # rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
+ # We don't want to use rₖ₋₁₋ₘₑₘ.ₖ₋₁ when we compute rₖ₋ₘₑₘ.ₖ
+ if iter ≥ mem + 2
+ H[mem+1] = zero(FC) # rₖ₋ₘₑₘ.ₖ = 0
+ end
+
+ # Update the QR factorization of Hₖ.
+ # Apply mem previous Givens reflections Ωᵢ.
+ for i = max(1,iter-mem) : iter-1
+ irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
+ diag = iter - i
+ next_diag = diag + 1
+ Htmp = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag]
+ H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag]
+ H[next_diag] = Htmp
+ end
- # Compute and apply current Givens reflection Ωₘ.
- # [cₘ sₘ] [ hₘ.ₘ ] = [ρₘ]
- # [sₘ -cₘ] [hₘ₊₁.ₘ] [0 ]
- (c[pos], s[pos], H[2]) = sym_givens(H[2], H[1])
- γₘ₊₁ = conj(s[pos]) * γₘ
- γₘ = c[pos] * γₘ
-
- # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
- for i = max(1,iter-mem) : iter-1
- ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
- diag = iter - i + 2
- if ipos == pos
- # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
- @kscal!(n, -H[diag], P[pos])
- else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
- @kaxpy!(n, -H[diag], P[ipos], P[pos])
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ hₖ.ₖ ] = [ρₖ]
+ # [sₖ -cₖ] [hₖ₊₁.ₖ] [0 ]
+ (c[pos], s[pos], H[1]) = sym_givens(H[1], Haux)
+ γₖ₊₁ = conj(s[pos]) * γₖ
+ γₖ = c[pos] * γₖ
+
+ # Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹.
+ for i = max(1,iter-mem) : iter-1
+ ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
+ diag = iter - i + 1
+ if ipos == pos
+ # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
+ @kscal!(n, -H[diag], P[pos])
+ else
+ # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ
+ @kaxpy!(n, -H[diag], P[ipos], P[pos])
+ end
end
+ # pₐᵤₓ ← pₐᵤₓ + Nvₖ
+ @kaxpy!(n, one(FC), z, P[pos])
+ # pₖ = pₐᵤₓ / hₖ.ₖ
+ P[pos] .= P[pos] ./ H[1]
+
+ # Compute solution xₖ.
+ # xₖ ← xₖ₋₁ + γₖ * pₖ
+ @kaxpy!(n, γₖ, P[pos], x)
+
+ # Update residual norm estimate.
+ # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁|
+ rNorm = abs(γₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Update γₖ.
+ γₖ = γₖ₊₁
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
end
- # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
- @kaxpy!(n, one(FC), z, P[pos])
- # pₘ = pₐᵤₓ / hₘ.ₘ
- @. P[pos] = P[pos] / H[2]
-
- # Compute solution xₘ.
- # xₘ ← xₘ₋₁ + γₘ * pₘ
- @kaxpy!(n, γₘ, P[pos], x)
-
- # Update residual norm estimate.
- # ‖ M⁻¹(b - Axₘ) ‖₂ ≈ |γₘ₊₁|
- rNorm = abs(γₘ₊₁)
- history && push!(rNorms, rNorm)
+ (verbose > 0) && @printf(iostream, "\n")
- # Update γₘ.
- γₘ = γₘ₊₁
+ # Termination status
+ solved && (status = "solution good enough given atol and rtol")
+ tired && (status = "maximum number of iterations exceeded")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- solved = resid_decrease_lim || resid_decrease_mach
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
- solved && (status = "solution good enough given atol and rtol")
- tired && (status = "maximum number of iterations exceeded")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/fgmres.jl b/src/fgmres.jl
new file mode 100644
index 000000000..1a68aac6c
--- /dev/null
+++ b/src/fgmres.jl
@@ -0,0 +1,391 @@
+# An implementation of FGMRES for the solution of the square linear system Ax = b.
+#
+# This method is described in
+#
+# Y. Saad, A Flexible Inner-Outer Preconditioned GMRES Algorithms.
+# SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+#
+# Alexis Montoison,
+# Montreal, September 2022.
+
+export fgmres, fgmres!
+
+"""
+ (x, stats) = fgmres(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ restart::Bool=false, reorthogonalization::Bool=false,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
+
+`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
+`FC` is `T` or `Complex{T}`.
+
+ (x, stats) = fgmres(A, b, x0::AbstractVector; kwargs...)
+
+FGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the linear system Ax = b of size n using FGMRES.
+
+FGMRES computes a sequence of approximate solutions with minimum residual.
+FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration.
+
+This implementation allows a left preconditioner M and a flexible right preconditioner N.
+A situation in which the preconditioner is "not constant" is when a relaxation-type method,
+a Chebyshev iteration or another Krylov subspace method is used as a preconditioner.
+Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles.
+Thus, GMRES is recommended if the right preconditioner N is constant.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
+
+#### Keyword arguments
+
+* `memory`: if `restart = true`, the restarted version FGMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
+
+#### Reference
+
+* Y. Saad, [*A Flexible Inner-Outer Preconditioned GMRES Algorithm*](https://doi.org/10.1137/0914028), SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+"""
+function fgmres end
+
+"""
+ solver = fgmres!(solver::FgmresSolver, A, b; kwargs...)
+ solver = fgmres!(solver::FgmresSolver, A, b, x0; kwargs...)
+
+where `kwargs` are keyword arguments of [`fgmres`](@ref).
+
+Note that the `memory` keyword argument is the only exception.
+It's required to create a `FgmresSolver` and can't be changed later.
+
+See [`FgmresSolver`](@ref) for more details about the `solver`.
+"""
+function fgmres! end
+
+def_args_fgmres = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_fgmres = (:(x0::AbstractVector),)
+
+def_kwargs_fgmres = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; restart::Bool = false ),
+ :(; reorthogonalization::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_fgmres = mapreduce(extract_parameters, vcat, def_kwargs_fgmres)
+
+args_fgmres = (:A, :b)
+optargs_fgmres = (:x0,)
+kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function fgmres($(def_args_fgmres...), $(def_optargs_fgmres...); memory :: Int=20, $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = FgmresSolver(A, b, memory)
+ warm_start!(solver, $(optargs_fgmres...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ fgmres!(solver, $(args_fgmres...); $(kwargs_fgmres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
+ end
+
+ function fgmres($(def_args_fgmres...); memory :: Int=20, $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = FgmresSolver(A, b, memory)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ fgmres!(solver, $(args_fgmres...); $(kwargs_fgmres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
+ end
+
+ function fgmres!(solver :: FgmresSolver{T,FC,S}, $(def_args_fgmres...); $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "FGMRES: system of size %d\n", n)
+
+ # Check M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI , solver, :q , S, n)
+ allocate_if(restart, solver, :Δx, S, n)
+ Δx, x, w, V, Z = solver.Δx, solver.x, solver.w, solver.V, solver.Z
+ z, c, s, R, stats = solver.z, solver.c, solver.s, solver.R, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ q = MisI ? w : solver.q
+ r₀ = MisI ? w : solver.q
+ xr = restart ? Δx : x
+
+ # Initial solution x₀.
+ x .= zero(FC)
+
+ # Initial residual r₀.
+ if warm_start
+ mul!(w, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ restart && @kaxpy!(n, one(FC), Δx, x)
+ else
+ w .= b
+ end
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
+ β = @knrm2(n, r₀) # β = ‖r₀‖₂
+
+ rNorm = β
+ history && push!(rNorms, β)
+ ε = atol + rtol * rNorm
+
+ if β == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ mem = length(c) # Memory
+ npass = 0 # Number of pass
+
+ iter = 0 # Cumulative number of iterations
+ inner_iter = 0 # Number of iterations in a pass
+
+ itmax == 0 && (itmax = 2*n)
+ inner_itmax = itmax
+
+ (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time))
+
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
+
+ # Stopping criterion
+ breakdown = false
+ inconsistent = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ inner_tired = inner_iter ≥ inner_itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+ # Initialize workspace.
+ nr = 0 # Number of coefficients stored in Rₖ.
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}.
+ Z[i] .= zero(FC) # Zₖ = [N₁v₁, ..., Nₖvₖ]
+ end
+ s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ R .= zero(FC) # Upper triangular matrix Rₖ.
+ z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
+
+ if restart
+ xr .= zero(FC) # xr === Δx when restart is set to true
+ if npass ≥ 1
+ mul!(w, A, x)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ MisI || mulorldiv!(r₀, M, w, ldiv)
+ end
+ end
+
+ # Initial ζ₁ and V₁
+ β = @knrm2(n, r₀)
+ z[1] = β
+ @. V[1] = r₀ / rNorm
+
+ npass = npass + 1
+ solver.inner_iter = 0
+ inner_tired = false
+
+ while !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+
+ # Update iteration index
+ solver.inner_iter = solver.inner_iter + 1
+ inner_iter = solver.inner_iter
+
+ # Update workspace if more storage is required and restart is set to false
+ if !restart && (inner_iter > mem)
+ for i = 1 : inner_iter
+ push!(R, zero(FC))
+ end
+ push!(s, zero(FC))
+ push!(c, zero(T))
+ push!(Z, S(undef, n))
+ end
+
+ # Continue the process.
+ # MAZₖ = Vₖ₊₁Hₖ₊₁.ₖ
+ mulorldiv!(Z[inner_iter], N, V[inner_iter], ldiv) # zₖ ← Nₖvₖ
+ mul!(w, A, Z[inner_iter]) # w ← Azₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MAzₖ
+ for i = 1 : inner_iter
+ R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
+ @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
+ end
+
+ # Reorthogonalization of the basis.
+ if reorthogonalization
+ for i = 1 : inner_iter
+ Htmp = @kdot(n, V[i], q)
+ R[nr+i] += Htmp
+ @kaxpy!(n, -Htmp, V[i], q)
+ end
+ end
+
+ # Compute hₖ₊₁.ₖ
+ Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+ # Update the QR factorization of Hₖ₊₁.ₖ.
+ # Apply previous Givens reflections Ωᵢ.
+ # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
+ # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ]
+ for i = 1 : inner_iter-1
+ Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1]
+ R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
+ R[nr+i] = Rtmp
+ end
+
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
+ # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
+ (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
+
+ # Update zₖ = (Qₖ)ᴴβe₁
+ ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
+ z[inner_iter] = c[inner_iter] * z[inner_iter]
+
+ # Update residual norm estimate.
+ # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
+ rNorm = abs(ζₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Update the number of coefficients in Rₖ
+ nr = nr + inner_iter
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = Hbis ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time))
+
+ # Compute vₖ₊₁
+ if !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+ if !restart && (inner_iter ≥ mem)
+ push!(V, S(undef, n))
+ push!(z, zero(FC))
+ end
+ @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
+ z[inner_iter+1] = ζₖ₊₁
+ end
+ end
+
+ # Compute y by solving Ry = z with backward substitution.
+ y = z # yᵢ = ζᵢ
+ for i = inner_iter : -1 : 1
+ pos = nr + i - inner_iter # position of rᵢ.ₖ
+ for j = inner_iter : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ btol
+ y[i] = zero(FC)
+ inconsistent = true
+ else
+ y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ end
+ end
+
+ # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ
+ for i = 1 : inner_iter
+ @kaxpy!(n, y[i], Z[i], xr)
+ end
+ restart && @kaxpy!(n, one(FC), xr, x)
+
+ # Update inner_itmax, iter and tired variables.
+ inner_itmax = inner_itmax - inner_iter
+ iter = iter + inner_iter
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ inconsistent && (status = "found approximate least-squares solution")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
+ end
+end
diff --git a/src/fom.jl b/src/fom.jl
index fcae5cf62..351fb246f 100644
--- a/src/fom.jl
+++ b/src/fom.jl
@@ -11,38 +11,54 @@
export fom, fom!
"""
- (x, stats) = fom(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
- restart::Bool=false, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = fom(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ restart::Bool=false, reorthogonalization::Bool=false,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using FOM method.
+ (x, stats) = fom(A, b, x0::AbstractVector; kwargs...)
+
+FOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the linear system Ax = b of size n using FOM.
FOM algorithm is based on the Arnoldi process and a Galerkin condition.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
-Full reorthogonalization is available with the `reorthogonalization` option.
+#### Optional argument
-If `restart = true`, the restarted version FOM(k) is used with `k = memory`.
-If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-FOM can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = fom(A, b, x0; kwargs...)
+* `memory`: if `restart = true`, the restarted version FOM(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -50,18 +66,6 @@ and `false` otherwise.
"""
function fom end
-function fom(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = FomSolver(A, b, memory)
- fom!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function fom(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = FomSolver(A, b, memory)
- fom!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = fom!(solver::FomSolver, A, b; kwargs...)
solver = fom!(solver::FomSolver, A, b, x0; kwargs...)
@@ -75,241 +79,293 @@ See [`FomSolver`](@ref) for more details about the `solver`.
"""
function fom! end
-function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- fom!(solver, A, b; kwargs...)
- return solver
-end
-
-function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
- restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("FOM: system of size %d\n", n)
-
- # Check M = Iₙ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI , solver, :q , S, n)
- allocate_if(!NisI , solver, :p , S, n)
- allocate_if(restart, solver, :Δx, S, n)
- Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
- l, U, stats = solver.l, solver.U, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- q = MisI ? w : solver.q
- r₀ = MisI ? w : solver.q
- xr = restart ? Δx : x
-
- # Initial solution x₀.
- x .= zero(FC)
-
- # Initial residual r₀.
- if warm_start
- mul!(w, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), w)
- restart && @kaxpy!(n, one(FC), Δx, x)
- else
- w .= b
+def_args_fom = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_fom = (:(x0::AbstractVector),)
+
+def_kwargs_fom = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; restart::Bool = false ),
+ :(; reorthogonalization::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_fom = mapreduce(extract_parameters, vcat, def_kwargs_fom)
+
+args_fom = (:A, :b)
+optargs_fom = (:x0,)
+kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function fom($(def_args_fom...), $(def_optargs_fom...); memory :: Int=20, $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = FomSolver(A, b, memory)
+ warm_start!(solver, $(optargs_fom...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ fom!(solver, $(args_fom...); $(kwargs_fom...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
- β = @knrm2(n, r₀) # β = ‖r₀‖₂
-
- rNorm = β
- history && push!(rNorms, β)
- ε = atol + rtol * rNorm
- if β == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function fom($(def_args_fom...); memory :: Int=20, $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = FomSolver(A, b, memory)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ fom!(solver, $(args_fom...); $(kwargs_fom...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- mem = length(l) # Memory
- npass = 0 # Number of pass
+ function fom!(solver :: FomSolver{T,FC,S}, $(def_args_fom...); $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "FOM: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI , solver, :q , S, n)
+ allocate_if(!NisI , solver, :p , S, n)
+ allocate_if(restart, solver, :Δx, S, n)
+ Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
+ l, U, stats = solver.l, solver.U, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ q = MisI ? w : solver.q
+ r₀ = MisI ? w : solver.q
+ xr = restart ? Δx : x
+
+ # Initial solution x₀.
+ x .= zero(FC)
+
+ # Initial residual r₀.
+ if warm_start
+ mul!(w, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ restart && @kaxpy!(n, one(FC), Δx, x)
+ else
+ w .= b
+ end
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
+ β = @knrm2(n, r₀) # β = ‖r₀‖₂
+
+ rNorm = β
+ history && push!(rNorms, β)
+ ε = atol + rtol * rNorm
+
+ if β == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
- iter = 0 # Cumulative number of iterations
- inner_iter = 0 # Number of iterations in a pass
+ mem = length(l) # Memory
+ npass = 0 # Number of pass
- itmax == 0 && (itmax = 2*n)
- inner_itmax = itmax
+ iter = 0 # Cumulative number of iterations
+ inner_iter = 0 # Number of iterations in a pass
- (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
- kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+ itmax == 0 && (itmax = 2*n)
+ inner_itmax = itmax
- # Tolerance for breakdown detection.
- btol = eps(T)^(3/4)
+ (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time))
- # Stopping criterion
- breakdown = false
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- inner_tired = inner_iter ≥ inner_itmax
- status = "unknown"
- user_requested_exit = false
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
- while !(solved || tired || breakdown || user_requested_exit)
+ # Stopping criterion
+ breakdown = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ inner_tired = inner_iter ≥ inner_itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
- # Initialize workspace.
- nr = 0 # Number of coefficients stored in Uₖ.
- for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
- end
- l .= zero(FC) # Lower unit triangular matrix Lₖ.
- U .= zero(FC) # Upper triangular matrix Uₖ.
- z .= zero(FC) # Solution of Lₖzₖ = βe₁.
-
- if restart
- xr .= zero(FC) # xr === Δx when restart is set to true
- if npass ≥ 1
- mul!(w, A, x)
- @kaxpby!(n, one(FC), b, -one(FC), w)
- MisI || mulorldiv!(r₀, M, w, ldiv)
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+ # Initialize workspace.
+ nr = 0 # Number of coefficients stored in Uₖ.
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ end
+ l .= zero(FC) # Lower unit triangular matrix Lₖ.
+ U .= zero(FC) # Upper triangular matrix Uₖ.
+ z .= zero(FC) # Solution of Lₖzₖ = βe₁.
+
+ if restart
+ xr .= zero(FC) # xr === Δx when restart is set to true
+ if npass ≥ 1
+ mul!(w, A, x)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ MisI || mulorldiv!(r₀, M, w, ldiv)
+ end
end
- end
- # Initial ζ₁ and V₁
- β = @knrm2(n, r₀)
- z[1] = β
- @. V[1] = r₀ / rNorm
+ # Initial ζ₁ and V₁
+ β = @knrm2(n, r₀)
+ z[1] = β
+ @. V[1] = r₀ / rNorm
- npass = npass + 1
- inner_iter = 0
- inner_tired = false
+ npass = npass + 1
+ inner_iter = 0
+ inner_tired = false
- while !(solved || inner_tired || breakdown)
+ while !(solved || inner_tired || breakdown)
- # Update iteration index
- inner_iter = inner_iter + 1
+ # Update iteration index
+ inner_iter = inner_iter + 1
- # Update workspace if more storage is required and restart is set to false
- if !restart && (inner_iter > mem)
- for i = 1 : inner_iter
- push!(U, zero(FC))
+ # Update workspace if more storage is required and restart is set to false
+ if !restart && (inner_iter > mem)
+ for i = 1 : inner_iter
+ push!(U, zero(FC))
+ end
+ push!(l, zero(FC))
+ push!(z, zero(FC))
end
- push!(l, zero(FC))
- push!(z, zero(FC))
- end
-
- # Continue the Arnoldi process.
- p = NisI ? V[inner_iter] : solver.p
- NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ
- mul!(w, A, p) # w ← AN⁻¹vₖ
- MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
- for i = 1 : inner_iter
- U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ
- @kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
- end
- # Reorthogonalization of the Krylov basis.
- if reorthogonalization
+ # Continue the Arnoldi process.
+ p = NisI ? V[inner_iter] : solver.p
+ NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ
+ mul!(w, A, p) # w ← ANvₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ
for i = 1 : inner_iter
- Htmp = @kdot(n, V[i], q)
- U[nr+i] += Htmp
- @kaxpy!(n, -Htmp, V[i], q)
+ U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
+ @kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
end
- end
- # Compute hₖ₊₁.ₖ
- Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ # Reorthogonalization of the Krylov basis.
+ if reorthogonalization
+ for i = 1 : inner_iter
+ Htmp = @kdot(n, V[i], q)
+ U[nr+i] += Htmp
+ @kaxpy!(n, -Htmp, V[i], q)
+ end
+ end
- # Update the LU factorization of Hₖ.
- if inner_iter ≥ 2
- for i = 2 : inner_iter
- # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
- U[nr+i] = U[nr+i] - l[i-1] * U[nr+i-1]
+ # Compute hₖ₊₁.ₖ
+ Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+ # Update the LU factorization of Hₖ.
+ if inner_iter ≥ 2
+ for i = 2 : inner_iter
+ # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
+ U[nr+i] = U[nr+i] - l[i-1] * U[nr+i-1]
+ end
+ # ζₖ = -lₖ.ₖ₋₁ * ζₖ₋₁
+ z[inner_iter] = - l[inner_iter-1] * z[inner_iter-1]
+ end
+ # lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
+ l[inner_iter] = Hbis / U[nr+inner_iter]
+
+ # Update residual norm estimate.
+ # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
+ rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter])
+ history && push!(rNorms, rNorm)
+
+ # Update the number of coefficients in Uₖ
+ nr = nr + inner_iter
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = Hbis ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time))
+
+ # Compute vₖ₊₁.
+ if !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+ if !restart && (inner_iter ≥ mem)
+ push!(V, S(undef, n))
+ end
+ @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
end
- # ζₖ = -lₖ.ₖ₋₁ * ζₖ₋₁
- z[inner_iter] = - l[inner_iter-1] * z[inner_iter-1]
end
- # lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
- l[inner_iter] = Hbis / U[nr+inner_iter]
-
- # Update residual norm estimate.
- # ‖ M⁻¹(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
- rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter])
- history && push!(rNorms, rNorm)
-
- # Update the number of coefficients in Uₖ
- nr = nr + inner_iter
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- breakdown = Hbis ≤ btol
- solved = resid_decrease_lim || resid_decrease_mach
- inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
- kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
-
- # Compute vₖ₊₁.
- if !(solved || inner_tired || breakdown)
- if !restart && (inner_iter ≥ mem)
- push!(V, S(undef, n))
+
+ # Hₖyₖ = βe₁ ⟺ LₖUₖyₖ = βe₁ ⟺ Uₖyₖ = zₖ.
+ # Compute yₖ by solving Uₖyₖ = zₖ with backward substitution.
+ y = z # yᵢ = zᵢ
+ for i = inner_iter : -1 : 1
+ pos = nr + i - inner_iter # position of rᵢ.ₖ
+ for j = inner_iter : -1 : i+1
+ y[i] = y[i] - U[pos] * y[j] # yᵢ ← yᵢ - uᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
end
- @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
+ y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ
end
- end
- # Hₖyₖ = βe₁ ⟺ LₖUₖyₖ = βe₁ ⟺ Uₖyₖ = zₖ.
- # Compute yₖ by solving Uₖyₖ = zₖ with backward substitution.
- y = z # yᵢ = zᵢ
- for i = inner_iter : -1 : 1
- pos = nr + i - inner_iter # position of rᵢ.ₖ
- for j = inner_iter : -1 : i+1
- y[i] = y[i] - U[pos] * y[j] # yᵢ ← yᵢ - uᵢⱼyⱼ
- pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ # Form xₖ = NVₖyₖ
+ for i = 1 : inner_iter
+ @kaxpy!(n, y[i], V[i], xr)
+ end
+ if !NisI
+ solver.p .= xr
+ mulorldiv!(xr, N, solver.p, ldiv)
end
- y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ
+ restart && @kaxpy!(n, one(FC), xr, x)
+
+ # Update inner_itmax, iter, tired and overtimed variables.
+ inner_itmax = inner_itmax - inner_iter
+ iter = iter + inner_iter
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Form xₖ = N⁻¹Vₖyₖ
- for i = 1 : inner_iter
- @kaxpy!(n, y[i], V[i], xr)
- end
- if !NisI
- solver.p .= xr
- mulorldiv!(xr, N, solver.p, ldiv)
- end
- restart && @kaxpy!(n, one(FC), xr, x)
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "inconsistent linear system")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Update inner_itmax, iter and tired variables.
- inner_itmax = inner_itmax - inner_iter
- iter = iter + inner_iter
- tired = iter ≥ itmax
+ # Update x
+ warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !solved && breakdown
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "inconsistent linear system")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !solved && breakdown
- stats.status = status
- return solver
end
diff --git a/src/gmres.jl b/src/gmres.jl
index 388a4ab96..7ee6e2341 100644
--- a/src/gmres.jl
+++ b/src/gmres.jl
@@ -11,38 +11,54 @@
export gmres, gmres!
"""
- (x, stats) = gmres(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
- restart::Bool=false, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = gmres(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ restart::Bool=false, reorthogonalization::Bool=false,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using GMRES method.
+ (x, stats) = gmres(A, b, x0::AbstractVector; kwargs...)
-GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimal residual property.
+GMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+Solve the linear system Ax = b of size n using GMRES.
-Full reorthogonalization is available with the `reorthogonalization` option.
+GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimum residual.
-If `restart = true`, the restarted version GMRES(k) is used with `k = memory`.
-If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+#### Input arguments
-GMRES can be warm-started from an initial guess `x0` with the method
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
- (x, stats) = gmres(A, b, x0; kwargs...)
+#### Optional argument
-where `kwargs` are the same keyword arguments as above.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Keyword arguments
+
+* `memory`: if `restart = true`, the restarted version GMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -50,18 +66,6 @@ and `false` otherwise.
"""
function gmres end
-function gmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = GmresSolver(A, b, memory)
- gmres!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function gmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = GmresSolver(A, b, memory)
- gmres!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = gmres!(solver::GmresSolver, A, b; kwargs...)
solver = gmres!(solver::GmresSolver, A, b, x0; kwargs...)
@@ -75,260 +79,310 @@ See [`GmresSolver`](@ref) for more details about the `solver`.
"""
function gmres! end
-function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- gmres!(solver, A, b; kwargs...)
- return solver
-end
-
-function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
- restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("GMRES: system of size %d\n", n)
-
- # Check M = Iₙ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI , solver, :q , S, n)
- allocate_if(!NisI , solver, :p , S, n)
- allocate_if(restart, solver, :Δx, S, n)
- Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
- c, s, R, stats = solver.c, solver.s, solver.R, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- q = MisI ? w : solver.q
- r₀ = MisI ? w : solver.q
- xr = restart ? Δx : x
-
- # Initial solution x₀.
- x .= zero(FC)
-
- # Initial residual r₀.
- if warm_start
- mul!(w, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), w)
- restart && @kaxpy!(n, one(FC), Δx, x)
- else
- w .= b
+def_args_gmres = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_gmres = (:(x0::AbstractVector),)
+
+def_kwargs_gmres = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; restart::Bool = false ),
+ :(; reorthogonalization::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_gmres = mapreduce(extract_parameters, vcat, def_kwargs_gmres)
+
+args_gmres = (:A, :b)
+optargs_gmres = (:x0,)
+kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function gmres($(def_args_gmres...), $(def_optargs_gmres...); memory :: Int=20, $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = GmresSolver(A, b, memory)
+ warm_start!(solver, $(optargs_gmres...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ gmres!(solver, $(args_gmres...); $(kwargs_gmres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
- β = @knrm2(n, r₀) # β = ‖r₀‖₂
-
- rNorm = β
- history && push!(rNorms, β)
- ε = atol + rtol * rNorm
- if β == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function gmres($(def_args_gmres...); memory :: Int=20, $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = GmresSolver(A, b, memory)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ gmres!(solver, $(args_gmres...); $(kwargs_gmres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- mem = length(c) # Memory
- npass = 0 # Number of pass
+ function gmres!(solver :: GmresSolver{T,FC,S}, $(def_args_gmres...); $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "GMRES: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI , solver, :q , S, n)
+ allocate_if(!NisI , solver, :p , S, n)
+ allocate_if(restart, solver, :Δx, S, n)
+ Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
+ c, s, R, stats = solver.c, solver.s, solver.R, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ q = MisI ? w : solver.q
+ r₀ = MisI ? w : solver.q
+ xr = restart ? Δx : x
+
+ # Initial solution x₀.
+ x .= zero(FC)
+
+ # Initial residual r₀.
+ if warm_start
+ mul!(w, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ restart && @kaxpy!(n, one(FC), Δx, x)
+ else
+ w .= b
+ end
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
+ β = @knrm2(n, r₀) # β = ‖r₀‖₂
+
+ rNorm = β
+ history && push!(rNorms, β)
+ ε = atol + rtol * rNorm
+
+ if β == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ mem = length(c) # Memory
+ npass = 0 # Number of pass
- iter = 0 # Cumulative number of iterations
- inner_iter = 0 # Number of iterations in a pass
+ iter = 0 # Cumulative number of iterations
+ inner_iter = 0 # Number of iterations in a pass
- itmax == 0 && (itmax = 2*n)
- inner_itmax = itmax
+ itmax == 0 && (itmax = 2*n)
+ inner_itmax = itmax
- (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
- kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+ (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time))
- # Tolerance for breakdown detection.
- btol = eps(T)^(3/4)
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
- # Stopping criterion
- breakdown = false
- inconsistent = false
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- inner_tired = inner_iter ≥ inner_itmax
- status = "unknown"
- user_requested_exit = false
+ # Stopping criterion
+ breakdown = false
+ inconsistent = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ inner_tired = inner_iter ≥ inner_itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
- while !(solved || tired || breakdown || user_requested_exit)
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
- # Initialize workspace.
- nr = 0 # Number of coefficients stored in Rₖ.
- for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
- end
- s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
- c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
- R .= zero(FC) # Upper triangular matrix Rₖ.
- z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
-
- if restart
- xr .= zero(FC) # xr === Δx when restart is set to true
- if npass ≥ 1
- mul!(w, A, x)
- @kaxpby!(n, one(FC), b, -one(FC), w)
- MisI || mulorldiv!(r₀, M, w, ldiv)
+ # Initialize workspace.
+ nr = 0 # Number of coefficients stored in Rₖ.
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ end
+ s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ R .= zero(FC) # Upper triangular matrix Rₖ.
+ z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
+
+ if restart
+ xr .= zero(FC) # xr === Δx when restart is set to true
+ if npass ≥ 1
+ mul!(w, A, x)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ MisI || mulorldiv!(r₀, M, w, ldiv)
+ end
end
- end
- # Initial ζ₁ and V₁
- β = @knrm2(n, r₀)
- z[1] = β
- @. V[1] = r₀ / rNorm
+ # Initial ζ₁ and V₁
+ β = @knrm2(n, r₀)
+ z[1] = β
+ @. V[1] = r₀ / rNorm
- npass = npass + 1
- solver.inner_iter = 0
- inner_tired = false
+ npass = npass + 1
+ solver.inner_iter = 0
+ inner_tired = false
- while !(solved || inner_tired || breakdown || user_requested_exit)
+ while !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
- # Update iteration index
- solver.inner_iter = solver.inner_iter + 1
- inner_iter = solver.inner_iter
+ # Update iteration index
+ solver.inner_iter = solver.inner_iter + 1
+ inner_iter = solver.inner_iter
- # Update workspace if more storage is required and restart is set to false
- if !restart && (inner_iter > mem)
+ # Update workspace if more storage is required and restart is set to false
+ if !restart && (inner_iter > mem)
+ for i = 1 : inner_iter
+ push!(R, zero(FC))
+ end
+ push!(s, zero(FC))
+ push!(c, zero(T))
+ end
+
+ # Continue the Arnoldi process.
+ p = NisI ? V[inner_iter] : solver.p
+ NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ
+ mul!(w, A, p) # w ← ANvₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ
for i = 1 : inner_iter
- push!(R, zero(FC))
+ R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
+ @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
end
- push!(s, zero(FC))
- push!(c, zero(T))
- end
- # Continue the Arnoldi process.
- p = NisI ? V[inner_iter] : solver.p
- NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ
- mul!(w, A, p) # w ← AN⁻¹vₖ
- MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
- for i = 1 : inner_iter
- R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ
- @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
- end
+ # Reorthogonalization of the Krylov basis.
+ if reorthogonalization
+ for i = 1 : inner_iter
+ Htmp = @kdot(n, V[i], q)
+ R[nr+i] += Htmp
+ @kaxpy!(n, -Htmp, V[i], q)
+ end
+ end
- # Reorthogonalization of the Krylov basis.
- if reorthogonalization
- for i = 1 : inner_iter
- Htmp = @kdot(n, V[i], q)
- R[nr+i] += Htmp
- @kaxpy!(n, -Htmp, V[i], q)
+ # Compute hₖ₊₁.ₖ
+ Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+ # Update the QR factorization of Hₖ₊₁.ₖ.
+ # Apply previous Givens reflections Ωᵢ.
+ # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
+ # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ]
+ for i = 1 : inner_iter-1
+ Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1]
+ R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
+ R[nr+i] = Rtmp
end
- end
- # Compute hₖ₊₁.ₖ
- Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
-
- # Update the QR factorization of Hₖ₊₁.ₖ.
- # Apply previous Givens reflections Ωᵢ.
- # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
- # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ]
- for i = 1 : inner_iter-1
- Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1]
- R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
- R[nr+i] = Rtmp
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
+ # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
+ (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
+
+ # Update zₖ = (Qₖ)ᴴβe₁
+ ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
+ z[inner_iter] = c[inner_iter] * z[inner_iter]
+
+ # Update residual norm estimate.
+ # ‖ M(b - Axₖ) ‖₂ = |ζₖ₊₁|
+ rNorm = abs(ζₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Update the number of coefficients in Rₖ
+ nr = nr + inner_iter
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = Hbis ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time))
+
+ # Compute vₖ₊₁.
+ if !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+ if !restart && (inner_iter ≥ mem)
+ push!(V, S(undef, n))
+ push!(z, zero(FC))
+ end
+ @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
+ z[inner_iter+1] = ζₖ₊₁
+ end
end
- # Compute and apply current Givens reflection Ωₖ.
- # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
- # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
- (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
-
- # Update zₖ = (Qₖ)ᵀβe₁
- ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
- z[inner_iter] = c[inner_iter] * z[inner_iter]
-
- # Update residual norm estimate.
- # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
- rNorm = abs(ζₖ₊₁)
- history && push!(rNorms, rNorm)
-
- # Update the number of coefficients in Rₖ
- nr = nr + inner_iter
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- resid_decrease_lim = rNorm ≤ ε
- breakdown = Hbis ≤ btol
- solved = resid_decrease_lim || resid_decrease_mach
- inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
- solver.inner_iter = inner_iter
- kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
-
- # Compute vₖ₊₁
- if !(solved || inner_tired || breakdown)
- if !restart && (inner_iter ≥ mem)
- push!(V, S(undef, n))
- push!(z, zero(FC))
+ # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
+ y = z # yᵢ = zᵢ
+ for i = inner_iter : -1 : 1
+ pos = nr + i - inner_iter # position of rᵢ.ₖ
+ for j = inner_iter : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ btol
+ y[i] = zero(FC)
+ inconsistent = true
+ else
+ y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
end
- @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
- z[inner_iter+1] = ζₖ₊₁
end
- user_requested_exit = callback(solver) :: Bool
- end
-
- # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
- y = z # yᵢ = zᵢ
- for i = inner_iter : -1 : 1
- pos = nr + i - inner_iter # position of rᵢ.ₖ
- for j = inner_iter : -1 : i+1
- y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
- pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ # Form xₖ = NVₖyₖ
+ for i = 1 : inner_iter
+ @kaxpy!(n, y[i], V[i], xr)
end
- # Rₖ can be singular if the system is inconsistent
- if abs(R[pos]) ≤ btol
- y[i] = zero(FC)
- inconsistent = true
- else
- y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ if !NisI
+ solver.p .= xr
+ mulorldiv!(xr, N, solver.p, ldiv)
end
+ restart && @kaxpy!(n, one(FC), xr, x)
+
+ # Update inner_itmax, iter, tired and overtimed variables.
+ inner_itmax = inner_itmax - inner_iter
+ iter = iter + inner_iter
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Form xₖ = N⁻¹Vₖyₖ
- for i = 1 : inner_iter
- @kaxpy!(n, y[i], V[i], xr)
- end
- if !NisI
- solver.p .= xr
- mulorldiv!(xr, N, solver.p, ldiv)
- end
- restart && @kaxpy!(n, one(FC), xr, x)
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ inconsistent && (status = "found approximate least-squares solution")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Update inner_itmax, iter and tired variables.
- inner_itmax = inner_itmax - inner_iter
- iter = iter + inner_iter
- tired = iter ≥ itmax
+ # Update x
+ warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- inconsistent && (status = "found approximate least-squares solution")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/src/gpmr.jl b/src/gpmr.jl
index b10942995..1049c3b50 100644
--- a/src/gpmr.jl
+++ b/src/gpmr.jl
@@ -3,8 +3,8 @@
# This method is described in
#
# A. Montoison and D. Orban
-# GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems
-# Cahier du GERAD G-2021-62.
+# GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems.
+# SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023.
#
# Alexis Montoison,
# Montréal, August 2021.
@@ -12,23 +12,30 @@
export gpmr, gpmr!
"""
- (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; memory::Int=20,
- C=I, D=I, E=I, F=I, atol::T=√eps(T), rtol::T=√eps(T),
- gsp::Bool=false, reorthogonalization::Bool=false,
- itmax::Int=0, λ::FC=one(FC), μ::FC=one(FC),
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC};
+ memory::Int=20, C=I, D=I, E=I, F=I,
+ ldiv::Bool=false, gsp::Bool=false,
+ λ::FC=one(FC), μ::FC=one(FC),
+ reorthogonalization::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-GPMR solves the unsymmetric partitioned linear system
+ (x, y, stats) = gpmr(A, B, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
- [ λI A ] [ x ] = [ b ]
- [ B μI ] [ y ] [ c ],
+GPMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
-where λ and μ are real or complex numbers.
-`A` can have any shape and `B` has the shape of `Aᵀ`.
+Given matrices `A` of dimension m × n and `B` of dimension n × m,
+GPMR solves the non-Hermitian partitioned linear system
+
+ [ λIₘ A ] [ x ] = [ b ]
+ [ B μIₙ ] [ y ] [ c ],
+
+of size (n+m) × (n+m) where λ and μ are real or complex numbers.
+`A` can have any shape and `B` has the shape of `Aᴴ`.
`A`, `B`, `b` and `c` must be all nonzero.
This implementation allows left and right block diagonal preconditioners
@@ -44,8 +51,6 @@ and can solve
when `CE = M⁻¹` and `DF = N⁻¹`.
By default, GPMR solves unsymmetric linear systems with `λ = 1` and `μ = 1`.
-If `gsp = true`, `λ = 1`, `μ = 0` and the associated generalized saddle point system is solved.
-`λ` and `μ` are also keyword arguments that can be directly modified for more specific problems.
GPMR is based on the orthogonal Hessenberg reduction process and its relations with the block-Arnoldi process.
The residual norm ‖rₖ‖ is monotonically decreasing in GPMR.
@@ -53,38 +58,50 @@ The residual norm ‖rₖ‖ is monotonically decreasing in GPMR.
GPMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Full reorthogonalization is available with the `reorthogonalization` option.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `B`: a linear operator that models a matrix of dimension n × m;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Optional arguments
-GPMR can be warm-started from initial guesses `x0` and `y0` with the method
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
- (x, y, stats) = gpmr(A, B, b, c, x0, y0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `memory`: if `restart = true`, the restarted version GPMR(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `C`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal left preconditioner;
+* `D`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal left preconditioner;
+* `E`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal right preconditioner;
+* `F`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal right preconditioner;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `gsp`: if `true`, set `λ = 1` and `μ = 0` for generalized saddle-point systems;
+* `λ` and `μ`: diagonal scaling factors of the partitioned linear system;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
-* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://dx.doi.org/10.13140/RG.2.2.24069.68326), Cahier du GERAD G-2021-62, GERAD, Montréal, 2021.
+* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://doi.org/10.1137/21M1459265), SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023.
"""
function gpmr end
-function gpmr(A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = GpmrSolver(A, b, memory)
- gpmr!(solver, A, B, b, c, x0, y0; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
-function gpmr(A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
- solver = GpmrSolver(A, b, memory)
- gpmr!(solver, A, B, b, c; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = gpmr!(solver::GpmrSolver, A, B, b, c; kwargs...)
solver = gpmr!(solver::GpmrSolver, A, B, b, c, x0, y0; kwargs...)
@@ -98,382 +115,436 @@ See [`GpmrSolver`](@ref) for more details about the `solver`.
"""
function gpmr! end
-function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0, y0)
- gpmr!(solver, A, B, b, c; kwargs...)
- return solver
-end
+def_args_gpmr = (:(A ),
+ :(B ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_gpmr = (:(x0 :: AbstractVector),
+ :(y0 :: AbstractVector))
+
+def_kwargs_gpmr = (:(; C = I ),
+ :(; D = I ),
+ :(; E = I ),
+ :(; F = I ),
+ :(; ldiv::Bool = false ),
+ :(; gsp::Bool = false ),
+ :(; λ::FC = one(FC) ),
+ :(; μ::FC = one(FC) ),
+ :(; reorthogonalization::Bool = false),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_gpmr = mapreduce(extract_parameters, vcat, def_kwargs_gpmr)
+
+args_gpmr = (:A, :B, :b, :c)
+optargs_gpmr = (:x0, :y0)
+kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function gpmr($(def_args_gpmr...), $(def_optargs_gpmr...); memory :: Int=20, $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = GpmrSolver(A, b, memory)
+ warm_start!(solver, $(optargs_gpmr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ gpmr!(solver, $(args_gpmr...); $(kwargs_gpmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
+ end
-function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- C=I, D=I, E=I, F=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- gsp :: Bool=false, reorthogonalization :: Bool=false,
- itmax :: Int=0, λ :: FC=one(FC), μ :: FC=one(FC),
- verbose :: Int=0, history::Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- s, t = size(B)
- m == t || error("Inconsistent problem size")
- s == n || error("Inconsistent problem size")
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("GPMR: system of %d equations in %d variables\n", m+n, m+n)
-
- # Check C = E = Iₘ and D = F = Iₙ
- CisI = (C === I)
- DisI = (D === I)
- EisI = (E === I)
- FisI = (F === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- eltype(B) == FC || error("eltype(B) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Determine λ and μ associated to generalized saddle point systems.
- gsp && (λ = one(FC) ; μ = zero(FC))
-
- warm_start = solver.warm_start
- warm_start && (λ ≠ 0) && !EisI && error("Warm-start with right preconditioners is not supported.")
- warm_start && (μ ≠ 0) && !FisI && error("Warm-start with right preconditioners is not supported.")
-
- # Set up workspace.
- allocate_if(!CisI, solver, :q , S, m)
- allocate_if(!DisI, solver, :p , S, n)
- allocate_if(!EisI, solver, :wB, S, m)
- allocate_if(!FisI, solver, :wA, S, n)
- wA, wB, dA, dB, Δx, Δy = solver.wA, solver.wB, solver.dA, solver.dB, solver.Δx, solver.Δy
- x, y, V, U, gs, gc = solver.x, solver.y, solver.V, solver.U, solver.gs, solver.gc
- zt, R, stats = solver.zt, solver.R, solver.stats
- rNorms = stats.residuals
- reset!(stats)
- b₀ = warm_start ? dA : b
- c₀ = warm_start ? dB : c
- q = CisI ? dA : solver.q
- p = DisI ? dB : solver.p
-
- # Initial solutions x₀ and y₀.
- x .= zero(FC)
- y .= zero(FC)
-
- iter = 0
- itmax == 0 && (itmax = m+n)
-
- # Initialize workspace.
- nr = 0 # Number of coefficients stored in Rₖ
- mem = length(V) # Memory
- ωₖ = zero(FC) # Auxiliary variable to store fₖₖ
- for i = 1 : mem
- V[i] .= zero(FC)
- U[i] .= zero(FC)
+ function gpmr($(def_args_gpmr...); memory :: Int=20, $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = GpmrSolver(A, b, memory)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ gpmr!(solver, $(args_gpmr...); $(kwargs_gpmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
- gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
- R .= zero(FC) # Upper triangular matrix Rₖ.
- zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᵀ(βe₁ + γe₂).
-
- # Warm-start
- # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ
- # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ
- # Compute C(b - AΔy) - λΔx
- warm_start && mul!(b₀, A, Δy)
- warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀)
- !CisI && mulorldiv!(q, C, b₀, ldiv)
- !CisI && (b₀ = q)
- warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀)
-
- # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ
- # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ
- # Compute D(c - BΔx) - μΔy
- warm_start && mul!(c₀, B, Δx)
- warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀)
- !DisI && mulorldiv!(p, D, c₀, ldiv)
- !DisI && (c₀ = p)
- warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀)
-
- # Initialize the orthogonal Hessenberg reduction process.
- # βv₁ = Cb
- β = @knrm2(m, b₀)
- β ≠ 0 || error("b must be nonzero")
- @. V[1] = b₀ / β
-
- # γu₁ = Dc
- γ = @knrm2(n, c₀)
- γ ≠ 0 || error("c must be nonzero")
- @. U[1] = c₀ / γ
-
- # Compute ‖r₀‖² = γ² + β²
- rNorm = sqrt(γ^2 + β^2)
- history && push!(rNorms, rNorm)
- ε = atol + rtol * rNorm
-
- # Initialize t̄₀
- zt[1] = β
- zt[2] = γ
-
- (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗")
-
- # Tolerance for breakdown detection.
- btol = eps(T)^(3/4)
-
- # Stopping criterion.
- breakdown = false
- inconsistent = false
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || breakdown || user_requested_exit)
-
- # Update iteration index.
- iter = iter + 1
- k = iter
- nr₂ₖ₋₁ = nr # Position of the column 2k-1 in Rₖ.
- nr₂ₖ = nr + 2k-1 # Position of the column 2k in Rₖ.
-
- # Update workspace if more storage is required
- if iter > mem
- for i = 1 : 4k-1
- push!(R, zero(FC))
- end
- for i = 1 : 4
- push!(gs, zero(FC))
- push!(gc, zero(T))
- end
+
+ function gpmr!(solver :: GpmrSolver{T,FC,S}, $(def_args_gpmr...); $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ s, t = size(B)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == t || error("Inconsistent problem size")
+ s == n || error("Inconsistent problem size")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "GPMR: system of %d equations in %d variables\n", m+n, m+n)
+
+ # Check C = E = Iₘ and D = F = Iₙ
+ CisI = (C === I)
+ DisI = (D === I)
+ EisI = (E === I)
+ FisI = (F === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ eltype(B) == FC || @warn "eltype(B) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Determine λ and μ associated to generalized saddle point systems.
+ gsp && (λ = one(FC) ; μ = zero(FC))
+
+ warm_start = solver.warm_start
+ warm_start && (λ ≠ 0) && !EisI && error("Warm-start with right preconditioners is not supported.")
+ warm_start && (μ ≠ 0) && !FisI && error("Warm-start with right preconditioners is not supported.")
+
+ # Set up workspace.
+ allocate_if(!CisI, solver, :q , S, m)
+ allocate_if(!DisI, solver, :p , S, n)
+ allocate_if(!EisI, solver, :wB, S, m)
+ allocate_if(!FisI, solver, :wA, S, n)
+ wA, wB, dA, dB, Δx, Δy = solver.wA, solver.wB, solver.dA, solver.dB, solver.Δx, solver.Δy
+ x, y, V, U, gs, gc = solver.x, solver.y, solver.V, solver.U, solver.gs, solver.gc
+ zt, R, stats = solver.zt, solver.R, solver.stats
+ rNorms = stats.residuals
+ reset!(stats)
+ b₀ = warm_start ? dA : b
+ c₀ = warm_start ? dB : c
+ q = CisI ? dA : solver.q
+ p = DisI ? dB : solver.p
+
+ # Initial solutions x₀ and y₀.
+ x .= zero(FC)
+ y .= zero(FC)
+
+ iter = 0
+ itmax == 0 && (itmax = m+n)
+
+ # Initialize workspace.
+ nr = 0 # Number of coefficients stored in Rₖ
+ mem = length(V) # Memory
+ ωₖ = zero(FC) # Auxiliary variable to store fₖₖ
+ for i = 1 : mem
+ V[i] .= zero(FC)
+ U[i] .= zero(FC)
end
+ gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
+ gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
+ R .= zero(FC) # Upper triangular matrix Rₖ.
+ zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂).
+
+ # Warm-start
+ # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ
+ # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ
+ # Compute C(b - AΔy) - λΔx
+ warm_start && mul!(b₀, A, Δy)
+ warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀)
+ !CisI && mulorldiv!(q, C, b₀, ldiv)
+ !CisI && (b₀ = q)
+ warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀)
+
+ # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ
+ # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ
+ # Compute D(c - BΔx) - μΔy
+ warm_start && mul!(c₀, B, Δx)
+ warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀)
+ !DisI && mulorldiv!(p, D, c₀, ldiv)
+ !DisI && (c₀ = p)
+ warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀)
+
+ # Initialize the orthogonal Hessenberg reduction process.
+ # βv₁ = Cb
+ β = @knrm2(m, b₀)
+ β ≠ 0 || error("b must be nonzero")
+ @. V[1] = b₀ / β
+
+ # γu₁ = Dc
+ γ = @knrm2(n, c₀)
+ γ ≠ 0 || error("c must be nonzero")
+ @. U[1] = c₀ / γ
+
+ # Compute ‖r₀‖² = γ² + β²
+ rNorm = sqrt(γ^2 + β^2)
+ history && push!(rNorms, rNorm)
+ ε = atol + rtol * rNorm
- # Continue the orthogonal Hessenberg reduction process.
- # CAFUₖ = VₖHₖ + hₖ₊₁.ₖ * vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Hₖ₊₁.ₖ
- # DBEVₖ = UₖFₖ + fₖ₊₁.ₖ * uₖ₊₁(eₖ)ᵀ = Uₖ₊₁Fₖ₊₁.ₖ
- wA = FisI ? U[iter] : solver.wA
- wB = EisI ? V[iter] : solver.wB
- FisI || mulorldiv!(wA, F, U[iter], ldiv) # wA = Fuₖ
- EisI || mulorldiv!(wB, E, V[iter], ldiv) # wB = Evₖ
- mul!(dA, A, wA) # dA = AFuₖ
- mul!(dB, B, wB) # dB = BEvₖ
- CisI || mulorldiv!(q, C, dA, ldiv) # q = CAFuₖ
- DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ
+ # Initialize t̄₀
+ zt[1] = β
+ zt[2] = γ
- for i = 1 : iter
- hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = vᵢAuₖ
- fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = uᵢBvₖ
- @kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ
- @kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ
- R[nr₂ₖ + 2i-1] = hᵢₖ
- (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ
- end
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %5s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7s %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time))
- # Reorthogonalization of the Krylov basis.
- if reorthogonalization
- for i = 1 : iter
- Htmp = @kdot(m, V[i], q) # hₜₘₚ = qᵀvᵢ
- Ftmp = @kdot(n, U[i], p) # fₜₘₚ = pᵀuᵢ
- @kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ
- @kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ
- R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ
- (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ
- end
- end
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
- Haux = @knrm2(m, q) # hₖ₊₁.ₖ = ‖q‖₂
- Faux = @knrm2(n, p) # fₖ₊₁.ₖ = ‖p‖₂
-
- # Add regularization terms.
- R[nr₂ₖ₋₁ + 2k-1] = λ # S₂ₖ₋₁.₂ₖ₋₁ = λ
- R[nr₂ₖ + 2k] = μ # S₂ₖ.₂ₖ = μ
-
- # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ]
- # [0 u₁ ••• 0 uₖ]
- #
- # rₖ = [ b ] - [ λI A ] [ xₖ ] = [ b ] - [ λI A ] Wₖzₖ
- # [ c ] [ B μI ] [ yₖ ] [ c ] [ B μI ]
- #
- # block-Arnoldi formulation : [ λI A ] Wₖ = Wₖ₊₁Sₖ₊₁.ₖ
- # [ B μI ]
- #
- # GPMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - βe₁ - γe₂ ‖
- #
- # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
- # [ Oᵀ ]
- #
- # Apply previous givens reflections when k ≥ 2
- # [ 1 ][ 1 ][ c₂.ᵢ s₂.ᵢ ][ c₁.ᵢ s₁.ᵢ ] [ r̄₂ᵢ₋₁.₂ₖ₋₁ r̄₂ᵢ₋₁.₂ₖ ] [ r₂ᵢ₋₁.₂ₖ₋₁ r₂ᵢ₋₁.₂ₖ ]
- # [ c₄.ᵢ s₄.ᵢ ][ c₃.ᵢ s₃.ᵢ ][ s̄₂.ᵢ -c₂.ᵢ ][ 1 ] [ r̄₂ᵢ.₂ₖ₋₁ r̄₂ᵢ.₂ₖ ] = [ r₂ᵢ.₂ₖ₋₁ r₂ᵢ.₂ₖ ]
- # [ s̄₄.ᵢ -c₄.ᵢ ][ 1 ][ 1 ][ 1 ] [ ρ hᵢ₊₁.ₖ ] [ r̄₂ᵢ₊₁.₂ₖ₋₁ r̄₂ᵢ₊₁.₂ₖ ]
- # [ 1 ][ s̄₃.ᵢ -c₃.ᵢ ][ 1 ][ s̄₁.ᵢ -c₁.ᵢ ] [ fᵢ₊₁.ₖ δ ] [ r̄₂ᵢ₊₂.₂ₖ₋₁ r̄₂ᵢ₊₂.₂ₖ ]
- #
- # r̄₁.₂ₖ₋₁ = 0, r̄₁.₂ₖ = h₁.ₖ, r̄₂.₂ₖ₋₁ = f₁.ₖ and r̄₂.₂ₖ = 0.
- # (ρ, δ) = (λ, μ) if i == k-1, (ρ, δ) = (0, 0) otherwise.
- for i = 1 : iter-1
- for nrcol ∈ (nr₂ₖ₋₁, nr₂ₖ)
- flag = (i == iter-1 && nrcol == nr₂ₖ₋₁)
- αₖ = flag ? ωₖ : R[nrcol + 2i+2]
-
- c₁ᵢ = gc[4i-3]
- s₁ᵢ = gs[4i-3]
- rtmp = c₁ᵢ * R[nrcol + 2i-1] + s₁ᵢ * αₖ
- αₖ = conj(s₁ᵢ) * R[nrcol + 2i-1] - c₁ᵢ * αₖ
- R[nrcol + 2i-1] = rtmp
-
- c₂ᵢ = gc[4i-2]
- s₂ᵢ = gs[4i-2]
- rtmp = c₂ᵢ * R[nrcol + 2i-1] + s₂ᵢ * R[nrcol + 2i]
- R[nrcol + 2i] = conj(s₂ᵢ) * R[nrcol + 2i-1] - c₂ᵢ * R[nrcol + 2i]
- R[nrcol + 2i-1] = rtmp
-
- c₃ᵢ = gc[4i-1]
- s₃ᵢ = gs[4i-1]
- rtmp = c₃ᵢ * R[nrcol + 2i] + s₃ᵢ * αₖ
- αₖ = conj(s₃ᵢ) * R[nrcol + 2i] - c₃ᵢ * αₖ
- R[nrcol + 2i] = rtmp
-
- c₄ᵢ = gc[4i]
- s₄ᵢ = gs[4i]
- rtmp = c₄ᵢ * R[nrcol + 2i] + s₄ᵢ * R[nrcol + 2i+1]
- R[nrcol + 2i+1] = conj(s₄ᵢ) * R[nrcol + 2i] - c₄ᵢ * R[nrcol + 2i+1]
- R[nrcol + 2i] = rtmp
-
- flag ? ωₖ = αₖ : R[nrcol + 2i+2] = αₖ
+ # Stopping criterion.
+ breakdown = false
+ inconsistent = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+ # Update iteration index.
+ iter = iter + 1
+ k = iter
+ nr₂ₖ₋₁ = nr # Position of the column 2k-1 in Rₖ.
+ nr₂ₖ = nr + 2k-1 # Position of the column 2k in Rₖ.
+
+ # Update workspace if more storage is required
+ if iter > mem
+ for i = 1 : 4k-1
+ push!(R, zero(FC))
+ end
+ for i = 1 : 4
+ push!(gs, zero(FC))
+ push!(gc, zero(T))
+ end
end
- end
- # Compute and apply current givens reflections
- # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ r̄₂ₖ₋₁.₂ₖ₋₁ r̄₂ₖ₋₁.₂ₖ ] [ r₂ₖ₋₁.₂ₖ₋₁ r₂ₖ₋₁.₂ₖ ]
- # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ r̄₂ₖ.₂ₖ₋₁ r̄₂ₖ.₂ₖ ] = [ r₂ₖ.₂ₖ ]
- # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ hₖ₊₁.ₖ ] [ ]
- # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ fₖ₊₁.ₖ ] [ ]
- (c₁ₖ, s₁ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], Faux) # annihilate fₖ₊₁.ₖ
- θₖ = conj(s₁ₖ) * R[nr₂ₖ + 2k-1]
- R[nr₂ₖ + 2k-1] = c₁ₖ * R[nr₂ₖ + 2k-1]
-
- (c₂ₖ, s₂ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], ωₖ) # annihilate ωₖ = r̄₂ₖ.₂ₖ₋₁
- rtmp = c₂ₖ * R[nr₂ₖ + 2k-1] + s₂ₖ * R[nr₂ₖ + 2k]
- R[nr₂ₖ + 2k] = conj(s₂ₖ) * R[nr₂ₖ + 2k-1] - c₂ₖ * R[nr₂ₖ + 2k]
- R[nr₂ₖ + 2k-1] = rtmp
-
- (c₃ₖ, s₃ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], θₖ) # annihilate Θₖ = r̄₂ₖ₊₂.₂ₖ
-
- (c₄ₖ, s₄ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], Haux) # annihilate hₖ₊₁.ₖ
-
- # Update t̄ₖ = (τ₁, ..., τ₂ₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂).
- #
- # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ τbar₂ₖ₋₁ ] [ τ₂ₖ₋₁ ]
- # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ τbar₂ₖ ] = [ τ₂ₖ ]
- # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ ] [ τbar₂ₖ₊₁ ]
- # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ ] [ τbar₂ₖ₊₂ ]
- τbar₂ₖ₊₂ = conj(s₁ₖ) * zt[2k-1]
- zt[2k-1] = c₁ₖ * zt[2k-1]
-
- τtmp = c₂ₖ * zt[2k-1] + s₂ₖ * zt[2k]
- zt[2k] = conj(s₂ₖ) * zt[2k-1] - c₂ₖ * zt[2k]
- zt[2k-1] = τtmp
-
- τtmp = c₃ₖ * zt[2k] + s₃ₖ * τbar₂ₖ₊₂
- τbar₂ₖ₊₂ = conj(s₃ₖ) * zt[2k] - c₃ₖ * τbar₂ₖ₊₂
- zt[2k] = τtmp
-
- τbar₂ₖ₊₁ = conj(s₄ₖ) * zt[2k]
- zt[2k] = c₄ₖ * zt[2k]
-
- # Update gc and gs vectors
- gc[4k-3], gc[4k-2], gc[4k-1], gc[4k] = c₁ₖ, c₂ₖ, c₃ₖ, c₄ₖ
- gs[4k-3], gs[4k-2], gs[4k-1], gs[4k] = s₁ₖ, s₂ₖ, s₃ₖ, s₄ₖ
-
- # Compute ‖rₖ‖² = |τbar₂ₖ₊₁|² + |τbar₂ₖ₊₂|²
- rNorm = sqrt(abs2(τbar₂ₖ₊₁) + abs2(τbar₂ₖ₊₂))
- history && push!(rNorms, rNorm)
+ # Continue the orthogonal Hessenberg reduction process.
+ # CAFUₖ = VₖHₖ + hₖ₊₁.ₖ * vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Hₖ₊₁.ₖ
+ # DBEVₖ = UₖFₖ + fₖ₊₁.ₖ * uₖ₊₁(eₖ)ᵀ = Uₖ₊₁Fₖ₊₁.ₖ
+ wA = FisI ? U[iter] : solver.wA
+ wB = EisI ? V[iter] : solver.wB
+ FisI || mulorldiv!(wA, F, U[iter], ldiv) # wA = Fuₖ
+ EisI || mulorldiv!(wB, E, V[iter], ldiv) # wB = Evₖ
+ mul!(dA, A, wA) # dA = AFuₖ
+ mul!(dB, B, wB) # dB = BEvₖ
+ CisI || mulorldiv!(q, C, dA, ldiv) # q = CAFuₖ
+ DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ
- # Update the number of coefficients in Rₖ.
- nr = nr + 4k-1
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ for i = 1 : iter
+ hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq
+ fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp
+ @kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ
+ @kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ
+ R[nr₂ₖ + 2i-1] = hᵢₖ
+ (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ
+ end
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- breakdown = Faux ≤ btol && Haux ≤ btol
- solved = resid_decrease_lim || resid_decrease_mach
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, Haux, Faux)
-
- # Compute vₖ₊₁ and uₖ₊₁
- if !(solved || tired || breakdown || user_requested_exit)
- if iter ≥ mem
- push!(V, S(undef, m))
- push!(U, S(undef, n))
- push!(zt, zero(FC), zero(FC))
+ # Reorthogonalization of the Krylov basis.
+ if reorthogonalization
+ for i = 1 : iter
+ Htmp = @kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq
+ Ftmp = @kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp
+ @kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ
+ @kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ
+ R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ
+ (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ
+ end
end
- # hₖ₊₁.ₖ ≠ 0
- if Haux > btol
- @. V[k+1] = q / Haux # hₖ₊₁.ₖvₖ₊₁ = q
- else
- # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ}
- V[k+1] .= zero(FC) # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ}
+ Haux = @knrm2(m, q) # hₖ₊₁.ₖ = ‖q‖₂
+ Faux = @knrm2(n, p) # fₖ₊₁.ₖ = ‖p‖₂
+
+ # Add regularization terms.
+ R[nr₂ₖ₋₁ + 2k-1] = λ # S₂ₖ₋₁.₂ₖ₋₁ = λ
+ R[nr₂ₖ + 2k] = μ # S₂ₖ.₂ₖ = μ
+
+ # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ]
+ # [0 u₁ ••• 0 uₖ]
+ #
+ # rₖ = [ b ] - [ λI A ] [ xₖ ] = [ b ] - [ λI A ] Wₖzₖ
+ # [ c ] [ B μI ] [ yₖ ] [ c ] [ B μI ]
+ #
+ # block-Arnoldi formulation : [ λI A ] Wₖ = Wₖ₊₁Sₖ₊₁.ₖ
+ # [ B μI ]
+ #
+ # GPMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - βe₁ - γe₂ ‖
+ #
+ # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
+ # [ Oᵀ ]
+ #
+ # Apply previous givens reflections when k ≥ 2
+ # [ 1 ][ 1 ][ c₂.ᵢ s₂.ᵢ ][ c₁.ᵢ s₁.ᵢ ] [ r̄₂ᵢ₋₁.₂ₖ₋₁ r̄₂ᵢ₋₁.₂ₖ ] [ r₂ᵢ₋₁.₂ₖ₋₁ r₂ᵢ₋₁.₂ₖ ]
+ # [ c₄.ᵢ s₄.ᵢ ][ c₃.ᵢ s₃.ᵢ ][ s̄₂.ᵢ -c₂.ᵢ ][ 1 ] [ r̄₂ᵢ.₂ₖ₋₁ r̄₂ᵢ.₂ₖ ] = [ r₂ᵢ.₂ₖ₋₁ r₂ᵢ.₂ₖ ]
+ # [ s̄₄.ᵢ -c₄.ᵢ ][ 1 ][ 1 ][ 1 ] [ ρ hᵢ₊₁.ₖ ] [ r̄₂ᵢ₊₁.₂ₖ₋₁ r̄₂ᵢ₊₁.₂ₖ ]
+ # [ 1 ][ s̄₃.ᵢ -c₃.ᵢ ][ 1 ][ s̄₁.ᵢ -c₁.ᵢ ] [ fᵢ₊₁.ₖ δ ] [ r̄₂ᵢ₊₂.₂ₖ₋₁ r̄₂ᵢ₊₂.₂ₖ ]
+ #
+ # r̄₁.₂ₖ₋₁ = 0, r̄₁.₂ₖ = h₁.ₖ, r̄₂.₂ₖ₋₁ = f₁.ₖ and r̄₂.₂ₖ = 0.
+ # (ρ, δ) = (λ, μ) if i == k-1, (ρ, δ) = (0, 0) otherwise.
+ for i = 1 : iter-1
+ for nrcol ∈ (nr₂ₖ₋₁, nr₂ₖ)
+ flag = (i == iter-1 && nrcol == nr₂ₖ₋₁)
+ αₖ = flag ? ωₖ : R[nrcol + 2i+2]
+
+ c₁ᵢ = gc[4i-3]
+ s₁ᵢ = gs[4i-3]
+ rtmp = c₁ᵢ * R[nrcol + 2i-1] + s₁ᵢ * αₖ
+ αₖ = conj(s₁ᵢ) * R[nrcol + 2i-1] - c₁ᵢ * αₖ
+ R[nrcol + 2i-1] = rtmp
+
+ c₂ᵢ = gc[4i-2]
+ s₂ᵢ = gs[4i-2]
+ rtmp = c₂ᵢ * R[nrcol + 2i-1] + s₂ᵢ * R[nrcol + 2i]
+ R[nrcol + 2i] = conj(s₂ᵢ) * R[nrcol + 2i-1] - c₂ᵢ * R[nrcol + 2i]
+ R[nrcol + 2i-1] = rtmp
+
+ c₃ᵢ = gc[4i-1]
+ s₃ᵢ = gs[4i-1]
+ rtmp = c₃ᵢ * R[nrcol + 2i] + s₃ᵢ * αₖ
+ αₖ = conj(s₃ᵢ) * R[nrcol + 2i] - c₃ᵢ * αₖ
+ R[nrcol + 2i] = rtmp
+
+ c₄ᵢ = gc[4i]
+ s₄ᵢ = gs[4i]
+ rtmp = c₄ᵢ * R[nrcol + 2i] + s₄ᵢ * R[nrcol + 2i+1]
+ R[nrcol + 2i+1] = conj(s₄ᵢ) * R[nrcol + 2i] - c₄ᵢ * R[nrcol + 2i+1]
+ R[nrcol + 2i] = rtmp
+
+ flag ? ωₖ = αₖ : R[nrcol + 2i+2] = αₖ
+ end
end
- # fₖ₊₁.ₖ ≠ 0
- if Faux > btol
- @. U[k+1] = p / Faux # fₖ₊₁.ₖuₖ₊₁ = p
+ # Compute and apply current givens reflections
+ # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ r̄₂ₖ₋₁.₂ₖ₋₁ r̄₂ₖ₋₁.₂ₖ ] [ r₂ₖ₋₁.₂ₖ₋₁ r₂ₖ₋₁.₂ₖ ]
+ # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ r̄₂ₖ.₂ₖ₋₁ r̄₂ₖ.₂ₖ ] = [ r₂ₖ.₂ₖ ]
+ # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ hₖ₊₁.ₖ ] [ ]
+ # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ fₖ₊₁.ₖ ] [ ]
+ (c₁ₖ, s₁ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], Faux) # annihilate fₖ₊₁.ₖ
+ θₖ = conj(s₁ₖ) * R[nr₂ₖ + 2k-1]
+ R[nr₂ₖ + 2k-1] = c₁ₖ * R[nr₂ₖ + 2k-1]
+
+ (c₂ₖ, s₂ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], ωₖ) # annihilate ωₖ = r̄₂ₖ.₂ₖ₋₁
+ rtmp = c₂ₖ * R[nr₂ₖ + 2k-1] + s₂ₖ * R[nr₂ₖ + 2k]
+ R[nr₂ₖ + 2k] = conj(s₂ₖ) * R[nr₂ₖ + 2k-1] - c₂ₖ * R[nr₂ₖ + 2k]
+ R[nr₂ₖ + 2k-1] = rtmp
+
+ (c₃ₖ, s₃ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], θₖ) # annihilate Θₖ = r̄₂ₖ₊₂.₂ₖ
+
+ (c₄ₖ, s₄ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], Haux) # annihilate hₖ₊₁.ₖ
+
+ # Update t̄ₖ = (τ₁, ..., τ₂ₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂).
+ #
+ # [ 1 ][ 1 ][ c₂.ₖ s₂.ₖ ][ c₁.ₖ s₁.ₖ ] [ τbar₂ₖ₋₁ ] [ τ₂ₖ₋₁ ]
+ # [ c₄.ₖ s₄.ₖ ][ c₃.ₖ s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ ][ 1 ] [ τbar₂ₖ ] = [ τ₂ₖ ]
+ # [ s̄₄.ₖ -c₄.ₖ ][ 1 ][ 1 ][ 1 ] [ ] [ τbar₂ₖ₊₁ ]
+ # [ 1 ][ s̄₃.ₖ -c₃.ₖ ][ 1 ][ s̄₁.ₖ -c₁.ₖ ] [ ] [ τbar₂ₖ₊₂ ]
+ τbar₂ₖ₊₂ = conj(s₁ₖ) * zt[2k-1]
+ zt[2k-1] = c₁ₖ * zt[2k-1]
+
+ τtmp = c₂ₖ * zt[2k-1] + s₂ₖ * zt[2k]
+ zt[2k] = conj(s₂ₖ) * zt[2k-1] - c₂ₖ * zt[2k]
+ zt[2k-1] = τtmp
+
+ τtmp = c₃ₖ * zt[2k] + s₃ₖ * τbar₂ₖ₊₂
+ τbar₂ₖ₊₂ = conj(s₃ₖ) * zt[2k] - c₃ₖ * τbar₂ₖ₊₂
+ zt[2k] = τtmp
+
+ τbar₂ₖ₊₁ = conj(s₄ₖ) * zt[2k]
+ zt[2k] = c₄ₖ * zt[2k]
+
+ # Update gc and gs vectors
+ gc[4k-3], gc[4k-2], gc[4k-1], gc[4k] = c₁ₖ, c₂ₖ, c₃ₖ, c₄ₖ
+ gs[4k-3], gs[4k-2], gs[4k-1], gs[4k] = s₁ₖ, s₂ₖ, s₃ₖ, s₄ₖ
+
+ # Compute ‖rₖ‖² = |τbar₂ₖ₊₁|² + |τbar₂ₖ₊₂|²
+ rNorm = sqrt(abs2(τbar₂ₖ₊₁) + abs2(τbar₂ₖ₊₂))
+ history && push!(rNorms, rNorm)
+
+ # Update the number of coefficients in Rₖ.
+ nr = nr + 4k-1
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = Faux ≤ btol && Haux ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, Haux, Faux, ktimer(start_time))
+
+ # Compute vₖ₊₁ and uₖ₊₁
+ if !(solved || tired || breakdown || user_requested_exit || overtimed)
+ if iter ≥ mem
+ push!(V, S(undef, m))
+ push!(U, S(undef, n))
+ push!(zt, zero(FC), zero(FC))
+ end
+
+ # hₖ₊₁.ₖ ≠ 0
+ if Haux > btol
+ @. V[k+1] = q / Haux # hₖ₊₁.ₖvₖ₊₁ = q
+ else
+ # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ}
+ V[k+1] .= zero(FC) # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ}
+ end
+
+ # fₖ₊₁.ₖ ≠ 0
+ if Faux > btol
+ @. U[k+1] = p / Faux # fₖ₊₁.ₖuₖ₊₁ = p
+ else
+ # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ}
+ U[k+1] .= zero(FC) # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ}
+ end
+
+ zt[2k+1] = τbar₂ₖ₊₁
+ zt[2k+2] = τbar₂ₖ₊₂
+ end
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution.
+ for i = 2iter : -1 : 1
+ pos = nr + i - 2iter # position of rᵢ.ₖ
+ for j = 2iter : -1 : i+1
+ zt[i] = zt[i] - R[pos] * zt[j] # ζᵢ ← ζᵢ - rᵢ.ⱼζⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ btol
+ zt[i] = zero(FC)
+ inconsistent = true
else
- # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ}
- U[k+1] .= zero(FC) # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ}
+ zt[i] = zt[i] / R[pos] # ζᵢ ← ζᵢ / rᵢ.ᵢ
end
+ end
- zt[2k+1] = τbar₂ₖ₊₁
- zt[2k+2] = τbar₂ₖ₊₂
+ # Compute xₖ and yₖ
+ for i = 1 : iter
+ @kaxpy!(m, zt[2i-1], V[i], x) # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ
+ @kaxpy!(n, zt[2i] , U[i], y) # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ
end
- end
- (verbose > 0) && @printf("\n")
-
- # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution.
- for i = 2iter : -1 : 1
- pos = nr + i - 2iter # position of rᵢ.ₖ
- for j = 2iter : -1 : i+1
- zt[i] = zt[i] - R[pos] * zt[j] # ζᵢ ← ζᵢ - rᵢ.ⱼζⱼ
- pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ if !EisI
+ wB .= x
+ mulorldiv!(x, E, wB, ldiv)
end
- # Rₖ can be singular if the system is inconsistent
- if abs(R[pos]) ≤ btol
- zt[i] = zero(FC)
- inconsistent = true
- else
- zt[i] = zt[i] / R[pos] # ζᵢ ← ζᵢ / rᵢ.ᵢ
+ if !FisI
+ wA .= y
+ mulorldiv!(y, F, wA, ldiv)
end
+ warm_start && @kaxpy!(m, one(FC), Δx, x)
+ warm_start && @kaxpy!(n, one(FC), Δy, y)
+ solver.warm_start = false
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ inconsistent && (status = "found approximate least-squares solution")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
-
- # Compute xₖ and yₖ
- for i = 1 : iter
- @kaxpy!(m, zt[2i-1], V[i], x) # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ
- @kaxpy!(n, zt[2i] , U[i], y) # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ
- end
- if !EisI
- wB .= x
- mulorldiv!(x, E, wB, ldiv)
- end
- if !FisI
- wA .= y
- mulorldiv!(y, F, wA, ldiv)
- end
- warm_start && @kaxpy!(m, one(FC), Δx, x)
- warm_start && @kaxpy!(n, one(FC), Δy, y)
- solver.warm_start = false
-
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- inconsistent && (status = "found approximate least-squares solution")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl
new file mode 100644
index 000000000..5c9cad24d
--- /dev/null
+++ b/src/krylov_processes.jl
@@ -0,0 +1,439 @@
+export hermitian_lanczos, nonhermitian_lanczos, arnoldi, golub_kahan, saunders_simon_yip, montoison_orban
+
+"""
+ V, T = hermitian_lanczos(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n;
+* `k`: the number of iterations of the Hermitian Lanczos process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950.
+"""
+function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ R = real(FC)
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+1)
+ rowval = zeros(Int, 3k-1)
+ nzval = zeros(R, 3k-1)
+
+ colptr[1] = 1
+ rowval[1] = 1
+ rowval[2] = 2
+ for i = 1:k
+ colptr[i+1] = 3i
+ if i ≥ 2
+ pos = colptr[i]
+ rowval[pos] = i-1
+ rowval[pos+1] = i
+ rowval[pos+2] = i+1
+ end
+ end
+
+ V = M(undef, n, k+1)
+ T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval)
+
+ pαᵢ = 1 # Position of αᵢ in the vector `nzval`
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ if i == 1
+ βᵢ = @knrm2(n, b)
+ vᵢ .= b ./ βᵢ
+ end
+ mul!(q, A, vᵢ)
+ αᵢ = @kdotr(n, vᵢ, q)
+ nzval[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ
+ @kaxpy!(n, -αᵢ, vᵢ, q)
+ if i ≥ 2
+ vᵢ₋₁ = view(V,:,i-1)
+ βᵢ = nzval[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁
+ nzval[pαᵢ-1] = βᵢ # Tᵢ₋₁.ᵢ = βᵢ
+ @kaxpy!(n, -βᵢ, vᵢ₋₁, q)
+ end
+ βᵢ₊₁ = @knrm2(n, q)
+ nzval[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁
+ vᵢ₊₁ .= q ./ βᵢ₊₁
+ pαᵢ = pαᵢ + 3
+ end
+ return V, T
+end
+
+"""
+ V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a square matrix of dimension n;
+* `b`: a vector of length n;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the non-Hermitian Lanczos process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix;
+* `U`: a dense n × (k+1) matrix;
+* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950.
+"""
+function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ Aᴴ = A'
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+1)
+ rowval = zeros(Int, 3k-1)
+ nzval_T = zeros(FC, 3k-1)
+ nzval_Tᴴ = zeros(FC, 3k-1)
+
+ colptr[1] = 1
+ rowval[1] = 1
+ rowval[2] = 2
+ for i = 1:k
+ colptr[i+1] = 3i
+ if i ≥ 2
+ pos = colptr[i]
+ rowval[pos] = i-1
+ rowval[pos+1] = i
+ rowval[pos+2] = i+1
+ end
+ end
+
+ V = M(undef, n, k+1)
+ U = M(undef, n, k+1)
+ T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T)
+ Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ)
+
+ pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ`
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ uᵢ = view(U,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ uᵢ₊₁ = p = view(U,:,i+1)
+ if i == 1
+ cᴴb = @kdot(n, c, b)
+ βᵢ = √(abs(cᴴb))
+ γᵢ = cᴴb / βᵢ
+ vᵢ .= b ./ βᵢ
+ uᵢ .= c ./ conj(γᵢ)
+ end
+ mul!(q, A , vᵢ)
+ mul!(p, Aᴴ, uᵢ)
+ if i ≥ 2
+ vᵢ₋₁ = view(V,:,i-1)
+ uᵢ₋₁ = view(U,:,i-1)
+ βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁
+ γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ
+ @kaxpy!(n, - γᵢ , vᵢ₋₁, q)
+ @kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p)
+ end
+ αᵢ = @kdot(n, uᵢ, q)
+ nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ
+ nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ
+ @kaxpy!(m, - αᵢ , vᵢ, q)
+ @kaxpy!(n, -conj(αᵢ), uᵢ, p)
+ pᴴq = @kdot(n, p, q)
+ βᵢ₊₁ = √(abs(pᴴq))
+ γᵢ₊₁ = pᴴq / βᵢ₊₁
+ vᵢ₊₁ .= q ./ βᵢ₊₁
+ uᵢ₊₁ .= p ./ conj(γᵢ₊₁)
+ nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁
+ nzval_Tᴴ[pαᵢ+1] = conj(γᵢ₊₁) # Tᴴᵢ₊₁.ᵢ = γ̄ᵢ₊₁
+ if i ≤ k-1
+ nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁
+ nzval_Tᴴ[pαᵢ+2] = conj(βᵢ₊₁) # Tᴴᵢ.ᵢ₊₁ = β̄ᵢ₊₁
+ end
+ pαᵢ = pαᵢ + 3
+ end
+ return V, T, U, Tᴴ
+end
+
+"""
+ V, H = arnoldi(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a square matrix of dimension n;
+* `b`: a vector of length n;
+* `k`: the number of iterations of the Arnoldi process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `H`: a dense (k+1) × k upper Hessenberg matrix.
+
+#### Reference
+
+* W. E. Arnoldi, [*The principle of minimized iterations in the solution of the matrix eigenvalue problem*](https://doi.org/10.1090/qam/42792), Quarterly of Applied Mathematics, 9, pp. 17--29, 1951.
+"""
+function arnoldi(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ V = M(undef, n, k+1)
+ H = zeros(FC, k+1, k)
+
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ if i == 1
+ β = @knrm2(n, b)
+ vᵢ .= b ./ β
+ end
+ mul!(q, A, vᵢ)
+ for j = 1:i
+ vⱼ = view(V,:,j)
+ H[j,i] = @kdot(n, vⱼ, q)
+ @kaxpy!(n, -H[j,i], vⱼ, q)
+ end
+ H[i+1,i] = @knrm2(n, q)
+ vᵢ₊₁ .= q ./ H[i+1,i]
+ end
+ return V, H
+end
+
+"""
+ V, U, L = golub_kahan(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `k`: the number of iterations of the Golub-Kahan process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `U`: a dense m × (k+1) matrix;
+* `L`: a sparse (k+1) × (k+1) lower bidiagonal matrix.
+
+#### References
+
+* G. H. Golub and W. Kahan, [*Calculating the Singular Values and Pseudo-Inverse of a Matrix*](https://doi.org/10.1137/0702016), SIAM Journal on Numerical Analysis, 2(2), pp. 225--224, 1965.
+* C. C. Paige, [*Bidiagonalization of Matrices and Solution of Linear Equations*](https://doi.org/10.1137/0711019), SIAM Journal on Numerical Analysis, 11(1), pp. 197--209, 1974.
+"""
+function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ R = real(FC)
+ Aᴴ = A'
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+2)
+ rowval = zeros(Int, 2k+1)
+ nzval = zeros(R, 2k+1)
+
+ colptr[1] = 1
+ for i = 1:k
+ pos = colptr[i]
+ colptr[i+1] = pos+2
+ rowval[pos] = i
+ rowval[pos+1] = i+1
+ end
+ rowval[2k+1] = k+1
+ colptr[k+2] = 2k+2
+
+ V = M(undef, n, k+1)
+ U = M(undef, m, k+1)
+ L = SparseMatrixCSC(k+1, k+1, colptr, rowval, nzval)
+
+ pαᵢ = 1 # Position of αᵢ in the vector `nzval`
+ for i = 1:k
+ uᵢ = view(U,:,i)
+ vᵢ = view(V,:,i)
+ uᵢ₊₁ = q = view(U,:,i+1)
+ vᵢ₊₁ = p = view(V,:,i+1)
+ if i == 1
+ wᵢ = vᵢ
+ βᵢ = @knrm2(m, b)
+ uᵢ .= b ./ βᵢ
+ mul!(wᵢ, Aᴴ, uᵢ)
+ αᵢ = @knrm2(n, wᵢ)
+ nzval[pαᵢ] = αᵢ # Lᵢ.ᵢ = αᵢ
+ vᵢ .= wᵢ ./ αᵢ
+ end
+ mul!(q, A, vᵢ)
+ αᵢ = nzval[pαᵢ] # αᵢ = Lᵢ.ᵢ
+ @kaxpy!(m, -αᵢ, uᵢ, q)
+ βᵢ₊₁ = @knrm2(m, q)
+ uᵢ₊₁ .= q ./ βᵢ₊₁
+ mul!(p, Aᴴ, uᵢ₊₁)
+ @kaxpy!(n, -βᵢ₊₁, vᵢ, p)
+ αᵢ₊₁ = @knrm2(n, p)
+ vᵢ₊₁ .= p ./ αᵢ₊₁
+ nzval[pαᵢ+1] = βᵢ₊₁ # Lᵢ₊₁.ᵢ = βᵢ₊₁
+ nzval[pαᵢ+2] = αᵢ₊₁ # Lᵢ₊₁.ᵢ₊₁ = αᵢ₊₁
+ pαᵢ = pαᵢ + 2
+ end
+ return V, U, L
+end
+
+"""
+ V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the Saunders-Simon-Yip process.
+
+#### Output arguments
+
+* `V`: a dense m × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix;
+* `U`: a dense n × (k+1) matrix;
+* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* M. A. Saunders, H. D. Simon, and E. L. Yip, [*Two Conjugate-Gradient-Type Methods for Unsymmetric Linear Equations*](https://doi.org/10.1137/0725052), SIAM Journal on Numerical Analysis, 25(4), pp. 927--940, 1988.
+"""
+function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ Aᴴ = A'
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+1)
+ rowval = zeros(Int, 3k-1)
+ nzval_T = zeros(FC, 3k-1)
+ nzval_Tᴴ = zeros(FC, 3k-1)
+
+ colptr[1] = 1
+ rowval[1] = 1
+ rowval[2] = 2
+ for i = 1:k
+ colptr[i+1] = 3i
+ if i ≥ 2
+ pos = colptr[i]
+ rowval[pos] = i-1
+ rowval[pos+1] = i
+ rowval[pos+2] = i+1
+ end
+ end
+
+ V = M(undef, m, k+1)
+ U = M(undef, n, k+1)
+ T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T)
+ Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ)
+
+ pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ`
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ uᵢ = view(U,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ uᵢ₊₁ = p = view(U,:,i+1)
+ if i == 1
+ β = @knrm2(m, b)
+ γ = @knrm2(n, c)
+ vᵢ .= b ./ β
+ uᵢ .= c ./ γ
+ end
+ mul!(q, A , uᵢ)
+ mul!(p, Aᴴ, vᵢ)
+ if i ≥ 2
+ vᵢ₋₁ = view(V,:,i-1)
+ uᵢ₋₁ = view(U,:,i-1)
+ βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁
+ γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ
+ @kaxpy!(m, -γᵢ, vᵢ₋₁, q)
+ @kaxpy!(n, -βᵢ, uᵢ₋₁, p)
+ end
+ αᵢ = @kdot(m, vᵢ, q)
+ nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ
+ nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ
+ @kaxpy!(m, - αᵢ , vᵢ, q)
+ @kaxpy!(n, -conj(αᵢ), uᵢ, p)
+ βᵢ₊₁ = @knrm2(m, q)
+ γᵢ₊₁ = @knrm2(n, p)
+ vᵢ₊₁ .= q ./ βᵢ₊₁
+ uᵢ₊₁ .= p ./ γᵢ₊₁
+ nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁
+ nzval_Tᴴ[pαᵢ+1] = γᵢ₊₁ # Tᴴᵢ₊₁.ᵢ = γᵢ₊₁
+ if i ≤ k-1
+ nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁
+ nzval_Tᴴ[pαᵢ+2] = βᵢ₊₁ # Tᴴᵢ.ᵢ₊₁ = βᵢ₊₁
+ end
+ pαᵢ = pαᵢ + 3
+ end
+ return V, T, U, Tᴴ
+end
+
+"""
+ V, H, U, F = montoison_orban(A, B, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `B`: a linear operator that models a matrix of dimension n × m;
+* `b`: a vector of length m;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the Montoison-Orban process.
+
+#### Output arguments
+
+* `V`: a dense m × (k+1) matrix;
+* `H`: a dense (k+1) × k upper Hessenberg matrix;
+* `U`: a dense n × (k+1) matrix;
+* `F`: a dense (k+1) × k upper Hessenberg matrix.
+
+#### Reference
+
+* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://doi.org/10.1137/21M1459265), SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023.
+"""
+function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ V = M(undef, m, k+1)
+ U = M(undef, n, k+1)
+ H = zeros(FC, k+1, k)
+ F = zeros(FC, k+1, k)
+
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ uᵢ = view(U,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ uᵢ₊₁ = p = view(U,:,i+1)
+ if i == 1
+ β = @knrm2(m, b)
+ γ = @knrm2(n, c)
+ vᵢ .= b ./ β
+ uᵢ .= c ./ γ
+ end
+ mul!(q, A, uᵢ)
+ mul!(p, B, vᵢ)
+ for j = 1:i
+ vⱼ = view(V,:,j)
+ uⱼ = view(U,:,j)
+ H[j,i] = @kdot(m, vⱼ, q)
+ @kaxpy!(n, -H[j,i], vⱼ, q)
+ F[j,i] = @kdot(n, uⱼ, p)
+ @kaxpy!(m, -F[j,i], uⱼ, p)
+ end
+ H[i+1,i] = @knrm2(m, q)
+ vᵢ₊₁ .= q ./ H[i+1,i]
+ F[i+1,i] = @knrm2(n, p)
+ uᵢ₊₁ .= p ./ F[i+1,i]
+ end
+ return V, H, U, F
+end
diff --git a/src/krylov_solve.jl b/src/krylov_solve.jl
new file mode 100644
index 000000000..30a463dfa
--- /dev/null
+++ b/src/krylov_solve.jl
@@ -0,0 +1,60 @@
+"""
+ solve!(solver, args...; kwargs...)
+
+Use the in-place Krylov method associated to `solver`.
+"""
+function solve! end
+
+for (KS, fun, args, def_args, optargs, def_optargs, kwargs, def_kwargs) in [
+ (:LsmrSolver , :lsmr! , args_lsmr , def_args_lsmr , () , () , kwargs_lsmr , def_kwargs_lsmr )
+ (:CgsSolver , :cgs! , args_cgs , def_args_cgs , optargs_cgs , def_optargs_cgs , kwargs_cgs , def_kwargs_cgs )
+ (:UsymlqSolver , :usymlq! , args_usymlq , def_args_usymlq , optargs_usymlq , def_optargs_usymlq , kwargs_usymlq , def_kwargs_usymlq )
+ (:LnlqSolver , :lnlq! , args_lnlq , def_args_lnlq , () , () , kwargs_lnlq , def_kwargs_lnlq )
+ (:BicgstabSolver , :bicgstab! , args_bicgstab , def_args_bicgstab , optargs_bicgstab , def_optargs_bicgstab , kwargs_bicgstab , def_kwargs_bicgstab )
+ (:CrlsSolver , :crls! , args_crls , def_args_crls , () , () , kwargs_crls , def_kwargs_crls )
+ (:LsqrSolver , :lsqr! , args_lsqr , def_args_lsqr , () , () , kwargs_lsqr , def_kwargs_lsqr )
+ (:MinresSolver , :minres! , args_minres , def_args_minres , optargs_minres , def_optargs_minres , kwargs_minres , def_kwargs_minres )
+ (:CgneSolver , :cgne! , args_cgne , def_args_cgne , () , () , kwargs_cgne , def_kwargs_cgne )
+ (:DqgmresSolver , :dqgmres! , args_dqgmres , def_args_dqgmres , optargs_dqgmres , def_optargs_dqgmres , kwargs_dqgmres , def_kwargs_dqgmres )
+ (:SymmlqSolver , :symmlq! , args_symmlq , def_args_symmlq , optargs_symmlq , def_optargs_symmlq , kwargs_symmlq , def_kwargs_symmlq )
+ (:TrimrSolver , :trimr! , args_trimr , def_args_trimr , optargs_trimr , def_optargs_trimr , kwargs_trimr , def_kwargs_trimr )
+ (:UsymqrSolver , :usymqr! , args_usymqr , def_args_usymqr , optargs_usymqr , def_optargs_usymqr , kwargs_usymqr , def_kwargs_usymqr )
+ (:BilqrSolver , :bilqr! , args_bilqr , def_args_bilqr , optargs_bilqr , def_optargs_bilqr , kwargs_bilqr , def_kwargs_bilqr )
+ (:CrSolver , :cr! , args_cr , def_args_cr , optargs_cr , def_optargs_cr , kwargs_cr , def_kwargs_cr )
+ (:CraigmrSolver , :craigmr! , args_craigmr , def_args_craigmr , () , () , kwargs_craigmr , def_kwargs_craigmr )
+ (:TricgSolver , :tricg! , args_tricg , def_args_tricg , optargs_tricg , def_optargs_tricg , kwargs_tricg , def_kwargs_tricg )
+ (:CraigSolver , :craig! , args_craig , def_args_craig , () , () , kwargs_craig , def_kwargs_craig )
+ (:DiomSolver , :diom! , args_diom , def_args_diom , optargs_diom , def_optargs_diom , kwargs_diom , def_kwargs_diom )
+ (:LslqSolver , :lslq! , args_lslq , def_args_lslq , () , () , kwargs_lslq , def_kwargs_lslq )
+ (:TrilqrSolver , :trilqr! , args_trilqr , def_args_trilqr , optargs_trilqr , def_optargs_trilqr , kwargs_trilqr , def_kwargs_trilqr )
+ (:CrmrSolver , :crmr! , args_crmr , def_args_crmr , () , () , kwargs_crmr , def_kwargs_crmr )
+ (:CgSolver , :cg! , args_cg , def_args_cg , optargs_cg , def_optargs_cg , kwargs_cg , def_kwargs_cg )
+ (:CgLanczosShiftSolver, :cg_lanczos_shift!, args_cg_lanczos_shift, def_args_cg_lanczos_shift, () , () , kwargs_cg_lanczos_shift, def_kwargs_cg_lanczos_shift)
+ (:CglsSolver , :cgls! , args_cgls , def_args_cgls , () , () , kwargs_cgls , def_kwargs_cgls )
+ (:CgLanczosSolver , :cg_lanczos! , args_cg_lanczos , def_args_cg_lanczos , optargs_cg_lanczos, def_optargs_cg_lanczos, kwargs_cg_lanczos , def_kwargs_cg_lanczos )
+ (:BilqSolver , :bilq! , args_bilq , def_args_bilq , optargs_bilq , def_optargs_bilq , kwargs_bilq , def_kwargs_bilq )
+ (:MinresQlpSolver , :minres_qlp! , args_minres_qlp , def_args_minres_qlp , optargs_minres_qlp, def_optargs_minres_qlp, kwargs_minres_qlp , def_kwargs_minres_qlp )
+ (:QmrSolver , :qmr! , args_qmr , def_args_qmr , optargs_qmr , def_optargs_qmr , kwargs_qmr , def_kwargs_qmr )
+ (:GmresSolver , :gmres! , args_gmres , def_args_gmres , optargs_gmres , def_optargs_gmres , kwargs_gmres , def_kwargs_gmres )
+ (:FgmresSolver , :fgmres! , args_fgmres , def_args_fgmres , optargs_fgmres , def_optargs_fgmres , kwargs_fgmres , def_kwargs_fgmres )
+ (:FomSolver , :fom! , args_fom , def_args_fom , optargs_fom , def_optargs_fom , kwargs_fom , def_kwargs_fom )
+ (:GpmrSolver , :gpmr! , args_gpmr , def_args_gpmr , optargs_gpmr , def_optargs_gpmr , kwargs_gpmr , def_kwargs_gpmr )
+]
+ @eval begin
+ solve!(solver :: $KS{T,FC,S}, $(def_args...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} = $(fun)(solver, $(args...); $(kwargs...))
+
+ if !isempty($optargs)
+ function $(fun)(solver :: $KS{T,FC,S}, $(def_args...), $(def_optargs...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+ start_time = time_ns()
+ warm_start!(solver, $(optargs...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ $(fun)(solver, $(args...); $(kwargs...))
+ solver.stats.timer += elapsed_time
+ return solver
+ end
+
+ solve!(solver :: $KS{T,FC,S}, $(def_args...), $(def_optargs...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} = $(fun)(solver, $(args...), $(optargs...); $(kwargs...))
+ end
+ end
+end
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index 8a109a2be..0e905e807 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -3,11 +3,13 @@ CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver,
UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver,
BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver,
LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver,
-GmresSolver, FomSolver, GpmrSolver
+GmresSolver, FomSolver, GpmrSolver, FgmresSolver
export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual,
niterations, Aprod, Atprod, Bprod, warm_start!
+import Base.size, Base.sizeof, Base.format_bytes
+
const KRYLOV_SOLVERS = Dict(
:cg => :CgSolver ,
:cr => :CrSolver ,
@@ -20,6 +22,7 @@ const KRYLOV_SOLVERS = Dict(
:fom => :FomSolver ,
:dqgmres => :DqgmresSolver ,
:gmres => :GmresSolver ,
+ :fgmres => :FgmresSolver ,
:gpmr => :GpmrSolver ,
:usymlq => :UsymlqSolver ,
:usymqr => :UsymqrSolver ,
@@ -51,12 +54,14 @@ Type for storing the vectors required by the in-place version of MINRES.
The outer constructors
- solver = MinresSolver(n, m, S; window :: Int=5)
+ solver = MinresSolver(m, n, S; window :: Int=5)
solver = MinresSolver(A, b; window :: Int=5)
may be used in order to create these vectors.
"""
mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r1 :: S
@@ -68,29 +73,29 @@ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
err_vec :: Vector{T}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function MinresSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r1 = S(undef, n)
- r2 = S(undef, n)
- w1 = S(undef, n)
- w2 = S(undef, n)
- y = S(undef, n)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
- return solver
- end
+function MinresSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r1 = S(undef, n)
+ r2 = S(undef, n)
+ w1 = S(undef, n)
+ w2 = S(undef, n)
+ y = S(undef, n)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = MinresSolver{T,FC,S}(m, n, Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
+ return solver
+end
- function MinresSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- MinresSolver(n, m, S, window=window)
- end
+function MinresSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ MinresSolver(m, n, S; window)
end
"""
@@ -98,12 +103,14 @@ Type for storing the vectors required by the in-place version of CG.
The outer constructors
- solver = CgSolver(n, m, S)
+ solver = CgSolver(m, n, S)
solver = CgSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -112,26 +119,26 @@ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
z :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CgSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- Ap = S(undef, n)
- z = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, Ap, z, false, stats)
- return solver
- end
+function CgSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ Ap = S(undef, n)
+ z = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CgSolver{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats)
+ return solver
+end
- function CgSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgSolver(n, m, S)
- end
+function CgSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgSolver(m, n, S)
end
"""
@@ -139,12 +146,14 @@ Type for storing the vectors required by the in-place version of CR.
The outer constructors
- solver = CrSolver(n, m, S)
+ solver = CrSolver(m, n, S)
solver = CrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -154,27 +163,27 @@ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
Mq :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- q = S(undef, n)
- Ar = S(undef, n)
- Mq = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats)
- return solver
- end
+function CrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ q = S(undef, n)
+ Ar = S(undef, n)
+ Mq = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CrSolver{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats)
+ return solver
+end
- function CrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrSolver(n, m, S)
- end
+function CrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CrSolver(m, n, S)
end
"""
@@ -182,12 +191,14 @@ Type for storing the vectors required by the in-place version of SYMMLQ.
The outer constructors
- solver = SymmlqSolver(n, m, S)
+ solver = SymmlqSolver(m, n, S)
solver = SymmlqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
Mvold :: S
@@ -200,30 +211,30 @@ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
sprod :: Vector{T}
warm_start :: Bool
stats :: SymmlqStats{T}
+end
- function SymmlqSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- Mvold = S(undef, n)
- Mv = S(undef, n)
- Mv_next = S(undef, n)
- w̅ = S(undef, n)
- v = S(undef, 0)
- clist = zeros(T, window)
- zlist = zeros(T, window)
- sprod = ones(T, window)
- stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
- return solver
- end
+function SymmlqSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Mvold = S(undef, n)
+ Mv = S(undef, n)
+ Mv_next = S(undef, n)
+ w̅ = S(undef, n)
+ v = S(undef, 0)
+ clist = zeros(T, window)
+ zlist = zeros(T, window)
+ sprod = ones(T, window)
+ stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), 0.0, "unknown")
+ solver = SymmlqSolver{T,FC,S}(m, n, Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
+ return solver
+end
- function SymmlqSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- SymmlqSolver(n, m, S, window=window)
- end
+function SymmlqSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ SymmlqSolver(m, n, S; window)
end
"""
@@ -231,12 +242,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS.
The outer constructors
- solver = CgLanczosSolver(n, m, S)
+ solver = CgLanczosSolver(m, n, S)
solver = CgLanczosSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
Mv :: S
@@ -246,27 +259,27 @@ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
warm_start :: Bool
stats :: LanczosStats{T}
+end
- function CgLanczosSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- Mv = S(undef, n)
- Mv_prev = S(undef, n)
- p = S(undef, n)
- Mv_next = S(undef, n)
- v = S(undef, 0)
- stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
- return solver
- end
+function CgLanczosSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Mv = S(undef, n)
+ Mv_prev = S(undef, n)
+ p = S(undef, n)
+ Mv_next = S(undef, n)
+ v = S(undef, 0)
+ stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), 0.0, "unknown")
+ solver = CgLanczosSolver{T,FC,S}(m, n, Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
+ return solver
+end
- function CgLanczosSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgLanczosSolver(n, m, S)
- end
+function CgLanczosSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgLanczosSolver(m, n, S)
end
"""
@@ -274,12 +287,15 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIF
The outer constructors
- solver = CgLanczosShiftSolver(n, m, nshifts, S)
+ solver = CgLanczosShiftSolver(m, n, nshifts, S)
solver = CgLanczosShiftSolver(A, b, nshifts)
may be used in order to create these vectors.
"""
mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
+ nshifts :: Int
Mv :: S
Mv_prev :: S
Mv_next :: S
@@ -294,34 +310,34 @@ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
converged :: BitVector
not_cv :: BitVector
stats :: LanczosShiftStats{T}
+end
- function CgLanczosShiftSolver(n, m, nshifts, S)
- FC = eltype(S)
- T = real(FC)
- Mv = S(undef, n)
- Mv_prev = S(undef, n)
- Mv_next = S(undef, n)
- v = S(undef, 0)
- x = [S(undef, n) for i = 1 : nshifts]
- p = [S(undef, n) for i = 1 : nshifts]
- σ = Vector{T}(undef, nshifts)
- δhat = Vector{T}(undef, nshifts)
- ω = Vector{T}(undef, nshifts)
- γ = Vector{T}(undef, nshifts)
- rNorms = Vector{T}(undef, nshifts)
- indefinite = BitVector(undef, nshifts)
- converged = BitVector(undef, nshifts)
- not_cv = BitVector(undef, nshifts)
- stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
- return solver
- end
+function CgLanczosShiftSolver(m, n, nshifts, S)
+ FC = eltype(S)
+ T = real(FC)
+ Mv = S(undef, n)
+ Mv_prev = S(undef, n)
+ Mv_next = S(undef, n)
+ v = S(undef, 0)
+ x = S[S(undef, n) for i = 1 : nshifts]
+ p = S[S(undef, n) for i = 1 : nshifts]
+ σ = Vector{T}(undef, nshifts)
+ δhat = Vector{T}(undef, nshifts)
+ ω = Vector{T}(undef, nshifts)
+ γ = Vector{T}(undef, nshifts)
+ rNorms = Vector{T}(undef, nshifts)
+ indefinite = BitVector(undef, nshifts)
+ converged = BitVector(undef, nshifts)
+ not_cv = BitVector(undef, nshifts)
+ stats = LanczosShiftStats(0, false, Vector{T}[T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), 0.0, "unknown")
+ solver = CgLanczosShiftSolver{T,FC,S}(m, n, nshifts, Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
+ return solver
+end
- function CgLanczosShiftSolver(A, b, nshifts)
- n, m = size(A)
- S = ktypeof(b)
- CgLanczosShiftSolver(n, m, nshifts, S)
- end
+function CgLanczosShiftSolver(A, b, nshifts)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgLanczosShiftSolver(m, n, nshifts, S)
end
"""
@@ -329,12 +345,14 @@ Type for storing the vectors required by the in-place version of MINRES-QLP.
The outer constructors
- solver = MinresQlpSolver(n, m, S)
+ solver = MinresQlpSolver(m, n, S)
solver = MinresQlpSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
wₖ₋₁ :: S
wₖ :: S
@@ -345,28 +363,28 @@ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function MinresQlpSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- wₖ₋₁ = S(undef, n)
- wₖ = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- x = S(undef, n)
- p = S(undef, n)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
- return solver
- end
+function MinresQlpSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ wₖ₋₁ = S(undef, n)
+ wₖ = S(undef, n)
+ M⁻¹vₖ₋₁ = S(undef, n)
+ M⁻¹vₖ = S(undef, n)
+ x = S(undef, n)
+ p = S(undef, n)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = MinresQlpSolver{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
+ return solver
+end
- function MinresQlpSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- MinresQlpSolver(n, m, S)
- end
+function MinresQlpSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ MinresQlpSolver(m, n, S)
end
"""
@@ -374,13 +392,15 @@ Type for storing the vectors required by the in-place version of DQGMRES.
The outer constructors
- solver = DqgmresSolver(n, m, memory, S)
+ solver = DqgmresSolver(m, n, memory, S)
solver = DqgmresSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
t :: S
@@ -393,31 +413,31 @@ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
H :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function DqgmresSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- t = S(undef, n)
- z = S(undef, 0)
- w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
- V = [S(undef, n) for i = 1 : memory]
- c = Vector{T}(undef, memory)
- s = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats)
- return solver
- end
+function DqgmresSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ t = S(undef, n)
+ z = S(undef, 0)
+ w = S(undef, 0)
+ P = S[S(undef, n) for i = 1 : memory]
+ V = S[S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ H = Vector{FC}(undef, memory+1)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = DqgmresSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats)
+ return solver
+end
- function DqgmresSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- DqgmresSolver(n, m, memory, S)
- end
+function DqgmresSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ DqgmresSolver(m, n, memory, S)
end
"""
@@ -425,13 +445,15 @@ Type for storing the vectors required by the in-place version of DIOM.
The outer constructors
- solver = DiomSolver(n, m, memory, S)
+ solver = DiomSolver(m, n, memory, S)
solver = DiomSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
t :: S
@@ -443,30 +465,30 @@ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
H :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function DiomSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- t = S(undef, n)
- z = S(undef, 0)
- w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
- V = [S(undef, n) for i = 1 : memory]
- L = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats)
- return solver
- end
+function DiomSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ t = S(undef, n)
+ z = S(undef, 0)
+ w = S(undef, 0)
+ P = S[S(undef, n) for i = 1 : memory-1]
+ V = S[S(undef, n) for i = 1 : memory]
+ L = Vector{FC}(undef, memory-1)
+ H = Vector{FC}(undef, memory)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = DiomSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats)
+ return solver
+end
- function DiomSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- DiomSolver(n, m, memory, S)
- end
+function DiomSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ DiomSolver(m, n, memory, S)
end
"""
@@ -474,12 +496,14 @@ Type for storing the vectors required by the in-place version of USYMLQ.
The outer constructors
- solver = UsymlqSolver(n, m, S)
+ solver = UsymlqSolver(m, n, S)
solver = UsymlqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
p :: S
@@ -491,29 +515,29 @@ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
q :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function UsymlqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- Δx = S(undef, 0)
- x = S(undef, m)
- d̅ = S(undef, m)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
- return solver
- end
+function UsymlqSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ d̅ = S(undef, n)
+ vₖ₋₁ = S(undef, m)
+ vₖ = S(undef, m)
+ q = S(undef, m)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = UsymlqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
+ return solver
+end
- function UsymlqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- UsymlqSolver(n, m, S)
- end
+function UsymlqSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ UsymlqSolver(m, n, S)
end
"""
@@ -521,12 +545,14 @@ Type for storing the vectors required by the in-place version of USYMQR.
The outer constructors
- solver = UsymqrSolver(n, m, S)
+ solver = UsymqrSolver(m, n, S)
solver = UsymqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
vₖ₋₁ :: S
vₖ :: S
q :: S
@@ -539,30 +565,30 @@ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
p :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function UsymqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, m)
- wₖ₋₂ = S(undef, m)
- wₖ₋₁ = S(undef, m)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
- return solver
- end
+function UsymqrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ vₖ₋₁ = S(undef, m)
+ vₖ = S(undef, m)
+ q = S(undef, m)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ wₖ₋₁ = S(undef, n)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ p = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = UsymqrSolver{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
+ return solver
+end
- function UsymqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- UsymqrSolver(n, m, S)
- end
+function UsymqrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ UsymqrSolver(m, n, S)
end
"""
@@ -570,12 +596,14 @@ Type for storing the vectors required by the in-place version of TRICG.
The outer constructors
- solver = TricgSolver(n, m, S)
+ solver = TricgSolver(m, n, S)
solver = TricgSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
y :: S
N⁻¹uₖ₋₁ :: S
N⁻¹uₖ :: S
@@ -594,36 +622,36 @@ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function TricgSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- y = S(undef, m)
- N⁻¹uₖ₋₁ = S(undef, m)
- N⁻¹uₖ = S(undef, m)
- p = S(undef, m)
- gy₂ₖ₋₁ = S(undef, m)
- gy₂ₖ = S(undef, m)
- x = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- q = S(undef, n)
- gx₂ₖ₋₁ = S(undef, n)
- gx₂ₖ = S(undef, n)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- uₖ = S(undef, 0)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
- return solver
- end
+function TricgSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ y = S(undef, n)
+ N⁻¹uₖ₋₁ = S(undef, n)
+ N⁻¹uₖ = S(undef, n)
+ p = S(undef, n)
+ gy₂ₖ₋₁ = S(undef, n)
+ gy₂ₖ = S(undef, n)
+ x = S(undef, m)
+ M⁻¹vₖ₋₁ = S(undef, m)
+ M⁻¹vₖ = S(undef, m)
+ q = S(undef, m)
+ gx₂ₖ₋₁ = S(undef, m)
+ gx₂ₖ = S(undef, m)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ uₖ = S(undef, 0)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = TricgSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+ return solver
+end
- function TricgSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TricgSolver(n, m, S)
- end
+function TricgSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ TricgSolver(m, n, S)
end
"""
@@ -631,12 +659,14 @@ Type for storing the vectors required by the in-place version of TRIMR.
The outer constructors
- solver = TrimrSolver(n, m, S)
+ solver = TrimrSolver(m, n, S)
solver = TrimrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
y :: S
N⁻¹uₖ₋₁ :: S
N⁻¹uₖ :: S
@@ -659,40 +689,40 @@ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function TrimrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- y = S(undef, m)
- N⁻¹uₖ₋₁ = S(undef, m)
- N⁻¹uₖ = S(undef, m)
- p = S(undef, m)
- gy₂ₖ₋₃ = S(undef, m)
- gy₂ₖ₋₂ = S(undef, m)
- gy₂ₖ₋₁ = S(undef, m)
- gy₂ₖ = S(undef, m)
- x = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- q = S(undef, n)
- gx₂ₖ₋₃ = S(undef, n)
- gx₂ₖ₋₂ = S(undef, n)
- gx₂ₖ₋₁ = S(undef, n)
- gx₂ₖ = S(undef, n)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- uₖ = S(undef, 0)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
- return solver
- end
+function TrimrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ y = S(undef, n)
+ N⁻¹uₖ₋₁ = S(undef, n)
+ N⁻¹uₖ = S(undef, n)
+ p = S(undef, n)
+ gy₂ₖ₋₃ = S(undef, n)
+ gy₂ₖ₋₂ = S(undef, n)
+ gy₂ₖ₋₁ = S(undef, n)
+ gy₂ₖ = S(undef, n)
+ x = S(undef, m)
+ M⁻¹vₖ₋₁ = S(undef, m)
+ M⁻¹vₖ = S(undef, m)
+ q = S(undef, m)
+ gx₂ₖ₋₃ = S(undef, m)
+ gx₂ₖ₋₂ = S(undef, m)
+ gx₂ₖ₋₁ = S(undef, m)
+ gx₂ₖ = S(undef, m)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ uₖ = S(undef, 0)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = TrimrSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+ return solver
+end
- function TrimrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TrimrSolver(n, m, S)
- end
+function TrimrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ TrimrSolver(m, n, S)
end
"""
@@ -700,12 +730,14 @@ Type for storing the vectors required by the in-place version of TRILQR.
The outer constructors
- solver = TrilqrSolver(n, m, S)
+ solver = TrilqrSolver(m, n, S)
solver = TrilqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
p :: S
@@ -721,33 +753,33 @@ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₂ :: S
warm_start :: Bool
stats :: AdjointStats{T}
+end
- function TrilqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- d̅ = S(undef, m)
- Δx = S(undef, 0)
- x = S(undef, m)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- Δy = S(undef, 0)
- y = S(undef, n)
- wₖ₋₃ = S(undef, n)
- wₖ₋₂ = S(undef, n)
- stats = AdjointStats(0, false, false, T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
- return solver
- end
+function TrilqrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ p = S(undef, n)
+ d̅ = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ vₖ₋₁ = S(undef, m)
+ vₖ = S(undef, m)
+ q = S(undef, m)
+ Δy = S(undef, 0)
+ y = S(undef, m)
+ wₖ₋₃ = S(undef, m)
+ wₖ₋₂ = S(undef, m)
+ stats = AdjointStats(0, false, false, T[], T[], 0.0, "unknown")
+ solver = TrilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
+ return solver
+end
- function TrilqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TrilqrSolver(n, m, S)
- end
+function TrilqrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ TrilqrSolver(m, n, S)
end
"""
@@ -755,12 +787,14 @@ Type for storing the vectors required by the in-place version of CGS.
The outer constructorss
- solver = CgsSolver(n, m, S)
+ solver = CgsSolver(m, n, S)
solver = CgsSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -772,29 +806,29 @@ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vw :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CgsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- u = S(undef, n)
- p = S(undef, n)
- q = S(undef, n)
- ts = S(undef, n)
- yz = S(undef, 0)
- vw = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats)
- return solver
- end
+function CgsSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ u = S(undef, n)
+ p = S(undef, n)
+ q = S(undef, n)
+ ts = S(undef, n)
+ yz = S(undef, 0)
+ vw = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CgsSolver{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats)
+ return solver
+end
- function CgsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgsSolver(n, m, S)
- end
+function CgsSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgsSolver(m, n, S)
end
"""
@@ -802,12 +836,14 @@ Type for storing the vectors required by the in-place version of BICGSTAB.
The outer constructors
- solver = BicgstabSolver(n, m, S)
+ solver = BicgstabSolver(m, n, S)
solver = BicgstabSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -819,29 +855,29 @@ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
t :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function BicgstabSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- v = S(undef, n)
- s = S(undef, n)
- qd = S(undef, n)
- yz = S(undef, 0)
- t = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats)
- return solver
- end
+function BicgstabSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ v = S(undef, n)
+ s = S(undef, n)
+ qd = S(undef, n)
+ yz = S(undef, 0)
+ t = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = BicgstabSolver{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats)
+ return solver
+end
- function BicgstabSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BicgstabSolver(n, m, S)
- end
+function BicgstabSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ BicgstabSolver(m, n, S)
end
"""
@@ -849,12 +885,14 @@ Type for storing the vectors required by the in-place version of BILQ.
The outer constructors
- solver = BilqSolver(n, m, S)
+ solver = BilqSolver(m, n, S)
solver = BilqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
q :: S
@@ -866,29 +904,29 @@ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
d̅ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function BilqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- d̅ = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
- return solver
- end
+function BilqSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ d̅ = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = BilqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
+ return solver
+end
- function BilqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BilqSolver(n, m, S)
- end
+function BilqSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ BilqSolver(m, n, S)
end
"""
@@ -896,12 +934,14 @@ Type for storing the vectors required by the in-place version of QMR.
The outer constructors
- solver = QmrSolver(n, m, S)
+ solver = QmrSolver(m, n, S)
solver = QmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
q :: S
@@ -914,30 +954,30 @@ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₁ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function QmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- wₖ₋₂ = S(undef, n)
- wₖ₋₁ = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
- return solver
- end
+function QmrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ wₖ₋₁ = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = QmrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
+ return solver
+end
- function QmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- QmrSolver(n, m, S)
- end
+function QmrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ QmrSolver(m, n, S)
end
"""
@@ -945,12 +985,14 @@ Type for storing the vectors required by the in-place version of BILQR.
The outer constructors
- solver = BilqrSolver(n, m, S)
+ solver = BilqrSolver(m, n, S)
solver = BilqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
q :: S
@@ -966,33 +1008,33 @@ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₂ :: S
warm_start :: Bool
stats :: AdjointStats{T}
+end
- function BilqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- Δy = S(undef, 0)
- y = S(undef, n)
- d̅ = S(undef, n)
- wₖ₋₃ = S(undef, n)
- wₖ₋₂ = S(undef, n)
- stats = AdjointStats(0, false, false, T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
- return solver
- end
+function BilqrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Δy = S(undef, 0)
+ y = S(undef, n)
+ d̅ = S(undef, n)
+ wₖ₋₃ = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ stats = AdjointStats(0, false, false, T[], T[], 0.0, "unknown")
+ solver = BilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
+ return solver
+end
- function BilqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BilqrSolver(n, m, S)
- end
+function BilqrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ BilqrSolver(m, n, S)
end
"""
@@ -1000,12 +1042,14 @@ Type for storing the vectors required by the in-place version of CGLS.
The outer constructors
- solver = CglsSolver(n, m, S)
+ solver = CglsSolver(m, n, S)
solver = CglsSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
s :: S
@@ -1013,26 +1057,26 @@ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
q :: S
Mr :: S
stats :: SimpleStats{T}
+end
- function CglsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- s = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- Mr = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, s, r, q, Mr, stats)
- return solver
- end
+function CglsSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ s = S(undef, n)
+ r = S(undef, m)
+ q = S(undef, m)
+ Mr = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CglsSolver{T,FC,S}(m, n, x, p, s, r, q, Mr, stats)
+ return solver
+end
- function CglsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CglsSolver(n, m, S)
- end
+function CglsSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CglsSolver(m, n, S)
end
"""
@@ -1040,12 +1084,14 @@ Type for storing the vectors required by the in-place version of CRLS.
The outer constructors
- solver = CrlsSolver(n, m, S)
+ solver = CrlsSolver(m, n, S)
solver = CrlsSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
Ar :: S
@@ -1055,28 +1101,28 @@ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
s :: S
Ms :: S
stats :: SimpleStats{T}
+end
- function CrlsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Ar = S(undef, m)
- q = S(undef, m)
- r = S(undef, n)
- Ap = S(undef, n)
- s = S(undef, n)
- Ms = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats)
- return solver
- end
+function CrlsSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ Ar = S(undef, n)
+ q = S(undef, n)
+ r = S(undef, m)
+ Ap = S(undef, m)
+ s = S(undef, m)
+ Ms = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CrlsSolver{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats)
+ return solver
+end
- function CrlsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrlsSolver(n, m, S)
- end
+function CrlsSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CrlsSolver(m, n, S)
end
"""
@@ -1084,41 +1130,43 @@ Type for storing the vectors required by the in-place version of CGNE.
The outer constructors
- solver = CgneSolver(n, m, S)
+ solver = CgneSolver(m, n, S)
solver = CgneSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
- Aᵀz :: S
+ Aᴴz :: S
r :: S
q :: S
s :: S
z :: S
stats :: SimpleStats{T}
+end
- function CgneSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Aᵀz = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- s = S(undef, 0)
- z = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats)
- return solver
- end
+function CgneSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ Aᴴz = S(undef, n)
+ r = S(undef, m)
+ q = S(undef, m)
+ s = S(undef, 0)
+ z = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CgneSolver{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats)
+ return solver
+end
- function CgneSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgneSolver(n, m, S)
- end
+function CgneSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgneSolver(m, n, S)
end
"""
@@ -1126,41 +1174,43 @@ Type for storing the vectors required by the in-place version of CRMR.
The outer constructors
- solver = CrmrSolver(n, m, S)
+ solver = CrmrSolver(m, n, S)
solver = CrmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
- Aᵀr :: S
+ Aᴴr :: S
r :: S
q :: S
- Mq :: S
+ Nq :: S
s :: S
stats :: SimpleStats{T}
+end
- function CrmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Aᵀr = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- Mq = S(undef, 0)
- s = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats)
- return solver
- end
+function CrmrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ Aᴴr = S(undef, n)
+ r = S(undef, m)
+ q = S(undef, m)
+ Nq = S(undef, 0)
+ s = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CrmrSolver{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats)
+ return solver
+end
- function CrmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrmrSolver(n, m, S)
- end
+function CrmrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CrmrSolver(m, n, S)
end
"""
@@ -1168,15 +1218,17 @@ Type for storing the vectors required by the in-place version of LSLQ.
The outer constructors
- solver = LslqSolver(n, m, S)
+ solver = LslqSolver(m, n, S)
solver = LslqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
w̄ :: S
Mu :: S
Av :: S
@@ -1184,29 +1236,29 @@ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: LSLQStats{T}
+end
- function LslqSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- w̄ = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LslqSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ w̄ = S(undef, n)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], 0.0, "unknown")
+ solver = LslqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w̄, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LslqSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LslqSolver(n, m, S, window=window)
- end
+function LslqSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ LslqSolver(m, n, S; window)
end
"""
@@ -1214,15 +1266,17 @@ Type for storing the vectors required by the in-place version of LSQR.
The outer constructors
- solver = LsqrSolver(n, m, S)
+ solver = LsqrSolver(m, n, S)
solver = LsqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
w :: S
Mu :: S
Av :: S
@@ -1230,29 +1284,29 @@ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: SimpleStats{T}
+end
- function LsqrSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- w = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LsqrSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ w = S(undef, n)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = LsqrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LsqrSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LsqrSolver(n, m, S, window=window)
- end
+function LsqrSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ LsqrSolver(m, n, S; window)
end
"""
@@ -1260,15 +1314,17 @@ Type for storing the vectors required by the in-place version of LSMR.
The outer constructors
- solver = LsmrSolver(n, m, S)
+ solver = LsmrSolver(m, n, S)
solver = LsmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
h :: S
hbar :: S
Mu :: S
@@ -1277,30 +1333,30 @@ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: LsmrStats{T}
+end
- function LsmrSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- h = S(undef, m)
- hbar = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LsmrSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ h = S(undef, n)
+ hbar = S(undef, n)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), 0.0, "unknown")
+ solver = LsmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, h, hbar, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LsmrSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LsmrSolver(n, m, S, window=window)
- end
+function LsmrSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ LsmrSolver(m, n, S; window)
end
"""
@@ -1308,15 +1364,17 @@ Type for storing the vectors required by the in-place version of LNLQ.
The outer constructors
- solver = LnlqSolver(n, m, S)
+ solver = LnlqSolver(m, n, S)
solver = LnlqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
y :: S
w̄ :: S
Mu :: S
@@ -1325,30 +1383,30 @@ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
q :: S
stats :: LNLQStats{T}
+end
- function LnlqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- y = S(undef, n)
- w̄ = S(undef, n)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- q = S(undef, 0)
- stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats)
- return solver
- end
+function LnlqSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ y = S(undef, m)
+ w̄ = S(undef, m)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ q = S(undef, 0)
+ stats = LNLQStats(0, false, T[], false, T[], T[], 0.0, "unknown")
+ solver = LnlqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w̄, Mu, Av, u, v, q, stats)
+ return solver
+end
- function LnlqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- LnlqSolver(n, m, S)
- end
+function LnlqSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ LnlqSolver(m, n, S)
end
"""
@@ -1356,15 +1414,17 @@ Type for storing the vectors required by the in-place version of CRAIG.
The outer constructors
- solver = CraigSolver(n, m, S)
+ solver = CraigSolver(m, n, S)
solver = CraigSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
y :: S
w :: S
Mu :: S
@@ -1373,30 +1433,30 @@ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
w2 :: S
stats :: SimpleStats{T}
+end
- function CraigSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- y = S(undef, n)
- w = S(undef, n)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- w2 = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats)
- return solver
- end
+function CraigSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ y = S(undef, m)
+ w = S(undef, m)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ w2 = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CraigSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats)
+ return solver
+end
- function CraigSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CraigSolver(n, m, S)
- end
+function CraigSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CraigSolver(m, n, S)
end
"""
@@ -1404,15 +1464,17 @@ Type for storing the vectors required by the in-place version of CRAIGMR.
The outer constructors
- solver = CraigmrSolver(n, m, S)
+ solver = CraigmrSolver(m, n, S)
solver = CraigmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
d :: S
y :: S
Mu :: S
@@ -1423,32 +1485,32 @@ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
q :: S
stats :: SimpleStats{T}
+end
- function CraigmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- d = S(undef, m)
- y = S(undef, n)
- Mu = S(undef, n)
- w = S(undef, n)
- wbar = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- q = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats)
- return solver
- end
+function CraigmrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ d = S(undef, n)
+ y = S(undef, m)
+ Mu = S(undef, m)
+ w = S(undef, m)
+ wbar = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ q = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = CraigmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats)
+ return solver
+end
- function CraigmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CraigmrSolver(n, m, S)
- end
+function CraigmrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CraigmrSolver(m, n, S)
end
"""
@@ -1456,13 +1518,15 @@ Type for storing the vectors required by the in-place version of GMRES.
The outer constructors
- solver = GmresSolver(n, m, memory, S)
+ solver = GmresSolver(m, n, memory, S)
solver = GmresSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
w :: S
@@ -1476,31 +1540,85 @@ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
warm_start :: Bool
inner_iter :: Int
stats :: SimpleStats{T}
+end
- function GmresSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- w = S(undef, n)
- p = S(undef, 0)
- q = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- c = Vector{T}(undef, memory)
- s = Vector{FC}(undef, memory)
- z = Vector{FC}(undef, memory)
- R = Vector{FC}(undef, div(memory * (memory+1), 2))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
- return solver
- end
+function GmresSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ p = S(undef, 0)
+ q = S(undef, 0)
+ V = S[S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ R = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = GmresSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
+ return solver
+end
- function GmresSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- GmresSolver(n, m, memory, S)
- end
+function GmresSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ GmresSolver(m, n, memory, S)
+end
+
+"""
+Type for storing the vectors required by the in-place version of FGMRES.
+
+The outer constructors
+
+ solver = FgmresSolver(m, n, memory, S)
+ solver = FgmresSolver(A, b, memory = 20)
+
+may be used in order to create these vectors.
+`memory` is set to `n` if the value given is larger than `n`.
+"""
+mutable struct FgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
+ Δx :: S
+ x :: S
+ w :: S
+ q :: S
+ V :: Vector{S}
+ Z :: Vector{S}
+ c :: Vector{T}
+ s :: Vector{FC}
+ z :: Vector{FC}
+ R :: Vector{FC}
+ warm_start :: Bool
+ inner_iter :: Int
+ stats :: SimpleStats{T}
+end
+
+function FgmresSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ q = S(undef, 0)
+ V = S[S(undef, n) for i = 1 : memory]
+ Z = S[S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ R = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = FgmresSolver{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats)
+ return solver
+end
+
+function FgmresSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ FgmresSolver(m, n, memory, S)
end
"""
@@ -1508,13 +1626,15 @@ Type for storing the vectors required by the in-place version of FOM.
The outer constructors
- solver = FomSolver(n, m, memory, S)
+ solver = FomSolver(m, n, memory, S)
solver = FomSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
w :: S
@@ -1526,30 +1646,30 @@ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
U :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function FomSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- w = S(undef, n)
- p = S(undef, 0)
- q = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- l = Vector{FC}(undef, memory)
- z = Vector{FC}(undef, memory)
- U = Vector{FC}(undef, div(memory * (memory+1), 2))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats)
- return solver
- end
+function FomSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ p = S(undef, 0)
+ q = S(undef, 0)
+ V = S[S(undef, n) for i = 1 : memory]
+ l = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ U = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = FomSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats)
+ return solver
+end
- function FomSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- FomSolver(n, m, memory, S)
- end
+function FomSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ FomSolver(m, n, memory, S)
end
"""
@@ -1557,13 +1677,15 @@ Type for storing the vectors required by the in-place version of GPMR.
The outer constructors
- solver = GpmrSolver(n, m, memory, S)
+ solver = GpmrSolver(m, n, memory, S)
solver = GpmrSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n + m` if the value given is larger than `n + m`.
"""
mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
wA :: S
wB :: S
dA :: S
@@ -1582,45 +1704,38 @@ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
R :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
-
- function GpmrSolver(n, m, memory, S)
- memory = min(n + m, memory)
- FC = eltype(S)
- T = real(FC)
- wA = S(undef, 0)
- wB = S(undef, 0)
- dA = S(undef, n)
- dB = S(undef, m)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- x = S(undef, n)
- y = S(undef, m)
- q = S(undef, 0)
- p = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- U = [S(undef, m) for i = 1 : memory]
- gs = Vector{FC}(undef, 4 * memory)
- gc = Vector{T}(undef, 4 * memory)
- zt = Vector{FC}(undef, 2 * memory)
- R = Vector{FC}(undef, memory * (2memory + 1))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
- return solver
- end
-
- function GpmrSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- GpmrSolver(n, m, memory, S)
- end
end
-"""
- solve!(solver, args...; kwargs...)
+function GpmrSolver(m, n, memory, S)
+ memory = min(n + m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ wA = S(undef, 0)
+ wB = S(undef, 0)
+ dA = S(undef, m)
+ dB = S(undef, n)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ x = S(undef, m)
+ y = S(undef, n)
+ q = S(undef, 0)
+ p = S(undef, 0)
+ V = S[S(undef, m) for i = 1 : memory]
+ U = S[S(undef, n) for i = 1 : memory]
+ gs = Vector{FC}(undef, 4 * memory)
+ gc = Vector{T}(undef, 4 * memory)
+ zt = Vector{FC}(undef, 2 * memory)
+ R = Vector{FC}(undef, memory * (2 * memory + 1))
+ stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+ solver = GpmrSolver{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
+ return solver
+end
-Use the in-place Krylov method associated to `solver`.
-"""
-function solve! end
+function GpmrSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ GpmrSolver(m, n, memory, S)
+end
"""
solution(solver)
@@ -1674,59 +1789,64 @@ Return the number of operator-vector products with `A'` performed by the Krylov
function Atprod end
for (KS, fun, nsol, nA, nAt, warm_start) in [
- (LsmrSolver , :lsmr! , 1, 1, 1, false)
- (CgsSolver , :cgs! , 1, 2, 0, true )
- (UsymlqSolver , :usymlq! , 1, 1, 1, true )
- (LnlqSolver , :lnlq! , 2, 1, 1, false)
- (BicgstabSolver , :bicgstab! , 1, 2, 0, true )
- (CrlsSolver , :crls! , 1, 1, 1, false)
- (LsqrSolver , :lsqr! , 1, 1, 1, false)
- (MinresSolver , :minres! , 1, 1, 0, true )
- (CgneSolver , :cgne! , 1, 1, 1, false)
- (DqgmresSolver , :dqgmres! , 1, 1, 0, true )
- (SymmlqSolver , :symmlq! , 1, 1, 0, true )
- (TrimrSolver , :trimr! , 2, 1, 1, true )
- (UsymqrSolver , :usymqr! , 1, 1, 1, true )
- (BilqrSolver , :bilqr! , 2, 1, 1, true )
- (CrSolver , :cr! , 1, 1, 0, true )
- (CraigmrSolver , :craigmr! , 2, 1, 1, false)
- (TricgSolver , :tricg! , 2, 1, 1, true )
- (CraigSolver , :craig! , 2, 1, 1, false)
- (DiomSolver , :diom! , 1, 1, 0, true )
- (LslqSolver , :lslq! , 1, 1, 1, false)
- (TrilqrSolver , :trilqr! , 2, 1, 1, true )
- (CrmrSolver , :crmr! , 1, 1, 1, false)
- (CgSolver , :cg! , 1, 1, 0, true )
- (CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false)
- (CglsSolver , :cgls! , 1, 1, 1, false)
- (CgLanczosSolver , :cg_lanczos! , 1, 1, 0, true )
- (BilqSolver , :bilq! , 1, 1, 1, true )
- (MinresQlpSolver , :minres_qlp! , 1, 1, 0, true )
- (QmrSolver , :qmr! , 1, 1, 1, true )
- (GmresSolver , :gmres! , 1, 1, 0, true )
- (FomSolver , :fom! , 1, 1, 0, true )
- (GpmrSolver , :gpmr! , 2, 1, 0, true )
+ (:LsmrSolver , :lsmr! , 1, 1, 1, false)
+ (:CgsSolver , :cgs! , 1, 2, 0, true )
+ (:UsymlqSolver , :usymlq! , 1, 1, 1, true )
+ (:LnlqSolver , :lnlq! , 2, 1, 1, false)
+ (:BicgstabSolver , :bicgstab! , 1, 2, 0, true )
+ (:CrlsSolver , :crls! , 1, 1, 1, false)
+ (:LsqrSolver , :lsqr! , 1, 1, 1, false)
+ (:MinresSolver , :minres! , 1, 1, 0, true )
+ (:CgneSolver , :cgne! , 1, 1, 1, false)
+ (:DqgmresSolver , :dqgmres! , 1, 1, 0, true )
+ (:SymmlqSolver , :symmlq! , 1, 1, 0, true )
+ (:TrimrSolver , :trimr! , 2, 1, 1, true )
+ (:UsymqrSolver , :usymqr! , 1, 1, 1, true )
+ (:BilqrSolver , :bilqr! , 2, 1, 1, true )
+ (:CrSolver , :cr! , 1, 1, 0, true )
+ (:CraigmrSolver , :craigmr! , 2, 1, 1, false)
+ (:TricgSolver , :tricg! , 2, 1, 1, true )
+ (:CraigSolver , :craig! , 2, 1, 1, false)
+ (:DiomSolver , :diom! , 1, 1, 0, true )
+ (:LslqSolver , :lslq! , 1, 1, 1, false)
+ (:TrilqrSolver , :trilqr! , 2, 1, 1, true )
+ (:CrmrSolver , :crmr! , 1, 1, 1, false)
+ (:CgSolver , :cg! , 1, 1, 0, true )
+ (:CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false)
+ (:CglsSolver , :cgls! , 1, 1, 1, false)
+ (:CgLanczosSolver , :cg_lanczos! , 1, 1, 0, true )
+ (:BilqSolver , :bilq! , 1, 1, 1, true )
+ (:MinresQlpSolver , :minres_qlp! , 1, 1, 0, true )
+ (:QmrSolver , :qmr! , 1, 1, 1, true )
+ (:GmresSolver , :gmres! , 1, 1, 0, true )
+ (:FgmresSolver , :fgmres! , 1, 1, 0, true )
+ (:FomSolver , :fom! , 1, 1, 0, true )
+ (:GpmrSolver , :gpmr! , 2, 1, 0, true )
]
@eval begin
- @inline solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...)
- @inline statistics(solver :: $KS) = solver.stats
- @inline niterations(solver :: $KS) = solver.stats.niter
- @inline Aprod(solver :: $KS) = $nA * solver.stats.niter
- @inline Atprod(solver :: $KS) = $nAt * solver.stats.niter
+ size(solver :: $KS) = solver.m, solver.n
+ statistics(solver :: $KS) = solver.stats
+ niterations(solver :: $KS) = solver.stats.niter
+ Aprod(solver :: $KS) = $nA * solver.stats.niter
+ Atprod(solver :: $KS) = $nAt * solver.stats.niter
if $KS == GpmrSolver
- @inline Bprod(solver :: $KS) = solver.stats.niter
+ Bprod(solver :: $KS) = solver.stats.niter
+ end
+ nsolution(solver :: $KS) = $nsol
+ if $nsol == 1
+ solution(solver :: $KS) = solver.x
+ solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.")
+ end
+ if $nsol == 2
+ solution(solver :: $KS) = solver.x, solver.y
+ solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.")
end
- @inline nsolution(solver :: $KS) = $nsol
- ($nsol == 1) && @inline solution(solver :: $KS) = solver.x
- ($nsol == 2) && @inline solution(solver :: $KS) = solver.x, solver.y
- ($nsol == 1) && @inline solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.")
- ($nsol == 2) && @inline solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.")
if $KS ∈ (BilqrSolver, TrilqrSolver)
- @inline issolved_primal(solver :: $KS) = solver.stats.solved_primal
- @inline issolved_dual(solver :: $KS) = solver.stats.solved_dual
- @inline issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver)
+ issolved_primal(solver :: $KS) = solver.stats.solved_primal
+ issolved_dual(solver :: $KS) = solver.stats.solved_dual
+ issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver)
else
- @inline issolved(solver :: $KS) = solver.stats.solved
+ issolved(solver :: $KS) = solver.stats.solved
end
if $warm_start
if $KS in (BilqrSolver, TrilqrSolver, TricgSolver, TrimrSolver, GpmrSolver)
@@ -1758,45 +1878,70 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [
end
end
+function ksizeof(attribute)
+ if isa(attribute, Vector{<:AbstractVector}) && !isempty(attribute)
+ # A vector of vectors is a vector of pointers in Julia.
+ # All vectors inside a vector have the same size in Krylov.jl
+ size_attribute = sizeof(attribute) + length(attribute) * ksizeof(attribute[1])
+ else
+ size_attribute = sizeof(attribute)
+ end
+ return size_attribute
+end
+
+function sizeof(stats_solver :: Union{KrylovStats, KrylovSolver})
+ type = typeof(stats_solver)
+ nfields = fieldcount(type)
+ storage = 0
+ for i = 1:nfields
+ field_i = getfield(stats_solver, i)
+ size_i = ksizeof(field_i)
+ storage += size_i
+ end
+ return storage
+end
+
"""
show(io, solver; show_stats=true)
Statistics of `solver` are displayed if `show_stats` is set to true.
"""
-function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
workspace = typeof(solver)
- name_solver = workspace.name.wrapper
- l1 = max(length(string(name_solver)), 10) # length("warm_start") = 10
- l2 = length(string(S)) + 8 # length("Vector{}") = 8
+ name_solver = string(workspace.name.name)
+ name_stats = string(typeof(solver.stats).name.name)
+ nbytes = sizeof(solver)
+ storage = format_bytes(nbytes)
architecture = S <: Vector ? "CPU" : "GPU"
- format = Printf.Format("│%$(l1)s│%$(l2)s│%18s│\n")
- format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%18s│\n")
- @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^18)
- Printf.format(io, format, name_solver, "Precision: $FC", "Architecture: $architecture")
- @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18)
+ l1 = max(length(name_solver), length(string(FC)) + 11) # length("Precision: ") = 11
+ nchar = workspace <: Union{CgLanczosShiftSolver, FomSolver, DiomSolver, DqgmresSolver, GmresSolver, FgmresSolver, GpmrSolver} ? 8 : 0 # length("Vector{}") = 8
+ l2 = max(ndigits(solver.m) + 7, length(architecture) + 14, length(string(S)) + nchar) # length("nrows: ") = 7 and length("Architecture: ") = 14
+ l2 = max(l2, length(name_stats) + 2 + length(string(T))) # length("{}") = 2
+ l3 = max(ndigits(solver.n) + 7, length(storage) + 9) # length("Storage: ") = 9 and length("cols: ") = 7
+ format = Printf.Format("│%$(l1)s│%$(l2)s│%$(l3)s│\n")
+ format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%$(l3)s│\n")
+ @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^l3)
+ Printf.format(io, format, "$(name_solver)", "nrows: $(solver.m)", "ncols: $(solver.n)")
+ @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
+ Printf.format(io, format, "Precision: $FC", "Architecture: $architecture","Storage: $storage")
+ @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
Printf.format(io, format, "Attribute", "Type", "Size")
- @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18)
- for i=1:fieldcount(workspace)-1 # show stats seperately
- type_i = fieldtype(workspace, i)
+ @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
+ for i=1:fieldcount(workspace)
name_i = fieldname(workspace, i)
- len = if type_i <: AbstractVector
- field_i = getfield(solver, name_i)
- ni = length(field_i)
- if eltype(type_i) <: AbstractVector
- "$(ni) x $(length(field_i[1]))"
- else
- length(field_i)
- end
- else
- 0
- end
- if (name_i in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV")
- Printf.format(io, format2, string(name_i), type_i, len)
+ type_i = fieldtype(workspace, i)
+ field_i = getfield(solver, name_i)
+ size_i = ksizeof(field_i)
+ if (name_i::Symbol in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV")
+ (size_i ≠ 0) && Printf.format(io, format2, string(name_i), type_i, format_bytes(size_i))
else
- Printf.format(io, format, string(name_i), type_i, len)
+ (size_i ≠ 0) && Printf.format(io, format, string(name_i), type_i, format_bytes(size_i))
end
end
- @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^18)
- show_stats && show(io, solver.stats)
+ @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^l3)
+ if show_stats
+ @printf(io, "\n")
+ show(io, solver.stats)
+ end
return nothing
end
diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl
index a662fa0a0..ba217a597 100644
--- a/src/krylov_stats.jl
+++ b/src/krylov_stats.jl
@@ -1,3 +1,6 @@
+export KrylovStats, SimpleStats, LsmrStats, LanczosStats, LanczosShiftStats,
+SymmlqStats, AdjointStats, LNLQStats, LSLQStats
+
"Abstract type for statistics returned by a solver"
abstract type KrylovStats{T} end
@@ -9,6 +12,7 @@ Type for statistics returned by the majority of Krylov solvers, the attributes a
- residuals
- Aresiduals
- Acond
+- timer
- status
"""
mutable struct SimpleStats{T} <: KrylovStats{T}
@@ -18,9 +22,16 @@ mutable struct SimpleStats{T} <: KrylovStats{T}
residuals :: Vector{T}
Aresiduals :: Vector{T}
Acond :: Vector{T}
+ timer :: Float64
status :: String
end
+function reset!(stats :: SimpleStats)
+ empty!(stats.residuals)
+ empty!(stats.Aresiduals)
+ empty!(stats.Acond)
+end
+
"""
Type for statistics returned by LSMR. The attributes are:
- niter
@@ -31,6 +42,7 @@ Type for statistics returned by LSMR. The attributes are:
- Acond
- Anorm
- xNorm
+- timer
- status
"""
mutable struct LsmrStats{T} <: KrylovStats{T}
@@ -44,9 +56,15 @@ mutable struct LsmrStats{T} <: KrylovStats{T}
Acond :: T
Anorm :: T
xNorm :: T
+ timer :: Float64
status :: String
end
+function reset!(stats :: LsmrStats)
+ empty!(stats.residuals)
+ empty!(stats.Aresiduals)
+end
+
"""
Type for statistics returned by CG-LANCZOS, the attributes are:
- niter
@@ -55,6 +73,7 @@ Type for statistics returned by CG-LANCZOS, the attributes are:
- indefinite
- Anorm
- Acond
+- timer
- status
"""
mutable struct LanczosStats{T} <: KrylovStats{T}
@@ -64,9 +83,14 @@ mutable struct LanczosStats{T} <: KrylovStats{T}
indefinite :: Bool
Anorm :: T
Acond :: T
+ timer :: Float64
status :: String
end
+function reset!(stats :: LanczosStats)
+ empty!(stats.residuals)
+end
+
"""
Type for statistics returned by CG-LANCZOS with shifts, the attributes are:
- niter
@@ -75,6 +99,7 @@ Type for statistics returned by CG-LANCZOS with shifts, the attributes are:
- indefinite
- Anorm
- Acond
+- timer
- status
"""
mutable struct LanczosShiftStats{T} <: KrylovStats{T}
@@ -84,6 +109,7 @@ mutable struct LanczosShiftStats{T} <: KrylovStats{T}
indefinite :: BitVector
Anorm :: T
Acond :: T
+ timer :: Float64
status :: String
end
@@ -103,6 +129,7 @@ Type for statistics returned by SYMMLQ, the attributes are:
- errorscg
- Anorm
- Acond
+- timer
- status
"""
mutable struct SymmlqStats{T} <: KrylovStats{T}
@@ -114,9 +141,17 @@ mutable struct SymmlqStats{T} <: KrylovStats{T}
errorscg :: Vector{Union{T, Missing}}
Anorm :: T
Acond :: T
+ timer :: Float64
status :: String
end
+function reset!(stats :: SymmlqStats)
+ empty!(stats.residuals)
+ empty!(stats.residualscg)
+ empty!(stats.errors)
+ empty!(stats.errorscg)
+end
+
"""
Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the attributes are:
- niter
@@ -124,6 +159,7 @@ Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the at
- solved_dual
- residuals_primal
- residuals_dual
+- timer
- status
"""
mutable struct AdjointStats{T} <: KrylovStats{T}
@@ -132,9 +168,15 @@ mutable struct AdjointStats{T} <: KrylovStats{T}
solved_dual :: Bool
residuals_primal :: Vector{T}
residuals_dual :: Vector{T}
+ timer :: Float64
status :: String
end
+function reset!(stats :: AdjointStats)
+ empty!(stats.residuals_primal)
+ empty!(stats.residuals_dual)
+end
+
"""
Type for statistics returned by the LNLQ method, the attributes are:
- niter
@@ -143,6 +185,7 @@ Type for statistics returned by the LNLQ method, the attributes are:
- error_with_bnd
- error_bnd_x
- error_bnd_y
+- timer
- status
"""
mutable struct LNLQStats{T} <: KrylovStats{T}
@@ -152,9 +195,16 @@ mutable struct LNLQStats{T} <: KrylovStats{T}
error_with_bnd :: Bool
error_bnd_x :: Vector{T}
error_bnd_y :: Vector{T}
+ timer :: Float64
status :: String
end
+function reset!(stats :: LNLQStats)
+ empty!(stats.residuals)
+ empty!(stats.error_bnd_x)
+ empty!(stats.error_bnd_y)
+end
+
"""
Type for statistics returned by the LSLQ method, the attributes are:
- niter
@@ -166,6 +216,7 @@ Type for statistics returned by the LSLQ method, the attributes are:
- error_with_bnd
- err_ubnds_lq
- err_ubnds_cg
+- timer
- status
"""
mutable struct LSLQStats{T} <: KrylovStats{T}
@@ -178,9 +229,18 @@ mutable struct LSLQStats{T} <: KrylovStats{T}
error_with_bnd :: Bool
err_ubnds_lq :: Vector{T}
err_ubnds_cg :: Vector{T}
+ timer :: Float64
status :: String
end
+function reset!(stats :: LSLQStats)
+ empty!(stats.residuals)
+ empty!(stats.Aresiduals)
+ empty!(stats.err_lbnds)
+ empty!(stats.err_ubnds_lq)
+ empty!(stats.err_ubnds_cg)
+end
+
import Base.show
special_fields = Dict(
@@ -192,45 +252,28 @@ special_fields = Dict(
:err_ubnds_cg => "error bound CG",
)
-for f in ["Simple", "Lsmr", "Adjoint", "LNLQ", "LSLQ", "Lanczos", "Symmlq"]
- T = Meta.parse("Krylov." * f * "Stats{S}")
-
- @eval function empty_field!(stats :: $T, i, ::Type{Vector{Si}}) where {S, Si}
- statfield = getfield(stats, i)
- empty!(statfield)
- end
- @eval empty_field!(stats :: $T, i, type) where S = stats
-
- @eval function reset!(stats :: $T) where S
- nfield = length($T.types)
- for i = 1 : nfield
- type = fieldtype($T, i)
- empty_field!(stats, i, type)
+function show(io :: IO, stats :: KrylovStats)
+ kst = typeof(stats)
+ s = string(kst.name.name) * "\n"
+ nfield = fieldcount(kst)
+ for i = 1 : nfield
+ field = fieldname(kst, i)
+ field_name = if field ∈ keys(special_fields)
+ special_fields[field]
+ else
+ replace(string(field), "_" => " ")
end
- end
-end
-
-for f in ["Simple", "Lsmr", "Lanczos", "LanczosShift", "Symmlq", "Adjoint", "LNLQ", "LSLQ"]
- T = Meta.parse("Krylov." * f * "Stats{S}")
-
- @eval function show(io :: IO, stats :: $T) where S
- s = $f * " stats\n"
- nfield = length($T.types)
- for i = 1 : nfield
- field = fieldname($T, i)
- field_name = if field ∈ keys(special_fields)
- special_fields[field]
- else
- replace(string(field), "_" => " ")
- end
- s *= " " * field_name * ":"
- statfield = getfield(stats, field)
- if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat}
- s *= @sprintf " %s\n" vec2str(statfield)
- else
- s *= @sprintf " %s\n" statfield
- end
+ s *= " " * field_name * ":"
+ statfield = getfield(stats, field)
+ if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat}
+ s *= @sprintf " %s\n" vec2str(statfield)
+ elseif field_name == "timer"
+ (statfield < 1e-3) && (s *= @sprintf " %.2fμs\n" 1e6*statfield)
+ (1e-3 ≤ statfield < 1.00) && (s *= @sprintf " %.2fms\n" 1e3*statfield)
+ (statfield ≥ 1.00) && (s *= @sprintf " %.2fs\n" statfield)
+ else
+ s *= @sprintf " %s\n" statfield
end
- print(io, s)
end
+ print(io, s)
end
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index 6f0c1c382..fb554395e 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -1,3 +1,8 @@
+export kstdout
+
+"Default I/O stream for all Krylov methods."
+const kstdout = Core.stdout
+
"""
FloatOrComplex{T}
Union type of `T` and `Complex{T}` where T is an `AbstractFloat`.
@@ -92,8 +97,8 @@ function sym_givens(a :: Complex{T}, b :: Complex{T}) where T <: AbstractFloat
return (c, s, ρ)
end
-@inline sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
-@inline sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
+sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
+sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
"""
roots = roots_quadratic(q₂, q₁, q₀; nitref)
@@ -111,79 +116,97 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
# Case where q(x) is linear.
if q₂ == zero(T)
if q₁ == zero(T)
- root = [zero(T)]
- q₀ == zero(T) || (root = T[])
+ q₀ == zero(T) || error("The quadratic `q` doesn't have real roots.")
+ root = zero(T)
else
- root = [-q₀ / q₁]
+ root = -q₀ / q₁
end
- return root
+ return (root, root)
end
# Case where q(x) is indeed quadratic.
rhs = √eps(T) * q₁ * q₁
if abs(q₀ * q₂) > rhs
ρ = q₁ * q₁ - 4 * q₂ * q₀
- ρ < 0 && return T[]
+ ρ < 0 && return error("The quadratic `q` doesn't have real roots.")
d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2
- roots = [d / q₂, q₀ / d]
+ root1 = d / q₂
+ root2 = q₀ / d
else
# Ill-conditioned quadratic.
- roots = [-q₁ / q₂, zero(T)]
+ root1 = -q₁ / q₂
+ root2 = zero(T)
end
# Perform a few Newton iterations to improve accuracy.
- for k = 1 : 2
- root = roots[k]
- for it = 1 : nitref
- q = (q₂ * root + q₁) * root + q₀
- dq = 2 * q₂ * root + q₁
- dq == zero(T) && continue
- root = root - q / dq
- end
- roots[k] = root
+ for it = 1 : nitref
+ q = (q₂ * root1 + q₁) * root1 + q₀
+ dq = 2 * q₂ * root1 + q₁
+ dq == zero(T) && continue
+ root1 = root1 - q / dq
end
- return roots
-end
+ for it = 1 : nitref
+ q = (q₂ * root2 + q₁) * root2 + q₀
+ dq = 2 * q₂ * root2 + q₁
+ dq == zero(T) && continue
+ root2 = root2 - q / dq
+ end
+ return (root1, root2)
+end
"""
- roots = to_boundary(x, d, radius; flip, xNorm2, dNorm2)
-
-Given a trust-region radius `radius`, a vector `x` lying inside the
-trust-region and a direction `d`, return `σ1` and `σ2` such that
-
- ‖x + σi d‖ = radius, i = 1, 2
+ s = vec2str(x; ndisp)
-in the Euclidean norm. If known, ‖x‖² may be supplied in `xNorm2`.
+Display an array in the form
-If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+ [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ]
- ‖x - σi d‖ = radius, i = 1, 2.
+with (ndisp - 1)/2 elements on each side.
"""
-function to_boundary(x :: Vector{T}, d :: Vector{T},
- radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number
- radius > 0 || error("radius must be positive")
-
- # ‖d‖² σ² + 2 xᵀd σ + (‖x‖² - radius²).
- xd = dot(x, d)
- flip && (xd = -xd)
- dNorm2 == zero(T) && (dNorm2 = dot(d, d))
- dNorm2 == zero(T) && error("zero direction")
- xNorm2 == zero(T) && (xNorm2 = dot(x, x))
- (xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius))
- roots = roots_quadratic(dNorm2, 2 * xd, xNorm2 - radius * radius)
- return roots # `σ1` and `σ2`
+function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
+ n = length(x)
+ if n ≤ ndisp
+ ndisp = n
+ nside = n
+ else
+ nside = max(1, div(ndisp - 1, 2))
+ end
+ s = "["
+ i = 1
+ while i ≤ nside
+ if x[i] !== missing
+ s *= @sprintf("%8.1e ", x[i])
+ else
+ s *= " ✗✗✗✗ "
+ end
+ i += 1
+ end
+ if i ≤ div(n, 2)
+ s *= "... "
+ end
+ i = max(i, n - nside + 1)
+ while i ≤ n
+ if x[i] !== missing
+ s *= @sprintf("%8.1e ", x[i])
+ else
+ s *= " ✗✗✗✗ "
+ end
+ i += 1
+ end
+ s *= "]"
+ return s
end
"""
S = ktypeof(v)
-Return a dense storage type `S` based on the type of `v`.
+Return the most relevant storage type `S` based on the type of `v`.
"""
function ktypeof end
-function ktypeof(v::S) where S <: DenseVector
- return S
+function ktypeof(v::S) where S <: AbstractVector
+ return S
end
function ktypeof(v::S) where S <: SparseVector
@@ -195,90 +218,128 @@ function ktypeof(v::S) where S <: AbstractSparseVector
return S.types[2] # return `CuVector` for a `CuSparseVector`
end
-function ktypeof(v::S) where S <: AbstractVector
- T = eltype(S)
- return Vector{T} # BlockArrays, FillArrays, etc...
+function ktypeof(v::S) where S <: SubArray
+ vp = v.parent
+ if isa(vp, DenseMatrix)
+ M = typeof(vp)
+ return matrix_to_vector(M) # view of a row or a column of a matrix
+ else
+ return ktypeof(vp) # view of a vector
+ end
end
-function ktypeof(v::S) where S <: SubArray
- return ktypeof(v.parent)
+"""
+ M = vector_to_matrix(S)
+
+Return the dense matrix storage type `M` related to the dense vector storage type `S`.
+"""
+function vector_to_matrix(::Type{S}) where S <: DenseVector
+ T = hasproperty(S, :body) ? S.body : S
+ par = T.parameters
+ npar = length(par)
+ (2 ≤ npar ≤ 3) || error("Type $S is not supported.")
+ if npar == 2
+ M = T.name.wrapper{par[1], 2}
+ else
+ M = T.name.wrapper{par[1], 2, par[3]}
+ end
+ return M
+end
+
+"""
+ S = matrix_to_vector(M)
+
+Return the dense vector storage type `S` related to the dense matrix storage type `M`.
+"""
+function matrix_to_vector(::Type{M}) where M <: DenseMatrix
+ T = hasproperty(M, :body) ? M.body : M
+ par = T.parameters
+ npar = length(par)
+ (2 ≤ npar ≤ 3) || error("Type $M is not supported.")
+ if npar == 2
+ S = T.name.wrapper{par[1], 1}
+ else
+ S = T.name.wrapper{par[1], 1, par[3]}
+ end
+ return S
end
"""
v = kzeros(S, n)
-Create an AbstractVector of storage type `S` of length `n` only composed of zero.
+Create a vector of storage type `S` of length `n` only composed of zero.
"""
-@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
+kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
"""
v = kones(S, n)
-Create an AbstractVector of storage type `S` of length `n` only composed of one.
+Create a vector of storage type `S` of length `n` only composed of one.
"""
-@inline kones(S, n) = fill!(S(undef, n), one(eltype(S)))
+kones(S, n) = fill!(S(undef, n), one(eltype(S)))
-@inline allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n))
+allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)::S) && (solver.:($v)::S = S(undef, n))
-@inline kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
+kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
-@inline mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
+ktimer(start_time::UInt64) = (time_ns() - start_time) / 1e9
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = dot(x, y)
+mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = krylov_dot(n, x, dx, y, dy)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(krylov_dot(n, x, dx, y, dy))
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
+kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y)
-@inline krylov_norm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
-@inline krylov_norm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: Number = norm(x)
+kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy)
+kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy))
-@inline krylov_scal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: Number = (x .*= s)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = krylov_scal!(n, Complex{T}(s), x, dx)
+knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
+knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpy!(s, x, y)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpy!(n, Complex{T}(s), x, dx, y, dy)
+kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpby!(s, x, t, y)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, s, x, dx, Complex{T}(t), y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
+kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy)
-@inline krylov_copy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
-@inline krylov_copy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = copyto!(y, x)
+kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
-# the macros are just for readability, so we don't have to write the increments (always equal to 1)
+kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
+kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x)
+# the macros are just for readability, so we don't have to write the increments (always equal to 1)
macro kdot(n, x, y)
- return esc(:(krylov_dot($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kdot($n, $x, 1, $y, 1)))
end
macro kdotr(n, x, y)
- return esc(:(krylov_dotr($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kdotr($n, $x, 1, $y, 1)))
end
macro knrm2(n, x)
- return esc(:(krylov_norm2($n, $x, 1)))
+ return esc(:(Krylov.knrm2($n, $x, 1)))
end
macro kscal!(n, s, x)
- return esc(:(krylov_scal!($n, $s, $x, 1)))
+ return esc(:(Krylov.kscal!($n, $s, $x, 1)))
end
macro kaxpy!(n, s, x, y)
- return esc(:(krylov_axpy!($n, $s, $x, 1, $y, 1)))
+ return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1)))
end
macro kaxpby!(n, s, x, t, y)
- return esc(:(krylov_axpby!($n, $s, $x, 1, $t, $y, 1)))
+ return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1)))
end
macro kcopy!(n, x, y)
- return esc(:(krylov_copy!($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1)))
end
macro kswap(x, y)
@@ -294,44 +355,48 @@ macro kref!(n, x, y, c, s)
end
"""
- s = vec2str(x; ndisp)
+ roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2)
-Display an array in the form
+Given a trust-region radius `radius`, a vector `x` lying inside the
+trust-region and a direction `d`, return `σ1` and `σ2` such that
- [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ]
+ ‖x + σi d‖ = radius, i = 1, 2
-with (ndisp - 1)/2 elements on each side.
+in the Euclidean norm.
+`n` is the length of vectors `x` and `d`.
+If known, ‖x‖² and ‖d‖² may be supplied with `xNorm2` and `dNorm2`.
+
+If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+
+ ‖x - σi d‖ = radius, i = 1, 2.
"""
-function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
- n = length(x)
- if n ≤ ndisp
- ndisp = n
- nside = n
- else
- nside = max(1, div(ndisp - 1, 2))
- end
- s = "["
- i = 1
- while i ≤ nside
- if x[i] !== missing
- s *= @sprintf("%8.1e ", x[i])
- else
- s *= " ✗✗✗✗ "
- end
- i += 1
- end
- if i ≤ div(n, 2)
- s *= "... "
- end
- i = max(i, n - nside + 1)
- while i ≤ n
- if x[i] !== missing
- s *= @sprintf("%8.1e ", x[i])
- else
- s *= " ✗✗✗✗ "
- end
- i += 1
- end
- s *= "]"
- return s
+function to_boundary(n :: Int, x :: AbstractVector{FC}, d :: AbstractVector{FC}, radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ radius > 0 || error("radius must be positive")
+
+ # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²).
+ rxd = @kdotr(n, x, d)
+ flip && (rxd = -rxd)
+ dNorm2 == zero(T) && (dNorm2 = @kdotr(n, d, d))
+ dNorm2 == zero(T) && error("zero direction")
+ xNorm2 == zero(T) && (xNorm2 = @kdotr(n, x, x))
+ radius2 = radius * radius
+ (xNorm2 ≤ radius2) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius2))
+
+ # q₂ = ‖d‖², q₁ = xᴴd + dᴴx, q₀ = ‖x‖² - Δ²
+ # ‖x‖² ≤ Δ² ⟹ (q₁)² - 4 * q₂ * q₀ ≥ 0
+ roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius2)
+ return roots # `σ1` and `σ2`
+end
+
+"""
+ arguments = extract_parameters(ex::Expr)
+
+Extract the arguments of an expression that is keyword parameter tuple.
+Implementation suggested by Mitchell J. O'Sullivan (@mosullivan93).
+"""
+function extract_parameters(ex::Expr)
+ Meta.isexpr(ex, :tuple, 1) &&
+ Meta.isexpr((@inbounds p = ex.args[1]), :parameters) &&
+ all(Base.Docs.validcall, p.args) || throw(ArgumentError("Given expression is not a kw parameter tuple [e.g. :(; x)]: $ex"))
+ return p.args
end
diff --git a/src/lnlq.jl b/src/lnlq.jl
index a1f890de2..f59f5daf4 100644
--- a/src/lnlq.jl
+++ b/src/lnlq.jl
@@ -9,9 +9,9 @@
# and is equivalent to applying the SYMMLQ method
# to the linear system
#
-# AAᵀy = b with x = Aᵀy and can be reformulated as
+# AAᴴy = b with x = Aᴴy and can be reformulated as
#
-# [ -I Aᵀ ][ x ] = [ 0 ]
+# [ -I Aᴴ ][ x ] = [ 0 ]
# [ A ][ y ] [ b ].
#
# This method is based on the Golub-Kahan bidiagonalization process and is described in
@@ -26,10 +26,14 @@ export lnlq, lnlq!
"""
(x, y, stats) = lnlq(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T), σ::T=zero(T),
- atol::T=√eps(T), rtol::T=√eps(T), etolx::T=√eps(T), etoly::T=√eps(T), itmax::Int=0,
- transfer_to_craig::Bool=true, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ transfer_to_craig::Bool=true,
+ sqd::Bool=false, λ::T=zero(T),
+ σ::T=zero(T), utolx::T=√eps(T),
+ utoly::T=√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -38,17 +42,17 @@ Find the least-norm solution of the consistent linear system
Ax + λ²y = b
-using the LNLQ method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LNLQ method, where λ ≥ 0 is a regularization parameter.
For a system in the form Ax = b, LNLQ method is equivalent to applying
-SYMMLQ to AAᵀy = b and recovering x = Aᵀy but is more stable.
+SYMMLQ to AAᴴy = b and recovering x = Aᴴy but is more stable.
Note that y are the Lagrange multipliers of the least-norm problem
minimize ‖x‖ s.t. Ax = b.
If `λ > 0`, LNLQ solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -59,12 +63,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, LNLQ solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -75,12 +79,40 @@ In this case, `M` can still be specified and indicates the weighted norm in whic
In this implementation, both the x and y-parts of the solution are returned.
-`etolx` and `etoly` are tolerances on the upper bound of the distance to the solution ‖x-xₛ‖ and ‖y-yₛ‖, respectively.
+`utolx` and `utoly` are tolerances on the upper bound of the distance to the solution ‖x-x*‖ and ‖y-y*‖, respectively.
The bound is valid if λ>0 or σ>0 where σ should be strictly smaller than the smallest positive singular value.
For instance σ:=(1-1e-7)σₘᵢₙ .
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `transfer_to_craig`: transfer from the LNLQ point to the CRAIG point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`;
+* `utolx`: tolerance on the upper bound on the distance to the solution `‖x-x*‖`;
+* `utoly`: tolerance on the upper bound on the distance to the solution `‖y-y*‖`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`LNLQStats`](@ref) structure.
#### Reference
@@ -88,12 +120,6 @@ and `false` otherwise.
"""
function lnlq end
-function lnlq(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = LnlqSolver(A, b)
- lnlq!(solver, A, b; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = lnlq!(solver::LnlqSolver, A, b; kwargs...)
@@ -103,389 +129,432 @@ See [`LnlqSolver`](@ref) for more details about the `solver`.
"""
function lnlq! end
-function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), σ :: T=zero(T),
- atol :: T=√eps(T), rtol :: T=√eps(T), etolx :: T=√eps(T), etoly :: T=√eps(T), itmax :: Int=0,
- transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LNLQ: system of %d equations in %d variables\n", m, n)
-
- # Check sqd and λ parameters
- sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
- sqd && (λ = one(T))
-
- # Tests M = Iₘ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :u, S, m)
- allocate_if(!NisI, solver, :v, S, n)
- allocate_if(λ > 0, solver, :q, S, n)
- x, Nv, Aᵀu, y, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w̄
- Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats
- rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y
- reset!(stats)
- u = MisI ? Mu : solver.u
- v = NisI ? Nv : solver.v
-
- # Set up parameter σₑₛₜ for the error estimate on x and y
- σₑₛₜ = √(σ^2 + λ^2)
- complex_error_bnd = false
-
- # Initial solutions (x₀, y₀) and residual norm ‖r₀‖.
- x .= zero(FC)
- y .= zero(FC)
-
- bNorm = @knrm2(m, b)
- if bNorm == 0
- stats.niter = 0
- stats.solved = true
- stats.error_with_bnd = false
- history && push!(rNorms, bNorm)
- stats.status = "x = 0 is a zero-residual solution"
- return solver
+def_args_lnlq = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_lnlq = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; transfer_to_craig::Bool = true),
+ :(; sqd::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; σ::T = zero(T) ),
+ :(; utolx::T = √eps(T) ),
+ :(; utoly::T = √eps(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_lnlq = mapreduce(extract_parameters, vcat, def_kwargs_lnlq)
+
+args_lnlq = (:A, :b)
+kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function lnlq($(def_args_lnlq...); $(def_kwargs_lnlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = LnlqSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ lnlq!(solver, $(args_lnlq...); $(kwargs_lnlq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- history && push!(rNorms, bNorm)
- ε = atol + rtol * bNorm
-
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
-
- # Update iteration index
- iter = iter + 1
-
- # Initialize generalized Golub-Kahan bidiagonalization.
- # β₁Mu₁ = b.
- Mu .= b
- MisI || mulorldiv!(u, M, Mu, ldiv) # u₁ = M⁻¹ * Mu₁
- βₖ = sqrt(@kdotr(m, u, Mu)) # β₁ = ‖u₁‖_M
- if βₖ ≠ 0
- @kscal!(m, one(FC) / βₖ, u)
- MisI || @kscal!(m, one(FC) / βₖ, Mu)
- end
+ function lnlq!(solver :: LnlqSolver{T,FC,S}, $(def_args_lnlq...); $(def_kwargs_lnlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "LNLQ: system of %d equations in %d variables\n", m, n)
+
+ # Check sqd and λ parameters
+ sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+ sqd && (λ = one(T))
+
+ # Tests M = Iₘ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :u, S, m)
+ allocate_if(!NisI, solver, :v, S, n)
+ allocate_if(λ > 0, solver, :q, S, n)
+ x, Nv, Aᴴu, y, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w̄
+ Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats
+ rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y
+ reset!(stats)
+ u = MisI ? Mu : solver.u
+ v = NisI ? Nv : solver.v
+
+ # Set up parameter σₑₛₜ for the error estimate on x and y
+ σₑₛₜ = √(σ^2 + λ^2)
+ complex_error_bnd = false
+
+ # Initial solutions (x₀, y₀) and residual norm ‖r₀‖.
+ x .= zero(FC)
+ y .= zero(FC)
+
+ bNorm = @knrm2(m, b)
+ if bNorm == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.error_with_bnd = false
+ history && push!(rNorms, bNorm)
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ return solver
+ end
- # α₁Nv₁ = Aᵀu₁.
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
- NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁
- αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N
- if αₖ ≠ 0
- @kscal!(n, one(FC) / αₖ, v)
- NisI || @kscal!(n, one(FC) / αₖ, Nv)
- end
+ history && push!(rNorms, bNorm)
+ ε = atol + rtol * bNorm
- w̄ .= u # Direction w̄₁
- cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᵀ
- sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᵀ
- ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
- ηₖ = zero(FC) # Coefficient of M̅ₖ
-
- # Variable used for the regularization.
- λₖ = λ # λ₁ = λ
- cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ
- cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁
- λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0
-
- # Initialize the regularization.
- if λ > 0
- # k 2k k 2k k 2k
- # k [ αₖ λₖ ] [ cpₖ spₖ ] = [ αhatₖ 0 ]
- # k+1 [ βₖ₊₁ 0 ] [ spₖ -cpₖ ] [ βhatₖ₊₁ θₖ₊₁ ]
- (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ)
-
- # q̄₁ = sp₁ * v₁
- @kscal!(n, spₖ, q)
- else
- αhatₖ = αₖ
- end
+ iter = 0
+ itmax == 0 && (itmax = m + n)
- # Begin the LQ factorization of (Lₖ)ᵀ = M̅ₖQₖ.
- # [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ]
- # [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ]
- # [ • • • • • • ] [ 0 • • • • ]
- # [ • • • • • • ] = [ • • • • • • ] Qₖ
- # [ • • • • 0 ] [ • • • • • • ]
- # [ • • • βₖ] [ • • • • 0 ]
- # [ 0 • • • • 0 αₖ] [ 0 • • • 0 ηₖ ϵbarₖ]
-
- ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁
-
- # Hₖ = Bₖ(Lₖ)ᵀ = [ Lₖ(Lₖ)ᵀ ] ⟹ (Hₖ₋₁)ᵀ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ
- # [ αₖβₖ₊₁(eₖ)ᵀ ]
- #
- # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ
- # tₖ = (τ₁, •••, τₖ)
- # z̅ₖ = (zₖ₋₁, ζbarₖ) = (ζ₁, •••, ζₖ₋₁, ζbarₖ)
-
- τₖ = βₖ / αhatₖ # τ₁ = β₁ / αhat₁
- ζbarₖ = τₖ / ϵbarₖ # ζbar₁ = τ₁ / ϵbar₁
-
- # Stopping criterion.
- solved_lq = solved_cg = false
- tired = false
- status = "unknown"
- user_requested_exit = false
-
- if σₑₛₜ > 0
- τtildeₖ = βₖ / σₑₛₜ
- ζtildeₖ = τtildeₖ / σₑₛₜ
- err_x = τtildeₖ
- err_y = ζtildeₖ
-
- solved_lq = err_x ≤ etolx || err_y ≤ etoly
- history && push!(xNorms, err_x)
- history && push!(yNorms, err_y)
-
- ρbar = -σₑₛₜ
- csig = -one(T)
- end
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time))
- while !(solved_lq || solved_cg || tired || user_requested_exit)
+ # Update iteration index
+ iter = iter + 1
- # Update of (xᵃᵘˣ)ₖ = Vₖtₖ
- if λ > 0
- # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
- @kaxpy!(n, τₖ * cpₖ, v, x)
- if iter ≥ 2
- @kaxpy!(n, τₖ * spₖ, q, x)
- # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
- @kaxpby!(n, spₖ, v, -cpₖ, q)
- end
- else
- # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
- @kaxpy!(n, τₖ, v, x)
+ # Initialize generalized Golub-Kahan bidiagonalization.
+ # β₁Mu₁ = b.
+ Mu .= b
+ MisI || mulorldiv!(u, M, Mu, ldiv) # u₁ = M⁻¹ * Mu₁
+ βₖ = sqrt(@kdotr(m, u, Mu)) # β₁ = ‖u₁‖_M
+ if βₖ ≠ 0
+ @kscal!(m, one(FC) / βₖ, u)
+ MisI || @kscal!(m, one(FC) / βₖ, Mu)
end
- # Continue the generalized Golub-Kahan bidiagonalization.
- # AVₖ = MUₖ₊₁Bₖ
- # AᵀUₖ₊₁ = NVₖ(Bₖ)ᵀ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᵀ = NVₖ₊₁(Lₖ₊₁)ᵀ
- #
- # [ α₁ 0 • • • • 0 ]
- # [ β₂ α₂ • • ]
- # [ 0 • • • • ]
- # Lₖ = [ • • • • • • ]
- # [ • • • • • • ]
- # [ • • • • 0 ]
- # [ 0 • • • 0 βₖ αₖ]
- #
- # Bₖ = [ Lₖ ]
- # [ βₖ₊₁(eₖ)ᵀ ]
-
- # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
- mul!(Av, A, v)
- @kaxpby!(m, one(FC), Av, -αₖ, Mu)
- MisI || mulorldiv!(u, M, Mu, ldiv) # uₖ₊₁ = M⁻¹ * Muₖ₊₁
- βₖ₊₁ = sqrt(@kdotr(m, u, Mu)) # βₖ₊₁ = ‖uₖ₊₁‖_M
- if βₖ₊₁ ≠ 0
- @kscal!(m, one(FC) / βₖ₊₁, u)
- MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu)
+ # α₁Nv₁ = Aᴴu₁.
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
+ NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁
+ αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N
+ if αₖ ≠ 0
+ @kscal!(n, one(FC) / αₖ, v)
+ NisI || @kscal!(n, one(FC) / αₖ, Nv)
end
- # αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -βₖ₊₁, Nv)
- NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁
- αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N
- if αₖ₊₁ ≠ 0
- @kscal!(n, one(FC) / αₖ₊₁, v)
- NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv)
- end
+ w̄ .= u # Direction w̄₁
+ cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᴴ
+ sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᴴ
+ ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
+ ηₖ = zero(FC) # Coefficient of M̅ₖ
+
+ # Variable used for the regularization.
+ λₖ = λ # λ₁ = λ
+ cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ
+ cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁
+ λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0
- # Continue the regularization.
+ # Initialize the regularization.
if λ > 0
# k 2k k 2k k 2k
# k [ αₖ λₖ ] [ cpₖ spₖ ] = [ αhatₖ 0 ]
# k+1 [ βₖ₊₁ 0 ] [ spₖ -cpₖ ] [ βhatₖ₊₁ θₖ₊₁ ]
- βhatₖ₊₁ = cpₖ * βₖ₊₁
- θₖ₊₁ = spₖ * βₖ₊₁
+ (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ)
- # 2k 2k+1 2k 2k+1 2k 2k+1
- # k [ 0 0 ] [ -cdₖ sdₖ ] = [ 0 0 ]
- # k+1 [ θₖ₊₁ λ ] [ sdₖ cdₖ ] [ 0 λₖ₊₁ ]
- (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁)
-
- # qₖ ← sdₖ * q̄ₖ
- @kscal!(n, sdₖ, q)
-
- # k+1 2k+1 k+1 2k+1 k+1 2k+1
- # k+1 [ αₖ₊₁ λₖ₊₁ ] [ cpₖ₊₁ spₖ₊₁ ] = [ αhatₖ₊₁ 0 ]
- # k+2 [ βₖ₊₂ 0 ] [ spₖ₊₁ -cpₖ₊₁ ] [ γₖ₊₂ θₖ₊₂ ]
- (cpₖ₊₁, spₖ₊₁, αhatₖ₊₁) = sym_givens(αₖ₊₁, λₖ₊₁)
+ # q̄₁ = sp₁ * v₁
+ @kscal!(n, spₖ, q)
else
- βhatₖ₊₁ = βₖ₊₁
- αhatₖ₊₁ = αₖ₊₁
+ αhatₖ = αₖ
end
- if σₑₛₜ > 0 && !complex_error_bnd
- μbar = -csig * αhatₖ
- ρ = √(ρbar^2 + αhatₖ^2)
- csig = ρbar / ρ
- ssig = αhatₖ / ρ
- ρbar = ssig * μbar + csig * σₑₛₜ
- μbar = -csig * βhatₖ₊₁
- θ = βhatₖ₊₁ * csig / ρbar
- ωdisc = σₑₛₜ^2 - σₑₛₜ * βhatₖ₊₁ * θ
- if ωdisc < 0
- complex_error_bnd = true
- else
- ω = √ωdisc
- τtildeₖ = - τₖ * βhatₖ₊₁ / ω
- end
+ # Begin the LQ factorization of (Lₖ)ᴴ = M̅ₖQₖ.
+ # [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ]
+ # [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ]
+ # [ • • • • • • ] [ 0 • • • • ]
+ # [ • • • • • • ] = [ • • • • • • ] Qₖ
+ # [ • • • • 0 ] [ • • • • • • ]
+ # [ • • • βₖ] [ • • • • 0 ]
+ # [ 0 • • • • 0 αₖ] [ 0 • • • 0 ηₖ ϵbarₖ]
- ρ = √(ρbar^2 + βhatₖ₊₁^2)
- csig = ρbar / ρ
- ssig = βhatₖ₊₁ / ρ
- ρbar = ssig * μbar + csig * σₑₛₜ
- end
+ ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁
- # Continue the LQ factorization of (Lₖ₊₁)ᵀ.
- # [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ]
- # [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁]
- # [0 sₖ₊₁ -cₖ₊₁]
+ # Hₖ = Bₖ(Lₖ)ᴴ = [ Lₖ(Lₖ)ᴴ ] ⟹ (Hₖ₋₁)ᴴ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ
+ # [ αₖβₖ₊₁(eₖ)ᵀ ]
+ #
+ # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ
+ # tₖ = (τ₁, •••, τₖ)
+ # z̅ₖ = (zₖ₋₁, ζbarₖ) = (ζ₁, •••, ζₖ₋₁, ζbarₖ)
- (cₖ₊₁, sₖ₊₁, ϵₖ) = sym_givens(ϵbarₖ, βhatₖ₊₁)
- ηₖ₊₁ = αhatₖ₊₁ * sₖ₊₁
- ϵbarₖ₊₁ = - αhatₖ₊₁ * cₖ₊₁
+ τₖ = βₖ / αhatₖ # τ₁ = β₁ / αhat₁
+ ζbarₖ = τₖ / ϵbarₖ # ζbar₁ = τ₁ / ϵbar₁
- # Update solutions of Lₖ₊₁tₖ₊₁ = β₁e₁ and M̅ₖ₊₁z̅ₖ₊₁ = tₖ₊₁.
- τₖ₊₁ = - βhatₖ₊₁ * τₖ / αhatₖ₊₁
- ζₖ = cₖ₊₁ * ζbarₖ
- ζbarₖ₊₁ = (τₖ₊₁ - ηₖ₊₁ * ζₖ) / ϵbarₖ₊₁
+ # Stopping criterion.
+ solved_lq = solved_cg = false
+ tired = false
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
- # Relations for the directions wₖ and w̄ₖ₊₁
- # [w̄ₖ uₖ₊₁] [cₖ₊₁ sₖ₊₁] = [wₖ w̄ₖ₊₁] → wₖ = cₖ₊₁ * w̄ₖ + sₖ₊₁ * uₖ₊₁
- # [sₖ₊₁ -cₖ₊₁] → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁
+ if σₑₛₜ > 0
+ τtildeₖ = βₖ / σₑₛₜ
+ ζtildeₖ = τtildeₖ / σₑₛₜ
+ err_x = τtildeₖ
+ err_y = ζtildeₖ
- # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ
- @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y)
- @kaxpy!(m, ζₖ * sₖ₊₁, u, y)
+ solved_lq = err_x ≤ utolx || err_y ≤ utoly
+ history && push!(xNorms, err_x)
+ history && push!(yNorms, err_y)
- # Compute w̄ₖ₊₁
- @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄)
+ ρbar = -σₑₛₜ
+ csig = -one(T)
+ end
- if σₑₛₜ > 0 && !complex_error_bnd
- if transfer_to_craig
- disc_x = τtildeₖ^2 - τₖ₊₁^2
- disc_x < 0 ? complex_error_bnd = true : err_x = √disc_x
+ while !(solved_lq || solved_cg || tired || user_requested_exit || overtimed)
+
+ # Update of (xᵃᵘˣ)ₖ = Vₖtₖ
+ if λ > 0
+ # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
+ @kaxpy!(n, τₖ * cpₖ, v, x)
+ if iter ≥ 2
+ @kaxpy!(n, τₖ * spₖ, q, x)
+ # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
+ @kaxpby!(n, spₖ, v, -cpₖ, q)
+ end
else
- disc_xL = τtildeₖ^2 - τₖ₊₁^2 + (τₖ₊₁ - ηₖ₊₁ * ζₖ)^2
- disc_xL < 0 ? complex_error_bnd = true : err_x = √disc_xL
+ # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
+ @kaxpy!(n, τₖ, v, x)
end
- ηtildeₖ = ω * sₖ₊₁
- ϵtildeₖ = -ω * cₖ₊₁
- ζtildeₖ = (τtildeₖ - ηtildeₖ * ζₖ) / ϵtildeₖ
-
- if transfer_to_craig
- disc_y = ζtildeₖ^2 - ζbarₖ₊₁^2
- disc_y < 0 ? complex_error_bnd = true : err_y = √disc_y
- else
- err_y = abs(ζtildeₖ)
+
+ # Continue the generalized Golub-Kahan bidiagonalization.
+ # AVₖ = MUₖ₊₁Bₖ
+ # AᴴUₖ₊₁ = NVₖ(Bₖ)ᴴ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᴴ = NVₖ₊₁(Lₖ₊₁)ᴴ
+ #
+ # [ α₁ 0 • • • • 0 ]
+ # [ β₂ α₂ • • ]
+ # [ 0 • • • • ]
+ # Lₖ = [ • • • • • • ]
+ # [ • • • • • • ]
+ # [ • • • • 0 ]
+ # [ 0 • • • 0 βₖ αₖ]
+ #
+ # Bₖ = [ Lₖ ]
+ # [ βₖ₊₁(eₖ)ᵀ ]
+
+ # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+ mul!(Av, A, v)
+ @kaxpby!(m, one(FC), Av, -αₖ, Mu)
+ MisI || mulorldiv!(u, M, Mu, ldiv) # uₖ₊₁ = M⁻¹ * Muₖ₊₁
+ βₖ₊₁ = sqrt(@kdotr(m, u, Mu)) # βₖ₊₁ = ‖uₖ₊₁‖_M
+ if βₖ₊₁ ≠ 0
+ @kscal!(m, one(FC) / βₖ₊₁, u)
+ MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu)
end
- history && push!(xNorms, err_x)
- history && push!(yNorms, err_y)
- end
+ # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv)
+ NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁
+ αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N
+ if αₖ₊₁ ≠ 0
+ @kscal!(n, one(FC) / αₖ₊₁, v)
+ NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv)
+ end
- # Compute residual norm ‖(rᴸ)ₖ‖ = |αₖ| * √(|ϵbarₖζbarₖ|² + |βₖ₊₁sₖζₖ₋₁|²)
- if iter == 1
- rNorm_lq = bNorm
- else
- rNorm_lq = abs(αhatₖ) * √(abs2(ϵbarₖ * ζbarₖ) + abs2(βhatₖ₊₁ * sₖ * ζₖ₋₁))
- end
- history && push!(rNorms, rNorm_lq)
+ # Continue the regularization.
+ if λ > 0
+ # k 2k k 2k k 2k
+ # k [ αₖ λₖ ] [ cpₖ spₖ ] = [ αhatₖ 0 ]
+ # k+1 [ βₖ₊₁ 0 ] [ spₖ -cpₖ ] [ βhatₖ₊₁ θₖ₊₁ ]
+ βhatₖ₊₁ = cpₖ * βₖ₊₁
+ θₖ₊₁ = spₖ * βₖ₊₁
+
+ # 2k 2k+1 2k 2k+1 2k 2k+1
+ # k [ 0 0 ] [ -cdₖ sdₖ ] = [ 0 0 ]
+ # k+1 [ θₖ₊₁ λ ] [ sdₖ cdₖ ] [ 0 λₖ₊₁ ]
+ (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁)
+
+ # qₖ ← sdₖ * q̄ₖ
+ @kscal!(n, sdₖ, q)
+
+ # k+1 2k+1 k+1 2k+1 k+1 2k+1
+ # k+1 [ αₖ₊₁ λₖ₊₁ ] [ cpₖ₊₁ spₖ₊₁ ] = [ αhatₖ₊₁ 0 ]
+ # k+2 [ βₖ₊₂ 0 ] [ spₖ₊₁ -cpₖ₊₁ ] [ γₖ₊₂ θₖ₊₂ ]
+ (cpₖ₊₁, spₖ₊₁, αhatₖ₊₁) = sym_givens(αₖ₊₁, λₖ₊₁)
+ else
+ βhatₖ₊₁ = βₖ₊₁
+ αhatₖ₊₁ = αₖ₊₁
+ end
- # Compute residual norm ‖(rᶜ)ₖ‖ = |βₖ₊₁ * τₖ|
- if transfer_to_craig
- rNorm_cg = abs(βhatₖ₊₁ * τₖ)
- end
+ if σₑₛₜ > 0 && !complex_error_bnd
+ μbar = -csig * αhatₖ
+ ρ = √(ρbar^2 + αhatₖ^2)
+ csig = ρbar / ρ
+ ssig = αhatₖ / ρ
+ ρbar = ssig * μbar + csig * σₑₛₜ
+ μbar = -csig * βhatₖ₊₁
+ θ = βhatₖ₊₁ * csig / ρbar
+ ωdisc = σₑₛₜ^2 - σₑₛₜ * βhatₖ₊₁ * θ
+ if ωdisc < 0
+ complex_error_bnd = true
+ else
+ ω = √ωdisc
+ τtildeₖ = - τₖ * βhatₖ₊₁ / ω
+ end
+
+ ρ = √(ρbar^2 + βhatₖ₊₁^2)
+ csig = ρbar / ρ
+ ssig = βhatₖ₊₁ / ρ
+ ρbar = ssig * μbar + csig * σₑₛₜ
+ end
- # Update sₖ, cₖ, αₖ, βₖ, ηₖ, ϵbarₖ, τₖ, ζₖ₋₁ and ζbarₖ.
- cₖ = cₖ₊₁
- sₖ = sₖ₊₁
- αₖ = αₖ₊₁
- αhatₖ = αhatₖ₊₁
- βₖ = βₖ₊₁
- ηₖ = ηₖ₊₁
- ϵbarₖ = ϵbarₖ₊₁
- τₖ = τₖ₊₁
- ζₖ₋₁ = ζₖ
- ζbarₖ = ζbarₖ₊₁
-
- # Update regularization variables.
- if λ > 0
- cpₖ = cpₖ₊₁
- spₖ = spₖ₊₁
- end
+ # Continue the LQ factorization of (Lₖ₊₁)ᴴ.
+ # [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ]
+ # [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁]
+ # [0 sₖ₊₁ -cₖ₊₁]
+
+ (cₖ₊₁, sₖ₊₁, ϵₖ) = sym_givens(ϵbarₖ, βhatₖ₊₁)
+ ηₖ₊₁ = αhatₖ₊₁ * sₖ₊₁
+ ϵbarₖ₊₁ = - αhatₖ₊₁ * cₖ₊₁
+
+ # Update solutions of Lₖ₊₁tₖ₊₁ = β₁e₁ and M̅ₖ₊₁z̅ₖ₊₁ = tₖ₊₁.
+ τₖ₊₁ = - βhatₖ₊₁ * τₖ / αhatₖ₊₁
+ ζₖ = cₖ₊₁ * ζbarₖ
+ ζbarₖ₊₁ = (τₖ₊₁ - ηₖ₊₁ * ζₖ) / ϵbarₖ₊₁
+
+ # Relations for the directions wₖ and w̄ₖ₊₁
+ # [w̄ₖ uₖ₊₁] [cₖ₊₁ sₖ₊₁] = [wₖ w̄ₖ₊₁] → wₖ = cₖ₊₁ * w̄ₖ + sₖ₊₁ * uₖ₊₁
+ # [sₖ₊₁ -cₖ₊₁] → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁
+
+ # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ
+ @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y)
+ @kaxpy!(m, ζₖ * sₖ₊₁, u, y)
+
+ # Compute w̄ₖ₊₁
+ @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄)
+
+ if σₑₛₜ > 0 && !complex_error_bnd
+ if transfer_to_craig
+ disc_x = τtildeₖ^2 - τₖ₊₁^2
+ disc_x < 0 ? complex_error_bnd = true : err_x = √disc_x
+ else
+ disc_xL = τtildeₖ^2 - τₖ₊₁^2 + (τₖ₊₁ - ηₖ₊₁ * ζₖ)^2
+ disc_xL < 0 ? complex_error_bnd = true : err_x = √disc_xL
+ end
+ ηtildeₖ = ω * sₖ₊₁
+ ϵtildeₖ = -ω * cₖ₊₁
+ ζtildeₖ = (τtildeₖ - ηtildeₖ * ζₖ) / ϵtildeₖ
+
+ if transfer_to_craig
+ disc_y = ζtildeₖ^2 - ζbarₖ₊₁^2
+ disc_y < 0 ? complex_error_bnd = true : err_y = √disc_y
+ else
+ err_y = abs(ζtildeₖ)
+ end
+
+ history && push!(xNorms, err_x)
+ history && push!(yNorms, err_y)
+ end
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- tired = iter ≥ itmax
- solved_lq = rNorm_lq ≤ ε
- solved_cg = transfer_to_craig && rNorm_cg ≤ ε
- if σₑₛₜ > 0
- if transfer_to_craig
- solved_cg = solved_cg || err_x ≤ etolx || err_y ≤ etoly
+ # Compute residual norm ‖(rᴸ)ₖ‖ = |αₖ| * √(|ϵbarₖζbarₖ|² + |βₖ₊₁sₖζₖ₋₁|²)
+ if iter == 1
+ rNorm_lq = bNorm
else
- solved_lq = solved_lq || err_x ≤ etolx || err_y ≤ etoly
+ rNorm_lq = abs(αhatₖ) * √(abs2(ϵbarₖ * ζbarₖ) + abs2(βhatₖ₊₁ * sₖ * ζₖ₋₁))
end
- end
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
+ history && push!(rNorms, rNorm_lq)
- # Update iteration index.
- iter = iter + 1
- end
- (verbose > 0) && @printf("\n")
+ # Compute residual norm ‖(rᶜ)ₖ‖ = |βₖ₊₁ * τₖ|
+ if transfer_to_craig
+ rNorm_cg = abs(βhatₖ₊₁ * τₖ)
+ end
- if solved_cg
- if λ > 0
- # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
- @kaxpy!(n, τₖ * cpₖ, v, x)
- if iter ≥ 2
- @kaxpy!(n, τₖ * spₖ, q, x)
+ # Update sₖ, cₖ, αₖ, βₖ, ηₖ, ϵbarₖ, τₖ, ζₖ₋₁ and ζbarₖ.
+ cₖ = cₖ₊₁
+ sₖ = sₖ₊₁
+ αₖ = αₖ₊₁
+ αhatₖ = αhatₖ₊₁
+ βₖ = βₖ₊₁
+ ηₖ = ηₖ₊₁
+ ϵbarₖ = ϵbarₖ₊₁
+ τₖ = τₖ₊₁
+ ζₖ₋₁ = ζₖ
+ ζbarₖ = ζbarₖ₊₁
+
+ # Update regularization variables.
+ if λ > 0
+ cpₖ = cpₖ₊₁
+ spₖ = spₖ₊₁
end
- else
- # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
- @kaxpy!(n, τₖ, v, x)
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ tired = iter ≥ itmax
+ solved_lq = rNorm_lq ≤ ε
+ solved_cg = transfer_to_craig && rNorm_cg ≤ ε
+ if σₑₛₜ > 0
+ solved_lq = solved_lq || err_x ≤ utolx || err_y ≤ utoly
+ solved_cg = transfer_to_craig && (solved_cg || err_x ≤ utolx || err_y ≤ utoly)
+ end
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm_lq, ktimer(start_time))
+
+ # Update iteration index.
+ iter = iter + 1
end
- # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ
- @kaxpy!(m, ζbarₖ, w̄, y)
- else
- if λ > 0
- # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁)
- @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x)
- if iter ≥ 2
- @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x)
+ (verbose > 0) && @printf(iostream, "\n")
+
+ if solved_cg
+ if λ > 0
+ # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
+ @kaxpy!(n, τₖ * cpₖ, v, x)
+ if iter ≥ 2
+ @kaxpy!(n, τₖ * spₖ, q, x)
+ end
+ else
+ # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
+ @kaxpy!(n, τₖ, v, x)
end
+ # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ
+ @kaxpy!(m, ζbarₖ, w̄, y)
else
- # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ
- @kaxpy!(n, ηₖ * ζₖ₋₁, v, x)
+ if λ > 0
+ # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁)
+ @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x)
+ if iter ≥ 2
+ @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x)
+ end
+ else
+ # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ
+ @kaxpy!(n, ηₖ * ζₖ₋₁, v, x)
+ end
end
- end
- tired && (status = "maximum number of iterations exceeded")
- solved_lq && (status = "solutions (xᴸ, yᴸ) good enough for the tolerances given")
- solved_cg && (status = "solutions (xᶜ, yᶜ) good enough for the tolerances given")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved_lq || solved_cg
- stats.error_with_bnd = complex_error_bnd
- stats.status = status
- return solver
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved_lq && (status = "solutions (xᴸ, yᴸ) good enough for the tolerances given")
+ solved_cg && (status = "solutions (xᶜ, yᶜ) good enough for the tolerances given")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved_lq || solved_cg
+ stats.error_with_bnd = complex_error_bnd
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
+ end
end
diff --git a/src/lslq.jl b/src/lslq.jl
index 908de19c5..3a549207e 100644
--- a/src/lslq.jl
+++ b/src/lslq.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSLQ is formally equivalent to applying SYMMLQ to the normal equations
# but should be more stable.
@@ -21,15 +21,17 @@
export lslq, lslq!
-
"""
(x, stats) = lslq(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T),
- atol::T=√eps(T), btol::T=√eps(T), etol::T=√eps(T),
- window::Int=5, utol::T=√eps(T), itmax::Int=0,
- σ::T=zero(T), transfer_to_lsqr::Bool=false,
- conlim::T=1/√eps(T), verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ window::Int=5, transfer_to_lsqr::Bool=false,
+ sqd::Bool=false, λ::T=zero(T),
+ σ::T=zero(T), etol::T=√eps(T),
+ utol::T=√eps(T), btol::T=√eps(T),
+ conlim::T=1/√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -38,31 +40,17 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ²‖x‖₂²
-using the LSLQ method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSLQ method, where λ ≥ 0 is a regularization parameter.
LSLQ is formally equivalent to applying SYMMLQ to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
but is more stable.
-#### Main features
-
-* the solution estimate is updated along orthogonal directions
-* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing
-* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing
-* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error)
-* if `A` is rank deficient, identify the minimum least-squares solution
-
-#### Optional arguments
-
-* `M`: a symmetric and positive definite dual preconditioner
-* `N`: a symmetric and positive definite primal preconditioner
-* `sqd` indicates that we are solving a symmetric and quasi-definite system with `λ=1`
-
If `λ > 0`, we solve the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -72,39 +60,61 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSLQ is then equivalent to applying SYMMLQ to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSLQ is then equivalent to applying SYMMLQ to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
-* `λ` is a regularization parameter (see the problem statement above)
-* `σ` is an underestimate of the smallest nonzero singular value of `A`---setting `σ` too large will result in an error in the course of the iterations
-* `atol` is a stopping tolerance based on the residual
-* `btol` is a stopping tolerance used to detect zero-residual problems
-* `etol` is a stopping tolerance based on the lower bound on the error
-* `window` is the number of iterations used to accumulate a lower bound on the error
-* `utol` is a stopping tolerance based on the upper bound on the error
-* `transfer_to_lsqr` return the CG solution estimate (i.e., the LSQR point) instead of the LQ estimate
-* `itmax` is the maximum number of iterations (0 means no imposed limit)
-* `conlim` is the limit on the estimated condition number of `A` beyond which the solution will be abandoned
-* `verbose` determines verbosity.
-
-#### Return values
+#### Main features
-`lslq` returns the tuple `(x, stats)` where
+* the solution estimate is updated along orthogonal directions
+* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing
+* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing
+* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error)
+* if `A` is rank deficient, identify the minimum least-squares solution
-* `x` is the LQ solution estimate
-* `stats` collects other statistics on the run in a LSLQStats
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `utol`: stopping tolerance based on the upper bound on the error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LSLQStats`](@ref) structure.
* `stats.err_lbnds` is a vector of lower bounds on the LQ error---the vector is empty if `window` is set to zero
* `stats.err_ubnds_lq` is a vector of upper bounds on the LQ error---the vector is empty if `σ == 0` is left at zero
@@ -116,8 +126,8 @@ In this case, `N` can still be specified and indicates the weighted norm in whic
The iterations stop as soon as one of the following conditions holds true:
* the optimality residual is sufficiently small (`stats.status = "found approximate minimum least-squares solution"`) in the sense that either
- * ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ atol, or
- * 1 + ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ 1
+ * ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ atol, or
+ * 1 + ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ 1
* an approximate zero-residual solution has been found (`stats.status = "found approximate zero-residual solution"`) in the sense that either
* ‖r‖ / ‖b‖ ≤ btol + atol ‖A‖ * ‖xᴸ‖ / ‖b‖, or
* 1 + ‖r‖ / ‖b‖ ≤ 1
@@ -127,9 +137,6 @@ The iterations stop as soon as one of the following conditions holds true:
* the lower bound on the LQ forward error is less than etol * ‖xᴸ‖
* the upper bound on the CG forward error is less than utol * ‖xᶜ‖
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
-
#### References
* R. Estrin, D. Orban and M. A. Saunders, [*Euclidean-norm error bounds for SYMMLQ and CG*](https://doi.org/10.1137/16M1094816), SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019.
@@ -137,12 +144,6 @@ and `false` otherwise.
"""
function lslq end
-function lslq(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = LslqSolver(A, b, window=window)
- lslq!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = lslq!(solver::LslqSolver, A, b; kwargs...)
@@ -152,315 +153,363 @@ See [`LslqSolver`](@ref) for more details about the `solver`.
"""
function lslq! end
-function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
- atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T),
- utol :: T=√eps(T), itmax :: Int=0, σ :: T=zero(T),
- transfer_to_lsqr :: Bool=false, conlim :: T=1/√eps(T),
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LSLQ: system of %d equations in %d variables\n", m, n)
-
- # Check sqd and λ parameters
- sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
- sqd && (λ = one(T))
-
- # Tests M = Iₙ and N = Iₘ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :u, S, m)
- allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.w̄
- Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
- rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds
- err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg
- reset!(stats)
- u = MisI ? Mu : solver.u
- v = NisI ? Nv : solver.v
-
- λ² = λ * λ
- ctol = conlim > 0 ? 1/conlim : zero(T)
-
- x .= zero(FC) # LSLQ point
-
- # Initialize Golub-Kahan process.
- # β₁ M u₁ = b.
- Mu .= b
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β₁ = sqrt(@kdotr(m, u, Mu))
- if β₁ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.error_with_bnd = false
- history && push!(rNorms, zero(T))
- history && push!(ArNorms, zero(T))
- stats.status = "x = 0 is a zero-residual solution"
- return solver
- end
- β = β₁
-
- @kscal!(m, one(FC)/β₁, u)
- MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv)) # = α₁
-
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
- if α == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.error_with_bnd = false
- history && push!(rNorms, β₁)
- history && push!(ArNorms, zero(T))
- stats.status = "x = 0 is a minimum least-squares solution"
- return solver
+def_args_lslq = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_lslq = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; transfer_to_lsqr::Bool = false),
+ :(; sqd::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; σ::T = zero(T) ),
+ :(; etol::T = √eps(T) ),
+ :(; utol::T = √eps(T) ),
+ :(; btol::T = √eps(T) ),
+ :(; conlim::T = 1/√eps(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_lslq = mapreduce(extract_parameters, vcat, def_kwargs_lslq)
+
+args_lslq = (:A, :b)
+kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function lslq($(def_args_lslq...); window :: Int=5, $(def_kwargs_lslq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = LslqSolver(A, b; window)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ lslq!(solver, $(args_lslq...); $(kwargs_lslq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
-
- Anorm = α
- Anorm² = α * α
-
- # condition number estimate
- σmax = zero(T)
- σmin = Inf
- Acond = zero(T)
-
- xlqNorm = zero(T)
- xlqNorm² = zero(T)
- xcgNorm = zero(T)
- xcgNorm² = zero(T)
-
- w̄ .= v # w̄₁ = v₁
-
- err_lbnd = zero(T)
- window = length(err_vec)
- err_vec .= zero(T)
- complex_error_bnd = false
-
- # Initialize other constants.
- αL = α
- βL = β
- ρ̄ = -σ
- γ̄ = α
- ψ = β₁
- c = -one(T)
- s = zero(T)
- δ = -one(T)
- τ = α * β₁
- ζ = zero(T)
- ζ̄ = zero(T)
- ζ̃ = zero(T)
- csig = -one(T)
-
- rNorm = β₁
- history && push!(rNorms, rNorm)
- ArNorm = α * β
- history && push!(ArNorms, ArNorm)
-
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm)
-
- status = "unknown"
- solved = solved_mach = solved_lim = (rNorm ≤ atol)
- tired = iter ≥ itmax
- ill_cond = ill_cond_mach = ill_cond_lim = false
- zero_resid = zero_resid_mach = zero_resid_lim = false
- fwd_err_lbnd = false
- fwd_err_ubnd = false
- user_requested_exit = false
-
- while ! (solved || tired || ill_cond || user_requested_exit)
-
- # Generate next Golub-Kahan vectors.
- # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
- mul!(Av, A, v)
- @kaxpby!(m, one(FC), Av, -α, Mu)
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β = sqrt(@kdotr(m, u, Mu))
- if β ≠ 0
- @kscal!(m, one(FC)/β, u)
- MisI || @kscal!(m, one(FC)/β, Mu)
-
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
- if α ≠ 0
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
- end
- # rotate out regularization term if present
- αL = α
- βL = β
- if λ ≠ 0
- (cL, sL, βL) = sym_givens(β, λ)
- αL = cL * α
+ function lslq!(solver :: LslqSolver{T,FC,S}, $(def_args_lslq...); $(def_kwargs_lslq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
- # the rotation updates the next regularization parameter
- λ = sqrt(λ² + (sL * α)^2)
- end
- Anorm² = Anorm² + αL * αL + βL * βL # = ‖Lₖ‖²
- Anorm = sqrt(Anorm²)
- end
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "LSLQ: system of %d equations in %d variables\n", m, n)
+
+ # Check sqd and λ parameters
+ sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+ sqd && (λ = one(T))
+
+ # Tests M = Iₙ and N = Iₘ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :u, S, m)
+ allocate_if(!NisI, solver, :v, S, n)
+ x, Nv, Aᴴu, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.w̄
+ Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
+ rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds
+ err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg
+ reset!(stats)
+ u = MisI ? Mu : solver.u
+ v = NisI ? Nv : solver.v
- # Continue QR factorization of Bₖ
- #
- # k k+1 k k+1 k k+1
- # k [ c' s' ] [ γ̄ ] = [ γ δ ]
- # k+1 [ s' -c' ] [ β α⁺ ] [ γ̄ ]
- (cp, sp, γ) = sym_givens(γ̄, βL)
- τ = -τ * δ / γ # forward substitution for t
- δ = sp * αL
- γ̄ = -cp * αL
-
- if σ > 0 && !complex_error_bnd
- # Continue QR factorization for error estimate
- μ̄ = -csig * γ
- (csig, ssig, ρ) = sym_givens(ρ̄, γ)
- ρ̄ = ssig * μ̄ + csig * σ
- μ̄ = -csig * δ
-
- # determine component of eigenvector and Gauss-Radau parameter
- h = δ * csig / ρ̄
- disc = σ * (σ - δ * h)
- disc < 0 ? complex_error_bnd = true : ω = sqrt(disc)
- (csig, ssig, ρ) = sym_givens(ρ̄, δ)
- ρ̄ = ssig * μ̄ + csig * σ
+ λ² = λ * λ
+ ctol = conlim > 0 ? 1/conlim : zero(T)
+
+ x .= zero(FC) # LSLQ point
+
+ # Initialize Golub-Kahan process.
+ # β₁ M u₁ = b.
+ Mu .= b
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β₁ = sqrt(@kdotr(m, u, Mu))
+ if β₁ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.error_with_bnd = false
+ history && push!(rNorms, zero(T))
+ history && push!(ArNorms, zero(T))
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ return solver
end
+ β = β₁
+
+ @kscal!(m, one(FC)/β₁, u)
+ MisI || @kscal!(m, one(FC)/β₁, Mu)
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv)) # = α₁
+
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+ if α == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.error_with_bnd = false
+ history && push!(rNorms, β₁)
+ history && push!(ArNorms, zero(T))
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a minimum least-squares solution"
+ return solver
+ end
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
- # Continue LQ factorization of Rₖ
- ϵ̄ = -γ * c
- η = γ * s
- (c, s, ϵ) = sym_givens(ϵ̄, δ)
+ Anorm = α
+ Anorm² = α * α
# condition number estimate
- # the QLP factorization suggests that the diagonal of M̄ approximates
- # the singular values of B.
- σmax = max(σmax, ϵ, abs(ϵ̄))
- σmin = min(σmin, ϵ, abs(ϵ̄))
- Acond = σmax / σmin
-
- # forward substitution for z, ζ̄
- ζold = ζ
- ζ = (τ - ζ * η) / ϵ
- ζ̄ = ζ / c
-
- # residual norm estimate
- rNorm = sqrt((ψ * cp - ζold * η)^2 + (ψ * sp)^2)
+ σmax = zero(T)
+ σmin = Inf
+ Acond = zero(T)
+
+ xlqNorm = zero(T)
+ xlqNorm² = zero(T)
+ xcgNorm = zero(T)
+ xcgNorm² = zero(T)
+
+ w̄ .= v # w̄₁ = v₁
+
+ err_lbnd = zero(T)
+ window = length(err_vec)
+ err_vec .= zero(T)
+ complex_error_bnd = false
+
+ # Initialize other constants.
+ αL = α
+ βL = β
+ ρ̄ = -σ
+ γ̄ = α
+ ψ = β₁
+ c = -one(T)
+ s = zero(T)
+ δ = -one(T)
+ τ = α * β₁
+ ζ = zero(T)
+ ζ̄ = zero(T)
+ ζ̃ = zero(T)
+ csig = -one(T)
+
+ rNorm = β₁
history && push!(rNorms, rNorm)
-
- ArNorm = sqrt((γ * ϵ * ζ)^2 + (δ * η * ζold)^2)
+ ArNorm = α * β
history && push!(ArNorms, ArNorm)
- # Compute ψₖ
- ψ = ψ * sp
+ iter = 0
+ itmax == 0 && (itmax = m + n)
- # Compute ‖x_cg‖₂
- xcgNorm² = xlqNorm² + ζ̄ * ζ̄
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm, ktimer(start_time))
- if σ > 0 && iter > 0 && !complex_error_bnd
- disc = ζ̃ * ζ̃ - ζ̄ * ζ̄
- if disc < 0
- complex_error_bnd = true
- else
- err_ubnd_cg = sqrt(disc)
- history && push!(err_ubnds_cg, err_ubnd_cg)
- fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²)
+ status = "unknown"
+ ε = atol + rtol * β₁
+ solved = solved_mach = solved_lim = (rNorm ≤ ε)
+ tired = iter ≥ itmax
+ ill_cond = ill_cond_mach = ill_cond_lim = false
+ zero_resid = zero_resid_mach = zero_resid_lim = false
+ fwd_err_lbnd = false
+ fwd_err_ubnd = false
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+
+ # Generate next Golub-Kahan vectors.
+ # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+ mul!(Av, A, v)
+ @kaxpby!(m, one(FC), Av, -α, Mu)
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β = sqrt(@kdotr(m, u, Mu))
+ if β ≠ 0
+ @kscal!(m, one(FC)/β, u)
+ MisI || @kscal!(m, one(FC)/β, Mu)
+
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+ if α ≠ 0
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
+ end
+
+ # rotate out regularization term if present
+ αL = α
+ βL = β
+ if λ ≠ 0
+ (cL, sL, βL) = sym_givens(β, λ)
+ αL = cL * α
+
+ # the rotation updates the next regularization parameter
+ λ = sqrt(λ² + (sL * α)^2)
+ end
+ Anorm² = Anorm² + αL * αL + βL * βL # = ‖Lₖ‖²
+ Anorm = sqrt(Anorm²)
end
- end
- test1 = rNorm / β₁
- test2 = ArNorm / (Anorm * rNorm)
- test3 = 1 / Acond
- t1 = test1 / (one(T) + Anorm * xlqNorm / β₁)
- rtol = btol + atol * Anorm * xlqNorm / β₁
+ # Continue QR factorization of Bₖ
+ #
+ # k k+1 k k+1 k k+1
+ # k [ c' s' ] [ γ̄ ] = [ γ δ ]
+ # k+1 [ s' -c' ] [ β α⁺ ] [ γ̄ ]
+ (cp, sp, γ) = sym_givens(γ̄, βL)
+ τ = -τ * δ / γ # forward substitution for t
+ δ = sp * αL
+ γ̄ = -cp * αL
+
+ if σ > 0 && !complex_error_bnd
+ # Continue QR factorization for error estimate
+ μ̄ = -csig * γ
+ (csig, ssig, ρ) = sym_givens(ρ̄, γ)
+ ρ̄ = ssig * μ̄ + csig * σ
+ μ̄ = -csig * δ
+
+ # determine component of eigenvector and Gauss-Radau parameter
+ h = δ * csig / ρ̄
+ disc = σ * (σ - δ * h)
+ disc < 0 ? complex_error_bnd = true : ω = sqrt(disc)
+ (csig, ssig, ρ) = sym_givens(ρ̄, δ)
+ ρ̄ = ssig * μ̄ + csig * σ
+ end
- # update LSLQ point for next iteration
- @kaxpy!(n, c * ζ, w̄, x)
- @kaxpy!(n, s * ζ, v, x)
+ # Continue LQ factorization of Rₖ
+ ϵ̄ = -γ * c
+ η = γ * s
+ (c, s, ϵ) = sym_givens(ϵ̄, δ)
+
+ # condition number estimate
+ # the QLP factorization suggests that the diagonal of M̄ approximates
+ # the singular values of B.
+ σmax = max(σmax, ϵ, abs(ϵ̄))
+ σmin = min(σmin, ϵ, abs(ϵ̄))
+ Acond = σmax / σmin
+
+ # forward substitution for z, ζ̄
+ ζold = ζ
+ ζ = (τ - ζ * η) / ϵ
+ ζ̄ = ζ / c
+
+ # residual norm estimate
+ rNorm = sqrt((ψ * cp - ζold * η)^2 + (ψ * sp)^2)
+ history && push!(rNorms, rNorm)
+
+ ArNorm = sqrt((γ * ϵ * ζ)^2 + (δ * η * ζold)^2)
+ history && push!(ArNorms, ArNorm)
+
+ # Compute ψₖ
+ ψ = ψ * sp
+
+ # Compute ‖x_cg‖₂
+ xcgNorm² = xlqNorm² + ζ̄ * ζ̄
+
+ if σ > 0 && iter > 0 && !complex_error_bnd
+ disc = ζ̃ * ζ̃ - ζ̄ * ζ̄
+ if disc < 0
+ complex_error_bnd = true
+ else
+ err_ubnd_cg = sqrt(disc)
+ history && push!(err_ubnds_cg, err_ubnd_cg)
+ fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²)
+ end
+ end
- # compute w̄
- @kaxpby!(n, -c, v, s, w̄)
+ test1 = rNorm
+ test2 = ArNorm / (Anorm * rNorm)
+ test3 = 1 / Acond
+ t1 = test1 / (one(T) + Anorm * xlqNorm)
+ tol = btol + atol * Anorm * xlqNorm / β₁
- xlqNorm² += ζ * ζ
- xlqNorm = sqrt(xlqNorm²)
+ # update LSLQ point for next iteration
+ @kaxpy!(n, c * ζ, w̄, x)
+ @kaxpy!(n, s * ζ, v, x)
- # check stopping condition based on forward error lower bound
- err_vec[mod(iter, window) + 1] = ζ
- if iter ≥ window
- err_lbnd = norm(err_vec)
- history && push!(err_lbnds, err_lbnd)
- fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm
- end
+ # compute w̄
+ @kaxpby!(n, -c, v, s, w̄)
- # compute LQ forward error upper bound
- if σ > 0 && !complex_error_bnd
- η̃ = ω * s
- ϵ̃ = -ω * c
- τ̃ = -τ * δ / ω
- ζ̃ = (τ̃ - ζ * η̃) / ϵ̃
- history && push!(err_ubnds_lq, abs(ζ̃ ))
- end
+ xlqNorm² += ζ * ζ
+ xlqNorm = sqrt(xlqNorm²)
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- ill_cond_mach = (one(T) + test3 ≤ one(T))
- solved_mach = (one(T) + test2 ≤ one(T))
- zero_resid_mach = (one(T) + t1 ≤ one(T))
+ # check stopping condition based on forward error lower bound
+ err_vec[mod(iter, window) + 1] = ζ
+ if iter ≥ window
+ err_lbnd = @knrm2(window, err_vec)
+ history && push!(err_lbnds, err_lbnd)
+ fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm
+ end
- # Stopping conditions based on user-provided tolerances.
- user_requested_exit = callback(solver) :: Bool
- tired = iter ≥ itmax
- ill_cond_lim = (test3 ≤ ctol)
- solved_lim = (test2 ≤ atol)
- zero_resid_lim = (test1 ≤ rtol)
+ # compute LQ forward error upper bound
+ if σ > 0 && !complex_error_bnd
+ η̃ = ω * s
+ ϵ̃ = -ω * c
+ τ̃ = -τ * δ / ω
+ ζ̃ = (τ̃ - ζ * η̃) / ϵ̃
+ history && push!(err_ubnds_lq, abs(ζ̃ ))
+ end
- ill_cond = ill_cond_mach || ill_cond_lim
- zero_resid = zero_resid_mach || zero_resid_lim
- solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ ill_cond_mach = (one(T) + test3 ≤ one(T))
+ solved_mach = (one(T) + test2 ≤ one(T))
+ zero_resid_mach = (one(T) + t1 ≤ one(T))
+
+ # Stopping conditions based on user-provided tolerances.
+ user_requested_exit = callback(solver) :: Bool
+ tired = iter ≥ itmax
+ ill_cond_lim = (test3 ≤ ctol)
+ solved_lim = (test2 ≤ atol)
+ zero_resid_lim = (test1 ≤ ε)
+
+ ill_cond = ill_cond_mach || ill_cond_lim
+ zero_resid = zero_resid_mach || zero_resid_lim
+ solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+
+ iter = iter + 1
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm, ktimer(start_time))
+ end
+ (verbose > 0) && @printf(iostream, "\n")
- iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm)
- end
- (verbose > 0) && @printf("\n")
+ if transfer_to_lsqr # compute LSQR point
+ @kaxpy!(n, ζ̄ , w̄, x)
+ end
- if transfer_to_lsqr # compute LSQR point
- @kaxpy!(n, ζ̄ , w̄, x)
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ ill_cond_lim && (status = "condition number exceeds tolerance")
+ solved && (status = "found approximate minimum least-squares solution")
+ zero_resid && (status = "found approximate zero-residual solution")
+ fwd_err_lbnd && (status = "forward error lower bound small enough")
+ fwd_err_ubnd && (status = "forward error upper bound small enough")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !zero_resid
+ stats.error_with_bnd = complex_error_bnd
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
-
- tired && (status = "maximum number of iterations exceeded")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- ill_cond_lim && (status = "condition number exceeds tolerance")
- solved && (status = "found approximate minimum least-squares solution")
- zero_resid && (status = "found approximate zero-residual solution")
- fwd_err_lbnd && (status = "forward error lower bound small enough")
- fwd_err_ubnd && (status = "forward error upper bound small enough")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !zero_resid
- stats.error_with_bnd = complex_error_bnd
- stats.status = status
- return solver
end
diff --git a/src/lsmr.jl b/src/lsmr.jl
index f4d8349d1..085d941db 100644
--- a/src/lsmr.jl
+++ b/src/lsmr.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSMR is formally equivalent to applying MINRES to the normal equations
# but should be more stable. It is also formally equivalent to CRLS though
@@ -24,17 +24,16 @@
export lsmr, lsmr!
-
"""
(x, stats) = lsmr(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T),
+ M=I, N=I, ldiv::Bool=false,
+ window::Int=5, sqd::Bool=false, λ::T=zero(T),
+ radius::T=zero(T), etol::T=√eps(T),
axtol::T=√eps(T), btol::T=√eps(T),
- atol::T=zero(T), rtol::T=zero(T),
- etol::T=√eps(T), window::Int=5,
- itmax::Int=0, conlim::T=1/√eps(T),
- radius::T=zero(T), verbose::Int=0,
- history::Bool=false, ldiv::Bool=false,
- callback=solver->false)
+ conlim::T=1/√eps(T), atol::T=zero(T),
+ rtol::T=zero(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -43,24 +42,24 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ²‖x‖₂²
-using the LSMR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSMR method, where λ ≥ 0 is a regularization parameter.
LSMR is formally equivalent to applying MINRES to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
(and therefore to CRLS) but is more stable.
-LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to CRLS, though can be substantially more accurate.
LSMR can be also used to find a null vector of a singular matrix A
-by solving the problem `min ‖Aᵀx - b‖` with any nonzero vector `b`.
-At a minimizer, the residual vector `r = b - Aᵀx` will satisfy `Ar = 0`.
+by solving the problem `min ‖Aᴴx - b‖` with any nonzero vector `b`.
+At a minimizer, the residual vector `r = b - Aᴴx` will satisfy `Ar = 0`.
If `λ > 0`, we solve the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -70,23 +69,52 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSMR is then equivalent to applying MINRES to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSMR is then equivalent to applying MINRES to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `axtol`: tolerance on the backward error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LsmrStats`](@ref) structure.
#### Reference
@@ -94,12 +122,6 @@ and `false` otherwise.
"""
function lsmr end
-function lsmr(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = LsmrSolver(A, b, window=window)
- lsmr!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = lsmr!(solver::LsmrSolver, A, b; kwargs...)
@@ -109,274 +131,320 @@ See [`LsmrSolver`](@ref) for more details about the `solver`.
"""
function lsmr! end
-function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
- axtol :: T=√eps(T), btol :: T=√eps(T),
- atol :: T=zero(T), rtol :: T=zero(T),
- etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
- radius :: T=zero(T), verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LSMR: system of %d equations in %d variables\n", m, n)
-
- # Check sqd and λ parameters
- sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
- sqd && (λ = one(T))
-
- # Tests M = Iₙ and N = Iₘ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :u, S, m)
- allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, h, hbar = solver.x, solver.Nv, solver.Aᵀu, solver.h, solver.hbar
- Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- u = MisI ? Mu : solver.u
- v = NisI ? Nv : solver.v
-
- ctol = conlim > 0 ? 1/conlim : zero(T)
- x .= zero(FC)
-
- # Initialize Golub-Kahan process.
- # β₁ M u₁ = b.
- Mu .= b
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β₁ = sqrt(@kdotr(m, u, Mu))
- if β₁ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(rNorms, zero(T))
- history && push!(ArNorms, zero(T))
- return solver
- end
- β = β₁
-
- @kscal!(m, one(FC)/β₁, u)
- MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
-
- ζbar = α * β
- αbar = α
- ρ = one(T)
- ρbar = one(T)
- cbar = one(T)
- sbar = zero(T)
-
- # Initialize variables for estimation of ‖r‖.
- βdd = β
- βd = zero(T)
- ρdold = one(T)
- τtildeold = zero(T)
- θtilde = zero(T)
- ζ = zero(T)
- d = zero(T)
-
- # Initialize variables for estimation of ‖A‖, cond(A) and xNorm.
- Anorm² = α * α
- maxrbar = zero(T)
- minrbar = min(floatmax(T), T(1.0e+100))
- Acond = maxrbar / minrbar
- Anorm = sqrt(Anorm²)
- xNorm = zero(T)
-
- # Items for use in stopping rules.
- ctol = conlim > 0 ? 1 / conlim : zero(T)
- rNorm = β
- history && push!(rNorms, rNorm)
- ArNorm = ArNorm0 = α * β
- history && push!(ArNorms, ArNorm)
-
- xENorm² = zero(T)
- err_lbnd = zero(T)
- window = length(err_vec)
- err_vec .= zero(T)
-
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²)
-
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
- if α == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a minimum least-squares solution"
- return solver
+def_args_lsmr = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_lsmr = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; sqd::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; radius::T = zero(T) ),
+ :(; etol::T = √eps(T) ),
+ :(; axtol::T = √eps(T) ),
+ :(; btol::T = √eps(T) ),
+ :(; conlim::T = 1/√eps(T) ),
+ :(; atol::T = zero(T) ),
+ :(; rtol::T = zero(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_lsmr = mapreduce(extract_parameters, vcat, def_kwargs_lsmr)
+
+args_lsmr = (:A, :b)
+kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function lsmr($(def_args_lsmr...); window :: Int=5, $(def_kwargs_lsmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = LsmrSolver(A, b; window)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ lsmr!(solver, $(args_lsmr...); $(kwargs_lsmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
-
- h .= v
- hbar .= zero(FC)
-
- status = "unknown"
- on_boundary = false
- solved = solved_mach = solved_lim = (rNorm ≤ axtol)
- tired = iter ≥ itmax
- ill_cond = ill_cond_mach = ill_cond_lim = false
- zero_resid = zero_resid_mach = zero_resid_lim = false
- fwd_err = false
- user_requested_exit = false
-
- while ! (solved || tired || ill_cond || user_requested_exit)
- iter = iter + 1
-
- # Generate next Golub-Kahan vectors.
- # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
- mul!(Av, A, v)
- @kaxpby!(m, one(FC), Av, -α, Mu)
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β = sqrt(@kdotr(m, u, Mu))
- if β ≠ 0
- @kscal!(m, one(FC)/β, u)
- MisI || @kscal!(m, one(FC)/β, Mu)
-
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
- if α ≠ 0
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
- end
- end
- # Continue QR factorization
- (chat, shat, αhat) = sym_givens(αbar, λ)
-
- ρold = ρ
- (c, s, ρ) = sym_givens(αhat, β)
- θnew = s * α
- αbar = c * α
-
- ρbarold = ρbar
- ζold = ζ
- θbar = sbar * ρ
- ρtemp = cbar * ρ
- (cbar, sbar, ρbar) = sym_givens(ρtemp, θnew)
- ζ = cbar * ζbar
- ζbar = -sbar * ζbar
-
- xENorm² = xENorm² + ζ * ζ
- err_vec[mod(iter, window) + 1] = ζ
- iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
-
- # Update h, hbar and x.
- δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁)
- @kaxpby!(n, one(FC), h, -δ, hbar) # ĥₖ = hₖ - δₖ * ĥₖ₋₁
-
- # if a trust-region constraint is given, compute step to the boundary
- # the step ϕ/ρ is not necessarily positive
- σ = ζ / (ρ * ρbar)
- if radius > 0
- t1, t2 = to_boundary(x, hbar, radius)
- tmax, tmin = max(t1, t2), min(t1, t2)
- on_boundary = σ > tmax || σ < tmin
- σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
- end
+ function lsmr!(solver :: LsmrSolver{T,FC,S}, $(def_args_lsmr...); $(def_kwargs_lsmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
- @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ
- @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
- # Estimate ‖r‖.
- βacute = chat * βdd
- βcheck = -shat * βdd
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "LSMR: system of %d equations in %d variables\n", m, n)
- βhat = c * βacute
- βdd = -s * βacute
+ # Check sqd and λ parameters
+ sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+ sqd && (λ = one(T))
- θtildeold = θtilde
- (ctildeold, stildeold, ρtildeold) = sym_givens(ρdold, θbar)
- θtilde = stildeold * ρbar
- ρdold = ctildeold * ρbar
- βd = -stildeold * βd + ctildeold * βhat
+ # Tests M = Iₙ and N = Iₘ
+ MisI = (M === I)
+ NisI = (N === I)
- τtildeold = (ζold - θtildeold * τtildeold) / ρtildeold
- τd = (ζ - θtilde * τtildeold) / ρdold
- d = d + βcheck * βcheck
- rNorm = sqrt(d + (βd - τd)^2 + βdd * βdd)
- history && push!(rNorms, rNorm)
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
- # Estimate ‖A‖.
- Anorm² += β * β
- Anorm = sqrt(Anorm²)
- Anorm² += α * α
+ # Set up workspace.
+ allocate_if(!MisI, solver, :u, S, m)
+ allocate_if(!NisI, solver, :v, S, n)
+ x, Nv, Aᴴu, h, hbar = solver.x, solver.Nv, solver.Aᴴu, solver.h, solver.hbar
+ Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ u = MisI ? Mu : solver.u
+ v = NisI ? Nv : solver.v
- # Estimate cond(A).
- maxrbar = max(maxrbar, ρbarold)
- iter > 1 && (minrbar = min(minrbar, ρbarold))
- Acond = max(maxrbar, ρtemp) / min(minrbar, ρtemp)
+ ctol = conlim > 0 ? 1/conlim : zero(T)
+ x .= zero(FC)
- # Test for convergence.
- ArNorm = abs(ζbar)
+ # Initialize Golub-Kahan process.
+ # β₁ M u₁ = b.
+ Mu .= b
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β₁ = sqrt(@kdotr(m, u, Mu))
+ if β₁ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(rNorms, zero(T))
+ history && push!(ArNorms, zero(T))
+ return solver
+ end
+ β = β₁
+
+ @kscal!(m, one(FC)/β₁, u)
+ MisI || @kscal!(m, one(FC)/β₁, Mu)
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+
+ ζbar = α * β
+ αbar = α
+ ρ = one(T)
+ ρbar = one(T)
+ cbar = one(T)
+ sbar = zero(T)
+
+ # Initialize variables for estimation of ‖r‖.
+ βdd = β
+ βd = zero(T)
+ ρdold = one(T)
+ τtildeold = zero(T)
+ θtilde = zero(T)
+ ζ = zero(T)
+ d = zero(T)
+
+ # Initialize variables for estimation of ‖A‖, cond(A) and xNorm.
+ Anorm² = α * α
+ maxrbar = zero(T)
+ minrbar = min(floatmax(T), T(1.0e+100))
+ Acond = maxrbar / minrbar
+ Anorm = sqrt(Anorm²)
+ xNorm = zero(T)
+
+ # Items for use in stopping rules.
+ ctol = conlim > 0 ? 1 / conlim : zero(T)
+ rNorm = β
+ history && push!(rNorms, rNorm)
+ ArNorm = ArNorm0 = α * β
history && push!(ArNorms, ArNorm)
- xNorm = @knrm2(n, x)
- test1 = rNorm / β₁
- test2 = ArNorm / (Anorm * rNorm)
- test3 = 1 / Acond
- t1 = test1 / (one(T) + Anorm * xNorm / β₁)
- rNormtol = btol + axtol * Anorm * xNorm / β₁
+ xENorm² = zero(T)
+ err_lbnd = zero(T)
+ window = length(err_vec)
+ err_vec .= zero(T)
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
+ iter = 0
+ itmax == 0 && (itmax = m + n)
+
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, β₁, α, β₁, α, 0, 1, Anorm², ktimer(start_time))
+
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+ if α == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a minimum least-squares solution"
+ return solver
+ end
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- ill_cond_mach = (one(T) + test3 ≤ one(T))
- solved_mach = (one(T) + test2 ≤ one(T))
- zero_resid_mach = (one(T) + t1 ≤ one(T))
+ h .= v
+ hbar .= zero(FC)
- # Stopping conditions based on user-provided tolerances.
- user_requested_exit = callback(solver) :: Bool
+ status = "unknown"
+ on_boundary = false
+ solved = solved_mach = solved_lim = (rNorm ≤ axtol)
tired = iter ≥ itmax
- ill_cond_lim = (test3 ≤ ctol)
- solved_lim = (test2 ≤ axtol)
- solved_opt = ArNorm ≤ atol + rtol * ArNorm0
- zero_resid_lim = (test1 ≤ rNormtol)
- iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
-
- ill_cond = ill_cond_mach | ill_cond_lim
- zero_resid = zero_resid_mach | zero_resid_lim
- solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary
+ ill_cond = ill_cond_mach = ill_cond_lim = false
+ zero_resid = zero_resid_mach = zero_resid_lim = false
+ fwd_err = false
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+ iter = iter + 1
+
+ # Generate next Golub-Kahan vectors.
+ # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+ mul!(Av, A, v)
+ @kaxpby!(m, one(FC), Av, -α, Mu)
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β = sqrt(@kdotr(m, u, Mu))
+ if β ≠ 0
+ @kscal!(m, one(FC)/β, u)
+ MisI || @kscal!(m, one(FC)/β, Mu)
+
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+ if α ≠ 0
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
+ end
+ end
+
+ # Continue QR factorization
+ (chat, shat, αhat) = sym_givens(αbar, λ)
+
+ ρold = ρ
+ (c, s, ρ) = sym_givens(αhat, β)
+ θnew = s * α
+ αbar = c * α
+
+ ρbarold = ρbar
+ ζold = ζ
+ θbar = sbar * ρ
+ ρtemp = cbar * ρ
+ (cbar, sbar, ρbar) = sym_givens(ρtemp, θnew)
+ ζ = cbar * ζbar
+ ζbar = -sbar * ζbar
+
+ xENorm² = xENorm² + ζ * ζ
+ err_vec[mod(iter, window) + 1] = ζ
+ iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
+
+ # Update h, hbar and x.
+ δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁)
+ @kaxpby!(n, one(FC), h, -δ, hbar) # ĥₖ = hₖ - δₖ * ĥₖ₋₁
+
+ # if a trust-region constraint is given, compute step to the boundary
+ # the step ϕ/ρ is not necessarily positive
+ σ = ζ / (ρ * ρbar)
+ if radius > 0
+ t1, t2 = to_boundary(n, x, hbar, radius)
+ tmax, tmin = max(t1, t2), min(t1, t2)
+ on_boundary = σ > tmax || σ < tmin
+ σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
+ end
+
+ @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ
+ @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ
+
+ # Estimate ‖r‖.
+ βacute = chat * βdd
+ βcheck = -shat * βdd
+
+ βhat = c * βacute
+ βdd = -s * βacute
+
+ θtildeold = θtilde
+ (ctildeold, stildeold, ρtildeold) = sym_givens(ρdold, θbar)
+ θtilde = stildeold * ρbar
+ ρdold = ctildeold * ρbar
+ βd = -stildeold * βd + ctildeold * βhat
+
+ τtildeold = (ζold - θtildeold * τtildeold) / ρtildeold
+ τd = (ζ - θtilde * τtildeold) / ρdold
+ d = d + βcheck * βcheck
+ rNorm = sqrt(d + (βd - τd)^2 + βdd * βdd)
+ history && push!(rNorms, rNorm)
+
+ # Estimate ‖A‖.
+ Anorm² += β * β
+ Anorm = sqrt(Anorm²)
+ Anorm² += α * α
+
+ # Estimate cond(A).
+ maxrbar = max(maxrbar, ρbarold)
+ iter > 1 && (minrbar = min(minrbar, ρbarold))
+ Acond = max(maxrbar, ρtemp) / min(minrbar, ρtemp)
+
+ # Test for convergence.
+ ArNorm = abs(ζbar)
+ history && push!(ArNorms, ArNorm)
+ xNorm = @knrm2(n, x)
+
+ test1 = rNorm / β₁
+ test2 = ArNorm / (Anorm * rNorm)
+ test3 = 1 / Acond
+ t1 = test1 / (one(T) + Anorm * xNorm / β₁)
+ rNormtol = btol + axtol * Anorm * xNorm / β₁
+
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², ktimer(start_time))
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ ill_cond_mach = (one(T) + test3 ≤ one(T))
+ solved_mach = (one(T) + test2 ≤ one(T))
+ zero_resid_mach = (one(T) + t1 ≤ one(T))
+
+ # Stopping conditions based on user-provided tolerances.
+ user_requested_exit = callback(solver) :: Bool
+ tired = iter ≥ itmax
+ ill_cond_lim = (test3 ≤ ctol)
+ solved_lim = (test2 ≤ axtol)
+ solved_opt = ArNorm ≤ atol + rtol * ArNorm0
+ zero_resid_lim = (test1 ≤ rNormtol)
+ iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
+
+ ill_cond = ill_cond_mach || ill_cond_lim
+ zero_resid = zero_resid_mach || zero_resid_lim
+ solved = solved_mach || solved_lim || solved_opt || zero_resid || fwd_err || on_boundary
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ ill_cond_lim && (status = "condition number exceeds tolerance")
+ solved && (status = "found approximate minimum least-squares solution")
+ zero_resid && (status = "found approximate zero-residual solution")
+ fwd_err && (status = "truncated forward error small enough")
+ on_boundary && (status = "on trust-region boundary")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.residual = rNorm
+ stats.Aresidual = ArNorm
+ stats.Acond = Acond
+ stats.Anorm = Anorm
+ stats.xNorm = xNorm
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !zero_resid
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- ill_cond_lim && (status = "condition number exceeds tolerance")
- solved && (status = "found approximate minimum least-squares solution")
- zero_resid && (status = "found approximate zero-residual solution")
- fwd_err && (status = "truncated forward error small enough")
- on_boundary && (status = "on trust-region boundary")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.residual = rNorm
- stats.Aresidual = ArNorm
- stats.Acond = Acond
- stats.Anorm = Anorm
- stats.xNorm = xNorm
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !zero_resid
- stats.status = status
- return solver
end
diff --git a/src/lsqr.jl b/src/lsqr.jl
index dd3779dce..fe7acc37c 100644
--- a/src/lsqr.jl
+++ b/src/lsqr.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSQR is formally equivalent to applying the conjugate gradient method
# to the normal equations but should be more stable. It is also formally
@@ -24,16 +24,16 @@
export lsqr, lsqr!
-
"""
(x, stats) = lsqr(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T),
+ M=I, N=I, ldiv::Bool=false,
+ window::Int=5, sqd::Bool=false, λ::T=zero(T),
+ radius::T=zero(T), etol::T=√eps(T),
axtol::T=√eps(T), btol::T=√eps(T),
- atol::T=zero(T), rtol::T=zero(T),
- etol::T=√eps(T), window::Int=5,
- itmax::Int=0, conlim::T=1/√eps(T),
- radius::T=zero(T), verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ conlim::T=1/√eps(T), atol::T=zero(T),
+ rtol::T=zero(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -42,20 +42,20 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ²‖x‖₂²
-using the LSQR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSQR method, where λ ≥ 0 is a regularization parameter.
LSQR is formally equivalent to applying CG to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
(and therefore to CGLS) but is more stable.
-LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to CGLS, though can be slightly more accurate.
If `λ > 0`, LSQR solves the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -65,23 +65,52 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSQR is then equivalent to applying CG to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSQR is then equivalent to applying CG to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `axtol`: tolerance on the backward error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -89,12 +118,6 @@ and `false` otherwise.
"""
function lsqr end
-function lsqr(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = LsqrSolver(A, b, window=window)
- lsqr!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = lsqr!(solver::LsqrSolver, A, b; kwargs...)
@@ -104,263 +127,309 @@ See [`LsqrSolver`](@ref) for more details about the `solver`.
"""
function lsqr! end
-function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
- axtol :: T=√eps(T), btol :: T=√eps(T),
- atol :: T=zero(T), rtol :: T=zero(T),
- etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
- radius :: T=zero(T), verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LSQR: system of %d equations in %d variables\n", m, n)
-
- # Check sqd and λ parameters
- sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
- sqd && (λ = one(T))
-
- # Tests M = Iₙ and N = Iₘ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :u, S, m)
- allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, w = solver.x, solver.Nv, solver.Aᵀu, solver.w
- Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
- rNorms, ArNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- u = MisI ? Mu : solver.u
- v = NisI ? Nv : solver.v
-
- λ² = λ * λ
- ctol = conlim > 0 ? 1/conlim : zero(T)
- x .= zero(FC)
-
- # Initialize Golub-Kahan process.
- # β₁ M u₁ = b.
- Mu .= b
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β₁ = sqrt(@kdotr(m, u, Mu))
- if β₁ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(rNorms, zero(T))
- history && push!(ArNorms, zero(T))
- return solver
+def_args_lsqr = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_kwargs_lsqr = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; sqd::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; radius::T = zero(T) ),
+ :(; etol::T = √eps(T) ),
+ :(; axtol::T = √eps(T) ),
+ :(; btol::T = √eps(T) ),
+ :(; conlim::T = 1/√eps(T) ),
+ :(; atol::T = zero(T) ),
+ :(; rtol::T = zero(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_lsqr = mapreduce(extract_parameters, vcat, def_kwargs_lsqr)
+
+args_lsqr = (:A, :b)
+kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function lsqr($(def_args_lsqr...); window :: Int=5, $(def_kwargs_lsqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = LsqrSolver(A, b; window)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ lsqr!(solver, $(args_lsqr...); $(kwargs_lsqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- β = β₁
-
- @kscal!(m, one(FC)/β₁, u)
- MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
- NisI || mulorldiv!(v, N, Nv, ldiv)
- Anorm² = @kdotr(n, v, Nv)
- Anorm = sqrt(Anorm²)
- α = Anorm
- Acond = zero(T)
- xNorm = zero(T)
- xNorm² = zero(T)
- dNorm² = zero(T)
- c2 = -one(T)
- s2 = zero(T)
- z = zero(T)
-
- xENorm² = zero(T)
- err_lbnd = zero(T)
- window = length(err_vec)
- err_vec .= zero(T)
-
- iter = 0
- itmax == 0 && (itmax = m + n)
-
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᵀr‖", "compat", "backwrd", "‖A‖", "κ(A)")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond)
-
- rNorm = β₁
- r1Norm = rNorm
- r2Norm = rNorm
- res2 = zero(T)
- history && push!(rNorms, r2Norm)
- ArNorm = ArNorm0 = α * β
- history && push!(ArNorms, ArNorm)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
- if α == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a minimum least-squares solution"
- return solver
- end
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
- w .= v
-
- # Initialize other constants.
- ϕbar = β₁
- ρbar = α
-
- status = "unknown"
- on_boundary = false
- solved_lim = ArNorm / (Anorm * rNorm) ≤ axtol
- solved_mach = one(T) + ArNorm / (Anorm * rNorm) ≤ one(T)
- solved = solved_mach | solved_lim
- tired = iter ≥ itmax
- ill_cond = ill_cond_mach = ill_cond_lim = false
- zero_resid_lim = rNorm / β₁ ≤ axtol
- zero_resid_mach = one(T) + rNorm / β₁ ≤ one(T)
- zero_resid = zero_resid_mach | zero_resid_lim
- fwd_err = false
- user_requested_exit = false
-
- while ! (solved || tired || ill_cond || user_requested_exit)
- iter = iter + 1
-
- # Generate next Golub-Kahan vectors.
- # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
- mul!(Av, A, v)
- @kaxpby!(m, one(FC), Av, -α, Mu)
- MisI || mulorldiv!(u, M, Mu, ldiv)
- β = sqrt(@kdotr(m, u, Mu))
- if β ≠ 0
- @kscal!(m, one(FC)/β, u)
- MisI || @kscal!(m, one(FC)/β, Mu)
- Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖²
- λ > 0 && (Anorm² += λ²)
-
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
- NisI || mulorldiv!(v, N, Nv, ldiv)
- α = sqrt(@kdotr(n, v, Nv))
- if α ≠ 0
- @kscal!(n, one(FC)/α, v)
- NisI || @kscal!(n, one(FC)/α, Nv)
- end
- end
- # Continue QR factorization
- # 1. Eliminate the regularization parameter.
- (c1, s1, ρbar1) = sym_givens(ρbar, λ)
- ψ = s1 * ϕbar
- ϕbar = c1 * ϕbar
-
- # 2. Eliminate β.
- # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] :
- # [ β 0 ] [ 0 ζbar ]
- #
- # k k+1 k k+1 k k+1
- # k [ c s ] [ ρbar ] = [ ρ θ⁺ ]
- # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ]
- #
- # so that we obtain
- #
- # [ c s ] [ ζbar ] = [ ζ ]
- # [ s -c ] [ 0 ] [ ζbar⁺ ]
- (c, s, ρ) = sym_givens(ρbar1, β)
- ϕ = c * ϕbar
- ϕbar = s * ϕbar
-
- xENorm² = xENorm² + ϕ * ϕ
- err_vec[mod(iter, window) + 1] = ϕ
- iter ≥ window && (err_lbnd = norm(err_vec))
-
- τ = s * ϕ
- θ = s * α
- ρbar = -c * α
- dNorm² += @kdotr(n, w, w) / ρ^2
-
- # if a trust-region constraint is give, compute step to the boundary
- # the step ϕ/ρ is not necessarily positive
- σ = ϕ / ρ
- if radius > 0
- t1, t2 = to_boundary(x, w, radius)
- tmax, tmin = max(t1, t2), min(t1, t2)
- on_boundary = σ > tmax || σ < tmin
- σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
+ function lsqr!(solver :: LsqrSolver{T,FC,S}, $(def_args_lsqr...); $(def_kwargs_lsqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "LSQR: system of %d equations in %d variables\n", m, n)
+
+ # Check sqd and λ parameters
+ sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+ sqd && (λ = one(T))
+
+ # Tests M = Iₙ and N = Iₘ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :u, S, m)
+ allocate_if(!NisI, solver, :v, S, n)
+ x, Nv, Aᴴu, w = solver.x, solver.Nv, solver.Aᴴu, solver.w
+ Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
+ rNorms, ArNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ u = MisI ? Mu : solver.u
+ v = NisI ? Nv : solver.v
+
+ λ² = λ * λ
+ ctol = conlim > 0 ? 1/conlim : zero(T)
+ x .= zero(FC)
+
+ # Initialize Golub-Kahan process.
+ # β₁ M u₁ = b.
+ Mu .= b
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β₁ = sqrt(@kdotr(m, u, Mu))
+ if β₁ == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(rNorms, zero(T))
+ history && push!(ArNorms, zero(T))
+ return solver
end
-
- @kaxpy!(n, σ, w, x) # x = x + ϕ / ρ * w
- @kaxpby!(n, one(FC), v, -θ/ρ, w) # w = v - θ / ρ * w
-
- # Use a plane rotation on the right to eliminate the super-diagonal
- # element (θ) of the upper-bidiagonal matrix.
- # Use the result to estimate norm(x).
- δ = s2 * ρ
- γbar = -c2 * ρ
- rhs = ϕ - δ * z
- zbar = rhs / γbar
- xNorm = sqrt(xNorm² + zbar * zbar)
- (c2, s2, γ) = sym_givens(γbar, θ)
- z = rhs / γ
- xNorm² += z * z
-
+ β = β₁
+
+ @kscal!(m, one(FC)/β₁, u)
+ MisI || @kscal!(m, one(FC)/β₁, Mu)
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ Anorm² = @kdotr(n, v, Nv)
Anorm = sqrt(Anorm²)
- Acond = Anorm * sqrt(dNorm²)
- res1 = ϕbar * ϕbar
- res2 += ψ * ψ
- rNorm = sqrt(res1 + res2)
-
- ArNorm = α * abs(τ)
- history && push!(ArNorms, ArNorm)
-
- r1sq = rNorm * rNorm - λ² * xNorm²
- r1Norm = sqrt(abs(r1sq))
- r1sq < 0 && (r1Norm = -r1Norm)
+ α = Anorm
+ Acond = zero(T)
+ xNorm = zero(T)
+ xNorm² = zero(T)
+ dNorm² = zero(T)
+ c2 = -one(T)
+ s2 = zero(T)
+ z = zero(T)
+
+ xENorm² = zero(T)
+ err_lbnd = zero(T)
+ window = length(err_vec)
+ err_vec .= zero(T)
+
+ iter = 0
+ itmax == 0 && (itmax = m + n)
+
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %7s %7s %7s %7s %5s\n", "k", "α", "β", "‖r‖", "‖Aᴴr‖", "compat", "backwrd", "‖A‖", "κ(A)", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %.2fs\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond, ktimer(start_time))
+
+ rNorm = β₁
+ r1Norm = rNorm
r2Norm = rNorm
+ res2 = zero(T)
history && push!(rNorms, r2Norm)
-
- test1 = rNorm / β₁
- test2 = ArNorm / (Anorm * rNorm)
- test3 = 1 / Acond
- t1 = test1 / (one(T) + Anorm * xNorm / β₁)
- rNormtol = btol + axtol * Anorm * xNorm / β₁
-
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond)
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- ill_cond_mach = (one(T) + test3 ≤ one(T))
- solved_mach = (one(T) + test2 ≤ one(T))
- zero_resid_mach = (one(T) + t1 ≤ one(T))
-
- # Stopping conditions based on user-provided tolerances.
- user_requested_exit = callback(solver) :: Bool
+ ArNorm = ArNorm0 = α * β
+ history && push!(ArNorms, ArNorm)
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+ if α == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a minimum least-squares solution"
+ return solver
+ end
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
+ w .= v
+
+ # Initialize other constants.
+ ϕbar = β₁
+ ρbar = α
+
+ status = "unknown"
+ on_boundary = false
+ solved_lim = ArNorm / (Anorm * rNorm) ≤ axtol
+ solved_mach = one(T) + ArNorm / (Anorm * rNorm) ≤ one(T)
+ solved = solved_mach | solved_lim
tired = iter ≥ itmax
- ill_cond_lim = (test3 ≤ ctol)
- solved_lim = (test2 ≤ axtol)
- solved_opt = ArNorm ≤ atol + rtol * ArNorm0
- zero_resid_lim = (test1 ≤ rNormtol)
- iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
-
- ill_cond = ill_cond_mach | ill_cond_lim
+ ill_cond = ill_cond_mach = ill_cond_lim = false
+ zero_resid_lim = rNorm / β₁ ≤ axtol
+ zero_resid_mach = one(T) + rNorm / β₁ ≤ one(T)
zero_resid = zero_resid_mach | zero_resid_lim
- solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary
+ fwd_err = false
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+ iter = iter + 1
+
+ # Generate next Golub-Kahan vectors.
+ # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+ mul!(Av, A, v)
+ @kaxpby!(m, one(FC), Av, -α, Mu)
+ MisI || mulorldiv!(u, M, Mu, ldiv)
+ β = sqrt(@kdotr(m, u, Mu))
+ if β ≠ 0
+ @kscal!(m, one(FC)/β, u)
+ MisI || @kscal!(m, one(FC)/β, Mu)
+ Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖²
+ λ > 0 && (Anorm² += λ²)
+
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+ NisI || mulorldiv!(v, N, Nv, ldiv)
+ α = sqrt(@kdotr(n, v, Nv))
+ if α ≠ 0
+ @kscal!(n, one(FC)/α, v)
+ NisI || @kscal!(n, one(FC)/α, Nv)
+ end
+ end
+
+ # Continue QR factorization
+ # 1. Eliminate the regularization parameter.
+ (c1, s1, ρbar1) = sym_givens(ρbar, λ)
+ ψ = s1 * ϕbar
+ ϕbar = c1 * ϕbar
+
+ # 2. Eliminate β.
+ # Q [ Lₖ β₁ e₁ ] = [ Rₖ zₖ ] :
+ # [ β 0 ] [ 0 ζbar ]
+ #
+ # k k+1 k k+1 k k+1
+ # k [ c s ] [ ρbar ] = [ ρ θ⁺ ]
+ # k+1 [ s -c ] [ β α⁺ ] [ ρbar⁺ ]
+ #
+ # so that we obtain
+ #
+ # [ c s ] [ ζbar ] = [ ζ ]
+ # [ s -c ] [ 0 ] [ ζbar⁺ ]
+ (c, s, ρ) = sym_givens(ρbar1, β)
+ ϕ = c * ϕbar
+ ϕbar = s * ϕbar
+
+ xENorm² = xENorm² + ϕ * ϕ
+ err_vec[mod(iter, window) + 1] = ϕ
+ iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
+
+ τ = s * ϕ
+ θ = s * α
+ ρbar = -c * α
+ dNorm² += @kdotr(n, w, w) / ρ^2
+
+ # if a trust-region constraint is give, compute step to the boundary
+ # the step ϕ/ρ is not necessarily positive
+ σ = ϕ / ρ
+ if radius > 0
+ t1, t2 = to_boundary(n, x, w, radius)
+ tmax, tmin = max(t1, t2), min(t1, t2)
+ on_boundary = σ > tmax || σ < tmin
+ σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
+ end
+
+ @kaxpy!(n, σ, w, x) # x = x + ϕ / ρ * w
+ @kaxpby!(n, one(FC), v, -θ/ρ, w) # w = v - θ / ρ * w
+
+ # Use a plane rotation on the right to eliminate the super-diagonal
+ # element (θ) of the upper-bidiagonal matrix.
+ # Use the result to estimate norm(x).
+ δ = s2 * ρ
+ γbar = -c2 * ρ
+ rhs = ϕ - δ * z
+ zbar = rhs / γbar
+ xNorm = sqrt(xNorm² + zbar * zbar)
+ (c2, s2, γ) = sym_givens(γbar, θ)
+ z = rhs / γ
+ xNorm² += z * z
+
+ Anorm = sqrt(Anorm²)
+ Acond = Anorm * sqrt(dNorm²)
+ res1 = ϕbar * ϕbar
+ res2 += ψ * ψ
+ rNorm = sqrt(res1 + res2)
+
+ ArNorm = α * abs(τ)
+ history && push!(ArNorms, ArNorm)
+
+ r1sq = rNorm * rNorm - λ² * xNorm²
+ r1Norm = sqrt(abs(r1sq))
+ r1sq < 0 && (r1Norm = -r1Norm)
+ r2Norm = rNorm
+ history && push!(rNorms, r2Norm)
+
+ test1 = rNorm / β₁
+ test2 = ArNorm / (Anorm * rNorm)
+ test3 = 1 / Acond
+ t1 = test1 / (one(T) + Anorm * xNorm / β₁)
+ rNormtol = btol + axtol * Anorm * xNorm / β₁
+
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %.2fs\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond, ktimer(start_time))
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ ill_cond_mach = (one(T) + test3 ≤ one(T))
+ solved_mach = (one(T) + test2 ≤ one(T))
+ zero_resid_mach = (one(T) + t1 ≤ one(T))
+
+ # Stopping conditions based on user-provided tolerances.
+ user_requested_exit = callback(solver) :: Bool
+ tired = iter ≥ itmax
+ ill_cond_lim = (test3 ≤ ctol)
+ solved_lim = (test2 ≤ axtol)
+ solved_opt = ArNorm ≤ atol + rtol * ArNorm0
+ zero_resid_lim = (test1 ≤ rNormtol)
+ iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
+
+ ill_cond = ill_cond_mach || ill_cond_lim
+ zero_resid = zero_resid_mach || zero_resid_lim
+ solved = solved_mach || solved_lim || solved_opt || zero_resid || fwd_err || on_boundary
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ ill_cond_lim && (status = "condition number exceeds tolerance")
+ solved && (status = "found approximate minimum least-squares solution")
+ zero_resid && (status = "found approximate zero-residual solution")
+ fwd_err && (status = "truncated forward error small enough")
+ on_boundary && (status = "on trust-region boundary")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !zero_resid
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- ill_cond_lim && (status = "condition number exceeds tolerance")
- solved && (status = "found approximate minimum least-squares solution")
- zero_resid && (status = "found approximate zero-residual solution")
- fwd_err && (status = "truncated forward error small enough")
- on_boundary && (status = "on trust-region boundary")
- user_requested_exit && (status = "user-requested exit")
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !zero_resid
- stats.status = status
- return solver
end
diff --git a/src/minres.jl b/src/minres.jl
index cbaefee9f..8e6659472 100644
--- a/src/minres.jl
+++ b/src/minres.jl
@@ -3,7 +3,7 @@
#
# minimize ‖Ax - b‖₂
#
-# where A is square and symmetric.
+# where A is Hermitian.
#
# MINRES is formally equivalent to applying the conjugate residuals method
# to Ax = b when A is positive definite, but is more general and also applies
@@ -21,20 +21,22 @@
export minres, minres!
-
"""
(x, stats) = minres(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T)/100,
- rtol::T=√eps(T)/100, ratol :: T=zero(T),
- rrtol :: T=zero(T), etol::T=√eps(T),
- window::Int=5, itmax::Int=0,
- conlim::T=1/√eps(T), verbose::Int=0,
- history::Bool=false, ldiv::Bool=false,
- callback=solver->false)
+ M=I, ldiv::Bool=false, window::Int=5,
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), etol::T=√eps(T),
+ conlim::T=1/√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, stats) = minres(A, b, x0::AbstractVector; kwargs...)
+
+MINRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
Solve the shifted linear least-squares problem
minimize ‖b - (A + λI)x‖₂²
@@ -43,26 +45,45 @@ or the shifted linear system
(A + λI) x = b
-using the MINRES method, where λ ≥ 0 is a shift parameter,
-where A is square and symmetric.
+of size n using the MINRES method, where λ ≥ 0 is a shift parameter,
+where A is Hermitian.
MINRES is formally equivalent to applying CR to Ax=b when A is positive
definite, but is typically more stable and also applies to the case where
A is indefinite.
-MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
+
+#### Input arguments
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
-MINRES can be warm-started from an initial guess `x0` with the method
+#### Optional argument
- (x, stats) = minres(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -70,18 +91,6 @@ and `false` otherwise.
"""
function minres end
-function minres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = MinresSolver(A, b, window=window)
- minres!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function minres(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = MinresSolver(A, b, window=window)
- minres!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = minres!(solver::MinresSolver, A, b; kwargs...)
solver = minres!(solver::MinresSolver, A, b, x0; kwargs...)
@@ -92,257 +101,306 @@ See [`MinresSolver`](@ref) for more details about the `solver`.
"""
function minres! end
-function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- minres!(solver, A, b; kwargs...)
- return solver
-end
-
-function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100,
- ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T),
- itmax :: Int=0, conlim :: T=1/√eps(T), verbose :: Int=0,
- history :: Bool=false, ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("MINRES: system of size %d\n", n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :v, S, n)
- Δx, x, r1, r2, w1, w2, y = solver.Δx, solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.y
- err_vec, stats = solver.err_vec, solver.stats
- warm_start = solver.warm_start
- rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
- reset!(stats)
- v = MisI ? r2 : solver.v
-
- ϵM = eps(T)
- ctol = conlim > 0 ? 1 / conlim : zero(T)
-
- # Initial solution x₀
- x .= zero(FC)
-
- if warm_start
- mul!(r1, A, Δx)
- (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1)
- @kaxpby!(n, one(FC), b, -one(FC), r1)
- else
- r1 .= b
+def_args_minres = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_minres = (:(x0::AbstractVector),)
+
+def_kwargs_minres = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; etol::T = √eps(T) ),
+ :(; conlim::T = 1/√eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_minres = mapreduce(extract_parameters, vcat, def_kwargs_minres)
+
+args_minres = (:A, :b)
+optargs_minres = (:x0,)
+kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function minres($(def_args_minres...), $(def_optargs_minres...); window :: Int=5, $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = MinresSolver(A, b; window)
+ warm_start!(solver, $(optargs_minres...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ minres!(solver, $(args_minres...); $(kwargs_minres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Initialize Lanczos process.
- # β₁ M v₁ = b.
- r2 .= r1
- MisI || mulorldiv!(v, M, r1, ldiv)
- β₁ = @kdotr(m, r1, v)
- β₁ < 0 && error("Preconditioner is not positive definite")
- if β₁ == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- history && push!(rNorms, β₁)
- history && push!(ArNorms, zero(T))
- history && push!(Aconds, zero(T))
- solver.warm_start = false
- return solver
+ function minres($(def_args_minres...); window :: Int=5, $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = MinresSolver(A, b; window)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ minres!(solver, $(args_minres...); $(kwargs_minres...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- β₁ = sqrt(β₁)
- β = β₁
-
- oldβ = zero(T)
- δbar = zero(T)
- ϵ = zero(T)
- rNorm = β₁
- history && push!(rNorms, β₁)
- ϕbar = β₁
- rhs1 = β₁
- rhs2 = zero(T)
- γmax = zero(T)
- γmin = T(Inf)
- cs = -one(T)
- sn = zero(T)
- w1 .= zero(FC)
- w2 .= zero(FC)
-
- ANorm² = zero(T)
- ANorm = zero(T)
- Acond = zero(T)
- history && push!(Aconds, Acond)
- ArNorm = zero(T)
- history && push!(ArNorms, ArNorm)
- xNorm = zero(T)
-
- xENorm² = zero(T)
- err_lbnd = zero(T)
- window = length(err_vec)
- err_vec .= zero(T)
-
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- (verbose > 0) && @printf("%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond)
-
- tol = atol + rtol * β₁
- rNormtol = ratol + rrtol * β₁
- stats.status = "unknown"
- solved = solved_mach = solved_lim = (rNorm ≤ rtol)
- tired = iter ≥ itmax
- ill_cond = ill_cond_mach = ill_cond_lim = false
- zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ tol)
- fwd_err = false
- user_requested_exit = false
-
- while !(solved || tired || ill_cond || user_requested_exit)
- iter = iter + 1
-
- # Generate next Lanczos vector.
- mul!(y, A, v)
- λ ≠ 0 && @kaxpy!(n, λ, v, y) # (y = y + λ * v)
- @kscal!(n, one(FC) / β, y)
- iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1)
-
- α = real((@kdot(n, v, y) / β))
- @kaxpy!(n, -α / β, r2, y) # y = y - α / β * r2
-
- # Compute w.
- δ = cs * δbar + sn * α
- if iter == 1
- w = w2
- else
- iter ≥ 3 && @kscal!(n, -ϵ, w1)
- w = w1
- @kaxpy!(n, -δ, w2, w)
- end
- @kaxpy!(n, one(FC) / β, v, w)
-
- @. r1 = r2
- @. r2 = y
- MisI || mulorldiv!(v, M, r2, ldiv)
- oldβ = β
- β = @kdotr(n, r2, v)
- β < 0 && error("Preconditioner is not positive definite")
- β = sqrt(β)
- ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
-
- # Apply rotation to obtain
- # [ δₖ ϵₖ₊₁ ] = [ cs sn ] [ δbarₖ 0 ]
- # [ γbar δbarₖ₊₁ ] [ sn -cs ] [ αₖ βₖ₊₁ ]
- γbar = sn * δbar - cs * α
- ϵ = sn * β
- δbar = -cs * β
- root = sqrt(γbar * γbar + δbar * δbar)
- ArNorm = ϕbar * root # = ‖Aᵀrₖ₋₁‖
- history && push!(ArNorms, ArNorm)
-
- # Compute the next plane rotation.
- γ = sqrt(γbar * γbar + β * β)
- γ = max(γ, ϵM)
- cs = γbar / γ
- sn = β / γ
- ϕ = cs * ϕbar
- ϕbar = sn * ϕbar
-
- # Final update of w.
- @kscal!(n, one(FC) / γ, w)
- # Update x.
- @kaxpy!(n, ϕ, w, x) # x = x + ϕ * w
- xENorm² = xENorm² + ϕ * ϕ
+ function minres!(solver :: MinresSolver{T,FC,S}, $(def_args_minres...); $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
- # Update directions for x.
- if iter ≥ 2
- @kswap(w1, w2)
- end
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
- # Compute lower bound on forward error.
- err_vec[mod(iter, window) + 1] = ϕ
- iter ≥ window && (err_lbnd = norm(err_vec))
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "MINRES: system of size %d\n", n)
- γmax = max(γmax, γ)
- γmin = min(γmin, γ)
- ζ = rhs1 / γ
- rhs1 = rhs2 - δ * ζ
- rhs2 = -ϵ * ζ
+ # Tests M = Iₙ
+ MisI = (M === I)
- # Estimate various norms.
- ANorm = sqrt(ANorm²)
- xNorm = @knrm2(n, x)
- ϵA = ANorm * ϵM
- ϵx = ANorm * xNorm * ϵM
- ϵr = ANorm * xNorm * rtol
- d = γbar
- d == 0 && (d = ϵA)
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
- rNorm = ϕbar
+ # Set up workspace.
+ allocate_if(!MisI, solver, :v, S, n)
+ Δx, x, r1, r2, w1, w2, y = solver.Δx, solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.y
+ err_vec, stats = solver.err_vec, solver.stats
+ warm_start = solver.warm_start
+ rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
+ reset!(stats)
+ v = MisI ? r2 : solver.v
- test1 = rNorm / (ANorm * xNorm)
- test2 = root / ANorm
- history && push!(rNorms, rNorm)
+ ϵM = eps(T)
+ ctol = conlim > 0 ? 1 / conlim : zero(T)
- Acond = γmax / γmin
- history && push!(Aconds, Acond)
+ # Initial solution x₀
+ x .= zero(FC)
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2)
+ if warm_start
+ mul!(r1, A, Δx)
+ (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1)
+ @kaxpby!(n, one(FC), b, -one(FC), r1)
+ else
+ r1 .= b
+ end
- if iter == 1 && β / β₁ ≤ 10 * ϵM
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Initialize Lanczos process.
+ # β₁ M v₁ = b.
+ r2 .= r1
+ MisI || mulorldiv!(v, M, r1, ldiv)
+ β₁ = @kdotr(m, r1, v)
+ β₁ < 0 && error("Preconditioner is not positive definite")
+ if β₁ == 0
stats.niter = 0
- stats.solved, stats.inconsistent = true, true
- stats.status = "x is a minimum least-squares solution"
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ history && push!(rNorms, β₁)
+ history && push!(ArNorms, zero(T))
+ history && push!(Aconds, zero(T))
solver.warm_start = false
return solver
end
+ β₁ = sqrt(β₁)
+ β = β₁
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
- solved_mach = (one(T) + test2 ≤ one(T))
- zero_resid_mach = (one(T) + test1 ≤ one(T))
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
- # solved_mach = (ϵx ≥ β₁)
-
- # Stopping conditions based on user-provided tolerances.
- tired = iter ≥ itmax
- ill_cond_lim = (one(T) / Acond ≤ ctol)
- solved_lim = (test2 ≤ tol)
- zero_resid_lim = (test1 ≤ tol)
- resid_decrease_lim = (rNorm ≤ rNormtol)
- iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
-
- user_requested_exit = callback(solver) :: Bool
- zero_resid = zero_resid_mach | zero_resid_lim
- resid_decrease = resid_decrease_mach | resid_decrease_lim
- ill_cond = ill_cond_mach | ill_cond_lim
- solved = solved_mach | solved_lim | zero_resid | fwd_err | resid_decrease
+ oldβ = zero(T)
+ δbar = zero(T)
+ ϵ = zero(T)
+ rNorm = β₁
+ history && push!(rNorms, β₁)
+ ϕbar = β₁
+ rhs1 = β₁
+ rhs2 = zero(T)
+ γmax = zero(T)
+ γmin = T(Inf)
+ cs = -one(T)
+ sn = zero(T)
+ w1 .= zero(FC)
+ w2 .= zero(FC)
+
+ ANorm² = zero(T)
+ ANorm = zero(T)
+ Acond = zero(T)
+ history && push!(Aconds, Acond)
+ ArNorm = zero(T)
+ history && push!(ArNorms, ArNorm)
+ xNorm = zero(T)
+
+ xENorm² = zero(T)
+ err_lbnd = zero(T)
+ window = length(err_vec)
+ err_vec .= zero(T)
+
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7s %7s %.2fs\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time))
+
+ ε = atol + rtol * β₁
+ solved = solved_mach = solved_lim = (rNorm ≤ rtol)
+ tired = iter ≥ itmax
+ ill_cond = ill_cond_mach = ill_cond_lim = false
+ zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ ε)
+ fwd_err = false
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || ill_cond || user_requested_exit || overtimed)
+ iter = iter + 1
+
+ # Generate next Lanczos vector.
+ mul!(y, A, v)
+ λ ≠ 0 && @kaxpy!(n, λ, v, y) # (y = y + λ * v)
+ @kscal!(n, one(FC) / β, y)
+ iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1)
+
+ α = real((@kdot(n, v, y) / β))
+ @kaxpy!(n, -α / β, r2, y) # y = y - α / β * r2
+
+ # Compute w.
+ δ = cs * δbar + sn * α
+ if iter == 1
+ w = w2
+ else
+ iter ≥ 3 && @kscal!(n, -ϵ, w1)
+ w = w1
+ @kaxpy!(n, -δ, w2, w)
+ end
+ @kaxpy!(n, one(FC) / β, v, w)
+
+ @. r1 = r2
+ @. r2 = y
+ MisI || mulorldiv!(v, M, r2, ldiv)
+ oldβ = β
+ β = @kdotr(n, r2, v)
+ β < 0 && error("Preconditioner is not positive definite")
+ β = sqrt(β)
+ ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
+
+ # Apply rotation to obtain
+ # [ δₖ ϵₖ₊₁ ] = [ cs sn ] [ δbarₖ 0 ]
+ # [ γbar δbarₖ₊₁ ] [ sn -cs ] [ αₖ βₖ₊₁ ]
+ γbar = sn * δbar - cs * α
+ ϵ = sn * β
+ δbar = -cs * β
+ root = sqrt(γbar * γbar + δbar * δbar)
+ ArNorm = ϕbar * root # = ‖Aᴴrₖ₋₁‖
+ history && push!(ArNorms, ArNorm)
+
+ # Compute the next plane rotation.
+ γ = sqrt(γbar * γbar + β * β)
+ γ = max(γ, ϵM)
+ cs = γbar / γ
+ sn = β / γ
+ ϕ = cs * ϕbar
+ ϕbar = sn * ϕbar
+
+ # Final update of w.
+ @kscal!(n, one(FC) / γ, w)
+
+ # Update x.
+ @kaxpy!(n, ϕ, w, x) # x = x + ϕ * w
+ xENorm² = xENorm² + ϕ * ϕ
+
+ # Update directions for x.
+ if iter ≥ 2
+ @kswap(w1, w2)
+ end
+
+ # Compute lower bound on forward error.
+ err_vec[mod(iter, window) + 1] = ϕ
+ iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
+
+ γmax = max(γmax, γ)
+ γmin = min(γmin, γ)
+ ζ = rhs1 / γ
+ rhs1 = rhs2 - δ * ζ
+ rhs2 = -ϵ * ζ
+
+ # Estimate various norms.
+ ANorm = sqrt(ANorm²)
+ xNorm = @knrm2(n, x)
+ ϵA = ANorm * ϵM
+ ϵx = ANorm * xNorm * ϵM
+ ϵr = ANorm * xNorm * rtol
+ d = γbar
+ d == 0 && (d = ϵA)
+
+ rNorm = ϕbar
+
+ test1 = rNorm / (ANorm * xNorm)
+ test2 = root / ANorm
+ history && push!(rNorms, rNorm)
+
+ Acond = γmax / γmin
+ history && push!(Aconds, Acond)
+
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2, ktimer(start_time))
+
+ if iter == 1 && β / β₁ ≤ 10 * ϵM
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+ stats.niter = 1
+ stats.solved, stats.inconsistent = true, true
+ stats.timer = ktimer(start_time)
+ stats.status = "x is a minimum least-squares solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
+ solved_mach = (one(T) + test2 ≤ one(T))
+ zero_resid_mach = (one(T) + test1 ≤ one(T))
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+ # solved_mach = (ϵx ≥ β₁)
+
+ # Stopping conditions based on user-provided tolerances.
+ tired = iter ≥ itmax
+ ill_cond_lim = (one(T) / Acond ≤ ctol)
+ solved_lim = (test2 ≤ ε)
+ zero_resid_lim = MisI && (test1 ≤ eps(T))
+ resid_decrease_lim = (rNorm ≤ ε)
+ iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
+
+ user_requested_exit = callback(solver) :: Bool
+ zero_resid = zero_resid_mach || zero_resid_lim
+ resid_decrease = resid_decrease_mach || resid_decrease_lim
+ ill_cond = ill_cond_mach || ill_cond_lim
+ solved = solved_mach || solved_lim || zero_resid || fwd_err || resid_decrease
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ ill_cond_lim && (status = "condition number exceeds tolerance")
+ solved && (status = "found approximate minimum least-squares solution")
+ zero_resid && (status = "found approximate zero-residual solution")
+ fwd_err && (status = "truncated forward error small enough")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !zero_resid
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- ill_cond_lim && (status = "condition number exceeds tolerance")
- solved && (status = "found approximate minimum least-squares solution")
- zero_resid && (status = "found approximate zero-residual solution")
- fwd_err && (status = "truncated forward error small enough")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !zero_resid
- stats.status = status
- return solver
end
diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl
index bbfbf856b..5bc3399eb 100644
--- a/src/minres_qlp.jl
+++ b/src/minres_qlp.jl
@@ -18,30 +18,53 @@ export minres_qlp, minres_qlp!
"""
(x, stats) = minres_qlp(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T),
- ctol::T=√eps(T), λ::T=zero(T), itmax::Int=0,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, Artol::T=√eps(T),
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, stats) = minres_qlp(A, b, x0::AbstractVector; kwargs...)
+
+MINRES-QLP can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
MINRES-QLP is the only method based on the Lanczos process that returns the minimum-norm
-solution on singular inconsistent systems (A + λI)x = b, where λ is a shift parameter.
+solution on singular inconsistent systems (A + λI)x = b of size n, where λ is a shift parameter.
It is significantly more complex but can be more reliable than MINRES when A is ill-conditioned.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
M also indicates the weighted norm in which residuals are measured.
-MINRES-QLP can be warm-started from an initial guess `x0` with the method
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
- (x, stats) = minres_qlp(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `Artol`: relative stopping tolerance based on the Aᴴ-residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -51,18 +74,6 @@ and `false` otherwise.
"""
function minres_qlp end
-function minres_qlp(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = MinresQlpSolver(A, b)
- minres_qlp!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function minres_qlp(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = MinresQlpSolver(A, b)
- minres_qlp!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = minres_qlp!(solver::MinresQlpSolver, A, b; kwargs...)
solver = minres_qlp!(solver::MinresQlpSolver, A, b, x0; kwargs...)
@@ -73,365 +84,414 @@ See [`MinresQlpSolver`](@ref) for more details about the `solver`.
"""
function minres_qlp! end
-function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- minres_qlp!(solver, A, b; kwargs...)
- return solver
-end
-
-function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- ctol :: T=√eps(T), λ ::T=zero(T), itmax :: Int=0,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("MINRES-QLP: system of size %d\n", n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :vₖ, S, n)
- wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ
- Δx, x, p, stats = solver.Δx, solver.x, solver.p, solver.stats
- warm_start = solver.warm_start
- rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
- reset!(stats)
- vₖ = MisI ? M⁻¹vₖ : solver.vₖ
- vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁
-
- # Initial solution x₀
- x .= zero(FC)
-
- if warm_start
- mul!(M⁻¹vₖ, A, Δx)
- (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ)
- @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ)
- else
- M⁻¹vₖ .= b
+def_args_minres_qlp = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_minres_qlp = (:(x0::AbstractVector),)
+
+def_kwargs_minres_qlp = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; λ::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; Artol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_minres_qlp = mapreduce(extract_parameters, vcat, def_kwargs_minres_qlp)
+
+args_minres_qlp = (:A, :b)
+optargs_minres_qlp = (:x0,)
+kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function minres_qlp($(def_args_minres_qlp...), $(def_optargs_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = MinresQlpSolver(A, b)
+ warm_start!(solver, $(optargs_minres_qlp...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ minres_qlp!(solver, $(args_minres_qlp...); $(kwargs_minres_qlp...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # β₁v₁ = Mb
- MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
- βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ))
- if βₖ ≠ 0
- @kscal!(n, one(FC) / βₖ, M⁻¹vₖ)
- MisI || @kscal!(n, one(FC) / βₖ, vₖ)
+ function minres_qlp($(def_args_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = MinresQlpSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ minres_qlp!(solver, $(args_minres_qlp...); $(kwargs_minres_qlp...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- rNorm = βₖ
- ANorm² = zero(T)
- ANorm = zero(T)
- μmin = zero(T)
- μmax = zero(T)
- Acond = zero(T)
- history && push!(rNorms, rNorm)
- history && push!(Aconds, Acond)
- if rNorm == 0
- stats.niter = 0
- stats.solved, stats.inconsistent = true, false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
- end
-
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- ε = atol + rtol * rNorm
- κ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %7s %8s %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗")
-
- # Set up workspace.
- M⁻¹vₖ₋₁ .= zero(FC)
- ζbarₖ = βₖ
- ξₖ₋₁ = zero(T)
- τₖ₋₂ = τₖ₋₁ = τₖ = zero(T)
- ψbarₖ₋₂ = zero(T)
- μbisₖ₋₂ = μbarₖ₋₁ = zero(T)
- wₖ₋₁ .= zero(FC)
- wₖ .= zero(FC)
- cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
- sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
-
- # Tolerance for breakdown detection.
- btol = eps(T)^(3/4)
-
- # Stopping criterion.
- breakdown = false
- solved = zero_resid = zero_resid_lim = rNorm ≤ ε
- zero_resid_mach = false
- inconsistent = false
- ill_cond_mach = false
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || inconsistent || ill_cond_mach || breakdown || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the preconditioned Lanczos process.
- # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ
- # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁
-
- mul!(p, A, vₖ) # p ← Avₖ
- if λ ≠ 0
- @kaxpy!(n, λ, vₖ, p) # p ← p + λvₖ
- end
-
- if iter ≥ 2
- @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁
- end
-
- αₖ = @kdotr(n, vₖ, p) # αₖ = ⟨vₖ,p⟩
-
- @kaxpy!(n, -αₖ, M⁻¹vₖ, p) # p ← p - αₖM⁻¹vₖ
-
- MisI || mulorldiv!(vₖ₊₁, M, p, ldiv) # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ
-
- βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p))
-
- # βₖ₊₁.ₖ ≠ 0
- if βₖ₊₁ > btol
- @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
- MisI || @kscal!(m, one(FC) / βₖ₊₁, p)
- end
-
- ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁
-
- # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
- # [ Oᵀ ]
- #
- # [ α₁ β₂ 0 • • • 0 ] [ λ₁ γ₁ ϵ₁ 0 • • 0 ]
- # [ β₂ α₂ β₃ • • ] [ 0 λ₂ γ₂ • • • ]
- # [ 0 • • • • • ] [ • • λ₃ • • • • ]
- # [ • • • • • • • ] = Qₖ [ • • • • • 0 ]
- # [ • • • • • 0 ] [ • • • • ϵₖ₋₂]
- # [ • • • • βₖ ] [ • • • γₖ₋₁]
- # [ • • βₖ αₖ ] [ 0 • • • • 0 λₖ ]
- # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ]
- #
- # If k = 1, we don't have any previous reflexion.
- # If k = 2, we apply the last reflexion.
- # If k ≥ 3, we only apply the two previous reflexions.
-
- # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
- if iter ≥ 3
- # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ]
- # [sₖ₋₂ -cₖ₋₂] [βₖ] [γbarₖ₋₁]
- ϵₖ₋₂ = sₖ₋₂ * βₖ
- γbarₖ₋₁ = -cₖ₋₂ * βₖ
- end
- # Apply previous Givens reflections Qₖ₋₁.ₖ
- if iter ≥ 2
- iter == 2 && (γbarₖ₋₁ = βₖ)
- # [cₖ₋₁ sₖ₋₁] [γbarₖ₋₁] = [γₖ₋₁ ]
- # [sₖ₋₁ -cₖ₋₁] [ αₖ ] [λbarₖ]
- γₖ₋₁ = cₖ₋₁ * γbarₖ₋₁ + sₖ₋₁ * αₖ
- λbarₖ = sₖ₋₁ * γbarₖ₋₁ - cₖ₋₁ * αₖ
- end
- iter == 1 && (λbarₖ = αₖ)
-
- # Compute and apply current Givens reflection Qₖ.ₖ₊₁
- # [cₖ sₖ] [λbarₖ] = [λₖ]
- # [sₖ -cₖ] [βₖ₊₁ ] [0 ]
- (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁)
-
- # Compute [ zₖ ] = (Qₖ)ᵀβ₁e₁
- # [ζbarₖ₊₁]
- #
- # [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
- # [sₖ -cₖ] [ 0 ] [ζbarₖ₊₁]
- ζₖ = cₖ * ζbarₖ
- ζbarₖ₊₁ = sₖ * ζbarₖ
-
- # Update the LQ factorization of Rₖ = LₖPₖ.
- # [ λ₁ γ₁ ϵ₁ 0 • • 0 ] [ μ₁ 0 • • • • 0 ]
- # [ 0 λ₂ γ₂ • • • ] [ ψ₁ μ₂ • • ]
- # [ • • λ₃ • • • • ] [ ρ₁ ψ₂ μ₃ • • ]
- # [ • • • • • 0 ] = [ 0 • • • • • ] Pₖ
- # [ • • • • ϵₖ₋₂] [ • • • • μₖ₋₂ • • ]
- # [ • • • γₖ₋₁] [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ]
- # [ 0 • • • • 0 λₖ ] [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ]
-
- if iter == 1
- μbarₖ = λₖ
- elseif iter == 2
- # [μbar₁ γ₁] [cp₂ sp₂] = [μbis₁ 0 ]
- # [ 0 λ₂] [sp₂ -cp₂] [ψbar₁ μbar₂]
- (cpₖ, spₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, γₖ₋₁)
- ψbarₖ₋₁ = spₖ * λₖ
- μbarₖ = -cpₖ * λₖ
- else
- # [μbisₖ₋₂ 0 ϵₖ₋₂] [cpₖ 0 spₖ] [μₖ₋₂ 0 0 ]
- # [ψbarₖ₋₂ μbarₖ₋₁ γₖ₋₁] [ 0 1 0 ] = [ψₖ₋₂ μbarₖ₋₁ θₖ]
- # [ 0 0 λₖ ] [spₖ 0 -cpₖ] [ρₖ₋₂ 0 ηₖ]
- (cpₖ, spₖ, μₖ₋₂) = sym_givens(μbisₖ₋₂, ϵₖ₋₂)
- ψₖ₋₂ = cpₖ * ψbarₖ₋₂ + spₖ * γₖ₋₁
- θₖ = spₖ * ψbarₖ₋₂ - cpₖ * γₖ₋₁
- ρₖ₋₂ = spₖ * λₖ
- ηₖ = -cpₖ * λₖ
-
- # [μₖ₋₂ 0 0 ] [1 0 0 ] [μₖ₋₂ 0 0 ]
- # [ψₖ₋₂ μbarₖ₋₁ θₖ] [0 cdₖ sdₖ] = [ψₖ₋₂ μbisₖ₋₁ 0 ]
- # [ρₖ₋₂ 0 ηₖ] [0 sdₖ -cdₖ] [ρₖ₋₂ ψbarₖ₋₁ μbarₖ]
- (cdₖ, sdₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, θₖ)
- ψbarₖ₋₁ = sdₖ * ηₖ
- μbarₖ = -cdₖ * ηₖ
- end
-
- # Compute Lₖtₖ = zₖ
- # [ μ₁ 0 • • • • 0 ] [τ₁] [ζ₁]
- # [ ψ₁ μ₂ • • ] [τ₂] [ζ₂]
- # [ ρ₁ ψ₂ μ₃ • • ] [τ₃] [ζ₃]
- # [ 0 • • • • • ] [••] = [••]
- # [ • • • • μₖ₋₂ • • ] [••] [••]
- # [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ] [••] [••]
- # [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ] [τₖ] [ζₖ]
- if iter == 1
- τₖ = ζₖ / μbarₖ
- elseif iter == 2
- τₖ₋₁ = τₖ
- τₖ₋₁ = τₖ₋₁ * μbarₖ₋₁ / μbisₖ₋₁
- ξₖ = ζₖ
- τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, $(def_args_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "MINRES-QLP: system of size %d\n", n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :vₖ, S, n)
+ wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ
+ Δx, x, p, stats = solver.Δx, solver.x, solver.p, solver.stats
+ warm_start = solver.warm_start
+ rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
+ reset!(stats)
+ vₖ = MisI ? M⁻¹vₖ : solver.vₖ
+ vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁
+
+ # Initial solution x₀
+ x .= zero(FC)
+
+ if warm_start
+ mul!(M⁻¹vₖ, A, Δx)
+ (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ)
+ @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ)
else
- τₖ₋₂ = τₖ₋₁
- τₖ₋₂ = τₖ₋₂ * μbisₖ₋₂ / μₖ₋₂
- τₖ₋₁ = (ξₖ₋₁ - ψₖ₋₂ * τₖ₋₂) / μbisₖ₋₁
- ξₖ = ζₖ - ρₖ₋₂ * τₖ₋₂
- τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+ M⁻¹vₖ .= b
end
- # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᵀ
- if iter == 1
- # w̅₁ = v₁
- @. wₖ = vₖ
- elseif iter == 2
- # [w̅ₖ₋₁ vₖ] [cpₖ spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ
- # [spₖ -cpₖ] ⟷ w̅ₖ = spₖ * w̅ₖ₋₁ - cpₖ * vₖ
- @kswap(wₖ₋₁, wₖ)
- @. wₖ = spₖ * wₖ₋₁ - cpₖ * vₖ
- @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁)
- else
- # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ 0 spₖ] [1 0 0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ
- # [ 0 1 0 ] [0 cdₖ sdₖ] ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
- # [spₖ 0 -cpₖ] [0 sdₖ -cdₖ] ⟷ w̄ₖ = sdₖ * w̄ₖ₋₁ - cdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
- ẘₖ₋₂ = wₖ₋₁
- w̄ₖ₋₁ = wₖ
- # Update the solution x
- @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x)
- @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x)
- # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ
- @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂)
- wₐᵤₓ = ẘₖ₋₂
- # Compute ẘₖ₋₁ and w̄ₖ
- @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ)
- @kswap(wₖ₋₁, wₖ)
+ # β₁v₁ = Mb
+ MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
+ βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ))
+ if βₖ ≠ 0
+ @kscal!(n, one(FC) / βₖ, M⁻¹vₖ)
+ MisI || @kscal!(n, one(FC) / βₖ, vₖ)
end
- # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ
- MisI || (vₖ .= vₖ₊₁)
- M⁻¹vₖ₋₁ .= M⁻¹vₖ
- M⁻¹vₖ .= p
-
- # Update ‖rₖ‖ estimate
- # ‖ rₖ ‖ = |ζbarₖ₊₁|
- rNorm = abs(ζbarₖ₊₁)
+ rNorm = βₖ
+ ANorm² = zero(T)
+ ANorm = zero(T)
+ μmin = zero(T)
+ μmax = zero(T)
+ Acond = zero(T)
history && push!(rNorms, rNorm)
-
- # Update ‖Arₖ₋₁‖ estimate
- # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²)
- ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁))
- iter == 1 && (κ = atol + ctol * ArNorm)
- history && push!(ArNorms, ArNorm)
-
- ANorm = sqrt(ANorm²)
- # estimate A condition number
- abs_μbarₖ = abs(μbarₖ)
- if iter == 1
- μmin = abs_μbarₖ
- μmax = abs_μbarₖ
- elseif iter == 2
- μmax = max(μmax, μbisₖ₋₁, abs_μbarₖ)
- μmin = min(μmin, μbisₖ₋₁, abs_μbarₖ)
- else
- μmax = max(μmax, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
- μmin = min(μmin, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
- end
- Acond = μmax / μmin
history && push!(Aconds, Acond)
- xNorm = @knrm2(n, x)
- backward = rNorm / (ANorm * xNorm)
-
- # Update stopping criterion.
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
- resid_decrease_mach = (one(T) + rNorm ≤ one(T))
- zero_resid_mach = (one(T) + backward ≤ one(T))
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
- # Stopping conditions based on user-provided tolerances.
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ ε = atol + rtol * rNorm
+ κ = zero(T)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %7s %7s %8s %5s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗", ktimer(start_time))
+
+ # Set up workspace.
+ M⁻¹vₖ₋₁ .= zero(FC)
+ ζbarₖ = βₖ
+ ξₖ₋₁ = zero(T)
+ τₖ₋₂ = τₖ₋₁ = τₖ = zero(T)
+ ψbarₖ₋₂ = zero(T)
+ μbisₖ₋₂ = μbarₖ₋₁ = zero(T)
+ wₖ₋₁ .= zero(FC)
+ wₖ .= zero(FC)
+ cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
+ sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
+
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
+
+ # Stopping criterion.
+ breakdown = false
+ solved = zero_resid = zero_resid_lim = rNorm ≤ ε
+ zero_resid_mach = false
+ inconsistent = false
+ ill_cond_mach = false
tired = iter ≥ itmax
- resid_decrease_lim = (rNorm ≤ ε)
- zero_resid_lim = (backward ≤ ε)
- breakdown = βₖ₊₁ ≤ btol
-
- user_requested_exit = callback(solver) :: Bool
- zero_resid = zero_resid_mach | zero_resid_lim
- resid_decrease = resid_decrease_mach | resid_decrease_lim
- solved = resid_decrease | zero_resid
- inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ ctol) || (breakdown && !solved)
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || inconsistent || ill_cond_mach || breakdown || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
+
+ # Continue the preconditioned Lanczos process.
+ # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ
+ # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁
+
+ mul!(p, A, vₖ) # p ← Avₖ
+ if λ ≠ 0
+ @kaxpy!(n, λ, vₖ, p) # p ← p + λvₖ
+ end
+
+ if iter ≥ 2
+ @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁
+ end
+
+ αₖ = @kdotr(n, vₖ, p) # αₖ = ⟨vₖ,p⟩
+
+ @kaxpy!(n, -αₖ, M⁻¹vₖ, p) # p ← p - αₖM⁻¹vₖ
+
+ MisI || mulorldiv!(vₖ₊₁, M, p, ldiv) # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ
+
+ βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p))
+
+ # βₖ₊₁.ₖ ≠ 0
+ if βₖ₊₁ > btol
+ @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
+ MisI || @kscal!(m, one(FC) / βₖ₊₁, p)
+ end
+
+ ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁
+
+ # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
+ # [ Oᵀ ]
+ #
+ # [ α₁ β₂ 0 • • • 0 ] [ λ₁ γ₁ ϵ₁ 0 • • 0 ]
+ # [ β₂ α₂ β₃ • • ] [ 0 λ₂ γ₂ • • • ]
+ # [ 0 • • • • • ] [ • • λ₃ • • • • ]
+ # [ • • • • • • • ] = Qₖ [ • • • • • 0 ]
+ # [ • • • • • 0 ] [ • • • • ϵₖ₋₂]
+ # [ • • • • βₖ ] [ • • • γₖ₋₁]
+ # [ • • βₖ αₖ ] [ 0 • • • • 0 λₖ ]
+ # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ]
+ #
+ # If k = 1, we don't have any previous reflexion.
+ # If k = 2, we apply the last reflexion.
+ # If k ≥ 3, we only apply the two previous reflexions.
+
+ # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
+ if iter ≥ 3
+ # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ]
+ # [sₖ₋₂ -cₖ₋₂] [βₖ] [γbarₖ₋₁]
+ ϵₖ₋₂ = sₖ₋₂ * βₖ
+ γbarₖ₋₁ = -cₖ₋₂ * βₖ
+ end
+ # Apply previous Givens reflections Qₖ₋₁.ₖ
+ if iter ≥ 2
+ iter == 2 && (γbarₖ₋₁ = βₖ)
+ # [cₖ₋₁ sₖ₋₁] [γbarₖ₋₁] = [γₖ₋₁ ]
+ # [sₖ₋₁ -cₖ₋₁] [ αₖ ] [λbarₖ]
+ γₖ₋₁ = cₖ₋₁ * γbarₖ₋₁ + sₖ₋₁ * αₖ
+ λbarₖ = sₖ₋₁ * γbarₖ₋₁ - cₖ₋₁ * αₖ
+ end
+ iter == 1 && (λbarₖ = αₖ)
+
+ # Compute and apply current Givens reflection Qₖ.ₖ₊₁
+ # [cₖ sₖ] [λbarₖ] = [λₖ]
+ # [sₖ -cₖ] [βₖ₊₁ ] [0 ]
+ (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁)
+
+ # Compute [ zₖ ] = (Qₖ)ᴴβ₁e₁
+ # [ζbarₖ₊₁]
+ #
+ # [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
+ # [sₖ -cₖ] [ 0 ] [ζbarₖ₊₁]
+ ζₖ = cₖ * ζbarₖ
+ ζbarₖ₊₁ = sₖ * ζbarₖ
+
+ # Update the LQ factorization of Rₖ = LₖPₖ.
+ # [ λ₁ γ₁ ϵ₁ 0 • • 0 ] [ μ₁ 0 • • • • 0 ]
+ # [ 0 λ₂ γ₂ • • • ] [ ψ₁ μ₂ • • ]
+ # [ • • λ₃ • • • • ] [ ρ₁ ψ₂ μ₃ • • ]
+ # [ • • • • • 0 ] = [ 0 • • • • • ] Pₖ
+ # [ • • • • ϵₖ₋₂] [ • • • • μₖ₋₂ • • ]
+ # [ • • • γₖ₋₁] [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ]
+ # [ 0 • • • • 0 λₖ ] [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ]
+
+ if iter == 1
+ μbarₖ = λₖ
+ elseif iter == 2
+ # [μbar₁ γ₁] [cp₂ sp₂] = [μbis₁ 0 ]
+ # [ 0 λ₂] [sp₂ -cp₂] [ψbar₁ μbar₂]
+ (cpₖ, spₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, γₖ₋₁)
+ ψbarₖ₋₁ = spₖ * λₖ
+ μbarₖ = -cpₖ * λₖ
+ else
+ # [μbisₖ₋₂ 0 ϵₖ₋₂] [cpₖ 0 spₖ] [μₖ₋₂ 0 0 ]
+ # [ψbarₖ₋₂ μbarₖ₋₁ γₖ₋₁] [ 0 1 0 ] = [ψₖ₋₂ μbarₖ₋₁ θₖ]
+ # [ 0 0 λₖ ] [spₖ 0 -cpₖ] [ρₖ₋₂ 0 ηₖ]
+ (cpₖ, spₖ, μₖ₋₂) = sym_givens(μbisₖ₋₂, ϵₖ₋₂)
+ ψₖ₋₂ = cpₖ * ψbarₖ₋₂ + spₖ * γₖ₋₁
+ θₖ = spₖ * ψbarₖ₋₂ - cpₖ * γₖ₋₁
+ ρₖ₋₂ = spₖ * λₖ
+ ηₖ = -cpₖ * λₖ
+
+ # [μₖ₋₂ 0 0 ] [1 0 0 ] [μₖ₋₂ 0 0 ]
+ # [ψₖ₋₂ μbarₖ₋₁ θₖ] [0 cdₖ sdₖ] = [ψₖ₋₂ μbisₖ₋₁ 0 ]
+ # [ρₖ₋₂ 0 ηₖ] [0 sdₖ -cdₖ] [ρₖ₋₂ ψbarₖ₋₁ μbarₖ]
+ (cdₖ, sdₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, θₖ)
+ ψbarₖ₋₁ = sdₖ * ηₖ
+ μbarₖ = -cdₖ * ηₖ
+ end
+
+ # Compute Lₖtₖ = zₖ
+ # [ μ₁ 0 • • • • 0 ] [τ₁] [ζ₁]
+ # [ ψ₁ μ₂ • • ] [τ₂] [ζ₂]
+ # [ ρ₁ ψ₂ μ₃ • • ] [τ₃] [ζ₃]
+ # [ 0 • • • • • ] [••] = [••]
+ # [ • • • • μₖ₋₂ • • ] [••] [••]
+ # [ • • • ψₖ₋₂ μbisₖ₋₁ 0 ] [••] [••]
+ # [ 0 • • 0 ρₖ₋₂ ψbarₖ₋₁ μbarₖ] [τₖ] [ζₖ]
+ if iter == 1
+ τₖ = ζₖ / μbarₖ
+ elseif iter == 2
+ τₖ₋₁ = τₖ
+ τₖ₋₁ = τₖ₋₁ * μbarₖ₋₁ / μbisₖ₋₁
+ ξₖ = ζₖ
+ τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+ else
+ τₖ₋₂ = τₖ₋₁
+ τₖ₋₂ = τₖ₋₂ * μbisₖ₋₂ / μₖ₋₂
+ τₖ₋₁ = (ξₖ₋₁ - ψₖ₋₂ * τₖ₋₂) / μbisₖ₋₁
+ ξₖ = ζₖ - ρₖ₋₂ * τₖ₋₂
+ τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+ end
+
+ # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ
+ if iter == 1
+ # w̅₁ = v₁
+ @. wₖ = vₖ
+ elseif iter == 2
+ # [w̅ₖ₋₁ vₖ] [cpₖ spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ
+ # [spₖ -cpₖ] ⟷ w̅ₖ = spₖ * w̅ₖ₋₁ - cpₖ * vₖ
+ @kswap(wₖ₋₁, wₖ)
+ @. wₖ = spₖ * wₖ₋₁ - cpₖ * vₖ
+ @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁)
+ else
+ # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ 0 spₖ] [1 0 0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ
+ # [ 0 1 0 ] [0 cdₖ sdₖ] ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
+ # [spₖ 0 -cpₖ] [0 sdₖ -cdₖ] ⟷ w̄ₖ = sdₖ * w̄ₖ₋₁ - cdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
+ ẘₖ₋₂ = wₖ₋₁
+ w̄ₖ₋₁ = wₖ
+ # Update the solution x
+ @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x)
+ @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x)
+ # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ
+ @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂)
+ wₐᵤₓ = ẘₖ₋₂
+ # Compute ẘₖ₋₁ and w̄ₖ
+ @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ)
+ @kswap(wₖ₋₁, wₖ)
+ end
+
+ # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ
+ MisI || (vₖ .= vₖ₊₁)
+ M⁻¹vₖ₋₁ .= M⁻¹vₖ
+ M⁻¹vₖ .= p
+
+ # Update ‖rₖ‖ estimate
+ # ‖ rₖ ‖ = |ζbarₖ₊₁|
+ rNorm = abs(ζbarₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Update ‖Arₖ₋₁‖ estimate
+ # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²)
+ ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁))
+ iter == 1 && (κ = atol + Artol * ArNorm)
+ history && push!(ArNorms, ArNorm)
+
+ ANorm = sqrt(ANorm²)
+ # estimate A condition number
+ abs_μbarₖ = abs(μbarₖ)
+ if iter == 1
+ μmin = abs_μbarₖ
+ μmax = abs_μbarₖ
+ elseif iter == 2
+ μmax = max(μmax, μbisₖ₋₁, abs_μbarₖ)
+ μmin = min(μmin, μbisₖ₋₁, abs_μbarₖ)
+ else
+ μmax = max(μmax, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
+ μmin = min(μmin, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
+ end
+ Acond = μmax / μmin
+ history && push!(Aconds, Acond)
+ xNorm = @knrm2(n, x)
+ backward = rNorm / (ANorm * xNorm)
+
+ # Update stopping criterion.
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
+ resid_decrease_mach = (one(T) + rNorm ≤ one(T))
+ zero_resid_mach = (one(T) + backward ≤ one(T))
+
+ # Stopping conditions based on user-provided tolerances.
+ tired = iter ≥ itmax
+ resid_decrease_lim = (rNorm ≤ ε)
+ zero_resid_lim = MisI && (backward ≤ eps(T))
+ breakdown = βₖ₊₁ ≤ btol
+
+ user_requested_exit = callback(solver) :: Bool
+ zero_resid = zero_resid_mach | zero_resid_lim
+ resid_decrease = resid_decrease_mach | resid_decrease_lim
+ solved = resid_decrease | zero_resid
+ inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ Artol) || (breakdown && !solved)
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+
+ # Update variables
+ if iter ≥ 2
+ sₖ₋₂ = sₖ₋₁
+ cₖ₋₂ = cₖ₋₁
+ ξₖ₋₁ = ξₖ
+ μbisₖ₋₂ = μbisₖ₋₁
+ ψbarₖ₋₂ = ψbarₖ₋₁
+ end
+ sₖ₋₁ = sₖ
+ cₖ₋₁ = cₖ
+ μbarₖ₋₁ = μbarₖ
+ ζbarₖ = ζbarₖ₊₁
+ βₖ = βₖ₊₁
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e %.2fs\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward, ktimer(start_time))
+ end
+ (verbose > 0) && @printf(iostream, "\n")
- # Update variables
+ # Finalize the update of x
if iter ≥ 2
- sₖ₋₂ = sₖ₋₁
- cₖ₋₂ = cₖ₋₁
- ξₖ₋₁ = ξₖ
- μbisₖ₋₂ = μbisₖ₋₁
- ψbarₖ₋₂ = ψbarₖ₋₁
+ @kaxpy!(n, τₖ₋₁, wₖ₋₁, x)
+ end
+ if !inconsistent
+ @kaxpy!(n, τₖ, wₖ, x)
end
- sₖ₋₁ = sₖ
- cₖ₋₁ = cₖ
- μbarₖ₋₁ = μbarₖ
- ζbarₖ = ζbarₖ₊₁
- βₖ = βₖ₊₁
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward)
- end
- (verbose > 0) && @printf("\n")
- # Finalize the update of x
- if iter ≥ 2
- @kaxpy!(n, τₖ₋₁, wₖ₋₁, x)
- end
- if !inconsistent
- @kaxpy!(n, τₖ, wₖ, x)
- end
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ inconsistent && (status = "found approximate minimum least-squares solution")
+ zero_resid && (status = "found approximate zero-residual solution")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- tired && (status = "maximum number of iterations exceeded")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- inconsistent && (status = "found approximate minimum least-squares solution")
- zero_resid && (status = "found approximate zero-residual solution")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
+ end
end
diff --git a/src/qmr.jl b/src/qmr.jl
index eb4a4eb46..995392f0c 100644
--- a/src/qmr.jl
+++ b/src/qmr.jl
@@ -21,28 +21,49 @@
export qmr, qmr!
"""
- (x, stats) = qmr(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ (x, stats) = qmr(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0, timemax::Float64=Inf, verbose::Int=0,
+ history::Bool=false, callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the QMR method.
+ (x, stats) = qmr(A, b, x0::AbstractVector; kwargs...)
+
+QMR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using QMR.
QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
-When `A` is symmetric and `b = c`, QMR is equivalent to MINRES.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
+When `A` is Hermitian and `b = c`, QMR is equivalent to MINRES.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-QMR can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = qmr(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -52,18 +73,6 @@ and `false` otherwise.
"""
function qmr end
-function qmr(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = QmrSolver(A, b)
- qmr!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function qmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = QmrSolver(A, b)
- qmr!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = qmr!(solver::QmrSolver, A, b; kwargs...)
solver = qmr!(solver::QmrSolver, A, b, x0; kwargs...)
@@ -74,253 +83,301 @@ See [`QmrSolver`](@ref) for more details about the `solver`.
"""
function qmr! end
-function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- qmr!(solver, A, b; kwargs...)
- return solver
-end
-
-function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("QMR: system of size %d\n", n)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p
- Δx, x, wₖ₋₂, wₖ₋₁, stats = solver.Δx, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- r₀ = warm_start ? q : b
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_qmr = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_qmr = (:(x0::AbstractVector),)
+
+def_kwargs_qmr = (:(; c::AbstractVector{FC} = b ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_qmr = mapreduce(extract_parameters, vcat, def_kwargs_qmr)
+
+args_qmr = (:A, :b)
+optargs_qmr = (:x0,)
+kwargs_qmr = (:c, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function qmr($(def_args_qmr...), $(def_optargs_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = QmrSolver(A, b)
+ warm_start!(solver, $(optargs_qmr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ qmr!(solver, $(args_qmr...); $(kwargs_qmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Initial solution x₀ and residual norm ‖r₀‖.
- x .= zero(FC)
- rNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖
-
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved = true
- stats.inconsistent = false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function qmr($(def_args_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = QmrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ qmr!(solver, $(args_qmr...); $(kwargs_qmr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = 2*n)
-
- ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
-
- # Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩
- if cᵗb == 0
- stats.niter = 0
- stats.solved = false
- stats.inconsistent = false
- stats.status = "Breakdown bᵀc = 0"
- solver.warm_start = false
- return solver
- end
-
- βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀)
- vₖ₋₁ .= zero(FC) # v₀ = 0
- uₖ₋₁ .= zero(FC) # u₀ = 0
- vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
- cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
- sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
- wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
- ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
- τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate
-
- # Stopping criterion.
- solved = rNorm ≤ ε
- breakdown = false
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || breakdown || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the Lanczos biorthogonalization process.
- # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
-
- @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
- @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
-
- αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩
-
- @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
- @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
-
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
-
- # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
- # [ Oᵀ ]
- # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ]
- # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ]
- # [ 0 • • • • • ] [ • • δ₃ • • • • ]
- # [ • • • • • • • ] = Qₖ [ • • • • • 0 ]
- # [ • • • • • 0 ] [ • • • • ϵₖ₋₂]
- # [ • • • • γₖ ] [ • • • λₖ₋₁]
- # [ • • βₖ αₖ ] [ • • δₖ ]
- # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ]
- #
- # If k = 1, we don't have any previous reflexion.
- # If k = 2, we apply the last reflexion.
- # If k ≥ 3, we only apply the two previous reflexions.
-
- # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
- if iter ≥ 3
- # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ]
- # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁]
- ϵₖ₋₂ = sₖ₋₂ * γₖ
- λbarₖ₋₁ = -cₖ₋₂ * γₖ
+ function qmr!(solver :: QmrSolver{T,FC,S}, $(def_args_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "QMR: system of size %d\n", n)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p
+ Δx, x, wₖ₋₂, wₖ₋₁, stats = solver.Δx, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ r₀ = warm_start ? q : b
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
end
- # Apply previous Givens reflections Qₖ₋₁.ₖ
- if iter ≥ 2
- iter == 2 && (λbarₖ₋₁ = γₖ)
- # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
- # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ]
- λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ
- δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
-
- # Update sₖ₋₂ and cₖ₋₂.
- sₖ₋₂ = sₖ₋₁
- cₖ₋₂ = cₖ₋₁
- end
+ # Initial solution x₀ and residual norm ‖r₀‖.
+ x .= zero(FC)
+ rNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖
- # Compute and apply current Givens reflection Qₖ.ₖ₊₁
- iter == 1 && (δbarₖ = αₖ)
- # [cₖ sₖ] [δbarₖ] = [δₖ]
- # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ]
- (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
-
- # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
- # [ 0 ]
- #
- # [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
- # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁]
- ζₖ = cₖ * ζbarₖ
- ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
-
- # Update sₖ₋₁ and cₖ₋₁.
- sₖ₋₁ = sₖ
- cₖ₋₁ = cₖ
-
- # Compute the direction wₖ, the last column of Wₖ = Vₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Vₖ)ᵀ.
- # w₁ = v₁ / δ₁
- if iter == 1
- wₖ = wₖ₋₁
- @kaxpy!(n, one(FC), vₖ, wₖ)
- @. wₖ = wₖ / δₖ
- end
- # w₂ = (v₂ - λ₁w₁) / δ₂
- if iter == 2
- wₖ = wₖ₋₂
- @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
- @kaxpy!(n, one(FC), vₖ, wₖ)
- @. wₖ = wₖ / δₖ
- end
- # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
- if iter ≥ 3
- @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
- wₖ = wₖ₋₂
- @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
- @kaxpy!(n, one(FC), vₖ, wₖ)
- @. wₖ = wₖ / δₖ
+ history && push!(rNorms, rNorm)
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
end
- # Compute solution xₖ.
- # xₖ ← xₖ₋₁ + ζₖ * wₖ
- @kaxpy!(n, ζₖ, wₖ, x)
-
- # Compute vₖ₊₁ and uₖ₊₁.
- @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
- @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
- if pᵗq ≠ zero(FC)
- @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
- @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
+ iter = 0
+ itmax == 0 && (itmax = 2*n)
+
+ ε = atol + rtol * rNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+
+ # Initialize the Lanczos biorthogonalization process.
+ cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩
+ if cᴴb == 0
+ stats.niter = 0
+ stats.solved = false
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "Breakdown bᴴc = 0"
+ solver.warm_start = false
+ return solver
end
- # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖²
- τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
+ vₖ₋₁ .= zero(FC) # v₀ = 0
+ uₖ₋₁ .= zero(FC) # u₀ = 0
+ vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
+ uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
+ cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
+ sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
+ wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
+ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
+ τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate
+
+ # Stopping criterion.
+ solved = rNorm ≤ ε
+ breakdown = false
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
+
+ # Continue the Lanczos biorthogonalization process.
+ # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+ mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
+
+ @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
+
+ αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩
+
+ @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
+ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
+
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
+
+ # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
+ # [ Oᵀ ]
+ # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ]
+ # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ]
+ # [ 0 • • • • • ] [ • • δ₃ • • • • ]
+ # [ • • • • • • • ] = Qₖ [ • • • • • 0 ]
+ # [ • • • • • 0 ] [ • • • • ϵₖ₋₂]
+ # [ • • • • γₖ ] [ • • • λₖ₋₁]
+ # [ • • βₖ αₖ ] [ • • δₖ ]
+ # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ]
+ #
+ # If k = 1, we don't have any previous reflexion.
+ # If k = 2, we apply the last reflexion.
+ # If k ≥ 3, we only apply the two previous reflexions.
+
+ # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
+ if iter ≥ 3
+ # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ]
+ # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁]
+ ϵₖ₋₂ = sₖ₋₂ * γₖ
+ λbarₖ₋₁ = -cₖ₋₂ * γₖ
+ end
+
+ # Apply previous Givens reflections Qₖ₋₁.ₖ
+ if iter ≥ 2
+ iter == 2 && (λbarₖ₋₁ = γₖ)
+ # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
+ # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ]
+ λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ
+ δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
+
+ # Update sₖ₋₂ and cₖ₋₂.
+ sₖ₋₂ = sₖ₋₁
+ cₖ₋₂ = cₖ₋₁
+ end
+
+ # Compute and apply current Givens reflection Qₖ.ₖ₊₁
+ iter == 1 && (δbarₖ = αₖ)
+ # [cₖ sₖ] [δbarₖ] = [δₖ]
+ # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ]
+ (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
+
+ # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
+ # [ 0 ]
+ #
+ # [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
+ # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁]
+ ζₖ = cₖ * ζbarₖ
+ ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
+
+ # Update sₖ₋₁ and cₖ₋₁.
+ sₖ₋₁ = sₖ
+ cₖ₋₁ = cₖ
+
+ # Compute the direction wₖ, the last column of Wₖ = Vₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Vₖ)ᵀ.
+ # w₁ = v₁ / δ₁
+ if iter == 1
+ wₖ = wₖ₋₁
+ @kaxpy!(n, one(FC), vₖ, wₖ)
+ @. wₖ = wₖ / δₖ
+ end
+ # w₂ = (v₂ - λ₁w₁) / δ₂
+ if iter == 2
+ wₖ = wₖ₋₂
+ @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+ @kaxpy!(n, one(FC), vₖ, wₖ)
+ @. wₖ = wₖ / δₖ
+ end
+ # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
+ if iter ≥ 3
+ @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
+ wₖ = wₖ₋₂
+ @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+ @kaxpy!(n, one(FC), vₖ, wₖ)
+ @. wₖ = wₖ / δₖ
+ end
+
+ # Compute solution xₖ.
+ # xₖ ← xₖ₋₁ + ζₖ * wₖ
+ @kaxpy!(n, ζₖ, wₖ, x)
+
+ # Compute vₖ₊₁ and uₖ₊₁.
+ @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+ if pᴴq ≠ zero(FC)
+ @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+ @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
+ end
+
+ # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖²
+ τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ)
+
+ # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁
+ rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁
+ history && push!(rNorms, rNorm)
+
+ # Update directions for x.
+ if iter ≥ 2
+ @kswap(wₖ₋₂, wₖ₋₁)
+ end
+
+ # Update ζbarₖ, βₖ, γₖ and τₖ.
+ ζbarₖ = ζbarₖ₊₁
+ βₖ = βₖ₊₁
+ γₖ = γₖ₊₁
+ τₖ = τₖ₊₁
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ breakdown = !solved && (pᴴq == 0)
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))
+ end
+ (verbose > 0) && @printf(iostream, "\n")
- # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁
- rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁
- history && push!(rNorms, rNorm)
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Update directions for x.
- if iter ≥ 2
- @kswap(wₖ₋₂, wₖ₋₁)
- end
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- # Update ζbarₖ, βₖ, γₖ and τₖ.
- ζbarₖ = ζbarₖ₊₁
- βₖ = βₖ₊₁
- γₖ = γₖ₊₁
- τₖ = τₖ₊₁
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- solved = resid_decrease_lim || resid_decrease_mach
- tired = iter ≥ itmax
- breakdown = !solved && (pᵗq == 0)
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/symmlq.jl b/src/symmlq.jl
index 7b889c715..604698525 100644
--- a/src/symmlq.jl
+++ b/src/symmlq.jl
@@ -1,5 +1,5 @@
# An implementation of SYMMLQ for the solution of the
-# linear system Ax = b, where A is square and symmetric.
+# linear system Ax = b, where A is Hermitian.
#
# This implementation follows the original implementation by
# Michael Saunders described in
@@ -11,38 +11,63 @@
export symmlq, symmlq!
-
"""
- (x, stats) = symmlq(A, b::AbstractVector{FC}; window::Int=0,
- M=I, λ::T=zero(T), transfer_to_cg::Bool=true,
- λest::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- etol::T=√eps(T), itmax::Int=0, conlim::T=1/√eps(T),
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = symmlq(A, b::AbstractVector{FC};
+ M=I, ldiv::Bool=false, window::Int=5,
+ transfer_to_cg::Bool=true, λ::T=zero(T),
+ λest::T=zero(T), etol::T=√eps(T),
+ conlim::T=1/√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, stats) = symmlq(A, b, x0::AbstractVector; kwargs...)
+
+SYMMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above
+
Solve the shifted linear system
(A + λI) x = b
-using the SYMMLQ method, where λ is a shift parameter,
-and A is square and symmetric.
+of size n using the SYMMLQ method, where λ is a shift parameter, and A is Hermitian.
+
+SYMMLQ produces monotonic errors ‖x* - x‖₂.
-SYMMLQ produces monotonic errors ‖x*-x‖₂.
+#### Input arguments
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
-SYMMLQ can be warm-started from an initial guess `x0` with the method
+#### Optional argument
- (x, stats) = symmlq(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `transfer_to_cg`: transfer from the SYMMLQ point to the CG point, when it exists. The transfer is based on the residual norm;
+* `λ`: regularization parameter;
+* `λest`: positive strict lower bound on the smallest eigenvalue `λₘᵢₙ` when solving a positive-definite system, such as `λest = (1-10⁻⁷)λₘᵢₙ`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SymmlqStats`](@ref) structure.
#### Reference
@@ -50,18 +75,6 @@ and `false` otherwise.
"""
function symmlq end
-function symmlq(A, b :: AbstractVector{FC}, x0 :: AbstractVector; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = SymmlqSolver(A, b, window=window)
- symmlq!(solver, A, b, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function symmlq(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
- solver = SymmlqSolver(A, b, window=window)
- symmlq!(solver, A, b; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = symmlq!(solver::SymmlqSolver, A, b; kwargs...)
solver = symmlq!(solver::SymmlqSolver, A, b, x0; kwargs...)
@@ -72,182 +85,125 @@ See [`SymmlqSolver`](@ref) for more details about the `solver`.
"""
function symmlq! end
-function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- symmlq!(solver, A, b; kwargs...)
- return solver
-end
-
-function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), transfer_to_cg :: Bool=true,
- λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- m == n || error("System must be square")
- length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n)
-
- # Tests M = Iₙ
- MisI = (M === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
- # Set up workspace.
- allocate_if(!MisI, solver, :v, S, n)
- x, Mvold, Mv, Mv_next, w̅ = solver.x, solver.Mvold, solver.Mv, solver.Mv_next, solver.w̅
- Δx, clist, zlist, sprod, stats = solver.Δx, solver.clist, solver.zlist, solver.sprod, solver.stats
- warm_start = solver.warm_start
- rNorms, rcgNorms = stats.residuals, stats.residualscg
- errors, errorscg = stats.errors, stats.errorscg
- reset!(stats)
- v = MisI ? Mv : solver.v
- vold = MisI ? Mvold : solver.v
-
- ϵM = eps(T)
- ctol = conlim > 0 ? 1 / conlim : zero(T)
-
- # Initial solution x₀
- x .= zero(FC)
-
- if warm_start
- mul!(Mvold, A, Δx)
- (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold)
- @kaxpby!(n, one(FC), b, -one(FC), Mvold)
- else
- Mvold .= b
+def_args_symmlq = (:(A ),
+ :(b::AbstractVector{FC}))
+
+def_optargs_symmlq = (:(x0::AbstractVector),)
+
+def_kwargs_symmlq = (:(; M = I ),
+ :(; ldiv::Bool = false ),
+ :(; transfer_to_cg::Bool = true),
+ :(; λ::T = zero(T) ),
+ :(; λest::T = zero(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; etol::T = √eps(T) ),
+ :(; conlim::T = 1/√eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_symmlq = mapreduce(extract_parameters, vcat, def_kwargs_symmlq)
+
+args_symmlq = (:A, :b)
+optargs_symmlq = (:x0,)
+kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, :conlim, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function symmlq($(def_args_symmlq...), $(def_optargs_symmlq...); window :: Int=5, $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = SymmlqSolver(A, b; window)
+ warm_start!(solver, $(optargs_symmlq...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ symmlq!(solver, $(args_symmlq...); $(kwargs_symmlq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Initialize Lanczos process.
- # β₁ M v₁ = b.
- MisI || mulorldiv!(vold, M, Mvold, ldiv)
- β₁ = @kdotr(m, vold, Mvold)
- if β₁ == 0
- stats.niter = 0
- stats.solved = true
- stats.Anorm = T(NaN)
- stats.Acond = T(NaN)
- history && push!(rNorms, zero(T))
- history && push!(rcgNorms, zero(T))
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function symmlq($(def_args_symmlq...); window :: Int=5, $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = SymmlqSolver(A, b; window)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ symmlq!(solver, $(args_symmlq...); $(kwargs_symmlq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- β₁ = sqrt(β₁)
- β = β₁
- @kscal!(m, one(FC) / β, vold)
- MisI || @kscal!(m, one(FC) / β, Mvold)
-
- w̅ .= vold
-
- mul!(Mv, A, vold)
- α = @kdotr(m, vold, Mv) + λ
- @kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold
- MisI || mulorldiv!(v, M, Mv, ldiv)
- β = @kdotr(m, v, Mv)
- β < 0 && error("Preconditioner is not positive definite")
- β = sqrt(β)
- @kscal!(m, one(FC) / β, v)
- MisI || @kscal!(m, one(FC) / β, Mv)
-
- # Start QR factorization
- γbar = α
- δbar = β
- ϵold = zero(T)
- cold = one(T)
- sold = zero(T)
-
- ηold = zero(T)
- η = β₁
- ζold = zero(T)
-
- ANorm² = α * α + β * β
-
- γmax = T(-Inf)
- γmin = T(Inf)
- ANorm = zero(T)
- Acond = zero(T)
-
- xNorm = zero(T)
- rNorm = β₁
- history && push!(rNorms, rNorm)
-
- if γbar ≠ 0
- ζbar = η / γbar
- xcgNorm = abs(ζbar)
- rcgNorm = β₁ * abs(ζbar)
- history && push!(rcgNorms, rcgNorm)
- else
- history && push!(rcgNorms, missing)
- end
-
- err = T(Inf)
- errcg = T(Inf)
- window = length(clist)
- clist .= zero(T)
- zlist .= zero(T)
- sprod .= one(T)
-
- if λest ≠ 0
- # Start QR factorization of Tₖ - λest I
- ρbar = α - λest
- σbar = β
- ρ = sqrt(ρbar * ρbar + β * β)
- cwold = -one(T)
- cw = ρbar / ρ
- sw = β / ρ
-
- history && push!(errors, abs(β₁/λest))
- if γbar ≠ 0
- history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2))
+ function symmlq!(solver :: SymmlqSolver{T,FC,S}, $(def_args_symmlq...); $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "SYMMLQ: system of size %d\n", n)
+
+ # Tests M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :v, S, n)
+ x, Mvold, Mv, Mv_next, w̅ = solver.x, solver.Mvold, solver.Mv, solver.Mv_next, solver.w̅
+ Δx, clist, zlist, sprod, stats = solver.Δx, solver.clist, solver.zlist, solver.sprod, solver.stats
+ warm_start = solver.warm_start
+ rNorms, rcgNorms = stats.residuals, stats.residualscg
+ errors, errorscg = stats.errors, stats.errorscg
+ reset!(stats)
+ v = MisI ? Mv : solver.v
+ vold = MisI ? Mvold : solver.v
+
+ ϵM = eps(T)
+ ctol = conlim > 0 ? 1 / conlim : zero(T)
+
+ # Initial solution x₀
+ x .= zero(FC)
+
+ if warm_start
+ mul!(Mvold, A, Δx)
+ (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold)
+ @kaxpby!(n, one(FC), b, -one(FC), Mvold)
else
- history && push!(errorscg, missing)
+ Mvold .= b
end
- end
- iter = 0
- itmax == 0 && (itmax = 2 * n)
-
- (verbose > 0) && @printf("%5s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond)
-
- tol = atol + rtol * β₁
- status = "unknown"
- solved_lq = solved_mach = solved_lim = (rNorm ≤ tol)
- solved_cg = (γbar ≠ 0) && transfer_to_cg && rcgNorm ≤ tol
- tired = iter ≥ itmax
- ill_cond = ill_cond_mach = ill_cond_lim = false
- solved = zero_resid = solved_lq || solved_cg
- fwd_err = false
- user_requested_exit = false
-
- while ! (solved || tired || ill_cond || user_requested_exit)
- iter = iter + 1
-
- # Continue QR factorization
- (c, s, γ) = sym_givens(γbar, β)
-
- # Update SYMMLQ point
- ηold = η
- ζ = ηold / γ
- @kaxpy!(n, c * ζ, w̅, x)
- @kaxpy!(n, s * ζ, v, x)
- # Update w̅
- @kaxpby!(n, -c, v, s, w̅)
-
- # Generate next Lanczos vector
- oldβ = β
- mul!(Mv_next, A, v)
- α = @kdotr(m, v, Mv_next) + λ
- @kaxpy!(m, -oldβ, Mvold, Mv_next)
- @. Mvold = Mv
- @kaxpy!(m, -α, Mv, Mv_next)
- @. Mv = Mv_next
+ # Initialize Lanczos process.
+ # β₁ M v₁ = b.
+ MisI || mulorldiv!(vold, M, Mvold, ldiv)
+ β₁ = @kdotr(m, vold, Mvold)
+ if β₁ == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.Anorm = T(NaN)
+ stats.Acond = T(NaN)
+ history && push!(rNorms, zero(T))
+ history && push!(rcgNorms, zero(T))
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+ β₁ = sqrt(β₁)
+ β = β₁
+ @kscal!(m, one(FC) / β, vold)
+ MisI || @kscal!(m, one(FC) / β, Mvold)
+
+ w̅ .= vold
+
+ mul!(Mv, A, vold)
+ α = @kdotr(m, vold, Mv) + λ
+ @kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold
MisI || mulorldiv!(v, M, Mv, ldiv)
β = @kdotr(m, v, Mv)
β < 0 && error("Preconditioner is not positive definite")
@@ -255,148 +211,259 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC) / β, v)
MisI || @kscal!(m, one(FC) / β, Mv)
- # Continue A norm estimate
- ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
+ # Start QR factorization
+ γbar = α
+ δbar = β
+ ϵold = zero(T)
+ cold = one(T)
+ sold = zero(T)
- if λest ≠ 0
- η = -oldβ * oldβ * cwold / ρbar
- ω = λest + η
- ψ = c * δbar + s * ω
- ωbar = s * δbar - c * ω
- end
+ ηold = zero(T)
+ η = β₁
+ ζold = zero(T)
+
+ ANorm² = α * α + β * β
- # Continue QR factorization
- δ = δbar * c + α * s
- γbar = δbar * s - α * c
- ϵ = β * s
- δbar = -β * c
- η = -ϵold * ζold - δ * ζ
+ γmax = T(-Inf)
+ γmin = T(Inf)
+ ANorm = zero(T)
+ Acond = zero(T)
- rNorm = sqrt(γ * γ * ζ * ζ + ϵold * ϵold * ζold * ζold)
- xNorm = xNorm + ζ * ζ
+ xNorm = zero(T)
+ rNorm = β₁
history && push!(rNorms, rNorm)
if γbar ≠ 0
ζbar = η / γbar
- rcgNorm = β * abs(s * ζ - c * ζbar)
- xcgNorm = xNorm + ζbar * ζbar
+ xcgNorm = abs(ζbar)
+ rcgNorm = β₁ * abs(ζbar)
history && push!(rcgNorms, rcgNorm)
else
history && push!(rcgNorms, missing)
end
- if window > 0 && λest ≠ 0
- if iter < window && window > 1
- for i = iter+1 : window
- sprod[i] = s * sprod[i]
- end
- end
+ err = T(Inf)
+ errcg = T(Inf)
- ix = ((iter-1) % window) + 1
- clist[ix] = c
- zlist[ix] = ζ
+ window = length(clist)
+ clist .= zero(T)
+ zlist .= zero(T)
+ sprod .= one(T)
- if iter ≥ window
- jx = mod(iter, window) + 1
- zetabark = zlist[jx] / clist[jx]
+ if λest ≠ 0
+ # Start QR factorization of Tₖ - λest I
+ ρbar = α - λest
+ σbar = β
+ ρ = sqrt(ρbar * ρbar + β * β)
+ cwold = -one(T)
+ cw = ρbar / ρ
+ sw = β / ρ
- if γbar ≠ 0
- theta = abs(sum(clist[i] * sprod[i] * zlist[i] for i = 1 : window))
- theta = zetabark * theta + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2
- history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta)))
- else
- history && (errorscg[iter-window+1] = missing)
- end
+ history && push!(errors, abs(β₁/λest))
+ if γbar ≠ 0
+ history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2))
+ else
+ history && push!(errorscg, missing)
end
+ end
+
+ iter = 0
+ itmax == 0 && (itmax = 2 * n)
- ix = (iter % window) + 1
- if iter ≥ window && window > 1
- sprod .= sprod ./ sprod[(ix % window) + 1]
- sprod[ix] = sprod[mod(ix-2, window)+1] * s
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %8s %8s %7s %7s %7s %5s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7s %.2fs\n", iter, rNorm, β, cold, sold, ANorm, Acond, "✗ ✗ ✗ ✗", ktimer(start_time))
+
+ tol = atol + rtol * β₁
+ status = "unknown"
+ solved_lq = solved_mach = solved_lim = (rNorm ≤ tol)
+ solved_cg = (γbar ≠ 0) && transfer_to_cg && rcgNorm ≤ tol
+ tired = iter ≥ itmax
+ ill_cond = ill_cond_mach = ill_cond_lim = false
+ solved = zero_resid = solved_lq || solved_cg
+ fwd_err = false
+ user_requested_exit = false
+ overtimed = false
+
+ while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+ iter = iter + 1
+
+ # Continue QR factorization
+ (c, s, γ) = sym_givens(γbar, β)
+
+ # Update SYMMLQ point
+ ηold = η
+ ζ = ηold / γ
+ @kaxpy!(n, c * ζ, w̅, x)
+ @kaxpy!(n, s * ζ, v, x)
+ # Update w̅
+ @kaxpby!(n, -c, v, s, w̅)
+
+ # Generate next Lanczos vector
+ oldβ = β
+ mul!(Mv_next, A, v)
+ α = @kdotr(m, v, Mv_next) + λ
+ @kaxpy!(m, -oldβ, Mvold, Mv_next)
+ @. Mvold = Mv
+ @kaxpy!(m, -α, Mv, Mv_next)
+ @. Mv = Mv_next
+ MisI || mulorldiv!(v, M, Mv, ldiv)
+ β = @kdotr(m, v, Mv)
+ β < 0 && error("Preconditioner is not positive definite")
+ β = sqrt(β)
+ @kscal!(m, one(FC) / β, v)
+ MisI || @kscal!(m, one(FC) / β, Mv)
+
+ # Continue A norm estimate
+ ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
+
+ if λest ≠ 0
+ η = -oldβ * oldβ * cwold / ρbar
+ ω = λest + η
+ ψ = c * δbar + s * ω
+ ωbar = s * δbar - c * ω
end
- end
- if λest ≠ 0
- err = abs((ϵold * ζold + ψ * ζ) / ωbar)
- history && push!(errors, err)
+ # Continue QR factorization
+ δ = δbar * c + α * s
+ γbar = δbar * s - α * c
+ ϵ = β * s
+ δbar = -β * c
+ η = -ϵold * ζold - δ * ζ
+
+ rNorm = sqrt(γ * γ * ζ * ζ + ϵold * ϵold * ζold * ζold)
+ xNorm = xNorm + ζ * ζ
+ history && push!(rNorms, rNorm)
if γbar ≠ 0
- errcg = sqrt(abs(err * err - ζbar * ζbar))
- history && push!(errorscg, errcg)
+ ζbar = η / γbar
+ rcgNorm = β * abs(s * ζ - c * ζbar)
+ xcgNorm = xNorm + ζbar * ζbar
+ history && push!(rcgNorms, rcgNorm)
else
- history && push!(errorscg, missing)
+ history && push!(rcgNorms, missing)
end
- ρbar = sw * σbar - cw * (α - λest)
- σbar = -cw * β
- ρ = sqrt(ρbar * ρbar + β * β)
+ if window > 0 && λest ≠ 0
+ if iter < window && window > 1
+ for i = iter+1 : window
+ sprod[i] = s * sprod[i]
+ end
+ end
+
+ ix = ((iter-1) % window) + 1
+ clist[ix] = c
+ zlist[ix] = ζ
+
+ if iter ≥ window
+ jx = mod(iter, window) + 1
+ zetabark = zlist[jx] / clist[jx]
+
+ if γbar ≠ 0
+ theta = zero(T)
+ for i = 1 : window
+ theta += clist[i] * sprod[i] * zlist[i]
+ end
+ theta = zetabark * abs(theta) + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2
+ history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta)))
+ else
+ history && (errorscg[iter-window+1] = missing)
+ end
+ end
- cwold = cw
+ ix = (iter % window) + 1
+ if iter ≥ window && window > 1
+ sprod .= sprod ./ sprod[(ix % window) + 1]
+ sprod[ix] = sprod[mod(ix-2, window)+1] * s
+ end
+ end
- cw = ρbar / ρ
- sw = β / ρ
- end
+ if λest ≠ 0
+ err = abs((ϵold * ζold + ψ * ζ) / ωbar)
+ history && push!(errors, err)
+
+ if γbar ≠ 0
+ errcg = sqrt(abs(err * err - ζbar * ζbar))
+ history && push!(errorscg, errcg)
+ else
+ history && push!(errorscg, missing)
+ end
- # TODO: Use γ or γbar?
- γmax = max(γmax, γ)
- γmin = min(γmin, γ)
+ ρbar = sw * σbar - cw * (α - λest)
+ σbar = -cw * β
+ ρ = sqrt(ρbar * ρbar + β * β)
- Acond = γmax / γmin
- ANorm = sqrt(ANorm²)
- test1 = rNorm / (ANorm * xNorm)
+ cwold = cw
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1)
+ cw = ρbar / ρ
+ sw = β / ρ
+ end
- # Reset variables
- ϵold = ϵ
- ζold = ζ
- cold = c
+ # TODO: Use γ or γbar?
+ γmax = max(γmax, γ)
+ γmin = min(γmin, γ)
+
+ Acond = γmax / γmin
+ ANorm = sqrt(ANorm²)
+ test1 = rNorm / (ANorm * xNorm)
+
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, β, c, s, ANorm, Acond, test1, ktimer(start_time))
+
+ # Reset variables
+ ϵold = ϵ
+ ζold = ζ
+ cold = c
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (one(T) + rNorm ≤ one(T))
+ ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
+ zero_resid_mach = (one(T) + test1 ≤ one(T))
+ # solved_mach = (ϵx ≥ β₁)
+
+ # Stopping conditions based on user-provided tolerances.
+ tired = iter ≥ itmax
+ ill_cond_lim = (one(T) / Acond ≤ ctol)
+ zero_resid_lim = (test1 ≤ tol)
+ fwd_err = (err ≤ etol) || ((γbar ≠ 0) && (errcg ≤ etol))
+ solved_lq = rNorm ≤ tol
+ solved_cg = transfer_to_cg && (γbar ≠ 0) && rcgNorm ≤ tol
+
+ user_requested_exit = callback(solver) :: Bool
+ zero_resid = solved_lq || solved_cg
+ ill_cond = ill_cond_mach || ill_cond_lim
+ solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ end
+ (verbose > 0) && @printf(iostream, "\n")
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (one(T) + rNorm ≤ one(T))
- ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
- zero_resid_mach = (one(T) + test1 ≤ one(T))
- # solved_mach = (ϵx ≥ β₁)
+ # Compute CG point
+ # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ
+ if solved_cg
+ @kaxpy!(m, ζbar, w̅, x)
+ end
- # Stopping conditions based on user-provided tolerances.
- tired = iter ≥ itmax
- ill_cond_lim = (one(T) / Acond ≤ ctol)
- zero_resid_lim = (test1 ≤ tol)
- fwd_err = (err ≤ etol) || ((γbar ≠ 0) && (errcg ≤ etol))
- solved_lq = rNorm ≤ tol
- solved_cg = transfer_to_cg && (γbar ≠ 0) && rcgNorm ≤ tol
-
- user_requested_exit = callback(solver) :: Bool
- zero_resid = solved_lq || solved_cg
- ill_cond = ill_cond_mach || ill_cond_lim
- solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach
- end
- (verbose > 0) && @printf("\n")
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ ill_cond_mach && (status = "condition number seems too large for this machine")
+ ill_cond_lim && (status = "condition number exceeds tolerance")
+ solved && (status = "found approximate solution")
+ solved_lq && (status = "solution xᴸ good enough given atol and rtol")
+ solved_cg && (status = "solution xᶜ good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- # Compute CG point
- # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ
- if solved_cg
- @kaxpy!(m, ζbar, w̅, x)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.Anorm = ANorm
+ stats.Acond = Acond
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
-
- tired && (status = "maximum number of iterations exceeded")
- ill_cond_mach && (status = "condition number seems too large for this machine")
- ill_cond_lim && (status = "condition number exceeds tolerance")
- solved && (status = "found approximate solution")
- solved_lq && (status = "solution xᴸ good enough given atol and rtol")
- solved_cg && (status = "solution xᶜ good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.Anorm = ANorm
- stats.Acond = Acond
- stats.status = status
- return solver
end
diff --git a/src/tricg.jl b/src/tricg.jl
index 5acff2d52..8250e6dfc 100644
--- a/src/tricg.jl
+++ b/src/tricg.jl
@@ -13,30 +13,32 @@ export tricg, tricg!
"""
(x, y, stats) = tricg(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- spd::Bool=false, snd::Bool=false, flip::Bool=false,
- τ::T=one(T), ν::T=-one(T), itmax::Int=0,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ spd::Bool=false, snd::Bool=false,
+ flip::Bool=false, τ::T=one(T),
+ ν::T=-one(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-TriCG solves the symmetric linear system
+ (x, y, stats) = tricg(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriCG can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
+Given a matrix `A` of dimension m × n, TriCG solves the Hermitian linear system
[ τE A ] [ x ] = [ b ]
- [ Aᵀ νF ] [ y ] [ c ],
+ [ Aᴴ νF ] [ y ] [ c ],
-where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
+of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
`b` and `c` must both be nonzero.
TriCG could breakdown if `τ = 0` or `ν = 0`.
It's recommended to use TriMR in these cases.
-By default, TriCG solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1.
-If `flip = true`, TriCG solves another known variant of SQD systems where τ = -1 and ν = 1.
-If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved.
-If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved.
-`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems.
+By default, TriCG solves Hermitian and quasi-definite linear systems with τ = 1 and ν = -1.
TriCG is based on the preconditioned orthogonal tridiagonalization process
and its relation with the preconditioned block-Lanczos process.
@@ -50,17 +52,40 @@ It's the Euclidean norm when `M` and `N` are identity operators.
TriCG stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
-TriCG can be warm-started from initial guesses `x0` and `y0` with the method
+#### Optional arguments
- (x, y, stats) = tricg(A, b, c, x0, y0; kwargs...)
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system;
+* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems;
+* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems;
+* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -68,18 +93,6 @@ and `false` otherwise.
"""
function tricg end
-function tricg(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = TricgSolver(A, b)
- tricg!(solver, A, b, c, x0, y0; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
-function tricg(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = TricgSolver(A, b)
- tricg!(solver, A, b, c; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = tricg!(solver::TricgSolver, A, b, c; kwargs...)
solver = tricg!(solver::TricgSolver, A, b, c, x0, y0; kwargs...)
@@ -90,322 +103,374 @@ See [`TricgSolver`](@ref) for more details about the `solver`.
"""
function tricg! end
-function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0, y0)
- tricg!(solver, A, b, c; kwargs...)
- return solver
-end
-
-function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- spd :: Bool=false, snd :: Bool=false, flip :: Bool=false,
- τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("TriCG: system of %d equations in %d variables\n", m+n, m+n)
-
- # Check flip, spd and snd parameters
- spd && flip && error("The matrix cannot be SPD and SQD")
- snd && flip && error("The matrix cannot be SND and SQD")
- spd && snd && error("The matrix cannot be SPD and SND")
-
- # Check M = Iₘ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Determine τ and ν associated to SQD, SPD or SND systems.
- flip && (τ = -one(T) ; ν = one(T))
- spd && (τ = one(T) ; ν = one(T))
- snd && (τ = -one(T) ; ν = -one(T))
-
- warm_start = solver.warm_start
- warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
- warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :vₖ, S, m)
- allocate_if(!NisI, solver, :uₖ, S, n)
- Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
- Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
- gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ
- vₖ = MisI ? M⁻¹vₖ : solver.vₖ
- uₖ = NisI ? N⁻¹uₖ : solver.uₖ
- vₖ₊₁ = MisI ? q : vₖ
- uₖ₊₁ = NisI ? p : uₖ
- b₀ = warm_start ? q : b
- c₀ = warm_start ? p : c
-
- stats = solver.stats
- rNorms = stats.residuals
- reset!(stats)
-
- # Initial solutions x₀ and y₀.
- xₖ .= zero(FC)
- yₖ .= zero(FC)
-
- iter = 0
- itmax == 0 && (itmax = m+n)
-
- # Initialize preconditioned orthogonal tridiagonalization process.
- M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0
- N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
-
- # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
- # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ]
- if warm_start
- mul!(b₀, A, Δy)
- (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
- @kaxpby!(m, one(FC), b, -one(FC), b₀)
- mul!(c₀, Aᵀ, Δx)
- (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
- @kaxpby!(n, one(FC), c, -one(FC), c₀)
- end
-
- # β₁Ev₁ = b ↔ β₁v₁ = Mb
- M⁻¹vₖ .= b₀
- MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
- βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E
- if βₖ ≠ 0
- @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
- MisI || @kscal!(m, one(FC) / βₖ, vₖ)
- else
- error("b must be nonzero")
+def_args_tricg = (:(A ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_tricg = (:(x0::AbstractVector),
+ :(y0::AbstractVector))
+
+def_kwargs_tricg = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; spd::Bool = false ),
+ :(; snd::Bool = false ),
+ :(; flip::Bool = false ),
+ :(; τ::T = one(T) ),
+ :(; ν::T = -one(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_tricg = mapreduce(extract_parameters, vcat, def_kwargs_tricg)
+
+args_tricg = (:A, :b, :c)
+optargs_tricg = (:x0, :y0)
+kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function tricg($(def_args_tricg...), $(def_optargs_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = TricgSolver(A, b)
+ warm_start!(solver, $(optargs_tricg...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ tricg!(solver, $(args_tricg...); $(kwargs_tricg...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
- N⁻¹uₖ .= c₀
- NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
- γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F
- if γₖ ≠ 0
- @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
- NisI || @kscal!(n, one(FC) / γₖ, uₖ)
- else
- error("c must be nonzero")
+ function tricg($(def_args_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = TricgSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ tricg!(solver, $(args_tricg...); $(kwargs_tricg...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ
- gx₂ₖ₋₁ .= zero(FC)
- gy₂ₖ₋₁ .= zero(FC)
- gx₂ₖ .= zero(FC)
- gy₂ₖ .= zero(FC)
-
- # Compute ‖r₀‖² = (γ₁)² + (β₁)²
- rNorm = sqrt(γₖ^2 + βₖ^2)
- history && push!(rNorms, rNorm)
- ε = atol + rtol * rNorm
-
- (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ)
-
- # Set up workspace.
- d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T)
- π₂ₖ₋₃ = π₂ₖ₋₂ = zero(FC)
- δₖ₋₁ = zero(FC)
-
- # Tolerance for breakdown detection.
- btol = eps(T)^(3/4)
-
- # Stopping criterion.
- breakdown = false
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || breakdown || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the orthogonal tridiagonalization process.
- # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ
-
- if iter ≥ 2
- @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
- @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁
- end
-
- αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
-
- @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ
- @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ
-
- # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
- M⁻¹vₖ₋₁ .= M⁻¹vₖ
- N⁻¹uₖ₋₁ .= N⁻¹uₖ
-
- # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ]
- # [0 u₁ ••• 0 uₖ]
- #
- # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
- # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ]
- #
- # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
- # [ Aᵀ νF ] [ 0 F ]
- #
- # TriCG subproblem : (Wₖ)ᵀ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
- #
- # Update the LDLᵀ factorization of Sₖ.ₖ.
- #
- # [ τ α₁ γ₂ 0 • • • • 0 ]
- # [ ᾱ₁ ν β₂ • • ]
- # [ β₂ τ α₂ γ₃ • • ]
- # [ γ₂ ᾱ₂ ν β₃ • • ]
- # [ 0 β₃ • • • • • ]
- # [ • • γ₃ • • • 0 ]
- # [ • • • • • γₖ ]
- # [ • • • • • βₖ ]
- # [ • • βₖ τ αₖ ]
- # [ 0 • • • • 0 γₖ ᾱₖ ν ]
- if iter == 1
- d₂ₖ₋₁ = τ
- δₖ = conj(αₖ) / d₂ₖ₋₁
- d₂ₖ = ν - abs2(δₖ) * d₂ₖ₋₁
- else
- σₖ = βₖ / d₂ₖ₋₂
- ηₖ = γₖ / d₂ₖ₋₃
- λₖ = -(ηₖ * conj(δₖ₋₁) * d₂ₖ₋₃) / d₂ₖ₋₂
- d₂ₖ₋₁ = τ - abs2(σₖ) * d₂ₖ₋₂
- δₖ = (conj(αₖ) - λₖ * conj(σₖ) * d₂ₖ₋₂) / d₂ₖ₋₁
- d₂ₖ = ν - abs2(ηₖ) * d₂ₖ₋₃ - abs2(λₖ) * d₂ₖ₋₂ - abs2(δₖ) * d₂ₖ₋₁
+ function tricg!(solver :: TricgSolver{T,FC,S}, $(def_args_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "TriCG: system of %d equations in %d variables\n", m+n, m+n)
+
+ # Check flip, spd and snd parameters
+ spd && flip && error("The matrix cannot be SPD and SQD")
+ snd && flip && error("The matrix cannot be SND and SQD")
+ spd && snd && error("The matrix cannot be SPD and SND")
+
+ # Check M = Iₘ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Determine τ and ν associated to SQD, SPD or SND systems.
+ flip && (τ = -one(T) ; ν = one(T))
+ spd && (τ = one(T) ; ν = one(T))
+ snd && (τ = -one(T) ; ν = -one(T))
+
+ warm_start = solver.warm_start
+ warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
+ warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :vₖ, S, m)
+ allocate_if(!NisI, solver, :uₖ, S, n)
+ Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
+ Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
+ gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ
+ vₖ = MisI ? M⁻¹vₖ : solver.vₖ
+ uₖ = NisI ? N⁻¹uₖ : solver.uₖ
+ vₖ₊₁ = MisI ? q : vₖ
+ uₖ₊₁ = NisI ? p : uₖ
+ b₀ = warm_start ? q : b
+ c₀ = warm_start ? p : c
+
+ stats = solver.stats
+ rNorms = stats.residuals
+ reset!(stats)
+
+ # Initial solutions x₀ and y₀.
+ xₖ .= zero(FC)
+ yₖ .= zero(FC)
+
+ iter = 0
+ itmax == 0 && (itmax = m+n)
+
+ # Initialize preconditioned orthogonal tridiagonalization process.
+ M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0
+ N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
+
+ # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
+ # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ]
+ if warm_start
+ mul!(b₀, A, Δy)
+ (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
+ @kaxpby!(m, one(FC), b, -one(FC), b₀)
+ mul!(c₀, Aᴴ, Δx)
+ (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
+ @kaxpby!(n, one(FC), c, -one(FC), c₀)
end
- # Solve LₖDₖpₖ = (β₁e₁ + γ₁e₂)
- #
- # [ 1 0 • • • • • • • 0 ] [ d₁ ] [ β₁ ]
- # [ δ₁ 1 • • ] [ d₂ ] [ γ₁ ]
- # [ σ₂ 1 • • ] [ • ] [ 0 ]
- # [ η₂ λ₂ δ₂ 1 • • ] [ • ] [ • ]
- # [ 0 σ₃ 1 • • ] [ • ] zₖ = [ • ]
- # [ • • η₃ λ₃ δ₃ 1 • • ] [ • ] [ • ]
- # [ • • • • • • ] [ • ] [ • ]
- # [ • • • • • • • • ] [ • ] [ • ]
- # [ • • σₖ 1 0 ] [ d₂ₖ₋₁ ] [ • ]
- # [ 0 • • • • 0 ηₖ λₖ δₖ 1 ] [ d₂ₖ] [ 0 ]
- if iter == 1
- π₂ₖ₋₁ = βₖ / d₂ₖ₋₁
- π₂ₖ = (γₖ - δₖ * βₖ) / d₂ₖ
+ # β₁Ev₁ = b ↔ β₁v₁ = Mb
+ M⁻¹vₖ .= b₀
+ MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
+ βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E
+ if βₖ ≠ 0
+ @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
+ MisI || @kscal!(m, one(FC) / βₖ, vₖ)
else
- π₂ₖ₋₁ = -(σₖ * d₂ₖ₋₂ * π₂ₖ₋₂) / d₂ₖ₋₁
- π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ
+ error("b must be nonzero")
end
- # Solve Gₖ = Wₖ(Lₖ)⁻ᵀ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
- if iter == 1
- # [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ]
- # [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ]
- @. gx₂ₖ₋₁ = vₖ
- @. gx₂ₖ = - conj(δₖ) * gx₂ₖ₋₁
- @. gy₂ₖ = uₖ
+ # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
+ N⁻¹uₖ .= c₀
+ NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
+ γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F
+ if γₖ ≠ 0
+ @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
+ NisI || @kscal!(n, one(FC) / γₖ, uₖ)
else
- # [ 0 σ̄ₖ 1 0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0 ]
- # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ] [ 0 uₖ ]
- # [ gx₂ₖ₋₁ gy₂ₖ₋₁ ]
- # [ gx₂ₖ gy₂ₖ ]
- @. gx₂ₖ₋₁ = conj(ηₖ) * gx₂ₖ₋₁ + conj(λₖ) * gx₂ₖ
- @. gy₂ₖ₋₁ = conj(ηₖ) * gy₂ₖ₋₁ + conj(λₖ) * gy₂ₖ
-
- @. gx₂ₖ = vₖ - conj(σₖ) * gx₂ₖ
- @. gy₂ₖ = - conj(σₖ) * gy₂ₖ
-
- @. gx₂ₖ₋₁ = - gx₂ₖ₋₁ - conj(δₖ) * gx₂ₖ
- @. gy₂ₖ₋₁ = uₖ - gy₂ₖ₋₁ - conj(δₖ) * gy₂ₖ
-
- # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁
- @kswap(gx₂ₖ₋₁, gx₂ₖ)
- @kswap(gy₂ₖ₋₁, gy₂ₖ)
+ error("c must be nonzero")
end
- # Update xₖ = Gxₖ * pₖ
- @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
- @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ)
-
- # Update yₖ = Gyₖ * pₖ
- @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
- @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ)
-
- # Compute vₖ₊₁ and uₖ₊₁
- MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
- NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+ # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ
+ gx₂ₖ₋₁ .= zero(FC)
+ gy₂ₖ₋₁ .= zero(FC)
+ gx₂ₖ .= zero(FC)
+ gy₂ₖ .= zero(FC)
- βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
- γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
-
- # βₖ₊₁ ≠ 0
- if βₖ₊₁ > btol
- @kscal!(m, one(FC) / βₖ₊₁, q)
- MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
- end
+ # Compute ‖r₀‖² = (γ₁)² + (β₁)²
+ rNorm = sqrt(γₖ^2 + βₖ^2)
+ history && push!(rNorms, rNorm)
+ ε = atol + rtol * rNorm
- # γₖ₊₁ ≠ 0
- if γₖ₊₁ > btol
- @kscal!(n, one(FC) / γₖ₊₁, p)
- NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
- end
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %5s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ, γₖ, ktimer(start_time))
- # Update M⁻¹vₖ and N⁻¹uₖ
- M⁻¹vₖ .= q
- N⁻¹uₖ .= p
+ # Set up workspace.
+ d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T)
+ π₂ₖ₋₃ = π₂ₖ₋₂ = zero(FC)
+ δₖ₋₁ = zero(FC)
- # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|²
- ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ
- ζ₂ₖ = π₂ₖ
- rNorm = sqrt(abs2(γₖ₊₁ * ζ₂ₖ₋₁) + abs2(βₖ₊₁ * ζ₂ₖ))
- history && push!(rNorms, rNorm)
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
- # Update βₖ, γₖ, π₂ₖ₋₃, π₂ₖ₋₂, d₂ₖ₋₃, d₂ₖ₋₂, δₖ₋₁, vₖ, uₖ.
- βₖ = βₖ₊₁
- γₖ = γₖ₊₁
- π₂ₖ₋₃ = π₂ₖ₋₁
- π₂ₖ₋₂ = π₂ₖ
- d₂ₖ₋₃ = d₂ₖ₋₁
- d₂ₖ₋₂ = d₂ₖ
- δₖ₋₁ = δₖ
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
- solved = resid_decrease_lim || resid_decrease_mach
+ # Stopping criterion.
+ breakdown = false
+ solved = rNorm ≤ ε
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
+
+ # Continue the orthogonal tridiagonalization process.
+ # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
+ # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+ mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
+ mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ
+
+ if iter ≥ 2
+ @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
+ @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁
+ end
+
+ αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
+
+ @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ
+ @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ
+
+ # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
+ M⁻¹vₖ₋₁ .= M⁻¹vₖ
+ N⁻¹uₖ₋₁ .= N⁻¹uₖ
+
+ # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ]
+ # [0 u₁ ••• 0 uₖ]
+ #
+ # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
+ # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ]
+ #
+ # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
+ # [ Aᴴ νF ] [ 0 F ]
+ #
+ # TriCG subproblem : (Wₖ)ᴴ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
+ #
+ # Update the LDLᴴ factorization of Sₖ.ₖ.
+ #
+ # [ τ α₁ γ₂ 0 • • • • 0 ]
+ # [ ᾱ₁ ν β₂ • • ]
+ # [ β₂ τ α₂ γ₃ • • ]
+ # [ γ₂ ᾱ₂ ν β₃ • • ]
+ # [ 0 β₃ • • • • • ]
+ # [ • • γ₃ • • • 0 ]
+ # [ • • • • • γₖ ]
+ # [ • • • • • βₖ ]
+ # [ • • βₖ τ αₖ ]
+ # [ 0 • • • • 0 γₖ ᾱₖ ν ]
+ if iter == 1
+ d₂ₖ₋₁ = τ
+ δₖ = conj(αₖ) / d₂ₖ₋₁
+ d₂ₖ = ν - abs2(δₖ) * d₂ₖ₋₁
+ else
+ σₖ = βₖ / d₂ₖ₋₂
+ ηₖ = γₖ / d₂ₖ₋₃
+ λₖ = -(ηₖ * conj(δₖ₋₁) * d₂ₖ₋₃) / d₂ₖ₋₂
+ d₂ₖ₋₁ = τ - abs2(σₖ) * d₂ₖ₋₂
+ δₖ = (conj(αₖ) - λₖ * conj(σₖ) * d₂ₖ₋₂) / d₂ₖ₋₁
+ d₂ₖ = ν - abs2(ηₖ) * d₂ₖ₋₃ - abs2(λₖ) * d₂ₖ₋₂ - abs2(δₖ) * d₂ₖ₋₁
+ end
+
+ # Solve LₖDₖpₖ = (β₁e₁ + γ₁e₂)
+ #
+ # [ 1 0 • • • • • • • 0 ] [ d₁ ] [ β₁ ]
+ # [ δ₁ 1 • • ] [ d₂ ] [ γ₁ ]
+ # [ σ₂ 1 • • ] [ • ] [ 0 ]
+ # [ η₂ λ₂ δ₂ 1 • • ] [ • ] [ • ]
+ # [ 0 σ₃ 1 • • ] [ • ] zₖ = [ • ]
+ # [ • • η₃ λ₃ δ₃ 1 • • ] [ • ] [ • ]
+ # [ • • • • • • ] [ • ] [ • ]
+ # [ • • • • • • • • ] [ • ] [ • ]
+ # [ • • σₖ 1 0 ] [ d₂ₖ₋₁ ] [ • ]
+ # [ 0 • • • • 0 ηₖ λₖ δₖ 1 ] [ d₂ₖ] [ 0 ]
+ if iter == 1
+ π₂ₖ₋₁ = βₖ / d₂ₖ₋₁
+ π₂ₖ = (γₖ - δₖ * βₖ) / d₂ₖ
+ else
+ π₂ₖ₋₁ = -(σₖ * d₂ₖ₋₂ * π₂ₖ₋₂) / d₂ₖ₋₁
+ π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ
+ end
+
+ # Solve Gₖ = Wₖ(Lₖ)⁻ᴴ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
+ if iter == 1
+ # [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ]
+ # [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ]
+ @. gx₂ₖ₋₁ = vₖ
+ @. gx₂ₖ = - conj(δₖ) * gx₂ₖ₋₁
+ @. gy₂ₖ = uₖ
+ else
+ # [ 0 σ̄ₖ 1 0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0 ]
+ # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ] [ 0 uₖ ]
+ # [ gx₂ₖ₋₁ gy₂ₖ₋₁ ]
+ # [ gx₂ₖ gy₂ₖ ]
+ @. gx₂ₖ₋₁ = conj(ηₖ) * gx₂ₖ₋₁ + conj(λₖ) * gx₂ₖ
+ @. gy₂ₖ₋₁ = conj(ηₖ) * gy₂ₖ₋₁ + conj(λₖ) * gy₂ₖ
+
+ @. gx₂ₖ = vₖ - conj(σₖ) * gx₂ₖ
+ @. gy₂ₖ = - conj(σₖ) * gy₂ₖ
+
+ @. gx₂ₖ₋₁ = - gx₂ₖ₋₁ - conj(δₖ) * gx₂ₖ
+ @. gy₂ₖ₋₁ = uₖ - gy₂ₖ₋₁ - conj(δₖ) * gy₂ₖ
+
+ # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁
+ @kswap(gx₂ₖ₋₁, gx₂ₖ)
+ @kswap(gy₂ₖ₋₁, gy₂ₖ)
+ end
+
+ # Update xₖ = Gxₖ * pₖ
+ @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
+ @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ)
+
+ # Update yₖ = Gyₖ * pₖ
+ @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
+ @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ)
+
+ # Compute vₖ₊₁ and uₖ₊₁
+ MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
+ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+
+ βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
+ γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
+
+ # βₖ₊₁ ≠ 0
+ if βₖ₊₁ > btol
+ @kscal!(m, one(FC) / βₖ₊₁, q)
+ MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
+ end
+
+ # γₖ₊₁ ≠ 0
+ if γₖ₊₁ > btol
+ @kscal!(n, one(FC) / γₖ₊₁, p)
+ NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
+ end
+
+ # Update M⁻¹vₖ and N⁻¹uₖ
+ M⁻¹vₖ .= q
+ N⁻¹uₖ .= p
+
+ # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|²
+ ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ
+ ζ₂ₖ = π₂ₖ
+ rNorm = sqrt(abs2(γₖ₊₁ * ζ₂ₖ₋₁) + abs2(βₖ₊₁ * ζ₂ₖ))
+ history && push!(rNorms, rNorm)
+
+ # Update βₖ, γₖ, π₂ₖ₋₃, π₂ₖ₋₂, d₂ₖ₋₃, d₂ₖ₋₂, δₖ₋₁, vₖ, uₖ.
+ βₖ = βₖ₊₁
+ γₖ = γₖ₊₁
+ π₂ₖ₋₃ = π₂ₖ₋₁
+ π₂ₖ₋₂ = π₂ₖ
+ d₂ₖ₋₃ = d₂ₖ₋₁
+ d₂ₖ₋₂ = d₂ₖ
+ δₖ₋₁ = δₖ
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ₊₁, γₖ₊₁, ktimer(start_time))
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "inconsistent linear system")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x and y
+ warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
+ warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !solved && breakdown
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "inconsistent linear system")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x and y
- warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
- warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !solved && breakdown
- stats.status = status
- return solver
end
diff --git a/src/trilqr.jl b/src/trilqr.jl
index edcb4c9b9..2b584c216 100644
--- a/src/trilqr.jl
+++ b/src/trilqr.jl
@@ -1,5 +1,5 @@
# An implementation of TRILQR for the solution of square or
-# rectangular consistent linear adjoint systems Ax = b and Aᵀy = c.
+# rectangular consistent linear adjoint systems Ax = b and Aᴴy = c.
#
# This method is described in
#
@@ -14,32 +14,54 @@ export trilqr, trilqr!
"""
(x, y, stats) = trilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ transfer_to_usymcg::Bool=true, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, y, stats) = trilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
Combine USYMLQ and USYMQR to solve adjoint systems.
[0 A] [y] = [b]
- [Aᵀ 0] [x] [c]
+ [Aᴴ 0] [x] [c]
+
+USYMLQ is used for solving primal system `Ax = b` of size m × n.
+USYMQR is used for solving dual system `Aᴴy = c` of size n × m.
+
+#### Input arguments
-USYMLQ is used for solving primal system `Ax = b`.
-USYMQR is used for solving dual system `Aᵀy = c`.
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
-An option gives the possibility of transferring from the USYMLQ point to the
-USYMCG point, when it exists. The transfer is based on the residual norm.
+#### Optional arguments
-TriLQR can be warm-started from initial guesses `x0` and `y0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x;
+* `y0`: a vector of length m that represents an initial guess of the solution y.
- (x, y, stats) = trilqr(A, b, c, x0, y0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure.
#### Reference
@@ -47,18 +69,6 @@ and `false` otherwise.
"""
function trilqr end
-function trilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = TrilqrSolver(A, b)
- trilqr!(solver, A, b, c, x0, y0; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
-function trilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = TrilqrSolver(A, b)
- trilqr!(solver, A, b, c; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = trilqr!(solver::TrilqrSolver, A, b, c; kwargs...)
solver = trilqr!(solver::TrilqrSolver, A, b, c, x0, y0; kwargs...)
@@ -69,349 +79,396 @@ See [`TrilqrSolver`](@ref) for more details about the `solver`.
"""
function trilqr! end
-function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0, y0)
- trilqr!(solver, A, b, c; kwargs...)
- return solver
-end
+def_args_trilqr = (:(A ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_trilqr = (:(x0::AbstractVector),
+ :(y0::AbstractVector))
+
+def_kwargs_trilqr = (:(; transfer_to_usymcg::Bool = true),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_trilqr = mapreduce(extract_parameters, vcat, def_kwargs_trilqr)
+
+args_trilqr = (:A, :b, :c)
+optargs_trilqr = (:x0, :y0)
+kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function trilqr($(def_args_trilqr...), $(def_optargs_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = TrilqrSolver(A, b)
+ warm_start!(solver, $(optargs_trilqr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ trilqr!(solver, $(args_trilqr...); $(kwargs_trilqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
+ end
-function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("TRILQR: primal system of %d equations in %d variables\n", m, n)
- (verbose > 0) && @printf("TRILQR: dual system of %d equations in %d variables\n", n, m)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats
- vₖ₋₁, vₖ, q, t, wₖ₋₃, wₖ₋₂ = solver.vₖ₋₁, solver.vₖ, solver.q, solver.y, solver.wₖ₋₃, solver.wₖ₋₂
- Δx, Δy, warm_start = solver.Δx, solver.Δy, solver.warm_start
- rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
- reset!(stats)
- r₀ = warm_start ? q : b
- s₀ = warm_start ? p : c
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
- mul!(s₀, Aᵀ, Δy)
- @kaxpby!(n, one(FC), c, -one(FC), s₀)
+ function trilqr($(def_args_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = TrilqrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ trilqr!(solver, $(args_trilqr...); $(kwargs_trilqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # Initial solution x₀ and residual r₀ = b - Ax₀.
- x .= zero(FC) # x₀
- bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖
-
- # Initial solution y₀ and residual s₀ = c - Aᵀy₀.
- t .= zero(FC) # t₀
- cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
-
- iter = 0
- itmax == 0 && (itmax = m+n)
-
- history && push!(rNorms, bNorm)
- history && push!(sNorms, cNorm)
- εL = atol + rtol * bNorm
- εQ = atol + rtol * cNorm
- ξ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
-
- # Set up workspace.
- βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖
- γₖ = @knrm2(n, s₀) # γ₁ = ‖s₀‖ = ‖u₁‖
- vₖ₋₁ .= zero(FC) # v₀ = 0
- uₖ₋₁ .= zero(FC) # u₀ = 0
- vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= s₀ ./ γₖ # u₁ = (c - Aᵀy₀) / γ₁
- cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
- sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
- ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
- ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
- δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
- ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
- ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
- wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ
-
- # Stopping criterion.
- inconsistent = false
- solved_lq = bNorm == 0
- solved_lq_tol = solved_lq_mach = false
- solved_cg = solved_cg_tol = solved_cg_mach = false
- solved_primal = solved_lq || solved_cg
- solved_qr_tol = solved_qr_mach = false
- solved_dual = cNorm == 0
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !((solved_primal && solved_dual) || tired || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the SSY tridiagonalization process.
- # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
-
- @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
- @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
-
- αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
-
- @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
- @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
-
- βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖
- γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖
-
- # Update the LQ factorization of Tₖ = L̅ₖQₖ.
- # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
- # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
- # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
- # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
- # [ • • • • • 0 ] [ • • • • • • • ]
- # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ]
- # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
- if iter == 1
- δbarₖ = αₖ
- elseif iter == 2
- # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
- # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
- δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
- else
- # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
- # [sₖ₋₁ -cₖ₋₁ 0]
- # [ 0 0 1]
- #
- # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
- # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
- # [0 sₖ -cₖ]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- ϵₖ₋₂ = sₖ₋₁ * βₖ
- λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
- δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+ function trilqr!(solver :: TrilqrSolver{T,FC,S}, $(def_args_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "TRILQR: primal system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "TRILQR: dual system of %d equations in %d variables\n", n, m)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats
+ vₖ₋₁, vₖ, q, t, wₖ₋₃, wₖ₋₂ = solver.vₖ₋₁, solver.vₖ, solver.q, solver.y, solver.wₖ₋₃, solver.wₖ₋₂
+ Δx, Δy, warm_start = solver.Δx, solver.Δy, solver.warm_start
+ rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
+ reset!(stats)
+ r₀ = warm_start ? q : b
+ s₀ = warm_start ? p : c
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
+ mul!(s₀, Aᴴ, Δy)
+ @kaxpby!(n, one(FC), c, -one(FC), s₀)
end
- if !solved_primal
- # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
- # [δbar₁] [ζbar₁] = [β₁]
- if iter == 1
- ηₖ = βₖ
- end
- # [δ₁ 0 ] [ ζ₁ ] = [β₁]
- # [λ₁ δbar₂] [ζbar₂] [0 ]
- if iter == 2
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -λₖ₋₁ * ζₖ₋₁
- end
- # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
- # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
- # [ζbarₖ]
- if iter ≥ 3
- ζₖ₋₂ = ζₖ₋₁
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
- end
+ # Initial solution x₀ and residual r₀ = b - Ax₀.
+ x .= zero(FC) # x₀
+ bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖
+
+ # Initial solution y₀ and residual s₀ = c - Aᴴy₀.
+ t .= zero(FC) # t₀
+ cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
+
+ iter = 0
+ itmax == 0 && (itmax = m+n)
+
+ history && push!(rNorms, bNorm)
+ history && push!(sNorms, cNorm)
+ εL = atol + rtol * bNorm
+ εQ = atol + rtol * cNorm
+ ξ = zero(T)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %5s\n", "k", "‖rₖ‖", "‖sₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, bNorm, cNorm, ktimer(start_time))
+
+ # Set up workspace.
+ βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖
+ γₖ = @knrm2(n, s₀) # γ₁ = ‖s₀‖ = ‖u₁‖
+ vₖ₋₁ .= zero(FC) # v₀ = 0
+ uₖ₋₁ .= zero(FC) # u₀ = 0
+ vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
+ uₖ .= s₀ ./ γₖ # u₁ = (c - Aᴴy₀) / γ₁
+ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
+ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
+ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
+ ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
+ ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
+ wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ
+
+ # Stopping criterion.
+ inconsistent = false
+ solved_lq = bNorm == 0
+ solved_lq_tol = solved_lq_mach = false
+ solved_cg = solved_cg_tol = solved_cg_mach = false
+ solved_primal = solved_lq || solved_cg
+ solved_qr_tol = solved_qr_mach = false
+ solved_dual = cNorm == 0
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
- # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
- # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
- if iter ≥ 2
- # Compute solution xₖ.
- # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
- @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
- @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
- end
+ while !((solved_primal && solved_dual) || tired || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
- # Compute d̅ₖ.
- if iter == 1
- # d̅₁ = u₁
- @. d̅ = uₖ
- else
- # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
- @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
- end
+ # Continue the SSY tridiagonalization process.
+ # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
- # Compute USYMLQ residual norm
- # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
- if iter == 1
- rNorm_lq = bNorm
- else
- μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
- ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
- end
- history && push!(rNorms, rNorm_lq)
-
- # Compute USYMCG residual norm
- # ‖rₖ‖ = |ρₖ|
- if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
- ζbarₖ = ηₖ / δbarₖ
- ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
- rNorm_cg = abs(ρₖ)
- end
+ mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
- # Update primal stopping criterion
- solved_lq_tol = rNorm_lq ≤ εL
- solved_lq_mach = rNorm_lq + 1 ≤ 1
- solved_lq = solved_lq_tol || solved_lq_mach
- solved_cg_tol = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
- solved_cg_mach = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
- solved_cg = solved_cg_tol || solved_cg_mach
- solved_primal = solved_lq || solved_cg
- end
+ @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
+
+ αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
+
+ @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
+ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
+
+ βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖
+ γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖
+
+ # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+ # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
+ # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
+ # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
+ # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
+ # [ • • • • • 0 ] [ • • • • • • • ]
+ # [ • • • • γₖ] [ • • • λₖ₋₂ δₖ₋₁ 0 ]
+ # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
- if !solved_dual
- # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ₁e₁.
if iter == 1
- ψbarₖ = γₖ
+ δbarₖ = αₖ
+ elseif iter == 2
+ # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
+ # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
+ δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
else
- # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
- # [sₖ -cₖ] [ 0 ] [ ψbarₖ]
- ψₖ₋₁ = cₖ * ψbarₖ₋₁
- ψbarₖ = sₖ * ψbarₖ₋₁
+ # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
+ # [sₖ₋₁ -cₖ₋₁ 0]
+ # [ 0 0 1]
+ #
+ # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
+ # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
+ # [0 sₖ -cₖ]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ ϵₖ₋₂ = sₖ₋₁ * βₖ
+ λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
+ δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
end
- # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
- # w₁ = v₁ / δ̄₁
- if iter == 2
- wₖ₋₁ = wₖ₋₂
- @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
- @. wₖ₋₁ = vₖ₋₁ / conj(δₖ₋₁)
+ if !solved_primal
+ # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+ # [δbar₁] [ζbar₁] = [β₁]
+ if iter == 1
+ ηₖ = βₖ
+ end
+ # [δ₁ 0 ] [ ζ₁ ] = [β₁]
+ # [λ₁ δbar₂] [ζbar₂] [0 ]
+ if iter == 2
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -λₖ₋₁ * ζₖ₋₁
+ end
+ # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
+ # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
+ # [ζbarₖ]
+ if iter ≥ 3
+ ζₖ₋₂ = ζₖ₋₁
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+ end
+
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
+ # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
+ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+ if iter ≥ 2
+ # Compute solution xₖ.
+ # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
+ @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
+ @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
+ end
+
+ # Compute d̅ₖ.
+ if iter == 1
+ # d̅₁ = u₁
+ @. d̅ = uₖ
+ else
+ # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+ @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
+ end
+
+ # Compute USYMLQ residual norm
+ # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
+ if iter == 1
+ rNorm_lq = bNorm
+ else
+ μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+ ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+ rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
+ end
+ history && push!(rNorms, rNorm_lq)
+
+ # Compute USYMCG residual norm
+ # ‖rₖ‖ = |ρₖ|
+ if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
+ ζbarₖ = ηₖ / δbarₖ
+ ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+ rNorm_cg = abs(ρₖ)
+ end
+
+ # Update primal stopping criterion
+ solved_lq_tol = rNorm_lq ≤ εL
+ solved_lq_mach = rNorm_lq + 1 ≤ 1
+ solved_lq = solved_lq_tol || solved_lq_mach
+ solved_cg_tol = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
+ solved_cg_mach = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
+ solved_cg = solved_cg_tol || solved_cg_mach
+ solved_primal = solved_lq || solved_cg
end
- # w₂ = (v₂ - λ̄₁w₁) / δ̄₂
- if iter == 3
- wₖ₋₁ = wₖ₋₃
- @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
- @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
- @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+ if !solved_dual
+ # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ₁e₁.
+ if iter == 1
+ ψbarₖ = γₖ
+ else
+ # [cₖ s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
+ # [sₖ -cₖ] [ 0 ] [ ψbarₖ]
+ ψₖ₋₁ = cₖ * ψbarₖ₋₁
+ ψbarₖ = sₖ * ψbarₖ₋₁
+ end
+
+ # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
+ # w₁ = v₁ / δ̄₁
+ if iter == 2
+ wₖ₋₁ = wₖ₋₂
+ @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
+ @. wₖ₋₁ = vₖ₋₁ / conj(δₖ₋₁)
+ end
+ # w₂ = (v₂ - λ̄₁w₁) / δ̄₂
+ if iter == 3
+ wₖ₋₁ = wₖ₋₃
+ @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
+ @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+ @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+ end
+ # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
+ if iter ≥ 4
+ @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃)
+ wₖ₋₁ = wₖ₋₃
+ @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
+ @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+ @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+ end
+
+ if iter ≥ 3
+ # Swap pointers.
+ @kswap(wₖ₋₃, wₖ₋₂)
+ end
+
+ if iter ≥ 2
+ # Compute solution tₖ₋₁.
+ # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
+ @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t)
+ end
+
+ # Update ψbarₖ₋₁
+ ψbarₖ₋₁ = ψbarₖ
+
+ # Compute USYMQR residual norm ‖sₖ₋₁‖ = |ψbarₖ|.
+ sNorm = abs(ψbarₖ)
+ history && push!(sNorms, sNorm)
+
+ # Compute ‖Asₖ₋₁‖ = |ψbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
+ AsNorm = abs(ψbarₖ) * √(abs2(δbarₖ) + abs2(cₖ * βₖ₊₁))
+
+ # Update dual stopping criterion
+ iter == 1 && (ξ = atol + rtol * AsNorm)
+ solved_qr_tol = sNorm ≤ εQ
+ solved_qr_mach = sNorm + 1 ≤ 1
+ inconsistent = AsNorm ≤ ξ
+ solved_dual = solved_qr_tol || solved_qr_mach || inconsistent
end
- # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
- if iter ≥ 4
- @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃)
- wₖ₋₁ = wₖ₋₃
- @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
- @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
- @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+ # Compute uₖ₊₁ and uₖ₊₁.
+ @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+ if βₖ₊₁ ≠ zero(T)
+ @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+ end
+ if γₖ₊₁ ≠ zero(T)
+ @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
end
+ # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
if iter ≥ 3
- # Swap pointers.
- @kswap(wₖ₋₃, wₖ₋₂)
+ ϵₖ₋₃ = ϵₖ₋₂
end
-
if iter ≥ 2
- # Compute solution tₖ₋₁.
- # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
- @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t)
+ λₖ₋₂ = λₖ₋₁
end
-
- # Update ψbarₖ₋₁
- ψbarₖ₋₁ = ψbarₖ
-
- # Compute USYMQR residual norm ‖sₖ₋₁‖ = |ψbarₖ|.
- sNorm = abs(ψbarₖ)
- history && push!(sNorms, sNorm)
-
- # Compute ‖Asₖ₋₁‖ = |ψbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
- AsNorm = abs(ψbarₖ) * √(abs2(δbarₖ) + abs2(cₖ * βₖ₊₁))
-
- # Update dual stopping criterion
- iter == 1 && (ξ = atol + rtol * AsNorm)
- solved_qr_tol = sNorm ≤ εQ
- solved_qr_mach = sNorm + 1 ≤ 1
- inconsistent = AsNorm ≤ ξ
- solved_dual = solved_qr_tol || solved_qr_mach || inconsistent
- end
-
- # Compute uₖ₊₁ and uₖ₊₁.
- @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
- @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
- if βₖ₊₁ ≠ zero(T)
- @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
- end
- if γₖ₊₁ ≠ zero(T)
- @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
+ δbarₖ₋₁ = δbarₖ
+ cₖ₋₁ = cₖ
+ sₖ₋₁ = sₖ
+ γₖ = γₖ₊₁
+ βₖ = βₖ₊₁
+
+ user_requested_exit = callback(solver) :: Bool
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+
+ kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e %.2fs\n", iter, "✗ ✗ ✗ ✗", sNorm, ktimer(start_time))
+ kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s %.2fs\n", iter, rNorm_lq, "✗ ✗ ✗ ✗", ktimer(start_time))
+ kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, rNorm_lq, sNorm, ktimer(start_time))
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
- if iter ≥ 3
- ϵₖ₋₃ = ϵₖ₋₂
+ # Compute USYMCG point
+ # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+ if solved_cg
+ @kaxpy!(n, ζbarₖ, d̅, x)
end
- if iter ≥ 2
- λₖ₋₂ = λₖ₋₁
- end
- δbarₖ₋₁ = δbarₖ
- cₖ₋₁ = cₖ
- sₖ₋₁ = sₖ
- γₖ = γₖ₊₁
- βₖ = βₖ₊₁
-
- user_requested_exit = callback(solver) :: Bool
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm)
- kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "")
- kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm)
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
+ solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
+ !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol")
+ solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
+ solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
+ solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ")
+ solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ")
+ !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
+ solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
+ solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
+ solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
+ solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
+ solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+ solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x and y
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ warm_start && @kaxpy!(m, one(FC), Δy, t)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved_primal = solved_primal
+ stats.solved_dual = solved_dual
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- # Compute USYMCG point
- # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
- if solved_cg
- @kaxpy!(n, ζbarₖ, d̅, x)
- end
-
- tired && (status = "maximum number of iterations exceeded")
- solved_lq_tol && !solved_dual && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
- solved_cg_tol && !solved_dual && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
- !solved_primal && solved_qr_tol && (status = "Only the dual solution t is good enough given atol and rtol")
- solved_lq_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
- solved_cg_tol && solved_qr_tol && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
- solved_lq_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᴸ")
- solved_cg_mach && !solved_dual && (status = "Only found approximate zero-residual primal solution xᶜ")
- !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
- solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
- solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
- solved_lq_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
- solved_cg_mach && solved_qr_tol && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
- solved_lq_tol && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
- solved_cg_tol && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x and y
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- warm_start && @kaxpy!(m, one(FC), Δy, t)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.status = status
- stats.solved_primal = solved_primal
- stats.solved_dual = solved_dual
- return solver
end
diff --git a/src/trimr.jl b/src/trimr.jl
index bc53633c2..ae61b785a 100644
--- a/src/trimr.jl
+++ b/src/trimr.jl
@@ -13,30 +13,31 @@ export trimr, trimr!
"""
(x, y, stats) = trimr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- spd::Bool=false, snd::Bool=false, flip::Bool=false, sp::Bool=false,
- τ::T=one(T), ν::T=-one(T), itmax::Int=0,
- verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ spd::Bool=false, snd::Bool=false,
+ flip::Bool=false, sp::Bool=false,
+ τ::T=one(T), ν::T=-one(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-TriMR solves the symmetric linear system
+ (x, y, stats) = trimr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
+Given a matrix `A` of dimension m × n, TriMR solves the symmetric linear system
[ τE A ] [ x ] = [ b ]
- [ Aᵀ νF ] [ y ] [ c ],
+ [ Aᴴ νF ] [ y ] [ c ],
-where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
+of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
`b` and `c` must both be nonzero.
TriMR handles saddle-point systems (`τ = 0` or `ν = 0`) and adjoint systems (`τ = 0` and `ν = 0`) without any risk of breakdown.
By default, TriMR solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1.
-If `flip = true`, TriMR solves another known variant of SQD systems where τ = -1 and ν = 1.
-If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved.
-If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved.
-If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is solved.
-`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems.
TriMR is based on the preconditioned orthogonal tridiagonalization process
and its relation with the preconditioned block-Lanczos process.
@@ -50,17 +51,41 @@ It's the Euclidean norm when `M` and `N` are identity operators.
TriMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
-TriMR can be warm-started from initial guesses `x0` and `y0` with the method
+#### Optional arguments
- (x, y, stats) = trimr(A, b, c, x0, y0; kwargs...)
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system;
+* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems;
+* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems;
+* `sp`: if `true`, set `τ = 1` and `ν = 0` for saddle-point systems;
+* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -68,18 +93,6 @@ and `false` otherwise.
"""
function trimr end
-function trimr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = TrimrSolver(A, b)
- trimr!(solver, A, b, c, x0, y0; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
-function trimr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = TrimrSolver(A, b)
- trimr!(solver, A, b, c; kwargs...)
- return (solver.x, solver.y, solver.stats)
-end
-
"""
solver = trimr!(solver::TrimrSolver, A, b, c; kwargs...)
solver = trimr!(solver::TrimrSolver, A, b, c, x0, y0; kwargs...)
@@ -90,424 +103,477 @@ See [`TrimrSolver`](@ref) for more details about the `solver`.
"""
function trimr! end
-function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0, y0)
- trimr!(solver, A, b, c; kwargs...)
- return solver
-end
-
-function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false,
- τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0,
- verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("TriMR: system of %d equations in %d variables\n", m+n, m+n)
-
- # Check flip, sp, spd and snd parameters
- spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !")
- spd && snd && error("The matrix cannot be symmetric positive definite and symmetric negative definite !")
- spd && sp && error("The matrix cannot be symmetric positive definite and a saddle-point !")
- snd && flip && error("The matrix cannot be symmetric negative definite and symmetric quasi-definite !")
- snd && sp && error("The matrix cannot be symmetric negative definite and a saddle-point !")
- sp && flip && error("The matrix cannot be symmetric quasi-definite and a saddle-point !")
-
- # Check M = Iₘ and N = Iₙ
- MisI = (M === I)
- NisI = (N === I)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Determine τ and ν associated to SQD, SPD or SND systems.
- flip && (τ = -one(T) ; ν = one(T))
- spd && (τ = one(T) ; ν = one(T))
- snd && (τ = -one(T) ; ν = -one(T))
- sp && (τ = one(T) ; ν = zero(T))
-
- warm_start = solver.warm_start
- warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
- warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- allocate_if(!MisI, solver, :vₖ, S, m)
- allocate_if(!NisI, solver, :uₖ, S, n)
- Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
- Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
- gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ
- gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ
- vₖ = MisI ? M⁻¹vₖ : solver.vₖ
- uₖ = NisI ? N⁻¹uₖ : solver.uₖ
- vₖ₊₁ = MisI ? q : M⁻¹vₖ₋₁
- uₖ₊₁ = NisI ? p : N⁻¹uₖ₋₁
- b₀ = warm_start ? q : b
- c₀ = warm_start ? p : c
-
- stats = solver.stats
- rNorms = stats.residuals
- reset!(stats)
-
- # Initial solutions x₀ and y₀.
- xₖ .= zero(FC)
- yₖ .= zero(FC)
-
- iter = 0
- itmax == 0 && (itmax = m+n)
-
- # Initialize preconditioned orthogonal tridiagonalization process.
- M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0
- N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
-
- # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
- # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ]
- if warm_start
- mul!(b₀, A, Δy)
- (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
- @kaxpby!(m, one(FC), b, -one(FC), b₀)
- mul!(c₀, Aᵀ, Δx)
- (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
- @kaxpby!(n, one(FC), c, -one(FC), c₀)
+def_args_trimr = (:(A ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_trimr = (:(x0::AbstractVector),
+ :(y0::AbstractVector))
+
+def_kwargs_trimr = (:(; M = I ),
+ :(; N = I ),
+ :(; ldiv::Bool = false ),
+ :(; spd::Bool = false ),
+ :(; snd::Bool = false ),
+ :(; flip::Bool = false ),
+ :(; sp::Bool = false ),
+ :(; τ::T = one(T) ),
+ :(; ν::T = -one(T) ),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_trimr = mapreduce(extract_parameters, vcat, def_kwargs_trimr)
+
+args_trimr = (:A, :b, :c)
+optargs_trimr = (:x0, :y0)
+kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function trimr($(def_args_trimr...), $(def_optargs_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = TrimrSolver(A, b)
+ warm_start!(solver, $(optargs_trimr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ trimr!(solver, $(args_trimr...); $(kwargs_trimr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # β₁Ev₁ = b ↔ β₁v₁ = Mb
- M⁻¹vₖ .= b₀
- MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
- βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E
- if βₖ ≠ 0
- @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
- MisI || @kscal!(m, one(FC) / βₖ, vₖ)
- else
- error("b must be nonzero")
+ function trimr($(def_args_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = TrimrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ trimr!(solver, $(args_trimr...); $(kwargs_trimr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.y, solver.stats)
end
- # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
- N⁻¹uₖ .= c₀
- NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
- γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F
- if γₖ ≠ 0
- @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
- NisI || @kscal!(n, one(FC) / γₖ, uₖ)
- else
- error("c must be nonzero")
- end
+ function trimr!(solver :: TrimrSolver{T,FC,S}, $(def_args_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "TriMR: system of %d equations in %d variables\n", m+n, m+n)
+
+ # Check flip, sp, spd and snd parameters
+ spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !")
+ spd && snd && error("The matrix cannot be symmetric positive definite and symmetric negative definite !")
+ spd && sp && error("The matrix cannot be symmetric positive definite and a saddle-point !")
+ snd && flip && error("The matrix cannot be symmetric negative definite and symmetric quasi-definite !")
+ snd && sp && error("The matrix cannot be symmetric negative definite and a saddle-point !")
+ sp && flip && error("The matrix cannot be symmetric quasi-definite and a saddle-point !")
+
+ # Check M = Iₘ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Determine τ and ν associated to SQD, SPD or SND systems.
+ flip && (τ = -one(T) ; ν = one(T))
+ spd && (τ = one(T) ; ν = one(T))
+ snd && (τ = -one(T) ; ν = -one(T))
+ sp && (τ = one(T) ; ν = zero(T))
+
+ warm_start = solver.warm_start
+ warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
+ warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ allocate_if(!MisI, solver, :vₖ, S, m)
+ allocate_if(!NisI, solver, :uₖ, S, n)
+ Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
+ Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
+ gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ
+ gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ
+ vₖ = MisI ? M⁻¹vₖ : solver.vₖ
+ uₖ = NisI ? N⁻¹uₖ : solver.uₖ
+ vₖ₊₁ = MisI ? q : M⁻¹vₖ₋₁
+ uₖ₊₁ = NisI ? p : N⁻¹uₖ₋₁
+ b₀ = warm_start ? q : b
+ c₀ = warm_start ? p : c
+
+ stats = solver.stats
+ rNorms = stats.residuals
+ reset!(stats)
+
+ # Initial solutions x₀ and y₀.
+ xₖ .= zero(FC)
+ yₖ .= zero(FC)
+
+ iter = 0
+ itmax == 0 && (itmax = m+n)
+
+ # Initialize preconditioned orthogonal tridiagonalization process.
+ M⁻¹vₖ₋₁ .= zero(FC) # v₀ = 0
+ N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
+
+ # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
+ # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ]
+ if warm_start
+ mul!(b₀, A, Δy)
+ (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
+ @kaxpby!(m, one(FC), b, -one(FC), b₀)
+ mul!(c₀, Aᴴ, Δx)
+ (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
+ @kaxpby!(n, one(FC), c, -one(FC), c₀)
+ end
- # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ.
- gx₂ₖ₋₃ .= zero(FC)
- gy₂ₖ₋₃ .= zero(FC)
- gx₂ₖ₋₂ .= zero(FC)
- gy₂ₖ₋₂ .= zero(FC)
- gx₂ₖ₋₁ .= zero(FC)
- gy₂ₖ₋₁ .= zero(FC)
- gx₂ₖ .= zero(FC)
- gy₂ₖ .= zero(FC)
-
- # Compute ‖r₀‖² = (γ₁)² + (β₁)²
- rNorm = sqrt(γₖ^2 + βₖ^2)
- history && push!(rNorms, rNorm)
- ε = atol + rtol * rNorm
-
- (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ)
-
- # Set up workspace.
- old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T)
- old_s₁ₖ = old_s₂ₖ = old_s₃ₖ = old_s₄ₖ = zero(FC)
- σbar₂ₖ₋₂ = ηbar₂ₖ₋₃ = λbar₂ₖ₋₃ = μ₂ₖ₋₅ = λ₂ₖ₋₄ = μ₂ₖ₋₄ = zero(FC)
- πbar₂ₖ₋₁ = βₖ
- πbar₂ₖ = γₖ
-
- # Tolerance for breakdown detection.
- btol = eps(T)^(3/4)
-
- # Stopping criterion.
- breakdown = false
- solved = rNorm ≤ ε
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- θbarₖ = δbar₂ₖ₋₁ = δbar₂ₖ = σbar₂ₖ₋₁ = σbar₂ₖ = λbar₂ₖ₋₁ = ηbar₂ₖ₋₁ = zero(FC)
-
- while !(solved || tired || breakdown || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the orthogonal tridiagonalization process.
- # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ
-
- if iter ≥ 2
- @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
- @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁
+ # β₁Ev₁ = b ↔ β₁v₁ = Mb
+ M⁻¹vₖ .= b₀
+ MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
+ βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E
+ if βₖ ≠ 0
+ @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
+ MisI || @kscal!(m, one(FC) / βₖ, vₖ)
+ else
+ error("b must be nonzero")
end
- αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
+ # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
+ N⁻¹uₖ .= c₀
+ NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
+ γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F
+ if γₖ ≠ 0
+ @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
+ NisI || @kscal!(n, one(FC) / γₖ, uₖ)
+ else
+ error("c must be nonzero")
+ end
- @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ
- @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ
+ # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ.
+ gx₂ₖ₋₃ .= zero(FC)
+ gy₂ₖ₋₃ .= zero(FC)
+ gx₂ₖ₋₂ .= zero(FC)
+ gy₂ₖ₋₂ .= zero(FC)
+ gx₂ₖ₋₁ .= zero(FC)
+ gy₂ₖ₋₁ .= zero(FC)
+ gx₂ₖ .= zero(FC)
+ gy₂ₖ .= zero(FC)
+
+ # Compute ‖r₀‖² = (γ₁)² + (β₁)²
+ rNorm = sqrt(γₖ^2 + βₖ^2)
+ history && push!(rNorms, rNorm)
+ ε = atol + rtol * rNorm
- # Compute vₖ₊₁ and uₖ₊₁
- MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
- NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %5s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ, γₖ, ktimer(start_time))
- βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
- γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
+ # Set up workspace.
+ old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T)
+ old_s₁ₖ = old_s₂ₖ = old_s₃ₖ = old_s₄ₖ = zero(FC)
+ σbar₂ₖ₋₂ = ηbar₂ₖ₋₃ = λbar₂ₖ₋₃ = μ₂ₖ₋₅ = λ₂ₖ₋₄ = μ₂ₖ₋₄ = zero(FC)
+ πbar₂ₖ₋₁ = βₖ
+ πbar₂ₖ = γₖ
- # βₖ₊₁ ≠ 0
- if βₖ₊₁ > btol
- @kscal!(m, one(FC) / βₖ₊₁, q)
- MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
- end
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
- # γₖ₊₁ ≠ 0
- if γₖ₊₁ > btol
- @kscal!(n, one(FC) / γₖ₊₁, p)
- NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
- end
+ # Stopping criterion.
+ breakdown = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
- # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ]
- # [0 u₁ ••• 0 uₖ]
- #
- # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
- # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ]
- #
- # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
- # [ Aᵀ νF ] [ 0 F ]
- #
- # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖
- #
- # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
- # [ Oᵀ ]
- if iter == 1
- θbarₖ = conj(αₖ)
- δbar₂ₖ₋₁ = τ
- δbar₂ₖ = ν
- σbar₂ₖ₋₁ = αₖ
- σbar₂ₖ = βₖ₊₁
- λbar₂ₖ₋₁ = γₖ₊₁
- ηbar₂ₖ₋₁ = zero(FC)
- else
- # Apply previous reflections
- # [ 1 ][ 1 ][ c₂.ₖ₋₁ s₂.ₖ₋₁ ][ 1 ]
- # Ζₖ₋₁ = [ c₄.ₖ₋₁ s₄.ₖ₋₁ ][ c₃.ₖ₋₁ s₃.ₖ₋₁ ][ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ][ c₁.ₖ₋₁ s₁.ₖ₋₁ ]
- # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ][ 1 ][ 1 ][ 1 ]
- # [ 1 ][ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ][ 1 ][ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ]
- #
- # [ δbar₂ₖ₋₃ σbar₂ₖ₋₃ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ δ₂ₖ₋₃ σ₂ₖ₋₃ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
- # Ζₖ₋₁ * [ θbarₖ₋₁ δbar₂ₖ₋₂ σbar₂ₖ₋₂ 0 0 0 ] = [ 0 δ₂ₖ₋₂ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ]
- # [ 0 βₖ τ αₖ 0 γₖ₊₁ ] [ 0 0 δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ]
- # [ γₖ 0 ᾱₖ ν βₖ₊₁ 0 ] [ 0 0 θbarₖ δbar₂ₖ σbar₂ₖ 0 ]
- #
- # [ 1 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ]
- # [ c₁.ₖ₋₁ s₁.ₖ₋₁ ] [ σbar₂ₖ₋₂ 0 0 0 ] = [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ]
- # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ]
- # [ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ] [ ᾱₖ ν βₖ₊₁ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ]
- σbis₂ₖ₋₂ = old_c₁ₖ * σbar₂ₖ₋₂ + old_s₁ₖ * conj(αₖ)
- ηbis₂ₖ₋₂ = old_s₁ₖ * ν
- λbis₂ₖ₋₂ = old_s₁ₖ * βₖ₊₁
- θbisₖ = conj(old_s₁ₖ) * σbar₂ₖ₋₂ - old_c₁ₖ * conj(αₖ)
- δbis₂ₖ = - old_c₁ₖ * ν
- σbis₂ₖ = - old_c₁ₖ * βₖ₊₁
- # [ c₂.ₖ₋₁ s₂.ₖ₋₁ ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
- # [ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ] [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ] = [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ]
- # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ]
- # [ 1 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ]
- η₂ₖ₋₃ = old_c₂ₖ * ηbar₂ₖ₋₃ + old_s₂ₖ * σbis₂ₖ₋₂
- λ₂ₖ₋₃ = old_c₂ₖ * λbar₂ₖ₋₃ + old_s₂ₖ * ηbis₂ₖ₋₂
- μ₂ₖ₋₃ = old_s₂ₖ * λbis₂ₖ₋₂
- σhat₂ₖ₋₂ = conj(old_s₂ₖ) * ηbar₂ₖ₋₃ - old_c₂ₖ * σbis₂ₖ₋₂
- ηhat₂ₖ₋₂ = conj(old_s₂ₖ) * λbar₂ₖ₋₃ - old_c₂ₖ * ηbis₂ₖ₋₂
- λhat₂ₖ₋₂ = - old_c₂ₖ * λbis₂ₖ₋₂
- # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
- # [ c₃.ₖ₋₁ s₃.ₖ₋₁ ] [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ] = [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ]
- # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ]
- # [ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ]
- σtmp₂ₖ₋₂ = old_c₃ₖ * σhat₂ₖ₋₂ + old_s₃ₖ * θbisₖ
- ηtmp₂ₖ₋₂ = old_c₃ₖ * ηhat₂ₖ₋₂ + old_s₃ₖ * δbis₂ₖ
- λtmp₂ₖ₋₂ = old_c₃ₖ * λhat₂ₖ₋₂ + old_s₃ₖ * σbis₂ₖ
- θbarₖ = conj(old_s₃ₖ) * σhat₂ₖ₋₂ - old_c₃ₖ * θbisₖ
- δbar₂ₖ = conj(old_s₃ₖ) * ηhat₂ₖ₋₂ - old_c₃ₖ * δbis₂ₖ
- σbar₂ₖ = conj(old_s₃ₖ) * λhat₂ₖ₋₂ - old_c₃ₖ * σbis₂ₖ
- # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
- # [ c₄.ₖ₋₁ s₄.ₖ₋₁ ] [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ] = [ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ]
- # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ] [ τ αₖ 0 γₖ₊₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ]
- # [ 1 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ]
- σ₂ₖ₋₂ = old_c₄ₖ * σtmp₂ₖ₋₂ + old_s₄ₖ * τ
- η₂ₖ₋₂ = old_c₄ₖ * ηtmp₂ₖ₋₂ + old_s₄ₖ * αₖ
- λ₂ₖ₋₂ = old_c₄ₖ * λtmp₂ₖ₋₂
- μ₂ₖ₋₂ = old_s₄ₖ * γₖ₊₁
- δbar₂ₖ₋₁ = conj(old_s₄ₖ) * σtmp₂ₖ₋₂ - old_c₄ₖ * τ
- σbar₂ₖ₋₁ = conj(old_s₄ₖ) * ηtmp₂ₖ₋₂ - old_c₄ₖ * αₖ
- ηbar₂ₖ₋₁ = conj(old_s₄ₖ) * λtmp₂ₖ₋₂
- λbar₂ₖ₋₁ = - old_c₄ₖ * γₖ₊₁
- end
+ θbarₖ = δbar₂ₖ₋₁ = δbar₂ₖ = σbar₂ₖ₋₁ = σbar₂ₖ = λbar₂ₖ₋₁ = ηbar₂ₖ₋₁ = zero(FC)
- # [ 1 ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ]
- # [ c₁.ₖ s₁.ₖ ] [ θbarₖ δbar₂ₖ ] = [ θₖ δbar₂ₖ ]
- # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ]
- # [ s̄₁.ₖ -c₁.ₖ ] [ γₖ₊₁ 0 ] [ 0 gₖ ]
- (c₁ₖ, s₁ₖ, θₖ) = sym_givens(θbarₖ, γₖ₊₁)
- gₖ = conj(s₁ₖ) * δbar₂ₖ
- δbar₂ₖ = c₁ₖ * δbar₂ₖ
-
- # [ c₂.ₖ s₂.ₖ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ]
- # [ s̄₂.ₖ -c₂.ₖ ] [ θₖ δbar₂ₖ ] = [ 0 δbis₂ₖ ]
- # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ]
- # [ 1 ] [ 0 gₖ ] [ 0 gₖ ]
- (c₂ₖ, s₂ₖ, δ₂ₖ₋₁) = sym_givens(δbar₂ₖ₋₁, θₖ)
- σ₂ₖ₋₁ = c₂ₖ * σbar₂ₖ₋₁ + s₂ₖ * δbar₂ₖ
- δbis₂ₖ = conj(s₂ₖ) * σbar₂ₖ₋₁ - c₂ₖ * δbar₂ₖ
-
- # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ]
- # [ c₃.ₖ s₃.ₖ ] [ 0 δbis₂ₖ ] = [ 0 δhat₂ₖ ]
- # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ]
- # [ s̄₃.ₖ -c₃.ₖ ] [ 0 gₖ ] [ 0 0 ]
- (c₃ₖ, s₃ₖ, δhat₂ₖ) = sym_givens(δbis₂ₖ, gₖ)
-
- # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ]
- # [ c₄.ₖ s₄.ₖ ] [ 0 δhat₂ₖ ] = [ 0 δ₂ₖ ]
- # [ s̄₄.ₖ -c₄.ₖ ] [ 0 βₖ₊₁ ] [ 0 0 ]
- # [ 1 ] [ 0 0 ] [ 0 0 ]
- (c₄ₖ, s₄ₖ, δ₂ₖ) = sym_givens(δhat₂ₖ, βₖ₊₁)
-
- # Solve Gₖ = Wₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Gₖ)ᵀ = (Wₖ)ᵀ.
- if iter == 1
- # [ δ₁ 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ]
- # [ σ₁ δ₂ ] [ gx₂ gy₂ ] [ 0 u₁ ]
- @. gx₂ₖ₋₁ = vₖ / δ₂ₖ₋₁
- @. gx₂ₖ = - σ₂ₖ₋₁ / δ₂ₖ * gx₂ₖ₋₁
- @. gy₂ₖ = uₖ / δ₂ₖ
- elseif iter == 2
- # [ η₁ σ₂ δ₃ 0 ] [ gx₁ gy₁ ] = [ v₂ 0 ]
- # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ] [ 0 u₂ ]
- # [ gx₃ gy₃ ]
- # [ gx₄ gy₄ ]
- @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
- @kswap(gx₂ₖ₋₂, gx₂ₖ)
- @kswap(gy₂ₖ₋₂, gy₂ₖ)
- @. gx₂ₖ₋₁ = (vₖ - η₂ₖ₋₃ * gx₂ₖ₋₃ - σ₂ₖ₋₂ * gx₂ₖ₋₂ ) / δ₂ₖ₋₁
- @. gx₂ₖ = ( - λ₂ₖ₋₃ * gx₂ₖ₋₃ - η₂ₖ₋₂ * gx₂ₖ₋₂ - σ₂ₖ₋₁ * gx₂ₖ₋₁) / δ₂ₖ
- @. gy₂ₖ₋₁ = ( - η₂ₖ₋₃ * gy₂ₖ₋₃ - σ₂ₖ₋₂ * gy₂ₖ₋₂ ) / δ₂ₖ₋₁
- @. gy₂ₖ = (uₖ - λ₂ₖ₋₃ * gy₂ₖ₋₃ - η₂ₖ₋₂ * gy₂ₖ₋₂ - σ₂ₖ₋₁ * gy₂ₖ₋₁) / δ₂ₖ
- else
- # μ₂ₖ₋₅ * gx₂ₖ₋₅ + λ₂ₖ₋₄ * gx₂ₖ₋₄ + η₂ₖ₋₃ * gx₂ₖ₋₃ + σ₂ₖ₋₂ * gx₂ₖ₋₂ + δ₂ₖ₋₁ * gx₂ₖ₋₁ = vₖ
- # μ₂ₖ₋₄ * gx₂ₖ₋₄ + λ₂ₖ₋₃ * gx₂ₖ₋₃ + η₂ₖ₋₂ * gx₂ₖ₋₂ + σ₂ₖ₋₁ * gx₂ₖ₋₁ + δ₂ₖ * gx₂ₖ = 0
- g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ
- @. g₂ₖ₋₁ = (vₖ - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁
- @. g₂ₖ = ( - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
- @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
- @kswap(gx₂ₖ₋₂, gx₂ₖ)
- # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁ = 0
- # μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ
- g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ
- @. g₂ₖ₋₁ = ( - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁
- @. g₂ₖ = (uₖ - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
- @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁)
- @kswap(gy₂ₖ₋₂, gy₂ₖ)
- end
+ while !(solved || tired || breakdown || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
- # Update p̅ₖ = (Qₖ)ᵀ * (β₁e₁ + γ₁e₂)
- πbis₂ₖ = c₁ₖ * πbar₂ₖ
- πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ
- #
- π₂ₖ₋₁ = c₂ₖ * πbar₂ₖ₋₁ + s₂ₖ * πbis₂ₖ
- πhat₂ₖ = conj(s₂ₖ) * πbar₂ₖ₋₁ - c₂ₖ * πbis₂ₖ
- #
- πtmp₂ₖ = c₃ₖ * πhat₂ₖ + s₃ₖ * πbis₂ₖ₊₂
- πbar₂ₖ₊₂ = conj(s₃ₖ) * πhat₂ₖ - c₃ₖ * πbis₂ₖ₊₂
- #
- π₂ₖ = c₄ₖ * πtmp₂ₖ
- πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ
-
- # Update xₖ = Gxₖ * pₖ
- @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
- @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ)
-
- # Update yₖ = Gyₖ * pₖ
- @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
- @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ)
-
- # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|²
- rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂))
- history && push!(rNorms, rNorm)
+ # Continue the orthogonal tridiagonalization process.
+ # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
+ # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+ mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
+ mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ
+
+ if iter ≥ 2
+ @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
+ @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁
+ end
+
+ αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
+
+ @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ
+ @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ
+
+ # Compute vₖ₊₁ and uₖ₊₁
+ MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
+ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+
+ βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
+ γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
- # Update vₖ and uₖ
- MisI || (vₖ .= vₖ₊₁)
- NisI || (uₖ .= uₖ₊₁)
-
- # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
- M⁻¹vₖ₋₁ .= M⁻¹vₖ
- N⁻¹uₖ₋₁ .= N⁻¹uₖ
-
- # Update M⁻¹vₖ and N⁻¹uₖ
- M⁻¹vₖ .= q
- N⁻¹uₖ .= p
-
- # Update cosines and sines
- old_s₁ₖ = s₁ₖ
- old_s₂ₖ = s₂ₖ
- old_s₃ₖ = s₃ₖ
- old_s₄ₖ = s₄ₖ
- old_c₁ₖ = c₁ₖ
- old_c₂ₖ = c₂ₖ
- old_c₃ₖ = c₃ₖ
- old_c₄ₖ = c₄ₖ
-
- # Update workspace
- βₖ = βₖ₊₁
- γₖ = γₖ₊₁
- σbar₂ₖ₋₂ = σbar₂ₖ
- ηbar₂ₖ₋₃ = ηbar₂ₖ₋₁
- λbar₂ₖ₋₃ = λbar₂ₖ₋₁
- if iter ≥ 2
- μ₂ₖ₋₅ = μ₂ₖ₋₃
- μ₂ₖ₋₄ = μ₂ₖ₋₂
- λ₂ₖ₋₄ = λ₂ₖ₋₂
+ # βₖ₊₁ ≠ 0
+ if βₖ₊₁ > btol
+ @kscal!(m, one(FC) / βₖ₊₁, q)
+ MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
+ end
+
+ # γₖ₊₁ ≠ 0
+ if γₖ₊₁ > btol
+ @kscal!(n, one(FC) / γₖ₊₁, p)
+ NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
+ end
+
+ # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ]
+ # [0 u₁ ••• 0 uₖ]
+ #
+ # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
+ # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ]
+ #
+ # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
+ # [ Aᴴ νF ] [ 0 F ]
+ #
+ # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖
+ #
+ # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
+ # [ Oᵀ ]
+ if iter == 1
+ θbarₖ = conj(αₖ)
+ δbar₂ₖ₋₁ = τ
+ δbar₂ₖ = ν
+ σbar₂ₖ₋₁ = αₖ
+ σbar₂ₖ = βₖ₊₁
+ λbar₂ₖ₋₁ = γₖ₊₁
+ ηbar₂ₖ₋₁ = zero(FC)
+ else
+ # Apply previous reflections
+ # [ 1 ][ 1 ][ c₂.ₖ₋₁ s₂.ₖ₋₁ ][ 1 ]
+ # Ζₖ₋₁ = [ c₄.ₖ₋₁ s₄.ₖ₋₁ ][ c₃.ₖ₋₁ s₃.ₖ₋₁ ][ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ][ c₁.ₖ₋₁ s₁.ₖ₋₁ ]
+ # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ][ 1 ][ 1 ][ 1 ]
+ # [ 1 ][ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ][ 1 ][ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ]
+ #
+ # [ δbar₂ₖ₋₃ σbar₂ₖ₋₃ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ δ₂ₖ₋₃ σ₂ₖ₋₃ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
+ # Ζₖ₋₁ * [ θbarₖ₋₁ δbar₂ₖ₋₂ σbar₂ₖ₋₂ 0 0 0 ] = [ 0 δ₂ₖ₋₂ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ]
+ # [ 0 βₖ τ αₖ 0 γₖ₊₁ ] [ 0 0 δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ]
+ # [ γₖ 0 ᾱₖ ν βₖ₊₁ 0 ] [ 0 0 θbarₖ δbar₂ₖ σbar₂ₖ 0 ]
+ #
+ # [ 1 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ]
+ # [ c₁.ₖ₋₁ s₁.ₖ₋₁ ] [ σbar₂ₖ₋₂ 0 0 0 ] = [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ]
+ # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ]
+ # [ s̄₁.ₖ₋₁ -c₁.ₖ₋₁ ] [ ᾱₖ ν βₖ₊₁ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ]
+ σbis₂ₖ₋₂ = old_c₁ₖ * σbar₂ₖ₋₂ + old_s₁ₖ * conj(αₖ)
+ ηbis₂ₖ₋₂ = old_s₁ₖ * ν
+ λbis₂ₖ₋₂ = old_s₁ₖ * βₖ₊₁
+ θbisₖ = conj(old_s₁ₖ) * σbar₂ₖ₋₂ - old_c₁ₖ * conj(αₖ)
+ δbis₂ₖ = - old_c₁ₖ * ν
+ σbis₂ₖ = - old_c₁ₖ * βₖ₊₁
+ # [ c₂.ₖ₋₁ s₂.ₖ₋₁ ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃ 0 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
+ # [ s̄₂.ₖ₋₁ -c₂.ₖ₋₁ ] [ σbis₂ₖ₋₂ ηbis₂ₖ₋₂ λbis₂ₖ₋₂ 0 ] = [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ]
+ # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ]
+ # [ 1 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ]
+ η₂ₖ₋₃ = old_c₂ₖ * ηbar₂ₖ₋₃ + old_s₂ₖ * σbis₂ₖ₋₂
+ λ₂ₖ₋₃ = old_c₂ₖ * λbar₂ₖ₋₃ + old_s₂ₖ * ηbis₂ₖ₋₂
+ μ₂ₖ₋₃ = old_s₂ₖ * λbis₂ₖ₋₂
+ σhat₂ₖ₋₂ = conj(old_s₂ₖ) * ηbar₂ₖ₋₃ - old_c₂ₖ * σbis₂ₖ₋₂
+ ηhat₂ₖ₋₂ = conj(old_s₂ₖ) * λbar₂ₖ₋₃ - old_c₂ₖ * ηbis₂ₖ₋₂
+ λhat₂ₖ₋₂ = - old_c₂ₖ * λbis₂ₖ₋₂
+ # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
+ # [ c₃.ₖ₋₁ s₃.ₖ₋₁ ] [ σhat₂ₖ₋₂ ηhat₂ₖ₋₂ λhat₂ₖ₋₂ 0 ] = [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ]
+ # [ 1 ] [ τ αₖ 0 γₖ₊₁ ] [ τ αₖ 0 γₖ₊₁ ]
+ # [ s̄₃.ₖ₋₁ -c₃.ₖ₋₁ ] [ θbisₖ δbis₂ₖ σbis₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ]
+ σtmp₂ₖ₋₂ = old_c₃ₖ * σhat₂ₖ₋₂ + old_s₃ₖ * θbisₖ
+ ηtmp₂ₖ₋₂ = old_c₃ₖ * ηhat₂ₖ₋₂ + old_s₃ₖ * δbis₂ₖ
+ λtmp₂ₖ₋₂ = old_c₃ₖ * λhat₂ₖ₋₂ + old_s₃ₖ * σbis₂ₖ
+ θbarₖ = conj(old_s₃ₖ) * σhat₂ₖ₋₂ - old_c₃ₖ * θbisₖ
+ δbar₂ₖ = conj(old_s₃ₖ) * ηhat₂ₖ₋₂ - old_c₃ₖ * δbis₂ₖ
+ σbar₂ₖ = conj(old_s₃ₖ) * λhat₂ₖ₋₂ - old_c₃ₖ * σbis₂ₖ
+ # [ 1 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ] [ η₂ₖ₋₃ λ₂ₖ₋₃ μ₂ₖ₋₃ 0 ]
+ # [ c₄.ₖ₋₁ s₄.ₖ₋₁ ] [ σtmp₂ₖ₋₂ ηtmp₂ₖ₋₂ λtmp₂ₖ₋₂ 0 ] = [ σ₂ₖ₋₂ η₂ₖ₋₂ λ₂ₖ₋₂ μ₂ₖ₋₂ ]
+ # [ s̄₄.ₖ₋₁ -c₄.ₖ₋₁ ] [ τ αₖ 0 γₖ₊₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ηbar₂ₖ₋₁ λbar₂ₖ₋₁ ]
+ # [ 1 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ] [ θbarₖ δbar₂ₖ σbar₂ₖ 0 ]
+ σ₂ₖ₋₂ = old_c₄ₖ * σtmp₂ₖ₋₂ + old_s₄ₖ * τ
+ η₂ₖ₋₂ = old_c₄ₖ * ηtmp₂ₖ₋₂ + old_s₄ₖ * αₖ
+ λ₂ₖ₋₂ = old_c₄ₖ * λtmp₂ₖ₋₂
+ μ₂ₖ₋₂ = old_s₄ₖ * γₖ₊₁
+ δbar₂ₖ₋₁ = conj(old_s₄ₖ) * σtmp₂ₖ₋₂ - old_c₄ₖ * τ
+ σbar₂ₖ₋₁ = conj(old_s₄ₖ) * ηtmp₂ₖ₋₂ - old_c₄ₖ * αₖ
+ ηbar₂ₖ₋₁ = conj(old_s₄ₖ) * λtmp₂ₖ₋₂
+ λbar₂ₖ₋₁ = - old_c₄ₖ * γₖ₊₁
+ end
+
+ # [ 1 ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ]
+ # [ c₁.ₖ s₁.ₖ ] [ θbarₖ δbar₂ₖ ] = [ θₖ δbar₂ₖ ]
+ # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ]
+ # [ s̄₁.ₖ -c₁.ₖ ] [ γₖ₊₁ 0 ] [ 0 gₖ ]
+ (c₁ₖ, s₁ₖ, θₖ) = sym_givens(θbarₖ, γₖ₊₁)
+ gₖ = conj(s₁ₖ) * δbar₂ₖ
+ δbar₂ₖ = c₁ₖ * δbar₂ₖ
+
+ # [ c₂.ₖ s₂.ₖ ] [ δbar₂ₖ₋₁ σbar₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ]
+ # [ s̄₂.ₖ -c₂.ₖ ] [ θₖ δbar₂ₖ ] = [ 0 δbis₂ₖ ]
+ # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ]
+ # [ 1 ] [ 0 gₖ ] [ 0 gₖ ]
+ (c₂ₖ, s₂ₖ, δ₂ₖ₋₁) = sym_givens(δbar₂ₖ₋₁, θₖ)
+ σ₂ₖ₋₁ = c₂ₖ * σbar₂ₖ₋₁ + s₂ₖ * δbar₂ₖ
+ δbis₂ₖ = conj(s₂ₖ) * σbar₂ₖ₋₁ - c₂ₖ * δbar₂ₖ
+
+ # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ]
+ # [ c₃.ₖ s₃.ₖ ] [ 0 δbis₂ₖ ] = [ 0 δhat₂ₖ ]
+ # [ 1 ] [ 0 βₖ₊₁ ] [ 0 βₖ₊₁ ]
+ # [ s̄₃.ₖ -c₃.ₖ ] [ 0 gₖ ] [ 0 0 ]
+ (c₃ₖ, s₃ₖ, δhat₂ₖ) = sym_givens(δbis₂ₖ, gₖ)
+
+ # [ 1 ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ] [ δ₂ₖ₋₁ σ₂ₖ₋₁ ]
+ # [ c₄.ₖ s₄.ₖ ] [ 0 δhat₂ₖ ] = [ 0 δ₂ₖ ]
+ # [ s̄₄.ₖ -c₄.ₖ ] [ 0 βₖ₊₁ ] [ 0 0 ]
+ # [ 1 ] [ 0 0 ] [ 0 0 ]
+ (c₄ₖ, s₄ₖ, δ₂ₖ) = sym_givens(δhat₂ₖ, βₖ₊₁)
+
+ # Solve Gₖ = Wₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Gₖ)ᵀ = (Wₖ)ᵀ.
+ if iter == 1
+ # [ δ₁ 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ]
+ # [ σ₁ δ₂ ] [ gx₂ gy₂ ] [ 0 u₁ ]
+ @. gx₂ₖ₋₁ = vₖ / δ₂ₖ₋₁
+ @. gx₂ₖ = - σ₂ₖ₋₁ / δ₂ₖ * gx₂ₖ₋₁
+ @. gy₂ₖ = uₖ / δ₂ₖ
+ elseif iter == 2
+ # [ η₁ σ₂ δ₃ 0 ] [ gx₁ gy₁ ] = [ v₂ 0 ]
+ # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ] [ 0 u₂ ]
+ # [ gx₃ gy₃ ]
+ # [ gx₄ gy₄ ]
+ @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
+ @kswap(gx₂ₖ₋₂, gx₂ₖ)
+ @kswap(gy₂ₖ₋₂, gy₂ₖ)
+ @. gx₂ₖ₋₁ = (vₖ - η₂ₖ₋₃ * gx₂ₖ₋₃ - σ₂ₖ₋₂ * gx₂ₖ₋₂ ) / δ₂ₖ₋₁
+ @. gx₂ₖ = ( - λ₂ₖ₋₃ * gx₂ₖ₋₃ - η₂ₖ₋₂ * gx₂ₖ₋₂ - σ₂ₖ₋₁ * gx₂ₖ₋₁) / δ₂ₖ
+ @. gy₂ₖ₋₁ = ( - η₂ₖ₋₃ * gy₂ₖ₋₃ - σ₂ₖ₋₂ * gy₂ₖ₋₂ ) / δ₂ₖ₋₁
+ @. gy₂ₖ = (uₖ - λ₂ₖ₋₃ * gy₂ₖ₋₃ - η₂ₖ₋₂ * gy₂ₖ₋₂ - σ₂ₖ₋₁ * gy₂ₖ₋₁) / δ₂ₖ
+ else
+ # μ₂ₖ₋₅ * gx₂ₖ₋₅ + λ₂ₖ₋₄ * gx₂ₖ₋₄ + η₂ₖ₋₃ * gx₂ₖ₋₃ + σ₂ₖ₋₂ * gx₂ₖ₋₂ + δ₂ₖ₋₁ * gx₂ₖ₋₁ = vₖ
+ # μ₂ₖ₋₄ * gx₂ₖ₋₄ + λ₂ₖ₋₃ * gx₂ₖ₋₃ + η₂ₖ₋₂ * gx₂ₖ₋₂ + σ₂ₖ₋₁ * gx₂ₖ₋₁ + δ₂ₖ * gx₂ₖ = 0
+ g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ
+ @. g₂ₖ₋₁ = (vₖ - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁
+ @. g₂ₖ = ( - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
+ @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
+ @kswap(gx₂ₖ₋₂, gx₂ₖ)
+ # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁ = 0
+ # μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ
+ g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ
+ @. g₂ₖ₋₁ = ( - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂ ) / δ₂ₖ₋₁
+ @. g₂ₖ = (uₖ - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
+ @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁)
+ @kswap(gy₂ₖ₋₂, gy₂ₖ)
+ end
+
+ # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂)
+ πbis₂ₖ = c₁ₖ * πbar₂ₖ
+ πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ
+ #
+ π₂ₖ₋₁ = c₂ₖ * πbar₂ₖ₋₁ + s₂ₖ * πbis₂ₖ
+ πhat₂ₖ = conj(s₂ₖ) * πbar₂ₖ₋₁ - c₂ₖ * πbis₂ₖ
+ #
+ πtmp₂ₖ = c₃ₖ * πhat₂ₖ + s₃ₖ * πbis₂ₖ₊₂
+ πbar₂ₖ₊₂ = conj(s₃ₖ) * πhat₂ₖ - c₃ₖ * πbis₂ₖ₊₂
+ #
+ π₂ₖ = c₄ₖ * πtmp₂ₖ
+ πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ
+
+ # Update xₖ = Gxₖ * pₖ
+ @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
+ @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ)
+
+ # Update yₖ = Gyₖ * pₖ
+ @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
+ @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ)
+
+ # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|²
+ rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂))
+ history && push!(rNorms, rNorm)
+
+ # Update vₖ and uₖ
+ MisI || (vₖ .= vₖ₊₁)
+ NisI || (uₖ .= uₖ₊₁)
+
+ # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
+ M⁻¹vₖ₋₁ .= M⁻¹vₖ
+ N⁻¹uₖ₋₁ .= N⁻¹uₖ
+
+ # Update M⁻¹vₖ and N⁻¹uₖ
+ M⁻¹vₖ .= q
+ N⁻¹uₖ .= p
+
+ # Update cosines and sines
+ old_s₁ₖ = s₁ₖ
+ old_s₂ₖ = s₂ₖ
+ old_s₃ₖ = s₃ₖ
+ old_s₄ₖ = s₄ₖ
+ old_c₁ₖ = c₁ₖ
+ old_c₂ₖ = c₂ₖ
+ old_c₃ₖ = c₃ₖ
+ old_c₄ₖ = c₄ₖ
+
+ # Update workspace
+ βₖ = βₖ₊₁
+ γₖ = γₖ₊₁
+ σbar₂ₖ₋₂ = σbar₂ₖ
+ ηbar₂ₖ₋₃ = ηbar₂ₖ₋₁
+ λbar₂ₖ₋₃ = λbar₂ₖ₋₁
+ if iter ≥ 2
+ μ₂ₖ₋₅ = μ₂ₖ₋₃
+ μ₂ₖ₋₄ = μ₂ₖ₋₂
+ λ₂ₖ₋₄ = λ₂ₖ₋₂
+ end
+ πbar₂ₖ₋₁ = πbar₂ₖ₊₁
+ πbar₂ₖ = πbar₂ₖ₊₂
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %.2fs\n", iter, rNorm, βₖ₊₁, γₖ₊₁, ktimer(start_time))
end
- πbar₂ₖ₋₁ = πbar₂ₖ₊₁
- πbar₂ₖ = πbar₂ₖ₊₂
-
- # Stopping conditions that do not depend on user input.
- # This is to guard against tolerances that are unreasonably small.
- resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- resid_decrease_lim = rNorm ≤ ε
- breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
- solved = resid_decrease_lim || resid_decrease_mach
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
+ (verbose > 0) && @printf(iostream, "\n")
+
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ breakdown && (status = "inconsistent linear system")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
+
+ # Update x and y
+ warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
+ warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = !solved && breakdown
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
-
- tired && (status = "maximum number of iterations exceeded")
- breakdown && (status = "inconsistent linear system")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x and y
- warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
- warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = !solved && breakdown
- stats.status = status
- return solver
end
diff --git a/src/usymlq.jl b/src/usymlq.jl
index 71670c80f..b80f0a622 100644
--- a/src/usymlq.jl
+++ b/src/usymlq.jl
@@ -21,34 +21,54 @@ export usymlq, usymlq!
"""
(x, stats) = usymlq(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ transfer_to_usymcg::Bool=true, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using the USYMLQ method.
+ (x, stats) = usymlq(A, b, c, x0::AbstractVector; kwargs...)
+
+USYMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+USYMLQ determines the least-norm solution of the consistent linear system Ax = b of size m × n.
USYMLQ is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
The error norm ‖x - x*‖ monotonously decreases in USYMLQ.
It's considered as a generalization of SYMMLQ.
It can also be applied to under-determined and over-determined problems.
In all cases, problems must be consistent.
-An option gives the possibility of transferring to the USYMCG point,
-when it exists. The transfer is based on the residual norm.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional argument
-USYMLQ can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = usymlq(A, b, c, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -58,18 +78,6 @@ and `false` otherwise.
"""
function usymlq end
-function usymlq(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = UsymlqSolver(A, b)
- usymlq!(solver, A, b, c, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function usymlq(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = UsymlqSolver(A, b)
- usymlq!(solver, A, b, c; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = usymlq!(solver::UsymlqSolver, A, b, c; kwargs...)
solver = usymlq!(solver::UsymlqSolver, A, b, c, x0; kwargs...)
@@ -80,243 +88,290 @@ See [`UsymlqSolver`](@ref) for more details about the `solver`.
"""
function usymlq! end
-function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- usymlq!(solver, A, b, c; kwargs...)
- return solver
-end
-
-function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("USYMLQ: system of %d equations in %d variables\n", m, n)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x
- vₖ₋₁, vₖ, q, d̅, stats = solver.vₖ₋₁, solver.vₖ, solver.q, solver.d̅, solver.stats
- warm_start = solver.warm_start
- rNorms = stats.residuals
- reset!(stats)
- r₀ = warm_start ? q : b
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_usymlq = (:(A ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_usymlq = (:(x0::AbstractVector),)
+
+def_kwargs_usymlq = (:(; transfer_to_usymcg::Bool = true),
+ :(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false ),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_usymlq = mapreduce(extract_parameters, vcat, def_kwargs_usymlq)
+
+args_usymlq = (:A, :b, :c)
+optargs_usymlq = (:x0,)
+kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function usymlq($(def_args_usymlq...), $(def_optargs_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = UsymlqSolver(A, b)
+ warm_start!(solver, $(optargs_usymlq...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ usymlq!(solver, $(args_usymlq...); $(kwargs_usymlq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Initial solution x₀ and residual norm ‖r₀‖.
- x .= zero(FC)
- bNorm = @knrm2(m, r₀)
- history && push!(rNorms, bNorm)
- if bNorm == 0
- stats.niter = 0
- stats.solved = true
- stats.inconsistent = false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function usymlq($(def_args_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = UsymlqSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ usymlq!(solver, $(args_usymlq...); $(kwargs_usymlq...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = m+n)
-
- ε = atol + rtol * bNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
-
- βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
- γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖
- vₖ₋₁ .= zero(FC) # v₀ = 0
- uₖ₋₁ .= zero(FC) # u₀ = 0
- vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= c ./ γₖ # u₁ = c / γ₁
- cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
- sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
- ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
- ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
- δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
-
- # Stopping criterion.
- solved_lq = bNorm ≤ ε
- solved_cg = false
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved_lq || solved_cg || tired || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the SSY tridiagonalization process.
- # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
-
- @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
- @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
-
- αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
-
- @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
- @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
-
- βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖
- γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖
-
- # Update the LQ factorization of Tₖ = L̅ₖQₖ.
- # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
- # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
- # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
- # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
- # [ • • • • • 0 ] [ • • • • • • • ]
- # [ • • • • γₖ] [ • • • • • 0 ]
- # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
- if iter == 1
- δbarₖ = αₖ
- elseif iter == 2
- # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
- # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
- δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
- else
- # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
- # [sₖ₋₁ -cₖ₋₁ 0]
- # [ 0 0 1]
- #
- # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
- # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
- # [0 sₖ -cₖ]
- (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
- ϵₖ₋₂ = sₖ₋₁ * βₖ
- λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
- δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+ function usymlq!(solver :: UsymlqSolver{T,FC,S}, $(def_args_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "USYMLQ: system of %d equations in %d variables\n", m, n)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x
+ vₖ₋₁, vₖ, q, d̅, stats = solver.vₖ₋₁, solver.vₖ, solver.q, solver.d̅, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ r₀ = warm_start ? q : b
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
end
- # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
- # [δbar₁] [ζbar₁] = [β₁]
- if iter == 1
- ηₖ = βₖ
- end
- # [δ₁ 0 ] [ ζ₁ ] = [β₁]
- # [λ₁ δbar₂] [ζbar₂] [0 ]
- if iter == 2
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -λₖ₋₁ * ζₖ₋₁
- end
- # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
- # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
- # [ζbarₖ]
- if iter ≥ 3
- ζₖ₋₂ = ζₖ₋₁
- ηₖ₋₁ = ηₖ
- ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
- ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+ # Initial solution x₀ and residual norm ‖r₀‖.
+ x .= zero(FC)
+ bNorm = @knrm2(m, r₀)
+ history && push!(rNorms, bNorm)
+ if bNorm == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
- # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
- # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
- if iter ≥ 2
- # Compute solution xₖ.
- # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
- @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
- @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
+ iter = 0
+ itmax == 0 && (itmax = m+n)
+
+ ε = atol + rtol * bNorm
+ (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time))
+
+ βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
+ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖
+ vₖ₋₁ .= zero(FC) # v₀ = 0
+ uₖ₋₁ .= zero(FC) # u₀ = 0
+ vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
+ uₖ .= c ./ γₖ # u₁ = c / γ₁
+ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
+ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
+ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
+
+ # Stopping criterion.
+ solved_lq = bNorm ≤ ε
+ solved_cg = false
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved_lq || solved_cg || tired || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
+
+ # Continue the SSY tridiagonalization process.
+ # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+ mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
+
+ @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
+
+ αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
+
+ @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
+ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
+
+ βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖
+ γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖
+
+ # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+ # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
+ # [ β₂ α₂ γ₃ • • ] [ λ₁ δ₂ • • ]
+ # [ 0 • • • • • ] [ ϵ₁ λ₂ δ₃ • • ]
+ # [ • • • • • • • ] = [ 0 • • • • • ] Qₖ
+ # [ • • • • • 0 ] [ • • • • • • • ]
+ # [ • • • • γₖ] [ • • • • • 0 ]
+ # [ 0 • • • 0 βₖ αₖ] [ • • • 0 ϵₖ₋₂ λₖ₋₁ δbarₖ]
+
+ if iter == 1
+ δbarₖ = αₖ
+ elseif iter == 2
+ # [δbar₁ γ₂] [c₂ s̄₂] = [δ₁ 0 ]
+ # [ β₂ α₂] [s₂ -c₂] [λ₁ δbar₂]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ λₖ₋₁ = cₖ * βₖ + sₖ * αₖ
+ δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
+ else
+ # [0 βₖ αₖ] [cₖ₋₁ s̄ₖ₋₁ 0] = [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ]
+ # [sₖ₋₁ -cₖ₋₁ 0]
+ # [ 0 0 1]
+ #
+ # [ λₖ₋₂ δbarₖ₋₁ γₖ] [1 0 0 ] = [λₖ₋₂ δₖ₋₁ 0 ]
+ # [sₖ₋₁βₖ -cₖ₋₁βₖ αₖ] [0 cₖ s̄ₖ] [ϵₖ₋₂ λₖ₋₁ δbarₖ]
+ # [0 sₖ -cₖ]
+ (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+ ϵₖ₋₂ = sₖ₋₁ * βₖ
+ λₖ₋₁ = -cₖ₋₁ * cₖ * βₖ + sₖ * αₖ
+ δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+ end
+
+ # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+ # [δbar₁] [ζbar₁] = [β₁]
+ if iter == 1
+ ηₖ = βₖ
+ end
+ # [δ₁ 0 ] [ ζ₁ ] = [β₁]
+ # [λ₁ δbar₂] [ζbar₂] [0 ]
+ if iter == 2
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -λₖ₋₁ * ζₖ₋₁
+ end
+ # [λₖ₋₂ δₖ₋₁ 0 ] [ζₖ₋₂ ] = [0]
+ # [ϵₖ₋₂ λₖ₋₁ δbarₖ] [ζₖ₋₁ ] [0]
+ # [ζbarₖ]
+ if iter ≥ 3
+ ζₖ₋₂ = ζₖ₋₁
+ ηₖ₋₁ = ηₖ
+ ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+ ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+ end
+
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
+ # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
+ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+ if iter ≥ 2
+ # Compute solution xₖ.
+ # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
+ @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x)
+ @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
+ end
+
+ # Compute d̅ₖ.
+ if iter == 1
+ # d̅₁ = u₁
+ @. d̅ = uₖ
+ else
+ # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+ @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
+ end
+
+ # Compute uₖ₊₁ and uₖ₊₁.
+ @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+ if βₖ₊₁ ≠ zero(T)
+ @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+ end
+ if γₖ₊₁ ≠ zero(T)
+ @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
+ end
+
+ # Compute USYMLQ residual norm
+ # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
+ if iter == 1
+ rNorm_lq = bNorm
+ else
+ μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+ ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+ rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
+ end
+ history && push!(rNorms, rNorm_lq)
+
+ # Compute USYMCG residual norm
+ # ‖rₖ‖ = |ρₖ|
+ if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
+ ζbarₖ = ηₖ / δbarₖ
+ ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+ rNorm_cg = abs(ρₖ)
+ end
+
+ # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ and δbarₖ₋₁.
+ sₖ₋₁ = sₖ
+ cₖ₋₁ = cₖ
+ γₖ = γₖ₊₁
+ βₖ = βₖ₊₁
+ δbarₖ₋₁ = δbarₖ
+
+ # Update stopping criterion.
+ user_requested_exit = callback(solver) :: Bool
+ solved_lq = rNorm_lq ≤ ε
+ solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm_lq, ktimer(start_time))
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Compute d̅ₖ.
- if iter == 1
- # d̅₁ = u₁
- @. d̅ = uₖ
- else
- # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
- @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
+ # Compute USYMCG point
+ # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+ if solved_cg
+ @kaxpy!(n, ζbarₖ, d̅, x)
end
- # Compute uₖ₊₁ and uₖ₊₁.
- @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
- @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved_lq && (status = "solution xᴸ good enough given atol and rtol")
+ solved_cg && (status = "solution xᶜ good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- if βₖ₊₁ ≠ zero(T)
- @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
- end
- if γₖ₊₁ ≠ zero(T)
- @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
- end
-
- # Compute USYMLQ residual norm
- # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
- if iter == 1
- rNorm_lq = bNorm
- else
- μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
- ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
- end
- history && push!(rNorms, rNorm_lq)
-
- # Compute USYMCG residual norm
- # ‖rₖ‖ = |ρₖ|
- if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
- ζbarₖ = ηₖ / δbarₖ
- ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
- rNorm_cg = abs(ρₖ)
- end
-
- # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ and δbarₖ₋₁.
- sₖ₋₁ = sₖ
- cₖ₋₁ = cₖ
- γₖ = γₖ₊₁
- βₖ = βₖ₊₁
- δbarₖ₋₁ = δbarₖ
-
- # Update stopping criterion.
- user_requested_exit = callback(solver) :: Bool
- solved_lq = rNorm_lq ≤ ε
- solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
- end
- (verbose > 0) && @printf("\n")
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- # Compute USYMCG point
- # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
- if solved_cg
- @kaxpy!(n, ζbarₖ, d̅, x)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved_lq || solved_cg
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
-
- tired && (status = "maximum number of iterations exceeded")
- solved_lq && (status = "solution xᴸ good enough given atol and rtol")
- solved_cg && (status = "solution xᶜ good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved_lq || solved_cg
- stats.inconsistent = false
- stats.status = status
- return solver
end
diff --git a/src/usymqr.jl b/src/usymqr.jl
index 863390c3f..0aae23335 100644
--- a/src/usymqr.jl
+++ b/src/usymqr.jl
@@ -21,31 +21,53 @@ export usymqr, usymqr!
"""
(x, stats) = usymqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using the USYMQR method.
+ (x, stats) = usymqr(A, b, c, x0::AbstractVector; kwargs...)
+
+USYMQR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+USYMQR solves the linear least-squares problem min ‖b - Ax‖² of size m × n.
+USYMQR solves Ax = b if it is consistent.
USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
The residual norm ‖b - Ax‖ monotonously decreases in USYMQR.
It's considered as a generalization of MINRES.
It can also be applied to under-determined and over-determined problems.
USYMQR finds the minimum-norm solution if problems are inconsistent.
-USYMQR can be warm-started from an initial guess `x0` with the method
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional argument
- (x, stats) = usymqr(A, b, c, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -55,18 +77,6 @@ and `false` otherwise.
"""
function usymqr end
-function usymqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
- solver = UsymqrSolver(A, b)
- usymqr!(solver, A, b, c, x0; kwargs...)
- return (solver.x, solver.stats)
-end
-
-function usymqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
- solver = UsymqrSolver(A, b)
- usymqr!(solver, A, b, c; kwargs...)
- return (solver.x, solver.stats)
-end
-
"""
solver = usymqr!(solver::UsymqrSolver, A, b, c; kwargs...)
solver = usymqr!(solver::UsymqrSolver, A, b, c, x0; kwargs...)
@@ -77,235 +87,282 @@ See [`UsymqrSolver`](@ref) for more details about the `solver`.
"""
function usymqr! end
-function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
- x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- warm_start!(solver, x0)
- usymqr!(solver, A, b, c; kwargs...)
- return solver
-end
-
-function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- m, n = size(A)
- length(b) == m || error("Inconsistent problem size")
- length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("USYMQR: system of %d equations in %d variables\n", m, n)
-
- # Check type consistency
- eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
- # Compute the adjoint of A
- Aᵀ = A'
-
- # Set up workspace.
- vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p
- wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats
- warm_start = solver.warm_start
- rNorms, AᵀrNorms = stats.residuals, stats.Aresiduals
- reset!(stats)
- r₀ = warm_start ? q : b
-
- if warm_start
- mul!(r₀, A, Δx)
- @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_usymqr = (:(A ),
+ :(b::AbstractVector{FC}),
+ :(c::AbstractVector{FC}))
+
+def_optargs_usymqr = (:(x0::AbstractVector),)
+
+def_kwargs_usymqr = (:(; atol::T = √eps(T) ),
+ :(; rtol::T = √eps(T) ),
+ :(; itmax::Int = 0 ),
+ :(; timemax::Float64 = Inf ),
+ :(; verbose::Int = 0 ),
+ :(; history::Bool = false ),
+ :(; callback = solver -> false),
+ :(; iostream::IO = kstdout ))
+
+def_kwargs_usymqr = mapreduce(extract_parameters, vcat, def_kwargs_usymqr)
+
+args_usymqr = (:A, :b, :c)
+optargs_usymqr = (:x0,)
+kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+ function usymqr($(def_args_usymqr...), $(def_optargs_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = UsymqrSolver(A, b)
+ warm_start!(solver, $(optargs_usymqr...))
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ usymqr!(solver, $(args_usymqr...); $(kwargs_usymqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- # Initial solution x₀ and residual norm ‖r₀‖.
- x .= zero(FC)
- rNorm = @knrm2(m, r₀)
- history && push!(rNorms, rNorm)
- if rNorm == 0
- stats.niter = 0
- stats.solved = true
- stats.inconsistent = false
- stats.status = "x = 0 is a zero-residual solution"
- solver.warm_start = false
- return solver
+ function usymqr($(def_args_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ start_time = time_ns()
+ solver = UsymqrSolver(A, b)
+ elapsed_time = ktimer(start_time)
+ timemax -= elapsed_time
+ usymqr!(solver, $(args_usymqr...); $(kwargs_usymqr...))
+ solver.stats.timer += elapsed_time
+ return (solver.x, solver.stats)
end
- iter = 0
- itmax == 0 && (itmax = m+n)
-
- ε = atol + rtol * rNorm
- κ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
-
- βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
- γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖
- vₖ₋₁ .= zero(FC) # v₀ = 0
- uₖ₋₁ .= zero(FC) # u₀ = 0
- vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= c ./ γₖ # u₁ = c / γ₁
- cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
- sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
- wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
- ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
-
- # Stopping criterion.
- solved = rNorm ≤ ε
- inconsistent = false
- tired = iter ≥ itmax
- status = "unknown"
- user_requested_exit = false
-
- while !(solved || tired || inconsistent || user_requested_exit)
- # Update iteration index.
- iter = iter + 1
-
- # Continue the SSY tridiagonalization process.
- # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
- mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
-
- @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
- @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
-
- αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
-
- @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
- @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
-
- βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖
- γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖
-
- # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
- # [ Oᵀ ]
- # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ]
- # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ]
- # [ 0 • • • • • ] [ • • δ₃ • • • • ]
- # [ • • • • • • • ] = Qₖ [ • • • • • 0 ]
- # [ • • • • • 0 ] [ • • • • ϵₖ₋₂]
- # [ • • • • γₖ ] [ • • • λₖ₋₁]
- # [ • • βₖ αₖ ] [ 0 • • • • 0 δₖ ]
- # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ]
- #
- # If k = 1, we don't have any previous reflexion.
- # If k = 2, we apply the last reflexion.
- # If k ≥ 3, we only apply the two previous reflexions.
-
- # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
- if iter ≥ 3
- # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ]
- # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁]
- ϵₖ₋₂ = sₖ₋₂ * γₖ
- λbarₖ₋₁ = -cₖ₋₂ * γₖ
+ function usymqr!(solver :: UsymqrSolver{T,FC,S}, $(def_args_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+ # Timer
+ start_time = time_ns()
+ timemax_ns = 1e9 * timemax
+
+ m, n = size(A)
+ (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+ length(b) == m || error("Inconsistent problem size")
+ length(c) == n || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "USYMQR: system of %d equations in %d variables\n", m, n)
+
+ # Check type consistency
+ eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+ # Compute the adjoint of A
+ Aᴴ = A'
+
+ # Set up workspace.
+ vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p
+ wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats
+ warm_start = solver.warm_start
+ rNorms, AᴴrNorms = stats.residuals, stats.Aresiduals
+ reset!(stats)
+ r₀ = warm_start ? q : b
+
+ if warm_start
+ mul!(r₀, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), r₀)
end
- # Apply previous Givens reflections Qₖ₋₁.ₖ
- if iter ≥ 2
- iter == 2 && (λbarₖ₋₁ = γₖ)
- # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
- # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ]
- λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ
- δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
+ # Initial solution x₀ and residual norm ‖r₀‖.
+ x .= zero(FC)
+ rNorm = @knrm2(m, r₀)
+ history && push!(rNorms, rNorm)
+ if rNorm == 0
+ stats.niter = 0
+ stats.solved = true
+ stats.inconsistent = false
+ stats.timer = ktimer(start_time)
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
end
- # Compute and apply current Givens reflection Qₖ.ₖ₊₁
- iter == 1 && (δbarₖ = αₖ)
- # [cₖ sₖ] [δbarₖ] = [δₖ]
- # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ]
- (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
-
- # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
- # [ 0 ]
- #
- # [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
- # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁]
- ζₖ = cₖ * ζbarₖ
- ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
-
- # Compute the direction wₖ, the last column of Wₖ = Uₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Uₖ)ᵀ.
- # w₁ = u₁ / δ₁
- if iter == 1
- wₖ = wₖ₋₁
- @kaxpy!(n, one(FC), uₖ, wₖ)
- @. wₖ = wₖ / δₖ
- end
- # w₂ = (u₂ - λ₁w₁) / δ₂
- if iter == 2
- wₖ = wₖ₋₂
- @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
- @kaxpy!(n, one(FC), uₖ, wₖ)
- @. wₖ = wₖ / δₖ
- end
- # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
- if iter ≥ 3
- @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
- wₖ = wₖ₋₂
- @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
- @kaxpy!(n, one(FC), uₖ, wₖ)
- @. wₖ = wₖ / δₖ
+ iter = 0
+ itmax == 0 && (itmax = m+n)
+
+ ε = atol + rtol * rNorm
+ κ = zero(T)
+ (verbose > 0) && @printf(iostream, "%5s %7s %8s %5s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖", "timer")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8s %.2fs\n", iter, rNorm, " ✗ ✗ ✗ ✗", ktimer(start_time))
+
+ βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
+ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖
+ vₖ₋₁ .= zero(FC) # v₀ = 0
+ uₖ₋₁ .= zero(FC) # u₀ = 0
+ vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
+ uₖ .= c ./ γₖ # u₁ = c / γ₁
+ cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
+ sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
+ wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
+ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
+
+ # Stopping criterion.
+ solved = rNorm ≤ ε
+ inconsistent = false
+ tired = iter ≥ itmax
+ status = "unknown"
+ user_requested_exit = false
+ overtimed = false
+
+ while !(solved || tired || inconsistent || user_requested_exit || overtimed)
+ # Update iteration index.
+ iter = iter + 1
+
+ # Continue the SSY tridiagonalization process.
+ # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+ mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
+
+ @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
+
+ αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩
+
+ @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
+ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
+
+ βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖
+ γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖
+
+ # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
+ # [ Oᵀ ]
+ # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ λ₁ ϵ₁ 0 • • 0 ]
+ # [ β₂ α₂ γ₃ • • ] [ 0 δ₂ λ₂ • • • ]
+ # [ 0 • • • • • ] [ • • δ₃ • • • • ]
+ # [ • • • • • • • ] = Qₖ [ • • • • • 0 ]
+ # [ • • • • • 0 ] [ • • • • ϵₖ₋₂]
+ # [ • • • • γₖ ] [ • • • λₖ₋₁]
+ # [ • • βₖ αₖ ] [ 0 • • • • 0 δₖ ]
+ # [ 0 • • • • 0 βₖ₊₁] [ 0 • • • • • 0 ]
+ #
+ # If k = 1, we don't have any previous reflexion.
+ # If k = 2, we apply the last reflexion.
+ # If k ≥ 3, we only apply the two previous reflexions.
+
+ # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
+ if iter ≥ 3
+ # [cₖ₋₂ sₖ₋₂] [0 ] = [ ϵₖ₋₂ ]
+ # [s̄ₖ₋₂ -cₖ₋₂] [γₖ] [λbarₖ₋₁]
+ ϵₖ₋₂ = sₖ₋₂ * γₖ
+ λbarₖ₋₁ = -cₖ₋₂ * γₖ
+ end
+
+ # Apply previous Givens reflections Qₖ₋₁.ₖ
+ if iter ≥ 2
+ iter == 2 && (λbarₖ₋₁ = γₖ)
+ # [cₖ₋₁ sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
+ # [s̄ₖ₋₁ -cₖ₋₁] [ αₖ ] [δbarₖ]
+ λₖ₋₁ = cₖ₋₁ * λbarₖ₋₁ + sₖ₋₁ * αₖ
+ δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
+ end
+
+ # Compute and apply current Givens reflection Qₖ.ₖ₊₁
+ iter == 1 && (δbarₖ = αₖ)
+ # [cₖ sₖ] [δbarₖ] = [δₖ]
+ # [s̄ₖ -cₖ] [βₖ₊₁ ] [0 ]
+ (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
+
+ # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
+ # [ 0 ]
+ #
+ # [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
+ # [s̄ₖ -cₖ] [ 0 ] [ζbarₖ₊₁]
+ ζₖ = cₖ * ζbarₖ
+ ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
+
+ # Compute the direction wₖ, the last column of Wₖ = Uₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Uₖ)ᵀ.
+ # w₁ = u₁ / δ₁
+ if iter == 1
+ wₖ = wₖ₋₁
+ @kaxpy!(n, one(FC), uₖ, wₖ)
+ @. wₖ = wₖ / δₖ
+ end
+ # w₂ = (u₂ - λ₁w₁) / δ₂
+ if iter == 2
+ wₖ = wₖ₋₂
+ @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+ @kaxpy!(n, one(FC), uₖ, wₖ)
+ @. wₖ = wₖ / δₖ
+ end
+ # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
+ if iter ≥ 3
+ @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
+ wₖ = wₖ₋₂
+ @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+ @kaxpy!(n, one(FC), uₖ, wₖ)
+ @. wₖ = wₖ / δₖ
+ end
+
+ # Compute solution xₖ.
+ # xₖ ← xₖ₋₁ + ζₖ * wₖ
+ @kaxpy!(n, ζₖ, wₖ, x)
+
+ # Compute ‖rₖ‖ = |ζbarₖ₊₁|.
+ rNorm = abs(ζbarₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Compute ‖Aᴴrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
+ AᴴrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
+ history && push!(AᴴrNorms, AᴴrNorm)
+
+ # Compute uₖ₊₁ and uₖ₊₁.
+ @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+ if βₖ₊₁ ≠ zero(T)
+ @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+ end
+ if γₖ₊₁ ≠ zero(T)
+ @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
+ end
+
+ # Update directions for x.
+ if iter ≥ 2
+ @kswap(wₖ₋₂, wₖ₋₁)
+ end
+
+ # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ.
+ if iter ≥ 2
+ sₖ₋₂ = sₖ₋₁
+ cₖ₋₂ = cₖ₋₁
+ end
+ sₖ₋₁ = sₖ
+ cₖ₋₁ = cₖ
+ ζbarₖ = ζbarₖ₊₁
+ γₖ = γₖ₊₁
+ βₖ = βₖ₊₁
+
+ # Update stopping criterion.
+ iter == 1 && (κ = atol + rtol * AᴴrNorm)
+ user_requested_exit = callback(solver) :: Bool
+ solved = rNorm ≤ ε
+ inconsistent = !solved && AᴴrNorm ≤ κ
+ tired = iter ≥ itmax
+ timer = time_ns() - start_time
+ overtimed = timer > timemax_ns
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %.2fs\n", iter, rNorm, AᴴrNorm, ktimer(start_time))
end
+ (verbose > 0) && @printf(iostream, "\n")
- # Compute solution xₖ.
- # xₖ ← xₖ₋₁ + ζₖ * wₖ
- @kaxpy!(n, ζₖ, wₖ, x)
-
- # Compute ‖rₖ‖ = |ζbarₖ₊₁|.
- rNorm = abs(ζbarₖ₊₁)
- history && push!(rNorms, rNorm)
-
- # Compute ‖Aᵀrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
- AᵀrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
- history && push!(AᵀrNorms, AᵀrNorm)
-
- # Compute uₖ₊₁ and uₖ₊₁.
- @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
- @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
- if βₖ₊₁ ≠ zero(T)
- @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
- end
- if γₖ₊₁ ≠ zero(T)
- @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
- end
+ # Termination status
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ user_requested_exit && (status = "user-requested exit")
+ overtimed && (status = "time limit exceeded")
- # Update directions for x.
- if iter ≥ 2
- @kswap(wₖ₋₂, wₖ₋₁)
- end
+ # Update x
+ warm_start && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
- # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ.
- if iter ≥ 2
- sₖ₋₂ = sₖ₋₁
- cₖ₋₂ = cₖ₋₁
- end
- sₖ₋₁ = sₖ
- cₖ₋₁ = cₖ
- ζbarₖ = ζbarₖ₊₁
- γₖ = γₖ₊₁
- βₖ = βₖ₊₁
-
- # Update stopping criterion.
- iter == 1 && (κ = atol + rtol * AᵀrNorm)
- user_requested_exit = callback(solver) :: Bool
- solved = rNorm ≤ ε
- inconsistent = !solved && AᵀrNorm ≤ κ
- tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, rNorm, AᵀrNorm)
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.timer = ktimer(start_time)
+ stats.status = status
+ return solver
end
- (verbose > 0) && @printf("\n")
- tired && (status = "maximum number of iterations exceeded")
- solved && (status = "solution good enough given atol and rtol")
- user_requested_exit && (status = "user-requested exit")
-
- # Update x
- warm_start && @kaxpy!(n, one(FC), Δx, x)
- solver.warm_start = false
-
- # Update stats
- stats.niter = iter
- stats.solved = solved
- stats.inconsistent = inconsistent
- stats.status = status
- return solver
end
diff --git a/test/callback_utils.jl b/test/callback_utils.jl
new file mode 100644
index 000000000..f88f01848
--- /dev/null
+++ b/test/callback_utils.jl
@@ -0,0 +1,152 @@
+mutable struct StorageGetxRestartedGmres{S}
+ x::S
+ y::S
+ p::S
+end
+StorageGetxRestartedGmres(solver::GmresSolver; N = I) =
+ StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
+
+function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A,
+ stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
+ NisI = (N === I)
+ x2, y2, p2 = stor.x, stor.y, stor.p
+ n = size(A, 2)
+ # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
+ nr = sum(1:solver.inner_iter)
+ y = solver.z # yᵢ = zᵢ
+ y2 .= y
+ R = solver.R
+ V = solver.V
+ x2 .= solver.Δx
+ for i = solver.inner_iter : -1 : 1
+ pos = nr + i - solver.inner_iter # position of rᵢ.ₖ
+ for j = solver.inner_iter : -1 : i+1
+ y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ eps(T)^(3/4)
+ y2[i] = zero(FC)
+ inconsistent = true
+ else
+ y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ end
+ end
+
+ # Form xₖ = N⁻¹Vₖyₖ
+ for i = 1 : solver.inner_iter
+ Krylov.@kaxpy!(n, y2[i], V[i], x2)
+ end
+ if !NisI
+ p2 .= solver.p
+ p2 .= x2
+ mul!(x2, N, p2)
+ end
+ x2 .+= solver.x
+end
+
+mutable struct TestCallbackN2{T, S, M}
+ A::M
+ b::S
+ storage_vec::S
+ tol::T
+end
+TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
+
+function (cb_n2::TestCallbackN2)(solver)
+ mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+ cb_n2.storage_vec .-= cb_n2.b
+ return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2Adjoint{T, S, M}
+ A::M
+ b::S
+ c::S
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2Adjoint)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+ cb_n2.storage_vec1 .-= cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
+ cb_n2.storage_vec2 .-= cb_n2.c
+ return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2Shifts{T, S, M}
+ A::M
+ b::S
+ shifts::Vector{T}
+ tol::T
+end
+TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
+
+function (cb_n2::TestCallbackN2Shifts)(solver)
+ r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
+ return all(map(norm, r) .≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2LS{T, S, M}
+ A::M
+ b::S
+ λ::T
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
+
+function (cb_n2::TestCallbackN2LS)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+ cb_n2.storage_vec1 .-= cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
+ cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
+ return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2LN{T, S, M}
+ A::M
+ b::S
+ λ::T
+ storage_vec::S
+ tol::T
+end
+TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
+
+function (cb_n2::TestCallbackN2LN)(solver)
+ mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+ cb_n2.storage_vec .-= cb_n2.b
+ cb_n2.λ != 0 && (cb_n2.storage_vec .+= cb_n2.λ .* solver.x)
+ return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2SaddlePts{T, S, M}
+ A::M
+ b::S
+ c::S
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2SaddlePts(A, b, c; tol = 0.1) =
+ TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2SaddlePts)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
+ cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
+ cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
+ return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
+ get_x_restarted_gmres!(solver, A, stor, N)
+ x = stor.x
+ mul!(storage_vec, A, x)
+ storage_vec .-= b
+ return (norm(storage_vec) ≤ tol)
+end
diff --git a/test/get_div_grad.jl b/test/get_div_grad.jl
index 6d6bf012e..ae27e5061 100644
--- a/test/get_div_grad.jl
+++ b/test/get_div_grad.jl
@@ -1,8 +1,8 @@
# Identity matrix.
eye(n::Int; FC=Float64) = sparse(one(FC) * I, n, n)
-# Compute the energy norm ‖r‖ₚ = √(rᵀPr) where P is a symmetric and positive definite matrix.
-metric(r, P) = sqrt(dot(r, P * r))
+# Compute the energy norm ‖r‖ₚ = √(rᴴPr) where P is a symmetric and positive definite matrix.
+metric(r, P) = sqrt(real(dot(r, P * r)))
# Based on Lars Ruthotto's initial implementation.
function get_div_grad(n1 :: Int, n2 :: Int, n3 :: Int)
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
new file mode 100644
index 000000000..9fb6cdffd
--- /dev/null
+++ b/test/gpu/amd.jl
@@ -0,0 +1,111 @@
+using AMDGPU
+
+include("gpu.jl")
+
+@testset "AMD -- AMDGPU.jl" begin
+
+ @test AMDGPU.functional()
+ AMDGPU.allowscalar(false)
+
+ @testset "documentation" begin
+ A_cpu = rand(ComplexF64, 20, 20)
+ A_cpu = A_cpu + A_cpu'
+ b_cpu = rand(ComplexF64, 20)
+ A_gpu = ROCMatrix(A_cpu)
+ b_gpu = ROCVector(b_cpu)
+ x, stats = minres(A_gpu, b_gpu)
+ end
+
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = ROCVector{FC}
+ M = ROCMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ # @testset "processes -- $FC" begin
+ # test_processes(S, M)
+ # end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+ end
+end
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
new file mode 100644
index 000000000..65e123be1
--- /dev/null
+++ b/test/gpu/gpu.jl
@@ -0,0 +1,52 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov
+
+include("../test_utils.jl")
+
+function test_processes(S, M)
+ m = 250
+ n = 500
+ k = 20
+ FC = eltype(S)
+
+ cpu_A, cpu_b = symmetric_indefinite(n, FC=FC)
+ gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+ V, T = hermitian_lanczos(gpu_A, gpu_b, k)
+
+ cpu_A, cpu_b = nonsymmetric_definite(n, FC=FC)
+ cpu_c = -cpu_b
+ gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c)
+ V, T, U, Tᴴ = nonhermitian_lanczos(gpu_A, gpu_b, gpu_c, k)
+
+ cpu_A, cpu_b = nonsymmetric_indefinite(n, FC=FC)
+ gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+ V, H = arnoldi(gpu_A, gpu_b, k)
+
+ cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+ gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+ V, U, L = golub_kahan(gpu_A, gpu_b, k)
+
+ cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+ _, cpu_c = over_consistent(n, m, FC=FC)
+ gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c)
+ V, T, U, Tᴴ = saunders_simon_yip(gpu_A, gpu_b, gpu_c, k)
+
+ cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+ cpu_B, cpu_c = over_consistent(n, m, FC=FC)
+ gpu_A, gpu_B, gpu_b, gpu_c = M(cpu_A), M(cpu_B), S(cpu_b), S(cpu_c)
+ V, H, U, F = montoison_orban(gpu_A, gpu_B, gpu_b, gpu_c, k)
+end
+
+function test_solver(S, M)
+ n = 10
+ memory = 5
+ A = M(undef, n, n)
+ b = S(undef, n)
+ solver = GmresSolver(n, n, memory, S)
+ solve!(solver, A, b) # Test that we don't have errors
+end
+
+function test_conversion(S, M)
+ @test Krylov.vector_to_matrix(S) <: M
+ @test Krylov.matrix_to_vector(M) <: S
+end
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
new file mode 100644
index 000000000..f03176199
--- /dev/null
+++ b/test/gpu/intel.jl
@@ -0,0 +1,113 @@
+using oneAPI
+
+include("gpu.jl")
+
+@testset "Intel -- oneAPI.jl" begin
+
+ @test oneAPI.functional()
+ oneAPI.allowscalar(false)
+
+ @testset "documentation" begin
+ T = Float32
+ m = 20
+ n = 10
+ A_cpu = rand(T, m, n)
+ b_cpu = rand(T, m)
+ A_gpu = oneMatrix(A_cpu)
+ b_gpu = oneVector(b_cpu)
+ x, stats = lsqr(A_gpu, b_gpu)
+ end
+
+ for FC ∈ (Float32, ComplexF32)
+ S = oneVector{FC}
+ M = oneMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ # @testset "processes -- $FC" begin
+ # test_processes(S, M)
+ # end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+ end
+end
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
new file mode 100644
index 000000000..2e684e21f
--- /dev/null
+++ b/test/gpu/metal.jl
@@ -0,0 +1,113 @@
+using Metal
+
+include("gpu.jl")
+
+@testset "Apple M1 GPUs -- Metal.jl" begin
+
+ # @test Metal.functional()
+ Metal.allowscalar(false)
+
+ @testset "documentation" begin
+ T = Float32
+ n = 10
+ m = 20
+ A_cpu = rand(T, n, m)
+ b_cpu = rand(T, n)
+ A_gpu = MtlMatrix(A_cpu)
+ b_gpu = MtlVector(b_cpu)
+ x, stats = craig(A_gpu, b_gpu)
+ end
+
+ for FC in (Float32, ComplexF32)
+ S = MtlVector{FC}
+ M = MtlMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ # @testset "processes -- $FC" begin
+ # test_processes(S, M)
+ # end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+ end
+end
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
new file mode 100644
index 000000000..8cb44136d
--- /dev/null
+++ b/test/gpu/nvidia.jl
@@ -0,0 +1,215 @@
+using LinearOperators, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+
+include("gpu.jl")
+
+@testset "Nvidia -- CUDA.jl" begin
+
+ @test CUDA.functional()
+ CUDA.allowscalar(false)
+
+ @testset "documentation" begin
+ A_cpu = rand(20, 20)
+ b_cpu = rand(20)
+ A_gpu = CuMatrix(A_cpu)
+ b_gpu = CuVector(b_cpu)
+ x, stats = bilq(A_gpu, b_gpu)
+
+ A_cpu = sprand(200, 100, 0.3)
+ b_cpu = rand(200)
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ b_gpu = CuVector(b_cpu)
+ x, stats = lsmr(A_gpu, b_gpu)
+
+ @testset "ic0" begin
+ A_cpu, b_cpu = sparse_laplacian()
+ @test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == true
+
+ b_gpu = CuVector(b_cpu)
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ z = CUDA.zeros(T, n)
+ symmetric = hermitian = true
+
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ P = ic02(A_gpu)
+ function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z)
+ ldiv!(z, UpperTriangular(P)', x)
+ ldiv!(y, UpperTriangular(P), z)
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+ @test stats.niter ≤ 19
+
+ A_gpu = CuSparseMatrixCSR(A_gpu)
+ P = ic02(A_gpu)
+ function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z)
+ ldiv!(z, LowerTriangular(P), x)
+ ldiv!(y, LowerTriangular(P)', z)
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+ @test stats.niter ≤ 19
+ end
+
+ @testset "ilu0" begin
+ A_cpu = Float64[1 0 0 4;
+ 0 0 7 8;
+ 9 0 0 12;
+ 0 14 0 16]
+ A_cpu = sparse(A_cpu)
+ b_cpu = ones(4)
+ @test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == false
+
+ p = zfd(A_cpu)
+ p .+= 1
+ invp = invperm(p)
+ @test reduce(&, invp .== p) == false
+
+ b_gpu = CuVector(b_cpu)
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ z = CUDA.zeros(T, n)
+ symmetric = hermitian = false
+
+ A_gpu = CuSparseMatrixCSC(A_cpu[:,p])
+ P = ilu02(A_gpu)
+ function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z)
+ ldiv!(z, LowerTriangular(P), x)
+ ldiv!(y, UnitUpperTriangular(P), z)
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
+ x̄, stats = gmres(A_gpu, b_gpu, M=opM)
+ x = Vector(x̄)[invp]
+ @test norm(b_gpu - A_gpu * x̄) ≤ 1e-6
+ @test norm(b_cpu - A_cpu * x) ≤ 1e-6
+
+ A_gpu = CuSparseMatrixCSR(A_cpu[:,p])
+ P = ilu02(A_gpu)
+ function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z)
+ ldiv!(z, UnitLowerTriangular(P), x)
+ ldiv!(y, UpperTriangular(P), z)
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
+ x̄, stats = gmres(A_gpu, b_gpu, M=opM)
+ x = Vector(x̄)[invp]
+ @test norm(b_gpu - A_gpu * x̄) ≤ 1e-6
+ @test norm(b_cpu - A_cpu * x) ≤ 1e-6
+ end
+ end
+
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = CuVector{FC}
+ V = CuSparseVector{FC}
+ M = CuMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "processes -- $FC" begin
+ test_processes(S, M)
+ end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+
+ @testset "ktypeof -- $FC" begin
+ dv = S(rand(FC, 10))
+ b = view(dv, 4:8)
+ @test Krylov.ktypeof(dv) <: S
+ @test Krylov.ktypeof(b) <: S
+
+ dm = M(rand(FC, 10, 10))
+ b = view(dm, :, 3)
+ @test Krylov.ktypeof(b) <: S
+
+ sv = V(sprand(FC, 10, 0.5))
+ b = view(sv, 4:8)
+ @test Krylov.ktypeof(sv) <: S
+ @test Krylov.ktypeof(b) <: S
+ end
+ end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 99ab25fda..5381fd10e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,7 +4,9 @@ import Krylov.KRYLOV_SOLVERS
include("test_utils.jl")
include("test_aux.jl")
include("test_stats.jl")
+include("test_processes.jl")
+include("test_fgmres.jl")
include("test_gpmr.jl")
include("test_fom.jl")
include("test_gmres.jl")
@@ -42,3 +44,5 @@ include("test_allocations.jl")
include("test_mp.jl")
include("test_solvers.jl")
include("test_warm_start.jl")
+include("test_verbose.jl")
+include("test_extensions.jl")
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
index 4c6817499..174d0ae55 100644
--- a/test/test_allocations.jl
+++ b/test/test_allocations.jl
@@ -1,26 +1,27 @@
@testset "allocations" begin
- for FC in (Float64, ComplexF64)
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
@testset "Data Type: $FC" begin
- A = FC.(get_div_grad(16, 16, 16)) # Dimension n x n
- n = size(A, 1)
- m = div(n, 2)
- Au = A[1:m,:] # Dimension m x n
- Ao = A[:,1:m] # Dimension n x m
- b = Ao * ones(FC, m) # Dimension n
- c = Au * ones(FC, n) # Dimension m
+ A = FC.(get_div_grad(18, 18, 18)) # Dimension m x n
+ m,n = size(A)
+ k = div(n, 2)
+ Au = A[1:k,:] # Dimension k x n
+ Ao = A[:,1:k] # Dimension m x k
+ b = Ao * ones(FC, k) # Dimension m
+ c = Au * ones(FC, n) # Dimension k
mem = 200
- shifts = [1.0; 2.0; 3.0; 4.0; 5.0]
+ T = real(FC)
+ shifts = T[1; 2; 3; 4; 5]
nshifts = 5
- nbits = sizeof(FC) # 8 bits for Float64 and 16 bits for ComplexF64
+ nbits_FC = sizeof(FC) # 8 bits for ComplexF32 and 16 bits for ComplexF64
+ nbits_T = sizeof(T) # 4 bits for Float32 and 8 bits for Float64
@testset "SYMMLQ" begin
# SYMMLQ needs:
# 5 n-vectors: x, Mvold, Mv, Mv_next, w̅
- storage_symmlq(n) = 5 * n
- storage_symmlq_bytes(n) = nbits * storage_symmlq(n)
+ storage_symmlq_bytes(n) = nbits_FC * 5 * n
expected_symmlq_bytes = storage_symmlq_bytes(n)
symmlq(A, b) # warmup
@@ -36,8 +37,7 @@
@testset "CG" begin
# CG needs:
# 4 n-vectors: x, r, p, Ap
- storage_cg(n) = 4 * n
- storage_cg_bytes(n) = nbits * storage_cg(n)
+ storage_cg_bytes(n) = nbits_FC * 4 * n
expected_cg_bytes = storage_cg_bytes(n)
cg(A, b) # warmup
@@ -53,8 +53,7 @@
@testset "CG-LANCZOS" begin
# CG-LANCZOS needs:
# 5 n-vectors: x, Mv, Mv_prev, p, Mv_next
- storage_cg_lanczos(n) = 5 * n
- storage_cg_lanczos_bytes(n) = nbits * storage_cg_lanczos(n)
+ storage_cg_lanczos_bytes(n) = nbits_FC * 5 * n
expected_cg_lanczos_bytes = storage_cg_lanczos_bytes(n)
cg_lanczos(A, b) # warmup
@@ -73,9 +72,7 @@
# - 2 (n*nshifts)-matrices: x, p
# - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms
# - 3 nshifts-bitVector: indefinite, converged, not_cv
- storage_cg_lanczos_shift(n, nshifts) = (3 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64)
- storage_cg_lanczos_shift_bytes(n, nshifts) = nbits * storage_cg_lanczos_shift(n, nshifts)
-
+ storage_cg_lanczos_shift_bytes(n, nshifts) = nbits_FC * ((3 * n) + (2 * n * nshifts)) + nbits_T * (5 * nshifts) + (3 * nshifts)
expected_cg_lanczos_shift_bytes = storage_cg_lanczos_shift_bytes(n, nshifts)
cg_lanczos_shift(A, b, shifts) # warmup
actual_cg_lanczos_shift_bytes = @allocated cg_lanczos_shift(A, b, shifts)
@@ -90,8 +87,7 @@
@testset "CR" begin
# CR needs:
# 5 n-vectors: x, r, p, q, Ar
- storage_cr(n) = 5 * n
- storage_cr_bytes(n) = nbits * storage_cr(n)
+ storage_cr_bytes(n) = nbits_FC * 5 * n
expected_cr_bytes = storage_cr_bytes(n)
cr(A, b) # warmup
@@ -107,8 +103,7 @@
@testset "MINRES" begin
# MINRES needs:
# 6 n-vectors: x, r1, r2, w1, w2, y
- storage_minres(n) = 6 * n
- storage_minres_bytes(n) = nbits * storage_minres(n)
+ storage_minres_bytes(n) = nbits_FC * 6 * n
expected_minres_bytes = storage_minres_bytes(n)
minres(A, b) # warmup
@@ -124,8 +119,7 @@
@testset "MINRES-QLP" begin
# MINRES-QLP needs:
# - 6 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x, p
- storage_minres_qlp(n) = 6 * n
- storage_minres_qlp_bytes(n) = nbits * storage_minres_qlp(n)
+ storage_minres_qlp_bytes(n) = nbits_FC * 6 * n
expected_minres_qlp_bytes = storage_minres_qlp_bytes(n)
minres_qlp(A, b) # warmup
@@ -141,11 +135,11 @@
@testset "DIOM" begin
# DIOM needs:
# - 2 n-vectors: x, t
- # - 2 (n*mem)-matrices: P, V
- # - 1 mem-vector: L
- # - 1 (mem+2)-vector: H
- storage_diom(mem, n) = (2 * n) + (2 * n * mem) + (mem) + (mem + 2)
- storage_diom_bytes(mem, n) = nbits * storage_diom(mem, n)
+ # - 1 (n*mem)-matrix: V
+ # - 1 n*(mem-1)-matrix: P
+ # - 1 (mem-1)-vector: L
+ # - 1 mem-vector: H
+ storage_diom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (n * (mem-1)) + (mem-1) + (mem))
expected_diom_bytes = storage_diom_bytes(mem, n)
diom(A, b, memory=mem) # warmup
@@ -164,8 +158,7 @@
# - 1 (n*mem)-matrix: V
# - 2 mem-vectors: l, z
# - 1 (mem*(mem+1)/2)-vector: U
- storage_fom(mem, n) = (2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)
- storage_fom_bytes(mem, n) = nbits * storage_fom(mem, n)
+ storage_fom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2))
expected_fom_bytes = storage_fom_bytes(mem, n)
fom(A, b, memory=mem) # warmup
@@ -183,9 +176,8 @@
# - 2 n-vectors: x, t
# - 2 (n*mem)-matrices: P, V
# - 2 mem-vectors: c, s
- # - 1 (mem+2)-vector: H
- storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 2)
- storage_dqgmres_bytes(mem, n) = nbits * storage_dqgmres(mem, n)
+ # - 1 (mem+1)-vector: H
+ storage_dqgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + mem + (mem + 1)) + nbits_T * mem
expected_dqgmres_bytes = storage_dqgmres_bytes(mem, n)
dqgmres(A, b, memory=mem) # warmup
@@ -204,8 +196,7 @@
# - 1 n*(mem)-matrix: V
# - 3 mem-vectors: c, s, z
# - 1 (mem*(mem+1)/2)-vector: R
- storage_gmres(mem, n) = (2 * n) + (n * mem) + (3 * mem) + (mem * (mem+1) / 2)
- storage_gmres_bytes(mem, n) = nbits * storage_gmres(mem, n)
+ storage_gmres_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem
expected_gmres_bytes = storage_gmres_bytes(mem, n)
gmres(A, b, memory=mem) # warmup
@@ -218,11 +209,29 @@
@test inplace_gmres_bytes == 0
end
+ @testset "FGMRES" begin
+ # FGMRES needs:
+ # - 2 n-vectors: x, w
+ # - 2 n*(mem)-matrix: V, Z
+ # - 3 mem-vectors: c, s, z
+ # - 1 (mem*(mem+1)/2)-vector: R
+ storage_fgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem
+
+ expected_fgmres_bytes = storage_fgmres_bytes(mem, n)
+ fgmres(A, b, memory=mem) # warmup
+ actual_fgmres_bytes = @allocated fgmres(A, b, memory=mem)
+ @test expected_fgmres_bytes ≤ actual_fgmres_bytes ≤ 1.02 * expected_fgmres_bytes
+
+ solver = FgmresSolver(A, b, mem)
+ fgmres!(solver, A, b) # warmup
+ inplace_fgmres_bytes = @allocated fgmres!(solver, A, b)
+ @test inplace_fgmres_bytes == 0
+ end
+
@testset "CGS" begin
# CGS needs:
# 6 n-vectors: x, r, u, p, q, ts
- storage_cgs(n) = 6 * n
- storage_cgs_bytes(n) = nbits * storage_cgs(n)
+ storage_cgs_bytes(n) = nbits_FC * 6 * n
expected_cgs_bytes = storage_cgs_bytes(n)
cgs(A, b) # warmup
@@ -238,8 +247,7 @@
@testset "BICGSTAB" begin
# BICGSTAB needs:
# 6 n-vectors: x, r, p, v, s, qd
- storage_bicgstab(n) = 6 * n
- storage_bicgstab_bytes(n) = nbits * storage_bicgstab(n)
+ storage_bicgstab_bytes(n) = nbits_FC * 6 * n
expected_bicgstab_bytes = storage_bicgstab_bytes(n)
bicgstab(A, b) # warmup
@@ -254,12 +262,11 @@
@testset "CGNE" begin
# CGNE needs:
- # - 3 n-vectors: x, p, Aᵀz
+ # - 3 n-vectors: x, p, Aᴴz
# - 2 m-vectors: r, q
- storage_cgne(n, m) = 3 * n + 2 * m
- storage_cgne_bytes(n, m) = nbits * storage_cgne(n, m)
+ storage_cgne_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
- expected_cgne_bytes = storage_cgne_bytes(n, m)
+ expected_cgne_bytes = storage_cgne_bytes(k, n)
(x, stats) = cgne(Au, c) # warmup
actual_cgne_bytes = @allocated cgne(Au, c)
@test expected_cgne_bytes ≤ actual_cgne_bytes ≤ 1.02 * expected_cgne_bytes
@@ -272,12 +279,11 @@
@testset "CRMR" begin
# CRMR needs:
- # - 3 n-vectors: x, p, Aᵀr
+ # - 3 n-vectors: x, p, Aᴴr
# - 2 m-vectors: r, q
- storage_crmr(n, m) = 3 * n + 2 * m
- storage_crmr_bytes(n, m) = nbits * storage_crmr(n, m)
+ storage_crmr_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
- expected_crmr_bytes = storage_crmr_bytes(n, m)
+ expected_crmr_bytes = storage_crmr_bytes(k, n)
(x, stats) = crmr(Au, c) # warmup
actual_crmr_bytes = @allocated crmr(Au, c)
@test expected_crmr_bytes ≤ actual_crmr_bytes ≤ 1.02 * expected_crmr_bytes
@@ -290,12 +296,11 @@
@testset "LNLQ" begin
# LNLQ needs:
- # - 3 n-vectors: x, v, Aᵀu
+ # - 3 n-vectors: x, v, Aᴴu
# - 4 m-vectors: y, w̄, u, Av
- storage_lnlq(n, m) = 3 * n + 4 * m
- storage_lnlq_bytes(n, m) = nbits * storage_lnlq(n, m)
+ storage_lnlq_bytes(m, n) = nbits_FC * (3 * n + 4 * m)
- expected_lnlq_bytes = storage_lnlq_bytes(n, m)
+ expected_lnlq_bytes = storage_lnlq_bytes(k, n)
lnlq(Au, c) # warmup
actual_lnlq_bytes = @allocated lnlq(Au, c)
@test expected_lnlq_bytes ≤ actual_lnlq_bytes ≤ 1.02 * expected_lnlq_bytes
@@ -308,12 +313,11 @@
@testset "CRAIG" begin
# CRAIG needs:
- # - 3 n-vectors: x, v, Aᵀu
+ # - 3 n-vectors: x, v, Aᴴu
# - 4 m-vectors: y, w, u, Av
- storage_craig(n, m) = 3 * n + 4 * m
- storage_craig_bytes(n, m) = nbits * storage_craig(n, m)
+ storage_craig_bytes(m, n) = nbits_FC * (3 * n + 4 * m)
- expected_craig_bytes = storage_craig_bytes(n, m)
+ expected_craig_bytes = storage_craig_bytes(k, n)
craig(Au, c) # warmup
actual_craig_bytes = @allocated craig(Au, c)
@test expected_craig_bytes ≤ actual_craig_bytes ≤ 1.02 * expected_craig_bytes
@@ -326,12 +330,11 @@
@testset "CRAIGMR" begin
# CRAIGMR needs:
- # - 4 n-vectors: x, v, Aᵀu, d
+ # - 4 n-vectors: x, v, Aᴴu, d
# - 5 m-vectors: y, u, w, wbar, Av
- storage_craigmr(n, m) = 4 * n + 5 * m
- storage_craigmr_bytes(n, m) = nbits * storage_craigmr(n, m)
+ storage_craigmr_bytes(m, n) = nbits_FC * (4 * n + 5 * m)
- expected_craigmr_bytes = storage_craigmr_bytes(n, m)
+ expected_craigmr_bytes = storage_craigmr_bytes(k, n)
craigmr(Au, c) # warmup
actual_craigmr_bytes = @allocated craigmr(Au, c)
@test expected_craigmr_bytes ≤ actual_craigmr_bytes ≤ 1.02 * expected_craigmr_bytes
@@ -344,12 +347,11 @@
@testset "CGLS" begin
# CGLS needs:
- # - 3 m-vectors: x, p, s
- # - 2 n-vectors: r, q
- storage_cgls(n, m) = 3 * m + 2 * n
- storage_cgls_bytes(n, m) = nbits * storage_cgls(n, m)
+ # - 3 n-vectors: x, p, s
+ # - 2 m-vectors: r, q
+ storage_cgls_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
- expected_cgls_bytes = storage_cgls_bytes(n, m)
+ expected_cgls_bytes = storage_cgls_bytes(m, k)
(x, stats) = cgls(Ao, b) # warmup
actual_cgls_bytes = @allocated cgls(Ao, b)
@test expected_cgls_bytes ≤ actual_cgls_bytes ≤ 1.02 * expected_cgls_bytes
@@ -362,12 +364,11 @@
@testset "LSLQ" begin
# LSLQ needs:
- # - 4 m-vectors: x_lq, v, Aᵀu, w̄ (= x_cg)
- # - 2 n-vectors: u, Av
- storage_lslq(n, m) = 4 * m + 2 * n
- storage_lslq_bytes(n, m) = nbits * storage_lslq(n, m)
+ # - 4 n-vectors: x_lq, v, Aᴴu, w̄ (= x_cg)
+ # - 2 m-vectors: u, Av
+ storage_lslq_bytes(m, n) = nbits_FC * (4 * n + 2 * m)
- expected_lslq_bytes = storage_lslq_bytes(n, m)
+ expected_lslq_bytes = storage_lslq_bytes(m, k)
(x, stats) = lslq(Ao, b) # warmup
actual_lslq_bytes = @allocated lslq(Ao, b)
@test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.02 * expected_lslq_bytes
@@ -380,12 +381,11 @@
@testset "CRLS" begin
# CRLS needs:
- # - 4 m-vectors: x, p, Ar, q
- # - 3 n-vectors: r, Ap, s
- storage_crls(n, m) = 4 * m + 3 * n
- storage_crls_bytes(n, m) = nbits * storage_crls(n, m)
+ # - 4 n-vectors: x, p, Ar, q
+ # - 3 m-vectors: r, Ap, s
+ storage_crls_bytes(m, n) = nbits_FC * (4 * n + 3 * m)
- expected_crls_bytes = storage_crls_bytes(n, m)
+ expected_crls_bytes = storage_crls_bytes(m, k)
(x, stats) = crls(Ao, b) # warmup
actual_crls_bytes = @allocated crls(Ao, b)
@test expected_crls_bytes ≤ actual_crls_bytes ≤ 1.02 * expected_crls_bytes
@@ -398,12 +398,11 @@
@testset "LSQR" begin
# LSQR needs:
- # - 4 m-vectors: x, v, w, Aᵀu
- # - 2 n-vectors: u, Av
- storage_lsqr(n, m) = 4 * m + 2 * n
- storage_lsqr_bytes(n, m) = nbits * storage_lsqr(n, m)
+ # - 4 n-vectors: x, v, w, Aᴴu
+ # - 2 m-vectors: u, Av
+ storage_lsqr_bytes(m, n) = nbits_FC * (4 * n + 2 * m)
- expected_lsqr_bytes = storage_lsqr_bytes(n, m)
+ expected_lsqr_bytes = storage_lsqr_bytes(m, k)
(x, stats) = lsqr(Ao, b) # warmup
actual_lsqr_bytes = @allocated lsqr(Ao, b)
@test expected_lsqr_bytes ≤ actual_lsqr_bytes ≤ 1.02 * expected_lsqr_bytes
@@ -416,12 +415,11 @@
@testset "LSMR" begin
# LSMR needs:
- # - 5 m-vectors: x, v, h, hbar, Aᵀu
- # - 2 n-vectors: u, Av
- storage_lsmr(n, m) = 5 * m + 2 * n
- storage_lsmr_bytes(n, m) = nbits * storage_lsmr(n, m)
+ # - 5 n-vectors: x, v, h, hbar, Aᴴu
+ # - 2 m-vectors: u, Av
+ storage_lsmr_bytes(m, n) = nbits_FC * (5 * n + 2 * m)
- expected_lsmr_bytes = storage_lsmr_bytes(n, m)
+ expected_lsmr_bytes = storage_lsmr_bytes(m, k)
(x, stats) = lsmr(Ao, b) # warmup
actual_lsmr_bytes = @allocated lsmr(Ao, b)
@test expected_lsmr_bytes ≤ actual_lsmr_bytes ≤ 1.02 * expected_lsmr_bytes
@@ -435,8 +433,7 @@
@testset "BiLQ" begin
# BILQ needs:
# - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q
- storage_bilq(n) = 8 * n
- storage_bilq_bytes(n) = nbits * storage_bilq(n)
+ storage_bilq_bytes(n) = nbits_FC * 8 * n
expected_bilq_bytes = storage_bilq_bytes(n)
bilq(A, b) # warmup
@@ -452,8 +449,7 @@
@testset "QMR" begin
# QMR needs:
# - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q
- storage_qmr(n) = 9 * n
- storage_qmr_bytes(n) = nbits * storage_qmr(n)
+ storage_qmr_bytes(n) = nbits_FC * 9 * n
expected_qmr_bytes = storage_qmr_bytes(n)
qmr(A, b) # warmup
@@ -469,8 +465,7 @@
@testset "BiLQR" begin
# BILQR needs:
# - 11 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ, p, q
- storage_bilqr(n) = 11 * n
- storage_bilqr_bytes(n) = nbits * storage_bilqr(n)
+ storage_bilqr_bytes(n) = nbits_FC * 11 * n
expected_bilqr_bytes = storage_bilqr_bytes(n)
bilqr(A, b, b) # warmup
@@ -487,10 +482,9 @@
# USYMLQ needs:
# - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p
# - 3 m-vectors: vₖ₋₁, vₖ, q
- storage_usymlq(n, m) = 5 * n + 3 * m
- storage_usymlq_bytes(n, m) = nbits * storage_usymlq(n, m)
+ storage_usymlq_bytes(m, n) = nbits_FC * (5 * n + 3 * m)
- expected_usymlq_bytes = storage_usymlq_bytes(n, m)
+ expected_usymlq_bytes = storage_usymlq_bytes(k, n)
usymlq(Au, c, b) # warmup
actual_usymlq_bytes = @allocated usymlq(Au, c, b)
@test expected_usymlq_bytes ≤ actual_usymlq_bytes ≤ 1.02 * expected_usymlq_bytes
@@ -503,12 +497,11 @@
@testset "USYMQR" begin
# USYMQR needs:
- # - 6 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p
- # - 3 n-vectors: uₖ₋₁, uₖ, q
- storage_usymqr(n, m) = 6 * m + 3 * n
- storage_usymqr_bytes(n, m) = nbits * storage_usymqr(n, m)
+ # - 6 n-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p
+ # - 3 m-vectors: uₖ₋₁, uₖ, q
+ storage_usymqr_bytes(m, n) = nbits_FC * (6 * n + 3 * m)
- expected_usymqr_bytes = storage_usymqr_bytes(n, m)
+ expected_usymqr_bytes = storage_usymqr_bytes(m, k)
(x, stats) = usymqr(Ao, b, c) # warmup
actual_usymqr_bytes = @allocated usymqr(Ao, b, c)
@test expected_usymqr_bytes ≤ actual_usymqr_bytes ≤ 1.02 * expected_usymqr_bytes
@@ -523,8 +516,7 @@
# TRILQR needs:
# - 6 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ, q
# - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p
- storage_trilqr(n, m) = 6 * m + 5 * n
- storage_trilqr_bytes(n, m) = nbits * storage_trilqr(n, m)
+ storage_trilqr_bytes(m, n) = nbits_FC * (6 * m + 5 * n)
expected_trilqr_bytes = storage_trilqr_bytes(n, n)
trilqr(A, b, b) # warmup
@@ -541,10 +533,9 @@
# TriCG needs:
# - 6 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ, p
# - 6 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ, q
- storage_tricg(n, m) = 6 * n + 6 * m
- storage_tricg_bytes(n, m) = nbits * storage_tricg(n, m)
+ storage_tricg_bytes(m, n) = nbits_FC * (6 * n + 6 * m)
- expected_tricg_bytes = storage_tricg_bytes(n, m)
+ expected_tricg_bytes = storage_tricg_bytes(k, n)
tricg(Au, c, b) # warmup
actual_tricg_bytes = @allocated tricg(Au, c, b)
@test expected_tricg_bytes ≤ actual_tricg_bytes ≤ 1.02 * expected_tricg_bytes
@@ -559,10 +550,9 @@
# TriMR needs:
# - 8 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, p
# - 8 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, q
- storage_trimr(n, m) = 8 * n + 8 * m
- storage_trimr_bytes(n, m) = nbits * storage_trimr(n, m)
+ storage_trimr_bytes(m, n) = nbits_FC * (8 * n + 8 * m)
- expected_trimr_bytes = storage_trimr_bytes(n, m)
+ expected_trimr_bytes = storage_trimr_bytes(k, n)
trimr(Au, c, b) # warmup
actual_trimr_bytes = @allocated trimr(Au, c, b)
@test expected_trimr_bytes ≤ actual_trimr_bytes ≤ 1.02 * expected_trimr_bytes
@@ -575,17 +565,16 @@
@testset "GPMR" begin
# GPMR needs:
- # - 2 n-vectors: x, q
- # - 2 m-vectors: y, p
- # - 1 (n*mem)-matrix: V
- # - 1 (m*mem)-matrix: U
+ # - 2 m-vectors: x, q
+ # - 2 n-vectors: y, p
+ # - 1 (m*mem)-matrix: V
+ # - 1 (n*mem)-matrix: U
# - 1 (2*mem)-vector: zt
# - 2 (4*mem)-vectors: gc, gs
# - 1 (mem*(2mem+1))-vector: R
- storage_gpmr(mem, n, m) = (mem + 2) * (n + m) + mem * (2 * mem + 11)
- storage_gpmr_bytes(mem, n, m) = nbits * storage_gpmr(mem, n, m)
+ storage_gpmr_bytes(mem, m, n) = nbits_FC * ((mem + 2) * (n + m) + mem * (2 * mem + 7)) + nbits_T * 4 * mem
- expected_gpmr_bytes = storage_gpmr_bytes(mem, n, m)
+ expected_gpmr_bytes = storage_gpmr_bytes(mem, m, k)
gpmr(Ao, Au, b, c, memory=mem, itmax=mem) # warmup
actual_gpmr_bytes = @allocated gpmr(Ao, Au, b, c, memory=mem, itmax=mem)
@test expected_gpmr_bytes ≤ actual_gpmr_bytes ≤ 1.02 * expected_gpmr_bytes
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 11bdb7c2d..6c43142c0 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -1,119 +1,203 @@
@testset "aux" begin
- # test Givens reflector corner cases
- (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
- @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
-
- a = 3.14
- (c, s, ρ) = Krylov.sym_givens(a, 0.0)
- @test (c == 1.0) && (s == 0.0) && (ρ == a)
- (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
- @test (c == -1.0) && (s == 0.0) && (ρ == a)
-
- b = 3.14
- (c, s, ρ) = Krylov.sym_givens(0.0, b)
- @test (c == 0.0) && (s == 1.0) && (ρ == b)
- (c, s, ρ) = Krylov.sym_givens(0.0, -b)
- @test (c == 0.0) && (s == -1.0) && (ρ == b)
-
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
-
- a = Complex(1.0, 1.0)
- (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
- (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
-
- b = Complex(1.0, 1.0)
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
- @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
- @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
-
- # test roots of a quadratic
- roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
- @test length(roots) == 1
- @test roots[1] == 0.0
-
- roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
- @test length(roots) == 0
-
- roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
- @test length(roots) == 1
- @test roots[1] == 1.0 / 3.14
-
- roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
- @test length(roots) == 0
-
- roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
- @test length(roots) == 2
- @test roots[1] == 0.0
- @test roots[2] == 0.0
-
- roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
- @test length(roots) == 2
- @test roots[1] ≈ -2.0
- @test roots[2] ≈ -1.0
-
- roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
- @test length(roots) == 0
-
- # ill-conditioned quadratic
- roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
- @test length(roots) == 2
- @test roots[1] == 1.0e+13
- @test roots[2] == 0.0
-
- # iterative refinement is crucial!
- roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
- @test length(roots) == 2
- @test roots[1] == 1.0e+13
- @test roots[2] == -1.0e-05
-
- # not ill-conditioned quadratic
- roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
- @test length(roots) == 2
- @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
- @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
- roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
- @test length(roots) == 2
- @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
- @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
- # test trust-region boundary
- x = ones(5)
- d = ones(5); d[1:2:5] .= -1
- @test_throws ErrorException Krylov.to_boundary(x, d, -1.0)
- @test_throws ErrorException Krylov.to_boundary(x, d, 0.5)
- @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0)
- @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178
- @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782
- @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782
- @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
-
- # test kzeros and kones
- @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10)
- @test Krylov.kones(Vector{Float64}, 10) == ones(10)
-
- # test ktypeof
- a = rand(Float32, 10)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float32}
- @test Krylov.ktypeof(b) == Vector{Float32}
-
- a = rand(Float64, 10)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float64}
- @test Krylov.ktypeof(b) == Vector{Float64}
-
- a = sprand(Float32, 10, 0.5)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float32}
- @test Krylov.ktypeof(b) == Vector{Float32}
-
- a = sprand(Float64, 10, 0.5)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float64}
- @test Krylov.ktypeof(b) == Vector{Float64}
+
+ @testset "sym_givens" begin
+ # test Givens reflector corner cases
+ (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
+ @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
+
+ a = 3.14
+ (c, s, ρ) = Krylov.sym_givens(a, 0.0)
+ @test (c == 1.0) && (s == 0.0) && (ρ == a)
+ (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
+ @test (c == -1.0) && (s == 0.0) && (ρ == a)
+
+ b = 3.14
+ (c, s, ρ) = Krylov.sym_givens(0.0, b)
+ @test (c == 0.0) && (s == 1.0) && (ρ == b)
+ (c, s, ρ) = Krylov.sym_givens(0.0, -b)
+ @test (c == 0.0) && (s == -1.0) && (ρ == b)
+
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
+
+ a = Complex(1.0, 1.0)
+ (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
+ (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
+
+ b = Complex(1.0, 1.0)
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
+ @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
+ @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
+ end
+
+ @testset "roots_quadratic" begin
+ # test roots of a quadratic
+ roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test roots[1] == 0.0
+ @test roots[2] == 0.0
+
+ @test_throws ErrorException Krylov.roots_quadratic(0.0, 0.0, 1.0)
+
+ roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test roots[1] == 1.0 / 3.14
+ @test roots[2] == 1.0 / 3.14
+
+ @test_throws ErrorException Krylov.roots_quadratic(1.0, 0.0, 1.0)
+
+ roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test roots[1] == 0.0
+ @test roots[2] == 0.0
+
+ roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test roots[1] ≈ -2.0
+ @test roots[2] ≈ -1.0
+
+ @test_throws ErrorException Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
+
+ # ill-conditioned quadratic
+ roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test roots[1] == 1.0e+13
+ @test roots[2] == 0.0
+
+ # iterative refinement is crucial!
+ roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test roots[1] == 1.0e+13
+ @test roots[2] == -1.0e-05
+
+ # not ill-conditioned quadratic
+ roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+ @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+ roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+ @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test allocations == 0
+ end
+
+ @testset "to_boundary" begin
+ # test trust-region boundary
+ n = 5
+ x = ones(n)
+ d = ones(n); d[1:2:n] .= -1
+ @test_throws ErrorException Krylov.to_boundary(n, x, d, -1.0)
+ @test_throws ErrorException Krylov.to_boundary(n, x, d, 0.5)
+ @test_throws ErrorException Krylov.to_boundary(n, x, zeros(n), 1.0)
+ @test maximum(Krylov.to_boundary(n, x, d, 5.0)) ≈ 2.209975124224178
+ @test minimum(Krylov.to_boundary(n, x, d, 5.0)) ≈ -1.8099751242241782
+ @test maximum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ 1.8099751242241782
+ @test minimum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ -2.209975124224178
+ end
+
+ @testset "kzeros" begin
+ # test kzeros
+ @test Krylov.kzeros(Vector{Float64}, 10) == zeros(Float64, 10)
+ @test Krylov.kzeros(Vector{ComplexF32}, 10) == zeros(ComplexF32, 10)
+ end
+
+ @testset "kones" begin
+ # test kones
+ @test Krylov.kones(Vector{Float64}, 10) == ones(Float64, 10)
+ @test Krylov.kones(Vector{ComplexF32}, 10) == ones(ComplexF32, 10)
+ end
+
+ @testset "ktypeof" begin
+ # test ktypeof
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ dv = rand(FC, 10)
+ b = view(dv, 4:8)
+ @test Krylov.ktypeof(dv) == Vector{FC}
+ @test Krylov.ktypeof(b) == Vector{FC}
+
+ dm = rand(FC, 10, 10)
+ b = view(dm, :, 3)
+ @test Krylov.ktypeof(b) == Vector{FC}
+
+ sv = sprand(FC, 10, 0.5)
+ b = view(sv, 4:8)
+ @test Krylov.ktypeof(sv) == Vector{FC}
+ @test Krylov.ktypeof(b) == Vector{FC}
+ end
+ end
+
+ @testset "vector_to_matrix" begin
+ # test vector_to_matrix
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = Vector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == Matrix{FC}
+ end
+ end
+
+ @testset "matrix_to_vector" begin
+ # test matrix_to_vector
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ M = Matrix{FC}
+ S = Krylov.matrix_to_vector(M)
+ @test S == Vector{FC}
+ end
+ end
+
+ @testset "macros" begin
+ # test macros
+ for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64)
+ n = 10
+ x = rand(FC, n)
+ y = rand(FC, n)
+ a = rand(FC)
+ b = rand(FC)
+ c = rand(FC)
+ s = rand(FC)
+
+ T = real(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+
+ Krylov.@kdot(n, x, y)
+
+ Krylov.@kdotr(n, x, y)
+
+ Krylov.@knrm2(n, x)
+
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+
+ Krylov.@kcopy!(n, x, y)
+
+ Krylov.@kswap(x, y)
+
+ Krylov.@kref!(n, x, y, c, s)
+ end
+ end
end
diff --git a/test/test_bicgstab.jl b/test/test_bicgstab.jl
index ce4e6dcd4..6817acf3d 100644
--- a/test/test_bicgstab.jl
+++ b/test/test_bicgstab.jl
@@ -82,10 +82,10 @@
@test(resid ≤ bicgstab_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = bicgstab(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
solver = BicgstabSolver(A, b)
diff --git a/test/test_bilq.jl b/test/test_bilq.jl
index 900d1f6e5..40b9872db 100644
--- a/test/test_bilq.jl
+++ b/test/test_bilq.jl
@@ -66,10 +66,10 @@
@test(resid ≤ bilq_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = bilq(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
diff --git a/test/test_bilqr.jl b/test/test_bilqr.jl
index 6dab06ec7..fd46aade4 100644
--- a/test/test_bilqr.jl
+++ b/test/test_bilqr.jl
@@ -46,10 +46,10 @@
@test(resid_dual ≤ bilqr_tol)
@test(stats.solved_dual)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, t, stats) = bilqr(A, b, c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
A, b, c = adjoint_pde(FC=FC)
diff --git a/test/test_cgne.jl b/test/test_cgne.jl
index 64cbc0ea7..c1a3e798b 100644
--- a/test/test_cgne.jl
+++ b/test/test_cgne.jl
@@ -1,6 +1,6 @@
-function test_cgne(A, b; λ=0.0, M=I)
+function test_cgne(A, b; λ=0.0, N=I, history=false)
(nrow, ncol) = size(A)
- (x, stats) = cgne(A, b, λ=λ, M=M)
+ (x, stats) = cgne(A, b, λ=λ, N=N, history=history)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
@@ -69,8 +69,8 @@ end
@test stats.status == "x = 0 is a zero-residual solution"
# Test with Jacobi (or diagonal) preconditioner
- A, b, M = square_preconditioned(FC=FC)
- (x, stats, resid) = test_cgne(A, b, M=M)
+ A, b, N = square_preconditioned(FC=FC)
+ (x, stats, resid) = test_cgne(A, b, N=N)
@test(resid ≤ cgne_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -81,8 +81,8 @@ end
A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0]
b = [1.0; 0.0]
- M = Diagonal(1 ./ (A * A'))
- (x, stats, resid) = test_cgne(A, b, M=M)
+ N = Diagonal(1 ./ (A * A'))
+ (x, stats, resid) = test_cgne(A, b, N=N)
@test(resid ≤ cgne_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -92,7 +92,7 @@ end
for transpose ∈ (false, true)
A, b, c, D = small_sp(transpose, FC=FC)
D⁻¹ = inv(D)
- (x, stats) = cgne(A, b, M=D⁻¹, λ=1.0)
+ (x, stats) = cgne(A, b, N=D⁻¹, λ=1.0)
end
# test callback function
diff --git a/test/test_cgs.jl b/test/test_cgs.jl
index 5c505bb70..832cd76c3 100644
--- a/test/test_cgs.jl
+++ b/test/test_cgs.jl
@@ -74,10 +74,10 @@
@test(resid ≤ cgs_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = cgs(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
A, b = sparse_laplacian(FC=FC)
diff --git a/test/test_crmr.jl b/test/test_crmr.jl
index 6354f329f..d0f902df6 100644
--- a/test/test_crmr.jl
+++ b/test/test_crmr.jl
@@ -1,6 +1,6 @@
-function test_crmr(A, b; λ=0.0, M=I, history=false)
+function test_crmr(A, b; λ=0.0, N=I, history=false)
(nrow, ncol) = size(A)
- (x, stats) = crmr(A, b, λ=λ, M=M, history=history)
+ (x, stats) = crmr(A, b, λ=λ, N=N, history=history)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
@@ -76,8 +76,8 @@ end
A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0]
b = [1.0; 0.0]
- M = Diagonal(1 ./ (A * A'))
- (x, stats, resid) = test_crmr(A, b, M=M)
+ N = Diagonal(1 ./ (A * A'))
+ (x, stats, resid) = test_crmr(A, b, N=N)
@test(resid ≤ crmr_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -87,7 +87,7 @@ end
for transpose ∈ (false, true)
A, b, c, D = small_sp(transpose, FC=FC)
D⁻¹ = inv(D)
- (x, stats) = crmr(A, b, M=D⁻¹, λ=1.0)
+ (x, stats) = crmr(A, b, N=D⁻¹, λ=1.0)
end
# test callback function
diff --git a/test/test_diom.jl b/test/test_diom.jl
index 4f1a8ecea..62a38b198 100644
--- a/test/test_diom.jl
+++ b/test/test_diom.jl
@@ -60,7 +60,7 @@
# Poisson equation in polar coordinates.
A, b = polar_poisson(FC=FC)
- (x, stats) = diom(A, b, memory=200)
+ (x, stats) = diom(A, b, memory=150)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ diom_tol)
diff --git a/test/test_extensions.jl b/test/test_extensions.jl
new file mode 100644
index 000000000..81bbe12ca
--- /dev/null
+++ b/test/test_extensions.jl
@@ -0,0 +1,56 @@
+using ComponentArrays
+using FillArrays
+using StaticArrays
+
+@testset "extensions" begin
+ @testset "ComponentArrays" begin
+ n = 5
+ for T in (Float32, Float64)
+ A = rand(T, n, n)
+
+ b = ComponentVector(; b1=rand(T, n - 1), b2=rand(T))
+ @test Krylov.ktypeof(b) == Vector{T}
+ x, stats = gmres(A, b)
+ @test stats.solved
+ end
+ end
+
+ @testset "FillArrays" begin
+ n = 5
+ for T in (Float32, Float64)
+ A = rand(T, n, n)
+
+ b = Ones(T, n)
+ @test Krylov.ktypeof(b) == Vector{T}
+ x, stats = gmres(A, b)
+ @test stats.solved
+
+ b = Zeros(T, n)
+ @test Krylov.ktypeof(b) == Vector{T}
+ x, stats = gmres(A, b)
+ @test stats.solved
+ end
+ end
+
+ @testset "StaticArrays" begin
+ n = 5
+ for T in (Float32, Float64)
+ A = rand(T, n, n)
+
+ b = SVector{n}(rand(T, n))
+ @test Krylov.ktypeof(b) == Vector{T}
+ x, stats = gmres(A, b)
+ @test stats.solved
+
+ b = MVector{n}(rand(T, n))
+ @test Krylov.ktypeof(b) == Vector{T}
+ x, stats = gmres(A, b)
+ @test stats.solved
+
+ b = SizedVector{n}(rand(T, n))
+ @test Krylov.ktypeof(b) == Vector{T}
+ x, stats = gmres(A, b)
+ @test stats.solved
+ end
+ end
+end
diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl
new file mode 100644
index 000000000..9bb73d3e4
--- /dev/null
+++ b/test/test_fgmres.jl
@@ -0,0 +1,154 @@
+import LinearAlgebra.mul!
+
+mutable struct FlexiblePreconditioner{T,S}
+ D::Diagonal{T, S}
+ ω::T
+end
+
+function mul!(y::Vector, P::FlexiblePreconditioner, x::Vector)
+ P.ω = -P.ω
+ mul!(y, P.D, x)
+ y .*= P.ω
+end
+
+@testset "fgmres" begin
+ fgmres_tol = 1.0e-6
+
+ for FC in (Float64, ComplexF64)
+ @testset "Data Type: $FC" begin
+
+ # Symmetric and positive definite system.
+ A, b = symmetric_definite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Symmetric indefinite variant.
+ A, b = symmetric_indefinite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Nonsymmetric and positive definite systems.
+ A, b = nonsymmetric_definite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Nonsymmetric indefinite variant.
+ A, b = nonsymmetric_indefinite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Symmetric indefinite variant, almost singular.
+ A, b = almost_singular(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ 100 * fgmres_tol)
+ @test(stats.solved)
+
+ # Singular system.
+ A, b = square_inconsistent(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ Aresid = norm(A' * r) / norm(A' * b)
+ @test(Aresid ≤ fgmres_tol)
+ @test(stats.inconsistent)
+
+ # Test b == 0
+ A, b = zero_rhs(FC=FC)
+ (x, stats) = fgmres(A, b)
+ @test norm(x) == 0
+ @test stats.status == "x = 0 is a zero-residual solution"
+
+ # Poisson equation in polar coordinates.
+ A, b = polar_poisson(FC=FC)
+ (x, stats) = fgmres(A, b, reorthogonalization=true)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Left preconditioning
+ A, b, M = square_preconditioned(FC=FC)
+ (x, stats) = fgmres(A, b, M=M)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Right preconditioning
+ A, b, N = square_preconditioned(FC=FC)
+ (x, stats) = fgmres(A, b, N=N)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Split preconditioning
+ A, b, M, N = two_preconditioners(FC=FC)
+ (x, stats) = fgmres(A, b, M=M, N=N)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Restart
+ for restart ∈ (false, true)
+ memory = 10
+
+ A, b = sparse_laplacian(FC=FC)
+ (x, stats) = fgmres(A, b, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ M = Diagonal(1 ./ diag(A))
+ (x, stats) = fgmres(A, b, M=M, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ N = Diagonal(1 ./ diag(A))
+ (x, stats) = fgmres(A, b, N=N, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ N = Diagonal(1 ./ sqrt.(diag(A)))
+ N = Diagonal(1 ./ sqrt.(diag(A)))
+ (x, stats) = fgmres(A, b, M=M, N=N, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+ end
+
+ A, b = polar_poisson(FC=FC)
+ J = inv(Diagonal(A)) # Jacobi preconditioner
+ N = FlexiblePreconditioner(J, 1.0)
+ (x, stats) = fgmres(A, b, N=N)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+ end
+ end
+end
diff --git a/test/test_fom.jl b/test/test_fom.jl
index 9469b6b9c..0500d139f 100644
--- a/test/test_fom.jl
+++ b/test/test_fom.jl
@@ -126,13 +126,6 @@
end
# test callback function
- solver = FomSolver(A, b)
- tol = 1.0e-1
- cb_n2 = TestCallbackN2(A, b, tol = tol)
- fom!(solver, A, b, restart = true, callback = cb_n2)
- @test solver.stats.status == "user-requested exit"
- @test cb_n2(solver)
-
@test_throws TypeError fom(A, b, restart = true, callback = solver -> "string", history = true)
end
end
diff --git a/test/test_lnlq.jl b/test/test_lnlq.jl
index 888119db8..b308609fa 100644
--- a/test/test_lnlq.jl
+++ b/test/test_lnlq.jl
@@ -1,5 +1,5 @@
function test_lnlq(A, b,transfer_to_craig)
- (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0)
+ (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0)
r = b - A * x
resid = norm(r) / norm(b)
return (x, y, stats, resid)
@@ -61,8 +61,8 @@ end
# Test regularization
A, b, λ = regularization(FC=FC)
- (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0)
- (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, etolx=1e-10, etoly=1e-10, λ=λ)
+ (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0)
+ (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, utolx=1e-10, utoly=1e-10, λ=λ)
for (x, y) in ((x, y), (xₛ, yₛ))
s = λ * y
r = b - (A * x + λ * s)
diff --git a/test/test_minres_qlp.jl b/test/test_minres_qlp.jl
index 6e983e49a..0b4d2046d 100644
--- a/test/test_minres_qlp.jl
+++ b/test/test_minres_qlp.jl
@@ -80,7 +80,7 @@
solver = MinresQlpSolver(A, b)
tol = 1.0
cb_n2 = TestCallbackN2(A, b, tol = tol)
- minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, ctol = 0.0, callback = cb_n2)
+ minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, Artol = 0.0, callback = cb_n2)
@test solver.stats.status == "user-requested exit"
@test cb_n2(solver)
diff --git a/test/test_mp.jl b/test/test_mp.jl
index b7aa43d38..96300bea6 100644
--- a/test/test_mp.jl
+++ b/test/test_mp.jl
@@ -3,55 +3,57 @@
for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr,
:lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres,
:bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom,
- :cg_lanczos_shift)
- for T in (Float16, Float32, Float64, BigFloat)
- for FC in (T, Complex{T})
- A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1))
- B = spdiagm(-1 => -ones(FC,n-1), 0 => 5*ones(FC,n), 1 => -ones(FC,n-1))
- b = ones(FC, n)
- c = -ones(FC, n)
- shifts = [-one(T), one(T)]
- if fn in (:usymlq, :usymqr)
- x, _ = @eval $fn($A, $b, $c)
- elseif fn in (:trilqr, :bilqr)
- x, t, _ = @eval $fn($A, $b, $c)
- elseif fn in (:tricg, :trimr)
- x, y, _ = @eval $fn($A, $b, $c)
- elseif fn == :gpmr
- x, y, _ = @eval $fn($A, $B, $b, $c)
- elseif fn in (:lnlq, :craig, :craigmr)
- x, y, _ = @eval $fn($A, $b)
- elseif fn == :cg_lanczos_shift
- x, _ = @eval $fn($A, $b, $shifts)
- else
- x, _ = @eval $fn($A, $b)
- end
- atol = √eps(T)
- rtol = √eps(T)
- Κ = (T == Float16 ? 10 : 1)
- if fn in (:tricg, :trimr)
- @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
- @test norm(A' * x - y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
- @test eltype(y) == FC
- elseif fn == :gpmr
- @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
- @test norm(B * x + y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
- @test eltype(y) == FC
- elseif fn == :cg_lanczos_shift
- @test norm((A - I) * x[1] - b) ≤ Κ * (atol + norm(b) * rtol)
- @test norm((A + I) * x[2] - b) ≤ Κ * (atol + norm(b) * rtol)
- @test eltype(x) == Vector{FC}
- else
- @test norm(A * x - b) ≤ Κ * (atol + norm(b) * rtol)
- @test eltype(x) == FC
- end
- if fn in (:trilqr, :bilqr)
- @test norm(A' * t - c) ≤ Κ * (atol + norm(c) * rtol)
- @test eltype(t) == FC
- end
- if fn in (:lnlq, :craig, :craigmr)
- @test norm(A * A' * y - b) ≤ Κ * (atol + norm(b) * rtol)
- @test eltype(y) == FC
+ :fgmres, :cg_lanczos_shift)
+ @testset "$fn" begin
+ for T in (Float16, Float32, Float64, BigFloat)
+ for FC in (T, Complex{T})
+ A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1))
+ B = spdiagm(-1 => -ones(FC,n-1), 0 => 5*ones(FC,n), 1 => -ones(FC,n-1))
+ b = ones(FC, n)
+ c = -ones(FC, n)
+ shifts = [-one(T), one(T)]
+ if fn in (:usymlq, :usymqr)
+ x, _ = @eval $fn($A, $b, $c)
+ elseif fn in (:trilqr, :bilqr)
+ x, t, _ = @eval $fn($A, $b, $c)
+ elseif fn in (:tricg, :trimr)
+ x, y, _ = @eval $fn($A, $b, $c)
+ elseif fn == :gpmr
+ x, y, _ = @eval $fn($A, $B, $b, $c)
+ elseif fn in (:lnlq, :craig, :craigmr)
+ x, y, _ = @eval $fn($A, $b)
+ elseif fn == :cg_lanczos_shift
+ x, _ = @eval $fn($A, $b, $shifts)
+ else
+ x, _ = @eval $fn($A, $b)
+ end
+ atol = √eps(T)
+ rtol = √eps(T)
+ Κ = (T == Float16 ? 10 : 1)
+ if fn in (:tricg, :trimr)
+ @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
+ @test norm(A' * x - y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
+ @test eltype(y) == FC
+ elseif fn == :gpmr
+ @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
+ @test norm(B * x + y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
+ @test eltype(y) == FC
+ elseif fn == :cg_lanczos_shift
+ @test norm((A - I) * x[1] - b) ≤ Κ * (atol + norm(b) * rtol)
+ @test norm((A + I) * x[2] - b) ≤ Κ * (atol + norm(b) * rtol)
+ @test eltype(x) == Vector{FC}
+ else
+ @test norm(A * x - b) ≤ Κ * (atol + norm(b) * rtol)
+ @test eltype(x) == FC
+ end
+ if fn in (:trilqr, :bilqr)
+ @test norm(A' * t - c) ≤ Κ * (atol + norm(c) * rtol)
+ @test eltype(t) == FC
+ end
+ if fn in (:lnlq, :craig, :craigmr)
+ @test norm(A * A' * y - b) ≤ Κ * (atol + norm(b) * rtol)
+ @test eltype(y) == FC
+ end
end
end
end
diff --git a/test/test_processes.jl b/test/test_processes.jl
new file mode 100644
index 000000000..eb3ad19af
--- /dev/null
+++ b/test/test_processes.jl
@@ -0,0 +1,146 @@
+"""
+ P = permutation_paige(k)
+
+Return the sparse (2k) × (2k) matrix
+
+ [e₁ • eₖ ]
+ [ e₁ • eₖ]
+"""
+function permutation_paige(k)
+ P = spzeros(Float64, 2k, 2k)
+ for i = 1:k
+ P[i,2i-1] = 1.0
+ P[i+k,2i] = 1.0
+ end
+ return P
+end
+
+@testset "processes" begin
+ m = 250
+ n = 500
+ k = 20
+
+ for FC in (Float64, ComplexF64)
+ R = real(FC)
+ nbits_FC = sizeof(FC)
+ nbits_R = sizeof(R)
+ nbits_I = sizeof(Int)
+
+ @testset "Data Type: $FC" begin
+
+ @testset "Hermitian Lanczos" begin
+ A, b = symmetric_indefinite(n, FC=FC)
+ V, T = hermitian_lanczos(A, b, k)
+
+ @test A * V[:,1:k] ≈ V * T
+
+ storage_hermitian_lanczos_bytes(n, k) = 4k * nbits_I + (3k-1) * nbits_R + n*(k+1) * nbits_FC
+
+ expected_hermitian_lanczos_bytes = storage_hermitian_lanczos_bytes(n, k)
+ actual_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, b, k)
+ @test expected_hermitian_lanczos_bytes ≤ actual_hermitian_lanczos_bytes ≤ 1.02 * expected_hermitian_lanczos_bytes
+ end
+
+ @testset "Non-Hermitian Lanczos" begin
+ A, b = nonsymmetric_definite(n, FC=FC)
+ c = -b
+ V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k)
+
+ @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]'
+ @test A * V[:,1:k] ≈ V * T
+ @test A' * U[:,1:k] ≈ U * Tᴴ
+
+ storage_nonhermitian_lanczos_bytes(n, k) = 4k * nbits_I + (6k-2) * nbits_FC + 2*n*(k+1) * nbits_FC
+
+ expected_nonhermitian_lanczos_bytes = storage_nonhermitian_lanczos_bytes(n, k)
+ actual_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, b, c, k)
+ @test expected_nonhermitian_lanczos_bytes ≤ actual_nonhermitian_lanczos_bytes ≤ 1.02 * expected_nonhermitian_lanczos_bytes
+ end
+
+ @testset "Arnoldi" begin
+ A, b = nonsymmetric_indefinite(n, FC=FC)
+ V, H = arnoldi(A, b, k)
+
+ @test A * V[:,1:k] ≈ V * H
+
+ function storage_arnoldi_bytes(n, k)
+ return k*(k+1) * nbits_FC + n*(k+1) * nbits_FC
+ end
+
+ expected_arnoldi_bytes = storage_arnoldi_bytes(n, k)
+ actual_arnoldi_bytes = @allocated arnoldi(A, b, k)
+ @test expected_arnoldi_bytes ≤ actual_arnoldi_bytes ≤ 1.02 * expected_arnoldi_bytes
+ end
+
+ @testset "Golub-Kahan" begin
+ A, b = under_consistent(m, n, FC=FC)
+ V, U, L = golub_kahan(A, b, k)
+ B = L[1:k+1,1:k]
+
+ @test A * V[:,1:k] ≈ U * B
+ @test A' * U ≈ V * L'
+ @test A' * A * V[:,1:k] ≈ V * L' * B
+ @test A * A' * U[:,1:k] ≈ U * B * L[1:k,1:k]'
+
+ storage_golub_kahan_bytes(m, n, k) = 3*(k+1) * nbits_I + (2k+1) * nbits_R + (n+m)*(k+1) * nbits_FC
+
+ expected_golub_kahan_bytes = storage_golub_kahan_bytes(m, n, k)
+ actual_golub_kahan_bytes = @allocated golub_kahan(A, b, k)
+ @test expected_golub_kahan_bytes ≤ actual_golub_kahan_bytes ≤ 1.02 * expected_golub_kahan_bytes
+ end
+
+ @testset "Saunders-Simon-Yip" begin
+ A, b = under_consistent(m, n, FC=FC)
+ _, c = over_consistent(n, m, FC=FC)
+ V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k)
+
+ @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]'
+ @test A * U[:,1:k] ≈ V * T
+ @test A' * V[:,1:k] ≈ U * Tᴴ
+ @test A' * A * U[:,1:k-1] ≈ U * Tᴴ * T[1:k,1:k-1]
+ @test A * A' * V[:,1:k-1] ≈ V * T * Tᴴ[1:k,1:k-1]
+
+ K = [zeros(FC,m,m) A; A' zeros(FC,n,n)]
+ Pₖ = permutation_paige(k)
+ Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ
+ Pₖ₊₁ = permutation_paige(k+1)
+ Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁
+ G = Pₖ₊₁' * [zeros(FC,k+1,k) T; Tᴴ zeros(FC,k+1,k)] * Pₖ
+ @test K * Wₖ ≈ Wₖ₊₁ * G
+
+ storage_saunders_simon_yip_bytes(m, n, k) = 4k * nbits_I + (6k-2) * nbits_FC + (n+m)*(k+1) * nbits_FC
+
+ expected_saunders_simon_yip_bytes = storage_saunders_simon_yip_bytes(m, n, k)
+ actual_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, b, c, k)
+ @test expected_saunders_simon_yip_bytes ≤ actual_saunders_simon_yip_bytes ≤ 1.02 * expected_saunders_simon_yip_bytes
+ end
+
+ @testset "Montoison-Orban" begin
+ A, b = under_consistent(m, n, FC=FC)
+ B, c = over_consistent(n, m, FC=FC)
+ V, H, U, F = montoison_orban(A, B, b, c, k)
+
+ @test A * U[:,1:k] ≈ V * H
+ @test B * V[:,1:k] ≈ U * F
+ @test B * A * U[:,1:k-1] ≈ U * F * H[1:k,1:k-1]
+ @test A * B * V[:,1:k-1] ≈ V * H * F[1:k,1:k-1]
+
+ K = [zeros(FC,m,m) A; B zeros(FC,n,n)]
+ Pₖ = permutation_paige(k)
+ Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ
+ Pₖ₊₁ = permutation_paige(k+1)
+ Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁
+ G = Pₖ₊₁' * [zeros(FC,k+1,k) H; F zeros(FC,k+1,k)] * Pₖ
+ @test K * Wₖ ≈ Wₖ₊₁ * G
+
+ function storage_montoison_orban_bytes(m, n, k)
+ return 2*k*(k+1) * nbits_FC + (n+m)*(k+1) * nbits_FC
+ end
+
+ expected_montoison_orban_bytes = storage_montoison_orban_bytes(m, n, k)
+ actual_montoison_orban_bytes = @allocated montoison_orban(A, B, b, c, k)
+ @test expected_montoison_orban_bytes ≤ actual_montoison_orban_bytes ≤ 1.02 * expected_montoison_orban_bytes
+ end
+ end
+ end
+end
diff --git a/test/test_qmr.jl b/test/test_qmr.jl
index 184b9877d..4a6b8c1c9 100644
--- a/test/test_qmr.jl
+++ b/test/test_qmr.jl
@@ -58,10 +58,10 @@
@test(resid ≤ qmr_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = qmr(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
solver = QmrSolver(A, b)
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index 468fa5a05..71885029f 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -11,1139 +11,183 @@ function test_solvers(FC)
nshifts = 5
T = real(FC)
S = Vector{FC}
+ solvers = Dict{Symbol, KrylovSolver}()
@eval begin
- cg_solver = $(KRYLOV_SOLVERS[:cg])($n, $n, $S)
- symmlq_solver = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S)
- minres_solver = $(KRYLOV_SOLVERS[:minres])($n, $n, $S)
- cg_lanczos_solver = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S)
- diom_solver = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S)
- fom_solver = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
- dqgmres_solver = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
- gmres_solver = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
- cr_solver = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
- crmr_solver = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
- cgs_solver = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
- bicgstab_solver = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S)
- craigmr_solver = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S)
- cgne_solver = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S)
- lnlq_solver = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S)
- craig_solver = $(KRYLOV_SOLVERS[:craig])($m, $n, $S)
- lslq_solver = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S)
- cgls_solver = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S)
- lsqr_solver = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S)
- crls_solver = $(KRYLOV_SOLVERS[:crls])($n, $m, $S)
- lsmr_solver = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S)
- usymqr_solver = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S)
- trilqr_solver = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S)
- bilq_solver = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S)
- bilqr_solver = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S)
- minres_qlp_solver = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S)
- qmr_solver = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S)
- usymlq_solver = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S)
- tricg_solver = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S)
- trimr_solver = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S)
- gpmr_solver = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S)
- cg_lanczos_shift_solver = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $m, $nshifts, $S)
+ $solvers[:cg] = $(KRYLOV_SOLVERS[:cg])($n, $n, $S)
+ $solvers[:symmlq] = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S)
+ $solvers[:minres] = $(KRYLOV_SOLVERS[:minres])($n, $n, $S)
+ $solvers[:cg_lanczos] = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S)
+ $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S)
+ $solvers[:diom] = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S)
+ $solvers[:fom] = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
+ $solvers[:dqgmres] = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
+ $solvers[:gmres] = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
+ $solvers[:fgmres] = $(KRYLOV_SOLVERS[:fgmres])($n, $n, $mem, $S)
+ $solvers[:cr] = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
+ $solvers[:crmr] = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
+ $solvers[:cgs] = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
+ $solvers[:bicgstab] = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S)
+ $solvers[:craigmr] = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S)
+ $solvers[:cgne] = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S)
+ $solvers[:lnlq] = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S)
+ $solvers[:craig] = $(KRYLOV_SOLVERS[:craig])($m, $n, $S)
+ $solvers[:lslq] = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S)
+ $solvers[:cgls] = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S)
+ $solvers[:lsqr] = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S)
+ $solvers[:crls] = $(KRYLOV_SOLVERS[:crls])($n, $m, $S)
+ $solvers[:lsmr] = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S)
+ $solvers[:usymqr] = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S)
+ $solvers[:trilqr] = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S)
+ $solvers[:bilq] = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S)
+ $solvers[:bilqr] = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S)
+ $solvers[:minres_qlp] = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S)
+ $solvers[:qmr] = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S)
+ $solvers[:usymlq] = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S)
+ $solvers[:tricg] = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S)
+ $solvers[:trimr] = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S)
+ $solvers[:gpmr] = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S)
+ $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S)
end
- for i = 1 : 3
- A = i * A
- Au = i * Au
- Ao = i * Ao
- b = 5 * b
- c = 3 * c
-
- solver = solve!(cg_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(symmlq_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(minres_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cg_lanczos_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cg_lanczos_shift_solver, A, b, shifts)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(diom_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(fom_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(dqgmres_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(gmres_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cr_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(crmr_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cgs_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == 2 * niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(bicgstab_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == 2 * niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(craigmr_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(cgne_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(lnlq_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(craig_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(lslq_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cgls_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(lsqr_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(crls_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(lsmr_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(usymqr_solver, Ao, b, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(trilqr_solver, A, b, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved_primal(solver)
- @test issolved_dual(solver)
- @test issolved(solver)
-
- solver = solve!(bilq_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(bilqr_solver, A, b, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved_primal(solver)
- @test issolved_dual(solver)
- @test issolved(solver)
-
- solver = solve!(minres_qlp_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(qmr_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(usymlq_solver, Au, c, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(tricg_solver, Au, c, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(trimr_solver, Au, c, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(gpmr_solver, Ao, Au, b, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test Bprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
+ @testset "Check compatibility between KrylovSolvers and the dimension of the linear problems" begin
+ A2 = FC.(get_div_grad(2, 2, 2))
+ n2 = size(A2, 1)
+ m2 = div(n2, 2)
+ Au2 = A2[1:m2,:]
+ Ao2 = A2[:,1:m2]
+ b2 = Ao2 * ones(FC, m2)
+ c2 = Au2 * ones(FC, n2)
+ shifts2 = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0]
+ for (method, solver) in solvers
+ if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr)
+ @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2)
+ end
+ method == :cg_lanczos_shift && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, shifts2)
+ method == :cg_lanczos_shift && @test_throws ErrorException("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $(length(shifts2))") solve!(solver, A, b, shifts2)
+ method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2)
+ method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2)
+ method ∈ (:bilqr, :trilqr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, b2)
+ method == :gpmr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, Au2, b2, c2)
+ method ∈ (:tricg, :trimr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2)
+ method == :usymlq && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2, b2)
+ method == :usymqr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2)
+ end
end
- io = IOBuffer()
- show(io, cg_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ CgSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Ap│ Vector{$FC}│ 64│
- │ z│ Vector{$FC}│ 0│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, symmlq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │SymmlqSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ Mvold│ Vector{$FC}│ 64│
- │ Mv│ Vector{$FC}│ 64│
- │ Mv_next│ Vector{$FC}│ 64│
- │ w̅│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ clist│ Vector{$T}│ 5│
- │ zlist│ Vector{$T}│ 5│
- │ sprod│ Vector{$T}│ 5│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, minres_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │MinresSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r1│ Vector{$FC}│ 64│
- │ r2│ Vector{$FC}│ 64│
- │ w1│ Vector{$FC}│ 64│
- │ w2│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cg_lanczos_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────────┬───────────────┬─────────────────┐
- │CgLanczosSolver│Precision: $FC │Architecture: CPU│
- ├───────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ Mv│ Vector{$FC}│ 64│
- │ Mv_prev│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Mv_next│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cg_lanczos_shift_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────────────┬───────────────────┬─────────────────┐
- │CgLanczosShiftSolver│ Precision: $FC │Architecture: CPU│
- ├────────────────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────────────┼───────────────────┼─────────────────┤
- │ Mv│ Vector{$FC}│ 64│
- │ Mv_prev│ Vector{$FC}│ 64│
- │ Mv_next│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ x│Vector{Vector{$FC}}│ 5 x 64│
- │ p│Vector{Vector{$FC}}│ 5 x 64│
- │ σ│ Vector{$T}│ 5│
- │ δhat│ Vector{$T}│ 5│
- │ ω│ Vector{$T}│ 5│
- │ γ│ Vector{$T}│ 5│
- │ rNorms│ Vector{$T}│ 5│
- │ converged│ BitVector│ 5│
- │ not_cv│ BitVector│ 5│
- └────────────────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, diom_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────────┬─────────────────┐
- │DiomSolver│ Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ t│ Vector{$FC}│ 64│
- │ z│ Vector{$FC}│ 0│
- │ w│ Vector{$FC}│ 0│
- │ P│Vector{Vector{$FC}}│ 10 x 64│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ L│ Vector{$FC}│ 10│
- │ H│ Vector{$FC}│ 12│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, fom_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────────┬─────────────────┐
- │ FomSolver│ Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ w│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ l│ Vector{$FC}│ 10│
- │ z│ Vector{$FC}│ 10│
- │ U│ Vector{$FC}│ 55│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, dqgmres_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌─────────────┬───────────────────┬─────────────────┐
- │DqgmresSolver│ Precision: $FC │Architecture: CPU│
- ├─────────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├─────────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ t│ Vector{$FC}│ 64│
- │ z│ Vector{$FC}│ 0│
- │ w│ Vector{$FC}│ 0│
- │ P│Vector{Vector{$FC}}│ 10 x 64│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ c│ Vector{$T}│ 10│
- │ s│ Vector{$FC}│ 10│
- │ H│ Vector{$FC}│ 12│
- │ warm_start│ Bool│ 0│
- └─────────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, gmres_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────────┬─────────────────┐
- │GmresSolver│ Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ w│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ c│ Vector{$T}│ 10│
- │ s│ Vector{$FC}│ 10│
- │ z│ Vector{$FC}│ 10│
- │ R│ Vector{$FC}│ 55│
- │ warm_start│ Bool│ 0│
- │ inner_iter│ Int64│ 0│
- └───────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ CrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Ar│ Vector{$FC}│ 64│
- │ Mq│ Vector{$FC}│ 0│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, crmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CrmrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Aᵀr│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ Mq│ Vector{$FC}│ 0│
- │ s│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cgs_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ CgsSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │Attribute │ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ ts│ Vector{$FC}│ 64│
- │ yz│ Vector{$FC}│ 0│
- │ vw│ Vector{$FC}│ 0│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, bicgstab_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────────┬───────────────┬─────────────────┐
- │BicgstabSolver│Precision: $FC │Architecture: CPU│
- ├──────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 64│
- │ s│ Vector{$FC}│ 64│
- │ qd│ Vector{$FC}│ 64│
- │ yz│ Vector{$FC}│ 0│
- │ t│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └──────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, craigmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌─────────────┬───────────────┬─────────────────┐
- │CraigmrSolver│Precision: $FC │Architecture: CPU│
- ├─────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├─────────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
- │ d│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 32│
- │ w│ Vector{$FC}│ 32│
- │ wbar│ Vector{$FC}│ 32│
- │ Av│ Vector{$FC}│ 32│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- └─────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cgne_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CgneSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Aᵀz│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ s│ Vector{$FC}│ 0│
- │ z│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lnlq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LnlqSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ w̄│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 32│
- │ Av│ Vector{$FC}│ 32│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, craig_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │CraigSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ w│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 32│
- │ Av│ Vector{$FC}│ 32│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ w2│ Vector{$FC}│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lslq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LslqSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
- │ w̄│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 64│
- │ Av│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cgls_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CglsSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ p│ Vector{$FC}│ 32│
- │ s│ Vector{$FC}│ 32│
- │ r│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Mr│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lsqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LsqrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
- │ w│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 64│
- │ Av│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, crls_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CrlsSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ p│ Vector{$FC}│ 32│
- │ Ar│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ r│ Vector{$FC}│ 64│
- │ Ap│ Vector{$FC}│ 64│
- │ s│ Vector{$FC}│ 64│
- │ Ms│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lsmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LsmrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
- │ h│ Vector{$FC}│ 32│
- │ hbar│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 64│
- │ Av│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, usymqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │UsymqrSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 32│
- │ wₖ₋₂│ Vector{$FC}│ 32│
- │ wₖ₋₁│ Vector{$FC}│ 32│
- │ uₖ₋₁│ Vector{$FC}│ 32│
- │ uₖ│ Vector{$FC}│ 32│
- │ p│ Vector{$FC}│ 32│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, trilqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │TrilqrSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Δy│ Vector{$FC}│ 0│
- │ y│ Vector{$FC}│ 64│
- │ wₖ₋₃│ Vector{$FC}│ 64│
- │ wₖ₋₂│ Vector{$FC}│ 64│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, bilq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │BilqSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, bilqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │BilqrSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ Δy│ Vector{$FC}│ 0│
- │ y│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │ wₖ₋₃│ Vector{$FC}│ 64│
- │ wₖ₋₂│ Vector{$FC}│ 64│
- │ warm_start│ Bool│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, minres_qlp_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────────┬───────────────┬─────────────────┐
- │MinresQlpSolver│Precision: $FC │Architecture: CPU│
- ├───────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ wₖ₋₁│ Vector{$FC}│ 64│
- │ wₖ│ Vector{$FC}│ 64│
- │ M⁻¹vₖ₋₁│ Vector{$FC}│ 64│
- │ M⁻¹vₖ│ Vector{$FC}│ 64│
- │ x│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, qmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ QmrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ wₖ₋₂│ Vector{$FC}│ 64│
- │ wₖ₋₁│ Vector{$FC}│ 64│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, usymlq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │UsymlqSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 32│
- │ vₖ│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, tricg_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │TricgSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ y│ Vector{$FC}│ 64│
- │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│
- │ N⁻¹uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₁│ Vector{$FC}│ 64│
- │ gy₂ₖ│ Vector{$FC}│ 64│
- │ x│ Vector{$FC}│ 32│
- │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│
- │ M⁻¹vₖ│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₁│ Vector{$FC}│ 32│
- │ gx₂ₖ│ Vector{$FC}│ 32│
- │ Δx│ Vector{$FC}│ 0│
- │ Δy│ Vector{$FC}│ 0│
- │ uₖ│ Vector{$FC}│ 0│
- │ vₖ│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, trimr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │TrimrSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ y│ Vector{$FC}│ 64│
- │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│
- │ N⁻¹uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₃│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₂│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₁│ Vector{$FC}│ 64│
- │ gy₂ₖ│ Vector{$FC}│ 64│
- │ x│ Vector{$FC}│ 32│
- │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│
- │ M⁻¹vₖ│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₃│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₂│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₁│ Vector{$FC}│ 32│
- │ gx₂ₖ│ Vector{$FC}│ 32│
- │ Δx│ Vector{$FC}│ 0│
- │ Δy│ Vector{$FC}│ 0│
- │ uₖ│ Vector{$FC}│ 0│
- │ vₖ│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
+ @testset "Test the keyword argument timemax" begin
+ timemax = 0.0
+ for (method, solver) in solvers
+ method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr) && solve!(solver, A, b, timemax=timemax)
+ method == :cg_lanczos_shift && solve!(solver, A, b, shifts, timemax=timemax)
+ method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && solve!(solver, Au, c, timemax=timemax)
+ method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && solve!(solver, Ao, b, timemax=timemax)
+ method ∈ (:bilqr, :trilqr) && solve!(solver, A, b, b, timemax=timemax)
+ method == :gpmr && solve!(solver, Ao, Au, b, c, timemax=timemax)
+ method ∈ (:tricg, :trimr) && solve!(solver, Au, c, b, timemax=timemax)
+ method == :usymlq && solve!(solver, Au, c, b, timemax=timemax)
+ method == :usymqr && solve!(solver, Ao, b, c, timemax=timemax)
+ @test solver.stats.status == "time limit exceeded"
+ end
+ end
- io = IOBuffer()
- show(io, gpmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────────┬─────────────────┐
- │GpmrSolver│ Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────────┼─────────────────┤
- │ wA│ Vector{$FC}│ 0│
- │ wB│ Vector{$FC}│ 0│
- │ dA│ Vector{$FC}│ 64│
- │ dB│ Vector{$FC}│ 32│
- │ Δx│ Vector{$FC}│ 0│
- │ Δy│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 0│
- │ p│ Vector{$FC}│ 0│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ U│Vector{Vector{$FC}}│ 10 x 32│
- │ gs│ Vector{$FC}│ 40│
- │ gc│ Vector{$T}│ 40│
- │ zt│ Vector{$FC}│ 20│
- │ R│ Vector{$FC}│ 210│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
+ for (method, solver) in solvers
+ @testset "$(method)" begin
+ for i = 1 : 3
+ A = i * A
+ Au = i * Au
+ Ao = i * Ao
+ b = 5 * b
+ c = 3 * c
+
+ if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom,
+ :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr, :cg_lanczos_shift)
+ method == :cg_lanczos_shift ? solve!(solver, A, b, shifts) : solve!(solver, A, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == (method ∈ (:cgs, :bicgstab) ? 2 * niter : niter)
+ @test Atprod(solver) == (method ∈ (:bilq, :qmr) ? niter : 0)
+ @test solution(solver) === solver.x
+ @test nsolution(solver) == 1
+ end
+
+ if method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr)
+ solve!(solver, Au, c)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver, 1) === solver.x
+ @test nsolution(solver) == (method ∈ (:cgne, :crmr) ? 1 : 2)
+ (nsolution == 2) && (@test solution(solver, 2) == solver.y)
+ end
+
+ if method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr)
+ solve!(solver, Ao, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver) === solver.x
+ @test nsolution(solver) == 1
+ end
+
+ if method ∈ (:bilqr, :trilqr)
+ solve!(solver, A, b, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver, 1) === solver.x
+ @test solution(solver, 2) === solver.y
+ @test nsolution(solver) == 2
+ @test issolved_primal(solver)
+ @test issolved_dual(solver)
+ end
+
+ if method ∈ (:tricg, :trimr, :gpmr)
+ method == :gpmr ? solve!(solver, Ao, Au, b, c) : solve!(solver, Au, c, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ method != :gpmr && (@test Atprod(solver) == niter)
+ method == :gpmr && (@test Bprod(solver) == niter)
+ @test solution(solver, 1) === solver.x
+ @test solution(solver, 2) === solver.y
+ @test nsolution(solver) == 2
+ end
+
+ if method ∈ (:usymlq, :usymqr)
+ method == :usymlq ? solve!(solver, Au, c, b) : solve!(solver, Ao, b, c)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver) === solver.x
+ @test nsolution(solver) == 1
+ end
+
+ @test niter > 0
+ @test statistics(solver) === solver.stats
+ @test issolved(solver)
+ end
+
+ io = IOBuffer()
+ show(io, solver, show_stats=false)
+ showed = String(take!(io))
+
+ # Test that the lines have the same length
+ str = split(showed, '\n', keepempty=false)
+ len_row = length(str[1])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_row, &, str)
+
+ # Test that the columns have the same length
+ str2 = split(showed, ['│','┌','┬','┐','├','┼','┤','└','┴','┴','┘','\n'], keepempty=false)
+ len_col1 = length(str2[1])
+ len_col2 = length(str2[2])
+ len_col3 = length(str2[3])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col1, &, str2[1:3:end-2])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col2, &, str2[2:3:end-1])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col3, &, str2[3:3:end])
+
+ # Code coverage
+ show(io, solver, show_stats=true)
+ end
+ end
end
@testset "solvers" begin
diff --git a/test/test_stats.jl b/test/test_stats.jl
index 4289a78a3..f4c212d50 100644
--- a/test/test_stats.jl
+++ b/test/test_stats.jl
@@ -1,28 +1,30 @@
@testset "stats" begin
- stats = Krylov.SimpleStats(0, true, true, Float64[1.0], Float64[2.0], Float64[], "t")
+ stats = Krylov.SimpleStats(0, true, true, Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Simple stats
+ expected = """SimpleStats
niter: 0
solved: true
inconsistent: true
residuals: [ 1.0e+00 ]
Aresiduals: [ 2.0e+00 ]
κ₂(A): []
- status: t"""
+ timer: 1.23s
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), "t")
+ stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), 0.1234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Lsmr stats
+ expected = """LsmrStats
niter: 0
solved: true
inconsistent: true
@@ -33,53 +35,59 @@
κ₂(A): 5.0
‖A‖F: 6.0
xNorm: 7.0
- status: t"""
+ timer: 123.40ms
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, "t")
+ stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, 1.234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Lanczos stats
+ expected = """LanczosStats
niter: 0
solved: true
residuals: [ 3.0e+00 ]
indefinite: true
‖A‖F: NaN
κ₂(A): NaN
- status: t"""
+ timer: 1.23s
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, "t")
+ stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, 0.00056789, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """LanczosShift stats
+ expected = """LanczosShiftStats
niter: 0
solved: true
residuals: [[0.9, 0.5], [0.6, 0.4, 0.1]]
indefinite: Bool[0, 1]
‖A‖F: NaN
κ₂(A): NaN
- status: t"""
- @test (VERSION < v"1.5") || strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
+ timer: 567.89μs
+ status: unknown"""
+ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, "t")
+ stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, 1.234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Symmlq stats
+ expected = """SymmlqStats
niter: 0
solved: true
residuals: [ 4.0e+00 ]
@@ -88,53 +96,59 @@
errors (cg): [ 7.0e+00 ✗✗✗✗ ]
‖A‖F: NaN
κ₂(A): NaN
- status: t"""
+ timer: 1.23s
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], "t")
+ stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], 1.234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Adjoint stats
+ expected = """AdjointStats
niter: 0
solved primal: true
solved dual: true
residuals primal: [ 8.0e+00 ]
residuals dual: [ 9.0e+00 ]
- status: t"""
+ timer: 1.23s
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], "t")
+ stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], 1.234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """LNLQ stats
+ expected = """LNLQStats
niter: 0
solved: true
residuals: [ 1.0e+01 ]
error with bnd: false
error bnd x: [ 1.1e+01 ]
error bnd y: [ 1.2e+01 ]
- status: t"""
+ timer: 1.23s
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
- stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], "t")
+ stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], 1.234, "unknown")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """LSLQ stats
+ expected = """LSLQStats
niter: 0
solved: true
inconsistent: false
@@ -144,9 +158,11 @@
error with bnd: false
error bound LQ: [ 1.6e+01 ]
error bound CG: [ 1.7e+01 ]
- status: t"""
+ timer: 1.23s
+ status: unknown"""
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
end
diff --git a/test/test_trilqr.jl b/test/test_trilqr.jl
index 7d7927372..baf8a597e 100644
--- a/test/test_trilqr.jl
+++ b/test/test_trilqr.jl
@@ -74,7 +74,7 @@
@test(resid_dual ≤ trilqr_tol)
@test(stats.solved_dual)
- # Test consistent Ax = b and inconsistent Aᵀt = c.
+ # Test consistent Ax = b and inconsistent Aᴴt = c.
A, b, c = rectangular_adjoint(FC=FC)
(x, t, stats) = trilqr(A, b, c)
diff --git a/test/test_utils.jl b/test/test_utils.jl
index ed72056b6..f1c3ca44e 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -1,50 +1,51 @@
include("get_div_grad.jl")
include("gen_lsq.jl")
include("check_min_norm.jl")
+include("callback_utils.jl")
# Symmetric and positive definite systems.
function symmetric_definite(n :: Int=10; FC=Float64)
- α = FC <: Complex ? im : 1
+ α = FC <: Complex ? FC(im) : one(FC)
A = spdiagm(-1 => α * ones(FC, n-1), 0 => 4 * ones(FC, n), 1 => conj(α) * ones(FC, n-1))
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Symmetric and indefinite systems.
function symmetric_indefinite(n :: Int=10; FC=Float64)
- α = FC <: Complex ? im : 1
+ α = FC <: Complex ? FC(im) : one(FC)
A = spdiagm(-1 => α * ones(FC, n-1), 0 => ones(FC, n), 1 => conj(α) * ones(FC, n-1))
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Nonsymmetric and positive definite systems.
function nonsymmetric_definite(n :: Int=10; FC=Float64)
if FC <: Complex
- A = [i == j ? n * one(FC) : im * one(FC) for i=1:n, j=1:n]
+ A = [i == j ? n * one(FC) : FC(im) * one(FC) for i=1:n, j=1:n]
else
A = [i == j ? n * one(FC) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
end
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Nonsymmetric and indefinite systems.
function nonsymmetric_indefinite(n :: Int=10; FC=Float64)
if FC <: Complex
- A = [i == j ? n * (-one(FC))^(i*j) : im * one(FC) for i=1:n, j=1:n]
+ A = [i == j ? n * (-one(FC))^(i*j) : FC(im) * one(FC) for i=1:n, j=1:n]
else
A = [i == j ? n * (-one(FC))^(i*j) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
end
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Underdetermined and consistent systems.
function under_consistent(n :: Int=10, m :: Int=25; FC=Float64)
n < m || error("Square or overdetermined system!")
- α = FC <: Complex ? im : 1
- A = [i/j - α * j/i for i=1:n, j=1:m]
+ α = FC <: Complex ? FC(im) : one(FC)
+ A = FC[i/j - α * j/i for i=1:n, j=1:m]
b = A * ones(FC, m)
return A, b
end
@@ -52,7 +53,7 @@ end
# Underdetermined and inconsistent systems.
function under_inconsistent(n :: Int=10, m :: Int=25; FC=Float64)
n < m || error("Square or overdetermined system!")
- α = FC <: Complex ? 1 + im : 1
+ α = FC <: Complex ? FC(1 + im) : one(FC)
A = α * ones(FC, n, m)
b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n]
return A, b
@@ -84,8 +85,8 @@ end
# Overdetermined and consistent systems.
function over_consistent(n :: Int=25, m :: Int=10; FC=Float64)
n > m || error("Underdetermined or square system!")
- α = FC <: Complex ? im : 1
- A = [i/j - α * j/i for i=1:n, j=1:m]
+ α = FC <: Complex ? FC(im) : one(FC)
+ A = FC[i/j - α * j/i for i=1:n, j=1:m]
b = A * ones(FC, m)
return A, b
end
@@ -93,7 +94,7 @@ end
# Overdetermined and inconsistent systems.
function over_inconsistent(n :: Int=25, m :: Int=10; FC=Float64)
n > m || error("Underdetermined or square system!")
- α = FC <: Complex ? 1 + im : 1
+ α = FC <: Complex ? FC(1 + im) : one(FC)
A = α * ones(FC, n, m)
b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n]
return A, b
@@ -162,23 +163,23 @@ end
function underdetermined_adjoint(n :: Int=100, m :: Int=200; FC=Float64)
n < m || error("Square or overdetermined system!")
A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m]
- b = A * [1:m;]
- c = A' * [-n:-1;]
+ b = A * FC[1:m;]
+ c = A' * FC[-n:-1;]
return A, b, c
end
# Square consistent adjoint systems.
function square_adjoint(n :: Int=100; FC=Float64)
A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
- b = A * [1:n;]
- c = A' * [-n:-1;]
+ b = A * FC[1:n;]
+ c = A' * FC[-n:-1;]
return A, b, c
end
-# Adjoint systems with Ax = b underdetermined consistent and Aᵀt = c overdetermined insconsistent.
+# Adjoint systems with Ax = b underdetermined consistent and Aᴴt = c overdetermined insconsistent.
function rectangular_adjoint(n :: Int=10, m :: Int=25; FC=Float64)
- Aᵀ, c = over_inconsistent(m, n; FC=FC)
- A = adjoint(Aᵀ)
+ Aᴴ, c = over_inconsistent(m, n; FC=FC)
+ A = adjoint(Aᴴ)
b = A * ones(FC, m)
return A, b, c
end
@@ -187,8 +188,8 @@ end
function overdetermined_adjoint(n :: Int=200, m :: Int=100; FC=Float64)
n > m || error("Underdetermined or square system!")
A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m]
- b = A * [1:m;]
- c = A' * [-n:-1;]
+ b = A * FC[1:m;]
+ c = A' * FC[-n:-1;]
return A, b, c
end
@@ -251,7 +252,7 @@ end
# Square and preconditioned problems.
function square_preconditioned(n :: Int=10; FC=Float64)
A = ones(FC, n, n) + (n-1) * eye(n)
- b = FC(10.0) * [1:n;]
+ b = 10 * FC[1:n;]
M⁻¹ = FC(1/n) * eye(n)
return A, b, M⁻¹
end
@@ -363,110 +364,3 @@ function check_reset(stats :: KS) where KS <: Krylov.KrylovStats
end
end
end
-
-# Test callback
-mutable struct TestCallbackN2{T, S, M}
- A::M
- b::S
- storage_vec::S
- tol::T
-end
-TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
-
-function (cb_n2::TestCallbackN2)(solver)
- mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
- cb_n2.storage_vec .-= cb_n2.b
- return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2Adjoint{T, S, M}
- A::M
- b::S
- c::S
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2Adjoint)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
- cb_n2.storage_vec1 .-= cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
- cb_n2.storage_vec2 .-= cb_n2.c
- return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2Shifts{T, S, M}
- A::M
- b::S
- shifts::Vector{T}
- tol::T
-end
-TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
-
-function (cb_n2::TestCallbackN2Shifts)(solver)
- r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
- return all(map(norm, r) .≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2LS{T, S, M}
- A::M
- b::S
- λ::T
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
-
-function (cb_n2::TestCallbackN2LS)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
- cb_n2.storage_vec1 .-= cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
- cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
- return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2LN{T, S, M}
- A::M
- b::S
- λ::T
- storage_vec::S
- tol::T
-end
-TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
-
-function (cb_n2::TestCallbackN2LN)(solver)
- mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
- cb_n2.storage_vec .-= cb_n2.b
- cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s)
- return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2SaddlePts{T, S, M}
- A::M
- b::S
- c::S
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2SaddlePts(A, b, c; tol = 0.1) =
- TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2SaddlePts)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
- cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
- cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
- return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
- get_x_restarted_gmres!(solver, A, stor, N)
- x = stor.x
- mul!(storage_vec, A, x)
- storage_vec .-= b
- return (norm(storage_vec) ≤ tol)
-end
diff --git a/test/test_verbose.jl b/test/test_verbose.jl
new file mode 100644
index 000000000..ebc42c8f7
--- /dev/null
+++ b/test/test_verbose.jl
@@ -0,0 +1,60 @@
+function test_verbose(FC)
+ A = FC.(get_div_grad(4, 4, 4)) # Dimension m x n
+ m,n = size(A)
+ k = div(n, 2)
+ Au = A[1:k,:] # Dimension k x n
+ Ao = A[:,1:k] # Dimension m x k
+ b = Ao * ones(FC, k) # Dimension m
+ c = Au * ones(FC, n) # Dimension k
+ mem = 10
+
+ T = real(FC)
+ shifts = T[1; 2; 3; 4; 5]
+ nshifts = 5
+
+ for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr,
+ :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres,
+ :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom,
+ :fgmres, :cg_lanczos_shift)
+
+ @testset "$fn" begin
+ io = IOBuffer()
+ if fn in (:trilqr, :bilqr)
+ @eval $fn($A, $b, $b, verbose=1, iostream=$io)
+ elseif fn in (:tricg, :trimr)
+ @eval $fn($Au, $c, $b, verbose=1, iostream=$io)
+ elseif fn in (:lnlq, :craig, :craigmr, :cgne, :crmr)
+ @eval $fn($Au, $c, verbose=1, iostream=$io)
+ elseif fn in (:lslq, :lsqr, :lsmr, :cgls, :crls)
+ @eval $fn($Ao, $b, verbose=1, iostream=$io)
+ elseif fn == :usymlq
+ @eval $fn($Au, $c, $b, verbose=1, iostream=$io)
+ elseif fn == :usymqr
+ @eval $fn($Ao, $b, $c, verbose=1, iostream=$io)
+ elseif fn == :gpmr
+ @eval $fn($Ao, $Au, $b, $c, verbose=1, iostream=$io)
+ elseif fn == :cg_lanczos_shift
+ @eval $fn($A, $b, $shifts, verbose=1, iostream=$io)
+ else
+ @eval $fn($A, $b, verbose=1, iostream=$io)
+ end
+
+ showed = String(take!(io))
+ str = split(showed, '\n', keepempty=false)
+ nrows = length(str)
+ first_row = fn in (:bilqr, :trilqr) ? 3 : 2
+ last_row = fn == :cg ? nrows-1 : nrows
+ str = str[first_row:last_row]
+ len_header = length(str[1])
+ @test mapreduce(x -> length(x) == len_header, &, str)
+ end
+ end
+end
+
+@testset "verbose" begin
+ for FC in (Float64, ComplexF64)
+ @testset "Data Type: $FC" begin
+ test_verbose(FC)
+ end
+ end
+end
diff --git a/test/test_warm_start.jl b/test/test_warm_start.jl
index 66a1cbea7..c788ed7e8 100644
--- a/test/test_warm_start.jl
+++ b/test/test_warm_start.jl
@@ -8,41 +8,126 @@ function test_warm_start(FC)
nshifts = 5
tol = 1.0e-6
+ x, y, stats = bilqr(A, b, c, x0, y0)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+ s = c - A' * y
+ resid = norm(s) / norm(c)
+ @test(resid ≤ tol)
+
+ solver = BilqrSolver(A, b)
+ solve!(solver, A, b, c, x0, y0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+ s = c - A' * solver.y
+ resid = norm(s) / norm(c)
+ @test(resid ≤ tol)
+
+ x, y, stats = trilqr(A, b, c, x0, y0)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+ s = c - A' * y
+ resid = norm(s) / norm(c)
+ @test(resid ≤ tol)
+
+ solver = TrilqrSolver(A, b)
+ solve!(solver, A, b, c, x0, y0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+ s = c - A' * solver.y
+ resid = norm(s) / norm(c)
+ @test(resid ≤ tol)
+
x, y, stats = tricg(A, b, b, x0, y0)
r = [b - x - A * y; b - A' * x + y]
resid = norm(r) / norm([b; b])
@test(resid ≤ tol)
+ solver = TricgSolver(A, b)
+ solve!(solver, A, b, b, x0, y0)
+ r = [b - solver.x - A * solver.y; b - A' * solver.x + solver.y]
+ resid = norm(r) / norm([b; b])
+ @test(resid ≤ tol)
+
x, y, stats = trimr(A, b, b, x0, y0)
r = [b - x - A * y; b - A' * x + y]
resid = norm(r) / norm([b; b])
@test(resid ≤ tol)
+ solver = TrimrSolver(A, b)
+ solve!(solver, A, b, b, x0, y0)
+ r = [b - solver.x - A * solver.y; b - A' * solver.x + solver.y]
+ resid = norm(r) / norm([b; b])
+ @test(resid ≤ tol)
+
x, y, stats = gpmr(A, A', b, b, x0, y0)
r = [b - x - A * y; b - A' * x - y]
resid = norm(r) / norm([b; b])
@test(resid ≤ tol)
+ solver = GpmrSolver(A, b)
+ solve!(solver, A, A', b, b, x0, y0)
+ r = [b - solver.x - A * solver.y; b - A' * solver.x - solver.y]
+ resid = norm(r) / norm([b; b])
+ @test(resid ≤ tol)
+
x, stats = minres_qlp(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = MinresQlpSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = symmlq(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = SymmlqSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = cg(A, b, x0)
r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
+ solver = CgSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
@test(resid ≤ tol)
x, stats = cr(A, b, x0)
r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
+ solver = CrSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
@test(resid ≤ tol)
x, stats = cg_lanczos(A, b, x0)
r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
+ solver = CgLanczosSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
@test(resid ≤ tol)
x, stats = minres(A, b, x0)
@@ -50,70 +135,131 @@ function test_warm_start(FC)
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = MinresSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = diom(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = DiomSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = dqgmres(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = DqgmresSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = fom(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
-
+
+ solver = FomSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = gmres(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = GmresSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
+ x, stats = fgmres(A, b, x0)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
+ solver = FgmresSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = bicgstab(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = BicgstabSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = cgs(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ solver = CgsSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = bilq(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
- x, stats = qmr(A, b, x0)
- r = b - A * x
+ solver = BilqSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
- x, stats = usymlq(A, b, c, x0)
+ x, stats = qmr(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
- x, stats = usymqr(A, b, c, x0)
- r = b - A * x
+ solver = QmrSolver(A, b)
+ solve!(solver, A, b, x0)
+ r = b - A * solver.x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
- x, y, stats = bilqr(A, b, c, x0, y0)
+ x, stats = usymlq(A, b, c, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
- s = c - A' * y
- resid = norm(s) / norm(c)
+
+ solver = UsymlqSolver(A, b)
+ solve!(solver, A, b, c, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
@test(resid ≤ tol)
- x, y, stats = trilqr(A, b, c, x0, y0)
+ x, stats = usymqr(A, b, c, x0)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ tol)
- s = c - A' * y
- resid = norm(s) / norm(c)
+
+ solver = UsymqrSolver(A, b)
+ solve!(solver, A, b, c, x0)
+ r = b - A * solver.x
+ resid = norm(r) / norm(b)
@test(resid ≤ tol)
end