diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
new file mode 100644
index 000000000..92ac58c74
--- /dev/null
+++ b/.buildkite/pipeline.yml
@@ -0,0 +1,71 @@
+steps:
+  - label: "Nvidia GPUs -- CUDA.jl"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.9
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    command: |
+      julia --color=yes --project -e '
+        using Pkg
+        Pkg.add("CUDA")
+        Pkg.add("LinearOperators")
+        Pkg.instantiate()
+        using CUDA
+        # CUDA.set_runtime_version!(v"11.8")'
+
+      julia --color=yes --project -e '
+        include("test/gpu/nvidia.jl")'
+    timeout_in_minutes: 30
+
+  # - label: "AMD GPUs -- AMDGPU.jl"
+  #   plugins:
+  #     - JuliaCI/julia#v1:
+  #         version: 1.9
+  #   agents:
+  #     queue: "juliagpu"
+  #     rocm: "*"
+  #     rocmgpu: "gfx1031"
+  #   env:
+  #     JULIA_AMDGPU_CORE_MUST_LOAD: "1"
+  #     JULIA_AMDGPU_HIP_MUST_LOAD: "1"
+  #     JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
+  #   command: |
+  #     julia --color=yes --project -e '
+  #     using Pkg
+  #       Pkg.add("AMDGPU")
+  #       Pkg.instantiate()
+  #       include("test/gpu/amd.jl")'
+  #   timeout_in_minutes: 30
+
+  - label: "Intel GPUs -- oneAPI.jl"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.9
+    agents:
+      queue: "juliagpu"
+      intel: "*"
+    command: |
+      julia --color=yes --project -e '
+        using Pkg
+        Pkg.add("oneAPI")
+        Pkg.instantiate()
+        include("test/gpu/intel.jl")'
+    timeout_in_minutes: 30
+
+  - label: "Apple M1 GPUs -- Metal.jl"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.9
+    agents:
+      queue: "juliaecosystem"
+      os: "macos"
+      arch: "aarch64"
+    command: |
+      julia --color=yes --project -e '
+        using Pkg
+        Pkg.add("Metal")
+        Pkg.instantiate()
+        include("test/gpu/metal.jl")'
+    timeout_in_minutes: 30
diff --git a/.cirrus.yml b/.cirrus.yml
index d559cf609..792aad121 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,15 +1,36 @@
-freebsd_instance:
-  image: freebsd-13-0-release-amd64
 task:
-  name: FreeBSD
-  env:
-    matrix:
-      - JULIA_VERSION: 1.6
-      - JULIA_VERSION: 1
-      - JULIA_VERSION: nightly
-  allow_failures: $JULIA_VERSION == 'nightly'
-  install_script:
-    - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
+  matrix:
+    - name: FreeBSD
+      freebsd_instance:
+        image_family: freebsd-13-1
+      env:
+        matrix:
+          - JULIA_VERSION: 1.6
+          - JULIA_VERSION: 1
+    - name: musl Linux
+      container:
+        image: alpine:3.14
+      env:
+        - JULIA_VERSION: 1
+    - name: MacOS M1
+      macos_instance:
+        image: ghcr.io/cirruslabs/macos-monterey-base:latest
+      env:
+        - JULIA_VERSION: 1
+  install_script: |
+    URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh"
+    set -x
+    if [ "$(uname -s)" = "Linux" ] && command -v apt; then
+        apt update
+        apt install -y curl
+    fi
+    if command -v curl; then
+        sh -c "$(curl ${URL})"
+    elif command -v wget; then
+        sh -c "$(wget ${URL} -q -O-)"
+    elif command -v fetch; then
+        sh -c "$(fetch ${URL} -o -)"
+    fi
   build_script:
     - cirrusjl build
   test_script:
diff --git a/.github/codecov.yml b/.github/codecov.yml
new file mode 100644
index 000000000..e3469746f
--- /dev/null
+++ b/.github/codecov.yml
@@ -0,0 +1,11 @@
+# Drops on the order 0.01% are typical even when no change occurs
+# Having the threshold set a little higher (0.5%) than that makes it 
+# a little more tolerant to fluctuations
+coverage:
+  status:
+    project:
+      default:
+        threshold: 0.5%
+    patch:
+      default:
+        threshold: 0.5%
diff --git a/.github/workflows/Aqua.yml b/.github/workflows/Aqua.yml
new file mode 100644
index 000000000..da872e225
--- /dev/null
+++ b/.github/workflows/Aqua.yml
@@ -0,0 +1,17 @@
+name: Aqua
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize, reopened]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: '1'
+      - name: Aqua.jl
+        run: julia --color=yes -e 'using Pkg; Pkg.add("Aqua"); Pkg.develop(path="."); using Aqua, Krylov; Aqua.test_all(Krylov)'
diff --git a/.github/workflows/Breakage.yml b/.github/workflows/Breakage.yml
index 266eed3cc..4a907d631 100644
--- a/.github/workflows/Breakage.yml
+++ b/.github/workflows/Breakage.yml
@@ -19,19 +19,20 @@ jobs:
           "JuliaSmoothOptimizers/JSOSolvers.jl",
           "JuliaSmoothOptimizers/LLSModels.jl",
           "JuliaSmoothOptimizers/Percival.jl",
-          "JuliaSmoothOptimizers/RipQP.jl"
+          "JuliaSmoothOptimizers/RipQP.jl",
+          "SciML/LinearSolve.jl"
         ]
         pkgversion: [latest, stable]
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
       # Install Julia
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1'
           arch: x64
-      - uses: actions/cache@v1
+      - uses: actions/cache@v3
         env:
           cache-name: cache-artifacts
         with:
@@ -85,7 +86,7 @@ jobs:
               end;
             end'
 
-      - uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v3
         with:
           name: pr
           path: pr/
@@ -94,9 +95,9 @@ jobs:
     needs: break
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      - uses: actions/download-artifact@v2
+      - uses: actions/download-artifact@v3
         with:
           name: pr
           path: pr/
@@ -127,7 +128,7 @@ jobs:
             fi
           done >> MSG
 
-      - uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v3
         with:
           name: pr
           path: pr/
diff --git a/.github/workflows/CI_M1.yml b/.github/workflows/CI_M1.yml
deleted file mode 100644
index 6f9aa720b..000000000
--- a/.github/workflows/CI_M1.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: CI_M1
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    types: [opened, synchronize, reopened]
-jobs:
-  test:
-    name: Julia ${{ matrix.version }} - macOS - ${{ matrix.arch }} - ${{ github.event_name }}
-    runs-on: self-hosted
-    strategy:
-      fail-fast: false
-      matrix:
-        version:
-          - '1'
-        arch:
-          - aarch64
-    steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - name: Version Info
-        shell: julia --color=yes {0}
-        run: |
-          using InteractiveUtils
-          versioninfo()
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml
index 14f6dcd47..043113f74 100644
--- a/.github/workflows/CommentPR.yml
+++ b/.github/workflows/CommentPR.yml
@@ -39,16 +39,36 @@ jobs:
       - run: unzip pr.zip
 
       - name: 'Comment on PR'
-        uses: actions/github-script@v3
+        uses: actions/github-script@v6
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
-            var fs = require('fs');
-            var issue_number = Number(fs.readFileSync('./NR'));
-            var msg = fs.readFileSync('./MSG', 'utf8');
-            await github.issues.createComment({
+            var fs = require('fs')
+            var issue_number = Number(fs.readFileSync('./NR'))
+            var msg = fs.readFileSync('./MSG', 'utf8')
+
+            // Get the existing comments.
+            const {data: comments} = await github.rest.issues.listComments({
               owner: context.repo.owner,
               repo: context.repo.repo,
-              issue_number: issue_number,
-              body: msg
-            });
+              issue_number: issue_number
+            })
+
+            // Find any comment already made by the bot.
+            const botComment = comments.find(comment => comment.user.id === 41898282)
+
+            if (botComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: botComment.id,
+                body: msg
+              })
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issue_number,
+                body: msg
+              })
+            }
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index b546a8082..7a9c79fd4 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -1,19 +1,44 @@
 name: CompatHelper
-
 on:
   schedule:
-    - cron: '00 00 * * *'
-
+    - cron: 0 0 * * *
+  workflow_dispatch:
+permissions:
+  contents: write
+  pull-requests: write
 jobs:
   CompatHelper:
     runs-on: ubuntu-latest
     steps:
-      - uses: julia-actions/setup-julia@latest
+      - name: Check if Julia is already available in the PATH
+        id: julia_in_path
+        run: which julia
+        continue-on-error: true
+      - name: Install Julia, but only if it is not already available in the PATH
+        uses: julia-actions/setup-julia@v1
         with:
           version: '1'
-      - name: CompatHelper
-        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
-      - name: CompatHelper.main()
+          arch: ${{ runner.arch }}
+        if: steps.julia_in_path.outcome != 'success'
+      - name: "Add the General registry via Git"
+        run: |
+          import Pkg
+          ENV["JULIA_PKG_SERVER"] = ""
+          Pkg.Registry.add("General")
+        shell: julia --color=yes {0}
+      - name: "Install CompatHelper"
+        run: |
+          import Pkg
+          name = "CompatHelper"
+          uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+          version = "3"
+          Pkg.add(; name, uuid, version)
+        shell: julia --color=yes {0}
+      - name: "Run CompatHelper"
+        run: |
+          import CompatHelper
+          CompatHelper.main()
+        shell: julia --color=yes {0}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: julia -e 'using CompatHelper; CompatHelper.main()'
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
index be0b86584..406f15e0d 100644
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@@ -10,12 +10,12 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@latest
         with:
           version: '1'
       - name: Install dependencies
-        run: julia --project=docs -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+        run: julia --project=docs --color=yes -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
       - name: Build and deploy
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
new file mode 100644
index 000000000..b0c37e05f
--- /dev/null
+++ b/.github/workflows/Invalidations.yml
@@ -0,0 +1,43 @@
+name: Invalidations
+# Uses SnoopCompile to evaluate number of invalidations caused by `using` the package
+# using https://github.com/julia-actions/julia-invalidations
+# Based on https://github.com/julia-actions/julia-invalidations
+
+on:
+  pull_request:
+
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: always.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  evaluate:
+    # Only run on PRs to the default branch.
+    # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch
+    if: github.base_ref == github.event.repository.default_branch
+    runs-on: ubuntu-latest
+    steps:
+    - uses: julia-actions/setup-julia@v1
+      with:
+        version: '1'
+    - uses: actions/checkout@v3
+    - uses: julia-actions/julia-buildpkg@v1
+    - uses: julia-actions/julia-invalidations@v1
+      id: invs_pr
+
+    - uses: actions/checkout@v3
+      with:
+        ref: ${{ github.event.repository.default_branch }}
+    - uses: julia-actions/julia-buildpkg@v1
+    - uses: julia-actions/julia-invalidations@v1
+      id: invs_default
+    
+    - name: Report invalidation counts
+      run: |
+        echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+        echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+    - name: Check if the PR does increase number of invalidations
+      if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
+      run: exit 1
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 409e0d146..9e1791f48 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,12 +31,12 @@ jobs:
             arch: x64
             allow_failure: true
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
+      - uses: actions/cache@v3
         env:
           cache-name: cache-artifacts
         with:
@@ -49,6 +49,6 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v3
         with:
-          file: lcov.info
+          files: lcov.info
diff --git a/LICENSE.md b/LICENSE.md
index 1533671ce..befba1c4d 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
-Copyright (c) 2015-2019: Dominique Orban
+Copyright (c) 2015-present: Alexis Montoison, Dominique Orban, and other contributors
 
-Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.0/).
+[Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.0/).
 
 ## License
 
@@ -11,83 +11,83 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
 	--------------
 
 	1.1. "Contributor"
-		means each individual or legal entity that creates, contributes to
-		the creation of, or owns Covered Software.
+	    means each individual or legal entity that creates, contributes to
+	    the creation of, or owns Covered Software.
 
 	1.2. "Contributor Version"
-		means the combination of the Contributions of others (if any) used
-		by a Contributor and that particular Contributor's Contribution.
+	    means the combination of the Contributions of others (if any) used
+	    by a Contributor and that particular Contributor's Contribution.
 
 	1.3. "Contribution"
-		means Covered Software of a particular Contributor.
+	    means Covered Software of a particular Contributor.
 
 	1.4. "Covered Software"
-		means Source Code Form to which the initial Contributor has attached
-		the notice in Exhibit A, the Executable Form of such Source Code
-		Form, and Modifications of such Source Code Form, in each case
-		including portions thereof.
+	    means Source Code Form to which the initial Contributor has attached
+	    the notice in Exhibit A, the Executable Form of such Source Code
+	    Form, and Modifications of such Source Code Form, in each case
+	    including portions thereof.
 
 	1.5. "Incompatible With Secondary Licenses"
-		means
+	    means
 
-		(a) that the initial Contributor has attached the notice described
-			in Exhibit B to the Covered Software; or
+	    (a) that the initial Contributor has attached the notice described
+	        in Exhibit B to the Covered Software; or
 
-		(b) that the Covered Software was made available under the terms of
-			version 1.1 or earlier of the License, but not also under the
-			terms of a Secondary License.
+	    (b) that the Covered Software was made available under the terms of
+	        version 1.1 or earlier of the License, but not also under the
+	        terms of a Secondary License.
 
 	1.6. "Executable Form"
-		means any form of the work other than Source Code Form.
+	    means any form of the work other than Source Code Form.
 
 	1.7. "Larger Work"
-		means a work that combines Covered Software with other material, in
-		a separate file or files, that is not Covered Software.
+	    means a work that combines Covered Software with other material, in
+	    a separate file or files, that is not Covered Software.
 
 	1.8. "License"
-		means this document.
+	    means this document.
 
 	1.9. "Licensable"
-		means having the right to grant, to the maximum extent possible,
-		whether at the time of the initial grant or subsequently, any and
-		all of the rights conveyed by this License.
+	    means having the right to grant, to the maximum extent possible,
+	    whether at the time of the initial grant or subsequently, any and
+	    all of the rights conveyed by this License.
 
 	1.10. "Modifications"
-		means any of the following:
+	    means any of the following:
 
-		(a) any file in Source Code Form that results from an addition to,
-			deletion from, or modification of the contents of Covered
-			Software; or
+	    (a) any file in Source Code Form that results from an addition to,
+	        deletion from, or modification of the contents of Covered
+	        Software; or
 
-		(b) any new file in Source Code Form that contains any Covered
-			Software.
+	    (b) any new file in Source Code Form that contains any Covered
+	        Software.
 
 	1.11. "Patent Claims" of a Contributor
-		means any patent claim(s), including without limitation, method,
-		process, and apparatus claims, in any patent Licensable by such
-		Contributor that would be infringed, but for the grant of the
-		License, by the making, using, selling, offering for sale, having
-		made, import, or transfer of either its Contributions or its
-		Contributor Version.
+	    means any patent claim(s), including without limitation, method,
+	    process, and apparatus claims, in any patent Licensable by such
+	    Contributor that would be infringed, but for the grant of the
+	    License, by the making, using, selling, offering for sale, having
+	    made, import, or transfer of either its Contributions or its
+	    Contributor Version.
 
 	1.12. "Secondary License"
-		means either the GNU General Public License, Version 2.0, the GNU
-		Lesser General Public License, Version 2.1, the GNU Affero General
-		Public License, Version 3.0, or any later versions of those
-		licenses.
+	    means either the GNU General Public License, Version 2.0, the GNU
+	    Lesser General Public License, Version 2.1, the GNU Affero General
+	    Public License, Version 3.0, or any later versions of those
+	    licenses.
 
 	1.13. "Source Code Form"
-		means the form of the work preferred for making modifications.
+	    means the form of the work preferred for making modifications.
 
 	1.14. "You" (or "Your")
-		means an individual or a legal entity exercising rights under this
-		License. For legal entities, "You" includes any entity that
-		controls, is controlled by, or is under common control with You. For
-		purposes of this definition, "control" means (a) the power, direct
-		or indirect, to cause the direction or management of such entity,
-		whether by contract or otherwise, or (b) ownership of more than
-		fifty percent (50%) of the outstanding shares or beneficial
-		ownership of such entity.
+	    means an individual or a legal entity exercising rights under this
+	    License. For legal entities, "You" includes any entity that
+	    controls, is controlled by, or is under common control with You. For
+	    purposes of this definition, "control" means (a) the power, direct
+	    or indirect, to cause the direction or management of such entity,
+	    whether by contract or otherwise, or (b) ownership of more than
+	    fifty percent (50%) of the outstanding shares or beneficial
+	    ownership of such entity.
 
 	2. License Grants and Conditions
 	--------------------------------
@@ -98,14 +98,14 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
 	non-exclusive license:
 
 	(a) under intellectual property rights (other than patent or trademark)
-		Licensable by such Contributor to use, reproduce, make available,
-		modify, display, perform, distribute, and otherwise exploit its
-		Contributions, either on an unmodified basis, with Modifications, or
-		as part of a Larger Work; and
+	    Licensable by such Contributor to use, reproduce, make available,
+	    modify, display, perform, distribute, and otherwise exploit its
+	    Contributions, either on an unmodified basis, with Modifications, or
+	    as part of a Larger Work; and
 
 	(b) under Patent Claims of such Contributor to make, use, sell, offer
-		for sale, have made, import, and otherwise transfer either its
-		Contributions or its Contributor Version.
+	    for sale, have made, import, and otherwise transfer either its
+	    Contributions or its Contributor Version.
 
 	2.2. Effective Date
 
@@ -122,15 +122,15 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
 	Contributor:
 
 	(a) for any code that a Contributor has removed from Covered Software;
-		or
+	    or
 
 	(b) for infringements caused by: (i) Your and any other third party's
-		modifications of Covered Software, or (ii) the combination of its
-		Contributions with other software (except as part of its Contributor
-		Version); or
+	    modifications of Covered Software, or (ii) the combination of its
+	    Contributions with other software (except as part of its Contributor
+	    Version); or
 
 	(c) under Patent Claims infringed by Covered Software in the absence of
-		its Contributions.
+	    its Contributions.
 
 	This License does not grant any rights in the trademarks, service marks,
 	or logos of any Contributor (except as may be necessary to comply with
@@ -178,15 +178,15 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
 	If You distribute Covered Software in Executable Form then:
 
 	(a) such Covered Software must also be made available in Source Code
-		Form, as described in Section 3.1, and You must inform recipients of
-		the Executable Form how they can obtain a copy of such Source Code
-		Form by reasonable means in a timely manner, at a charge no more
-		than the cost of distribution to the recipient; and
+	    Form, as described in Section 3.1, and You must inform recipients of
+	    the Executable Form how they can obtain a copy of such Source Code
+	    Form by reasonable means in a timely manner, at a charge no more
+	    than the cost of distribution to the recipient; and
 
 	(b) You may distribute such Executable Form under the terms of this
-		License, or sublicense it under different terms, provided that the
-		license for the Executable Form does not attempt to limit or alter
-		the recipients' rights in the Source Code Form under this License.
+	    License, or sublicense it under different terms, provided that the
+	    license for the Executable Form does not attempt to limit or alter
+	    the recipients' rights in the Source Code Form under this License.
 
 	3.3. Distribution of a Larger Work
 
@@ -363,7 +363,7 @@ Krylov.jl is licensed under the [MPL version 2.0](https://www.mozilla.org/MPL/2.
 
 	  This Source Code Form is subject to the terms of the Mozilla Public
 	  License, v. 2.0. If a copy of the MPL was not distributed with this
-	  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+	  file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
 	If it is not possible or desirable to put the notice in a particular
 	file, then You may include the notice in a location (such as a LICENSE
diff --git a/Project.toml b/Project.toml
index a91e07b8a..c711f565c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,18 +1,33 @@
 name = "Krylov"
 uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
-version = "0.8.3"
+version = "0.9.2"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
+[weakdeps]
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[extensions]
+KrylovComponentArraysExt = "ComponentArrays"
+KrylovFillArraysExt = "FillArrays"
+KrylovStaticArraysExt = "StaticArrays"
+
 [compat]
+PackageExtensionCompat = "1.0.1"
 julia = "^1.6.0"
 
 [extras]
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Random", "Test"]
+test = ["ComponentArrays", "FillArrays", "Random", "StaticArrays", "Test"]
diff --git a/README.md b/README.md
index a4664e187..57bcd1d81 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ should be solved when **_b_** is not in the range of **_A_** (inconsistent syste
   * **_A_** is square and singular,
   * **_A_** is tall and thin.
 
-Underdetermined sytems are less common but also occur.
+Underdetermined systems are less common but also occur.
 
 If there are infinitely many such **_x_** (because **_A_** is column rank-deficient), one with minimum norm is identified
 
@@ -61,32 +61,32 @@ If there are infinitely many such **_x_** (because **_A_** is column rank-defici
   minimize ‖<b><i>x</i></b>‖ &nbsp; subject to &nbsp; <b><i>Ax = b</i></b>
 </p>
 
-sould be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
+should be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
 This situation mainly occurs when
   * **_A_** is square and singular,
   * **_A_** is short and wide.
 
-Overdetermined sytems are less common but also occur.
+Overdetermined systems are less common but also occur.
 
 4. Adjoint systems
 
 <p align="center">
-  <b><i>Ax = b</i></b> &nbsp; and &nbsp; <b><i>Aᵀy = c</i></b>
+  <b><i>Ax = b</i></b> &nbsp; and &nbsp; <b><i>Aᴴy = c</i></b>
 </p>
 
 where **_A_** can have any shape.
 
-5. Saddle-point and symmetric quasi-definite (SQD) systems
+5. Saddle-point and Hermitian quasi-definite systems
 
 <p align="center">
   [<b><i>M </i></b>&nbsp;&nbsp;&nbsp;<b><i> A</i></b>]&nbsp; [<b><i>x</i></b>]            =           [<b><i>b</i></b>]
   <br>
-  [<b><i>Aᵀ</i></b>&nbsp;&nbsp;      <b><i>-N</i></b>]&nbsp; [<b><i>y</i></b>]&nbsp;&nbsp;&nbsp;&nbsp;[<b><i>c</i></b>]
+  [<b><i>Aᴴ</i></b>&nbsp;&nbsp;      <b><i>-N</i></b>]&nbsp; [<b><i>y</i></b>]&nbsp;&nbsp;&nbsp;&nbsp;[<b><i>c</i></b>]
 </p>
 
 where **_A_** can have any shape.
 
-6. Generalized saddle-point and unsymmetric partitioned systems
+6. Generalized saddle-point and non-Hermitian partitioned systems
 
 <p align="center">
   [<b><i>M</i></b>&nbsp;&nbsp;&nbsp;<b><i>A</i></b>]&nbsp; [<b><i>x</i></b>]            =           [<b><i>b</i></b>]
@@ -94,7 +94,7 @@ where **_A_** can have any shape.
   [<b><i>B</i></b>&nbsp;&nbsp;&nbsp;<b><i>N</i></b>]&nbsp; [<b><i>y</i></b>]&nbsp;&nbsp;&nbsp;&nbsp;[<b><i>c</i></b>]
 </p>
 
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
 **_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
 
 Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
@@ -121,3 +121,10 @@ julia> ]
 pkg> add Krylov
 pkg> test Krylov
 ```
+
+## Bug reports and discussions
+
+If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues).
+Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.
+
+If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome.
diff --git a/docs/make.jl b/docs/make.jl
index 57ad87cd2..441ddb3ee 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,23 +6,26 @@ makedocs(
   linkcheck = true,
   strict = true,
   format = Documenter.HTML(assets = ["assets/style.css"],
-                           ansicolor=true,
+                           ansicolor = true,
                            prettyurls = get(ENV, "CI", nothing) == "true",
                            collapselevel = 1),
   sitename = "Krylov.jl",
   pages = ["Home" => "index.md",
            "API" => "api.md",
-           "Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md",
-                                "Symmetric indefinite linear systems" => "solvers/sid.md",
-                                "Unsymmetric linear systems" => "solvers/unsymmetric.md",
+           "Krylov processes" => "processes.md",
+           "Krylov methods" => ["Hermitian positive definite linear systems" => "solvers/spd.md",
+                                "Hermitian indefinite linear systems" => "solvers/sid.md",
+                                "Non-Hermitian square linear systems" => "solvers/unsymmetric.md",
                                 "Least-norm problems" => "solvers/ln.md",
                                 "Least-squares problems" => "solvers/ls.md",
                                 "Adjoint systems" => "solvers/as.md",
-                                "Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md",
-                                "Generalized saddle-point and unsymmetric partitioned systems" => "solvers/gsp.md"],
+                                "Saddle-point and Hermitian quasi-definite systems" => "solvers/sp_sqd.md",
+                                "Generalized saddle-point and non-Hermitian partitioned systems" => "solvers/gsp.md"],
            "In-place methods" => "inplace.md",
+           "Preconditioners" => "preconditioners.md",
+           "Storage requirements" => "storage.md",
            "GPU support" => "gpu.md",
-           "Warm start" => "warm_start.md",
+           "Warm-start" => "warm-start.md",
            "Factorization-free operators" => "factorization-free.md",
            "Callbacks" => "callbacks.md",
            "Performance tips" => "tips.md",
diff --git a/docs/src/api.md b/docs/src/api.md
index 7f2f4dff7..238c86f1a 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -48,6 +48,7 @@ LnlqSolver
 CraigSolver
 CraigmrSolver
 GpmrSolver
+FgmresSolver
 ```
 
 ## Utilities
@@ -60,4 +61,6 @@ Krylov.vec2str
 Krylov.ktypeof
 Krylov.kzeros
 Krylov.kones
+Krylov.vector_to_matrix
+Krylov.matrix_to_vector
 ```
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index f44018687..91e0b521c 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -1,43 +1,80 @@
-## Callbacks
+# [Callbacks](@id callbacks)
 
-Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. The callback should return `true` if the main loop should terminate, and `false` otherwise.
+Each Krylov method is able to call a callback function as `callback(solver)` at each iteration.
+The callback should return `true` if the main loop should terminate, and `false` otherwise.
 If the method terminated because of the callback, the output status will be `"user-requested exit"`.
-For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using
+For example, if the user defines `minres_callback(solver::MinresSolver)`, it can be passed to the solver using
 
 ```julia
-(x, stats) = minres(A, b, callback = my_callback)
+(x, stats) = minres(A, b, callback = minres_callback)
 ```
 
-If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure:
+If you need to write a callback that uses variables that are not in a `KrylovSolver`, use a closure:
 
 ```julia
-function my_callback2(solver::MinresSolver, A, b, storage_vec, tol::Float64)
-  mul!(storage_vec, A, solver.x)
-  storage_vec .-= b
-  return norm(storage_vec) ≤ tol # tolerance based on the 2-norm of the residual
+function custom_stopping_condition(solver::KrylovSolver, A, b, r, tol)
+  mul!(r, A, solver.x)
+  r .-= b               # r := b - Ax
+  bool = norm(r) ≤ tol  # tolerance based on the 2-norm of the residual
+  return bool
 end
 
-storage_vec = similar(b)
-(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, storage_vec, 0.1))
+cg_callback(solver) = custom_stopping_condition(solver, A, b, r, tol)
+(x, stats) = cg(A, b, callback = cg_callback)
 ```
 
 Alternatively, use a structure and make it callable:
 
 ```julia
-mutable struct MyCallback3{S, M}
-  A::M
-  b::S
-  storage_vec::S
-  tol::Float64
+mutable struct CallbackWorkspace{T}
+  A::Matrix{T}
+  b::Vector{T}
+  r::Vector{T}
+  tol::T
 end
-MyCallback3(A, b; tol = 0.1) = MyCallback3(A, b, similar(b), tol)
 
-function (my_cb::MyCallback3)(solver)
-  mul!(my_cb.storage_vec, my_cb.A, solver.x)
-  my_cb.storage_vec .-= my_cb.b
-  return norm(my_cb.storage_vec) ≤ my_cb.tol # tolerance based on the 2-norm of the residual
+function (workspace::CallbackWorkspace)(solver::KrylovSolver)
+  mul!(workspace.r, workspace.A, solver.x)
+  workspace.r .-= workspace.b
+  bool = norm(workspace.r) ≤ workspace.tol
+  return bool
 end
 
-my_cb = MyCallback3(A, b; tol = 0.1)
-(x, stats) = minres(A, b, callback = my_cb)
+bicgstab_callback = CallbackWorkspace(A, b, r, tol)
+(x, stats) = bicgstab(A, b, callback = bicgstab_callback)
+```
+
+Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations.
+We now illustrate how to store all iterates $x_k$ of the GMRES method.
+
+```julia
+S = Krylov.ktypeof(b)
+global X = S[]  # Storage for GMRES iterates
+
+function gmres_callback(solver)
+  z = solver.z
+  k = solver.inner_iter
+  nr = sum(1:k)
+  V = solver.V
+  R = solver.R
+  y = copy(z)
+
+  # Solve Rk * yk = zk
+  for i = k : -1 : 1
+    pos = nr + i - k
+    for j = k : -1 : i+1
+      y[i] = y[i] - R[pos] * y[j]
+      pos = pos - j + 1
+    end
+    y[i] = y[i] / R[pos]
+  end
+
+  # xk = Vk * yk
+  xk = sum(V[i] * y[i] for i = 1:k)
+  push!(X, xk)
+
+  return false  # We don't want to add new stopping conditions
+end
+
+(x, stats) = gmres(A, b, callback = gmres_callback)
 ```
diff --git a/docs/src/examples/tricg.md b/docs/src/examples/tricg.md
index e981c2f7e..61750de5f 100644
--- a/docs/src/examples/tricg.md
+++ b/docs/src/examples/tricg.md
@@ -14,7 +14,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
 c = -b
 
 # [I   A] [x] = [b]
-# [Aᵀ -I] [y]   [c]
+# [Aᴴ -I] [y]   [c]
 (x, y, stats) = tricg(A, b, c)
 K = [eye(m) A; A' -eye(n)]
 B = [b; c]
@@ -23,7 +23,7 @@ resid = norm(r)
 @printf("TriCG: Relative residual: %8.1e\n", resid)
 
 # [-I   A] [x] = [b]
-# [ Aᵀ  I] [y]   [c]
+# [ Aᴴ  I] [y]   [c]
 (x, y, stats) = tricg(A, b, c, flip=true)
 K = [-eye(m) A; A' eye(n)]
 B = [b; c]
@@ -32,7 +32,7 @@ resid = norm(r)
 @printf("TriCG: Relative residual: %8.1e\n", resid)
 
 # [I   A] [x] = [b]
-# [Aᵀ  I] [y]   [c]
+# [Aᴴ  I] [y]   [c]
 (x, y, stats) = tricg(A, b, c, spd=true)
 K = [eye(m) A; A' eye(n)]
 B = [b; c]
@@ -41,7 +41,7 @@ resid = norm(r)
 @printf("TriCG: Relative residual: %8.1e\n", resid)
 
 # [-I    A] [x] = [b]
-# [ Aᵀ  -I] [y]   [c]
+# [ Aᴴ  -I] [y]   [c]
 (x, y, stats) = tricg(A, b, c, snd=true)
 K = [-eye(m) A; A' -eye(n)]
 B = [b; c]
@@ -50,7 +50,7 @@ resid = norm(r)
 @printf("TriCG: Relative residual: %8.1e\n", resid)
 
 # [τI    A] [x] = [b]
-# [ Aᵀ  νI] [y]   [c]
+# [ Aᴴ  νI] [y]   [c]
 (τ, ν) = (1e-4, 1e2)
 (x, y, stats) = tricg(A, b, c, τ=τ, ν=ν)
 K = [τ*eye(m) A; A' ν*eye(n)]
@@ -60,7 +60,7 @@ resid = norm(r)
 @printf("TriCG: Relative residual: %8.1e\n", resid)
 
 # [M⁻¹  A  ] [x] = [b]
-# [Aᵀ  -N⁻¹] [y]   [c]
+# [Aᴴ  -N⁻¹] [y]   [c]
 (x, y, stats) = tricg(A, b, c, M=M, N=N, verbose=1)
 K = [inv(M) A; A' -inv(N)]
 H = BlockDiagonalOperator(M, N)
diff --git a/docs/src/examples/trimr.md b/docs/src/examples/trimr.md
index 2aa48be1e..adc4e82e5 100644
--- a/docs/src/examples/trimr.md
+++ b/docs/src/examples/trimr.md
@@ -14,7 +14,7 @@ m, n = size(A)
 c = -b
 
 # [D   A] [x] = [b]
-# [Aᵀ  0] [y]   [c]
+# [Aᴴ  0] [y]   [c]
 llt_D = cholesky(D)
 opD⁻¹ = LinearOperator(Float64, 5, 5, true, true, (y, v) -> ldiv!(y, llt_D, v))
 opH⁻¹ = BlockDiagonalOperator(opD⁻¹, eye(n))
@@ -34,7 +34,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
 c = -b
 
 # [I   A] [x] = [b]
-# [Aᵀ -I] [y]   [c]
+# [Aᴴ -I] [y]   [c]
 (x, y, stats) = trimr(A, b, c)
 K = [eye(m) A; A' -eye(n)]
 B = [b; c]
@@ -43,7 +43,7 @@ resid = norm(r)
 @printf("TriMR: Relative residual: %8.1e\n", resid)
 
 # [M   A] [x] = [b]
-# [Aᵀ -N] [y]   [c]
+# [Aᴴ -N] [y]   [c]
 ldlt_M = ldl(M)
 ldlt_N = ldl(N)
 opM⁻¹ = LinearOperator(Float64, size(M,1), size(M,2), true, true, (y, v) -> ldiv!(y, ldlt_M, v))
diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md
index aa0f51f07..0bff49d4c 100644
--- a/docs/src/factorization-free.md
+++ b/docs/src/factorization-free.md
@@ -1,3 +1,32 @@
+```@raw html
+<style>
+.content table td {
+    border-right-width: 1px;
+}
+.content table th {
+    border-right-width: 1px;
+}
+.content table td:last-child {
+    border-right-width: 0px;
+}
+.content table th:last-child {
+    border-right-width: 0px;
+}
+html.theme--documenter-dark .content table td {
+    border-right-width: 1px;
+}
+html.theme--documenter-dark .content table th {
+    border-right-width: 1px;
+}
+html.theme--documenter-dark .content table td:last-child {
+    border-right-width: 0px;
+}
+html.theme--documenter-dark .content table th:last-child {
+    border-right-width: 0px;
+}
+</style>
+```
+
 ## [Factorization-free operators](@id factorization-free)
 
 All methods are factorization-free, which means that you only need to provide operator-vector products.
@@ -10,8 +39,11 @@ Some methods only require `A * v` products, whereas other ones also require `A'
 |:--------------------------------------:|:----------------------------------------:|
 | CG, CR                                 | CGLS, CRLS, CGNE, CRMR                   |
 | SYMMLQ, CG-LANCZOS, MINRES, MINRES-QLP | LSLQ, LSQR, LSMR, LNLQ, CRAIG, CRAIGMR   |
-| DIOM, FOM, DQGMRES, GMRES              | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
-| CGS, BICGSTAB                          | TriCG, TriMR, USYMLQR                    |
+| DIOM, FOM, DQGMRES, GMRES, FGMRES      | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
+| CGS, BICGSTAB                          | TriCG, TriMR                             |
+
+!!! info
+    GPMR is the only method that requires `A * v` and `B * w` products.
 
 Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`.
 
@@ -27,9 +59,9 @@ where
 * `type` is the operator element type;
 * `nrow` and `ncol` are its dimensions;
 * `symmetric` and `hermitian` should be set to `true` or `false`;
-* `prod(y, v)`, `tprod(y, w)` and `ctprod(u, w)` are called when writing `mul!(y, A, v)`, `mul!(y, tranpose(A), w)`, and `mul!(y, A', u)`, respectively.
+* `prod(y, v)`, `tprod(y, w)` and `ctprod(u, w)` are called when writing `mul!(y, A, v)`, `mul!(y, transpose(A), w)`, and `mul!(y, A', u)`, respectively.
 
-See the [tutorial](https://juliasmoothoptimizers.github.io/tutorials/introduction-to-linear-operators/) and the detailed [documentation](https://juliasmoothoptimizers.github.io/LinearOperators.jl/dev/) for more informations on `LinearOperators.jl`.
+See the [tutorial](https://juliasmoothoptimizers.github.io/tutorials/introduction-to-linear-operators/) and the detailed [documentation](https://juliasmoothoptimizers.github.io/LinearOperators.jl/dev/) for more information on `LinearOperators.jl`.
 
 ## Examples
 
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 4c9887f24..33b76b421 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -1,38 +1,51 @@
-## GPU support
+# [GPU support](@id gpu)
 
-All solvers in Krylov.jl can be used with `CuArrays` and allow computations with Nvidia GPU. Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to GPU format (`CuMatrix` and `CuVector`).
+Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are highly parallelizable.
+
+The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia.
+Those allow the implementations to be specialized automatically by the compiler for both CPU and GPU.
+Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+
+## Nvidia GPUs
+
+All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations on Nvidia GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`CuMatrix` and `CuVector`).
 
 ```julia
 using CUDA, Krylov
 
-# CPU Arrays
-A_cpu = rand(20, 20)
-b_cpu = rand(20)
+if CUDA.functional()
+  # CPU Arrays
+  A_cpu = rand(20, 20)
+  b_cpu = rand(20)
 
-# GPU Arrays
-A_gpu = CuMatrix(A_cpu)
-b_gpu = CuVector(b_cpu)
+  # GPU Arrays
+  A_gpu = CuMatrix(A_cpu)
+  b_gpu = CuVector(b_cpu)
 
-# Solve a square and dense system on GPU
-x, stats = bilq(A_gpu, b_gpu)
+  # Solve a square and dense system on an Nivida GPU
+  x, stats = bilq(A_gpu, b_gpu)
+end
 ```
 
-Sparse matrices have a specific storage on GPU (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`):
+Sparse matrices have a specific storage on Nvidia GPUs (`CuSparseMatrixCSC`, `CuSparseMatrixCSR` or `CuSparseMatrixCOO`):
 
 ```julia
 using CUDA, Krylov
 using CUDA.CUSPARSE, SparseArrays
 
-# CPU Arrays
-A_cpu = sprand(200, 100, 0.3)
-b_cpu = rand(200)
+if CUDA.functional()
+  # CPU Arrays
+  A_cpu = sprand(200, 100, 0.3)
+  b_cpu = rand(200)
 
-# GPU Arrays
-A_gpu = CuSparseMatrixCSC(A_cpu)
-b_gpu = CuVector(b_cpu)
+  # GPU Arrays
+  A_gpu = CuSparseMatrixCSC(A_cpu)
+  b_gpu = CuVector(b_cpu)
 
-# Solve a rectangular and sparse system on GPU
-x, stats = lsmr(A_gpu, b_gpu)
+  # Solve a rectangular and sparse system on an Nvidia GPU
+  x, stats = lsmr(A_gpu, b_gpu)
+end
 ```
 
 Optimized operator-vector products that exploit GPU features can be also used by means of linear operators.
@@ -46,64 +59,168 @@ can be applied directly on GPU thanks to efficient operators that take advantage
 using SparseArrays, Krylov, LinearOperators
 using CUDA, CUDA.CUSPARSE
 
-# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu)  # A = CuSparseMatrixCSR(A_cpu)
-b_gpu = CuVector(b_cpu)
+if CUDA.functional()
+  # Transfer the linear system from the CPU to the GPU
+  A_gpu = CuSparseMatrixCSR(A_cpu)  # A_gpu = CuSparseMatrixCSC(A_cpu)
+  b_gpu = CuVector(b_cpu)
+
+  # IC(0) decomposition LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
+  P = ic02(A_gpu)
+
+  # Additional vector required for solving triangular systems
+  n = length(b_gpu)
+  T = eltype(b_gpu)
+  z = CUDA.zeros(T, n)
+
+  # Solve Py = x
+  function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z)
+    ldiv!(z, LowerTriangular(P), x)   # Forward substitution with L
+    ldiv!(y, LowerTriangular(P)', z)  # Backward substitution with Lᴴ
+    return y
+  end
+
+  function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z)
+    ldiv!(z, UpperTriangular(P)', x)  # Forward substitution with L
+    ldiv!(y, UpperTriangular(P), z)   # Backward substitution with Lᴴ
+    return y
+  end
+
+  # Operator that model P⁻¹
+  symmetric = hermitian = true
+  opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
+
+  # Solve an Hermitian positive definite system with an IC(0) preconditioner on GPU
+  x, stats = cg(A_gpu, b_gpu, M=opM)
+end
+```
 
-# LLᵀ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
-P = ic02(A_gpu, 'O')
+### Example with a general square system
+
+```julia
+using SparseArrays, Krylov, LinearOperators
+using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
 
-# Solve Py = x
-function ldiv!(y, P, x)
-  copyto!(y, x)                        # Variant for CuSparseMatrixCSR
-  sv2!('T', 'U', 'N', 1.0, P, y, 'O')  # sv2!('N', 'L', 'N', 1.0, P, y, 'O')
-  sv2!('N', 'U', 'N', 1.0, P, y, 'O')  # sv2!('T', 'L', 'N', 1.0, P, y, 'O')
-  return y
+if CUDA.functional()
+  # Optional -- Compute a permutation vector p such that A[:,p] has no zero diagonal
+  p = zfd(A_cpu)
+  p .+= 1
+  A_cpu = A_cpu[:,p]
+
+  # Transfer the linear system from the CPU to the GPU
+  A_gpu = CuSparseMatrixCSR(A_cpu)  # A_gpu = CuSparseMatrixCSC(A_cpu)
+  b_gpu = CuVector(b_cpu)
+
+  # ILU(0) decomposition LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
+  P = ilu02(A_gpu)
+
+  # Additional vector required for solving triangular systems
+  n = length(b_gpu)
+  T = eltype(b_gpu)
+  z = CUDA.zeros(T, n)
+
+  # Solve Py = x
+  function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z)
+    ldiv!(z, UnitLowerTriangular(P), x)  # Forward substitution with L
+    ldiv!(y, UpperTriangular(P), z)      # Backward substitution with U
+    return y
+  end
+
+  function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z)
+    ldiv!(z, LowerTriangular(P), x)      # Forward substitution with L
+    ldiv!(y, UnitUpperTriangular(P), z)  # Backward substitution with U
+    return y
+  end
+
+  # Operator that model P⁻¹
+  symmetric = hermitian = false
+  opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
+
+  # Solve a non-Hermitian system with an ILU(0) preconditioner on GPU
+  x̄, stats = bicgstab(A_gpu, b_gpu, M=opM)
+
+  # Recover the solution of Ax = b with the solution of A[:,p]x̄ = b
+  invp = invperm(p)
+  x = x̄[invp]
 end
+```
+
+## AMD GPUs
+
+All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`ROCMatrix` and `ROCVector`).
+
+```julia
+using Krylov, AMDGPU
 
-# Operator that model P⁻¹
-n = length(b_gpu)
-T = eltype(b_gpu)
-symmetric = hermitian = true
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+if AMDGPU.functional()
+  # CPU Arrays
+  A_cpu = rand(ComplexF64, 20, 20)
+  A_cpu = A_cpu + A_cpu'
+  b_cpu = rand(ComplexF64, 20)
 
-# Solve a symmetric positive definite system with an incomplete Cholesky preconditioner on GPU
-(x, stats) = cg(A_gpu, b_gpu, M=opM)
+  A_gpu = ROCMatrix(A_cpu)
+  b_gpu = ROCVector(b_cpu)
+
+  # Solve a dense Hermitian system on an AMD GPU
+  x, stats = minres(A_gpu, b_gpu)
+end
 ```
 
-### Example with a general square system
+!!! info
+    The library `rocSPARSE` is not interfaced yet in AMDGPU.jl and only dense linear systems are supported.
+
+## Intel GPUs
+
+All solvers in Krylov.jl can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations on Intel GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`oneMatrix` and `oneVector`).
 
 ```julia
-using SparseArrays, Krylov, LinearOperators
-using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+using Krylov, oneAPI
+
+if oneAPI.functional()
+  T = Float32  # oneAPI.jl also works with ComplexF32
+  m = 20
+  n = 10
+
+  # CPU Arrays
+  A_cpu = rand(T, m, n)
+  b_cpu = rand(T, m)
 
-# Optional -- Compute a permutation vector p such that A[p,:] has no zero diagonal
-p = zfd(A_cpu, 'O')
-p .+= 1
-A_cpu = A_cpu[p,:]
-b_cpu = b_cpu[p]
-
-# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu)  # A = CuSparseMatrixCSR(A_cpu)
-b_gpu = CuVector(b_cpu)
-
-# LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
-P = ilu02(A_gpu, 'O')
-
-# Solve Py = x
-function ldiv!(y, P, x)
-  copyto!(y, x)                        # Variant for CuSparseMatrixCSR
-  sv2!('N', 'L', 'N', 1.0, P, y, 'O')  # sv2!('N', 'L', 'U', 1.0, P, y, 'O')
-  sv2!('N', 'U', 'U', 1.0, P, y, 'O')  # sv2!('N', 'U', 'N', 1.0, P, y, 'O')
-  return y
+  # GPU Arrays
+  A_gpu = oneMatrix(A_cpu)
+  b_gpu = oneVector(b_cpu)
+
+  # Solve a dense least-squares problem on an Intel GPU
+  x, stats = lsqr(A_gpu, b_gpu)
 end
+```
 
-# Operator that model P⁻¹
-n = length(b_gpu)
-T = eltype(b_gpu)
-symmetric = hermitian = false
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+!!! note
+    The library `oneMKL` is interfaced in oneAPI.jl and accelerates linear algebra operations on Intel GPUs. Only dense linear systems are supported for the time being because sparse linear algebra routines are not interfaced yet.
 
-# Solve an unsymmetric system with an incomplete LU preconditioner on GPU
-(x, stats) = bicgstab(A_gpu, b_gpu, M=opM)
+## Apple M1 GPUs
+
+All solvers in Krylov.jl can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations on Apple M1 GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`MtlMatrix` and `MtlVector`).
+
+```julia
+using Krylov, Metal
+
+T = Float32  # Metal.jl also works with ComplexF32
+n = 10
+m = 20
+
+# CPU Arrays
+A_cpu = rand(T, n, m)
+b_cpu = rand(T, n)
+
+# GPU Arrays
+A_gpu = MtlMatrix(A_cpu)
+b_gpu = MtlVector(b_cpu)
+
+# Solve a dense least-norm problem on an Apple M1 GPU
+x, stats = craig(A_gpu, b_gpu)
 ```
+
+!!! warning
+    Metal.jl is under heavy development and is considered experimental for now.
diff --git a/docs/src/graphics/arnoldi.png b/docs/src/graphics/arnoldi.png
new file mode 100644
index 000000000..9ef8bd3a3
Binary files /dev/null and b/docs/src/graphics/arnoldi.png differ
diff --git a/docs/src/graphics/golub_kahan.png b/docs/src/graphics/golub_kahan.png
new file mode 100644
index 000000000..32fc3d7b8
Binary files /dev/null and b/docs/src/graphics/golub_kahan.png differ
diff --git a/docs/src/graphics/hermitian_lanczos.png b/docs/src/graphics/hermitian_lanczos.png
new file mode 100644
index 000000000..c70082e72
Binary files /dev/null and b/docs/src/graphics/hermitian_lanczos.png differ
diff --git a/docs/src/graphics/montoison_orban.png b/docs/src/graphics/montoison_orban.png
new file mode 100644
index 000000000..5a14eda04
Binary files /dev/null and b/docs/src/graphics/montoison_orban.png differ
diff --git a/docs/src/graphics/nonhermitian_lanczos.png b/docs/src/graphics/nonhermitian_lanczos.png
new file mode 100644
index 000000000..b8d83961c
Binary files /dev/null and b/docs/src/graphics/nonhermitian_lanczos.png differ
diff --git a/docs/src/graphics/saunders_simon_yip.png b/docs/src/graphics/saunders_simon_yip.png
new file mode 100644
index 000000000..c3acfd181
Binary files /dev/null and b/docs/src/graphics/saunders_simon_yip.png differ
diff --git a/docs/src/index.md b/docs/src/index.md
index ce657436d..1cc2c3302 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -22,7 +22,7 @@ should be solved when **_b_** is not in the range of **_A_** (inconsistent syste
   * **_A_** is square and singular,
   * **_A_** is tall and thin.
 
-Underdetermined sytems are less common but also occur.
+Underdetermined systems are less common but also occur.
 
 If there are infinitely many such **_x_** (because **_A_** is column rank-deficient), one with minimum norm is identified
 
@@ -36,36 +36,36 @@ If there are infinitely many such **_x_** (because **_A_** is column rank-defici
   \min \|x\| \quad \text{subject to} \quad Ax = b
 ```
 
-sould be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
+should be solved when **_A_** is column rank-deficient but **_b_** is in the range of **_A_** (consistent systems), regardless of the shape of **_A_**.
 This situation mainly occurs when
   * **_A_** is square and singular,
   * **_A_** is short and wide.
 
-Overdetermined sytems are less common but also occur.
+Overdetermined systems are less common but also occur.
 
 4 - Adjoint systems
 
 ```math
-  Ax = b \quad \text{and} \quad A^T y = c
+  Ax = b \quad \text{and} \quad A^H y = c
 ```
 
 where **_A_** can have any shape.
 
-5 - Saddle-point and symmetric quasi-definite (SQD) systems
+5 - Saddle-point and Hermitian quasi-definite systems
 
 ```math
-  \begin{bmatrix} M & \phantom{-}A \\ A^T & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
+  \begin{bmatrix} M & \phantom{-}A \\ A^H & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
 ```
 
 where **_A_** can have any shape.
 
-6 - Generalized saddle-point and unsymmetric partitioned systems
+6 - Generalized saddle-point and non-Hermitian partitioned systems
 
 ```math
   \begin{bmatrix} M & A \\ B & N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}
 ```
 
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
 **_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
 
 Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
@@ -92,3 +92,10 @@ julia> ]
 pkg> add Krylov
 pkg> test Krylov
 ```
+
+# Bug reports and discussions
+
+If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues).
+Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.
+
+If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome.
diff --git a/docs/src/inplace.md b/docs/src/inplace.md
index 71a4e25de..9950575fe 100644
--- a/docs/src/inplace.md
+++ b/docs/src/inplace.md
@@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver`
 For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU.
 
 !!! note
-    `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
+    `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
 
 The workspace is always the first argument of the in-place methods:
 
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
new file mode 100644
index 000000000..fd203dddb
--- /dev/null
+++ b/docs/src/preconditioners.md
@@ -0,0 +1,237 @@
+# [Preconditioners](@id preconditioners)
+
+The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear system $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
+Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance.
+
+The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account.
+Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations.
+
+The construction of a preconditioner necessitates trade-offs because we need to apply it at least once per iteration within a Krylov method.
+Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense.
+
+There exist three variants of preconditioning:
+
+| Left preconditioning               | Two-sided preconditioning                                              | Right preconditioning                        |
+|:----------------------------------:|:----------------------------------------------------------------------:|:--------------------------------------------:|
+| $P_{\ell}^{-1}Ax = P_{\ell}^{-1}b$ | $P_{\ell}^{-1}AP_r^{-1}y = P_{\ell}^{-1}b~~\text{with}~~x = P_r^{-1}y$ | $AP_r^{-1}y = b~~\text{with}~~x = P_r^{-1}y$ |
+
+where $P_{\ell}$ and $P_r$ are square and nonsingular.
+
+In Krylov.jl , we call $P_{\ell}^{-1}$ and $P_r^{-1}$ the preconditioners and we assume that we can apply them with the operation $y \leftarrow P^{-1} * x$.
+It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available.
+Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov solvers.
+
+## How to use preconditioners in Krylov.jl?
+
+!!! info
+    - A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+    - The default value of a preconditioner in Krylov.jl is the identity operator `I`.
+
+### Square non-Hermitian linear systems
+
+Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`FGMRES`](@ref fgmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+
+A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning.
+
+| Preconditioners | $P_{\ell}^{-1}$       | $P_{\ell}$           | $P_r^{-1}$            | $P_r$                |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments       | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+### Hermitian linear systems
+
+Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
+
+When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-H}y = L^{-1}b$ with $x = L^{-H}y$.
+Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^H$ that maintains hermicity.
+However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its inverse directly.
+
+| Preconditioners | $P_c^{-1}$                | $P_c$                |
+|:---------------:|:-------------------------:|:--------------------:|
+| Arguments       | `M` with `ldiv=false`     | `M` with `ldiv=true` |
+
+!!! warning
+    The preconditioner `M` must be hermitian and positive definite.
+
+### Linear least-squares problems
+
+Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+| Formulation           | Without preconditioning              | With preconditioning                        |
+|:---------------------:|:------------------------------------:|:-------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
+| Normal equation       | $A^HAx = A^Hb$                       | $A^HE^{-1}Ax = A^HE^{-1}b$                  |
+| Augmented system      | $\begin{bmatrix} I & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
+
+| Formulation           | Without preconditioning                                                   | With preconditioning                                                             |
+|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
+| Normal equation       | $(A^HA + \lambda^2 I)x = A^Hb$                                            | $(A^HE^{-1}A + \lambda^2 F)x = A^HE^{-1}b$                                       |
+| Augmented system      | $\begin{bmatrix} I & A \\ A^H & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$                | $E$                  | $F^{-1}$                | $F$                  |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments       | `M` with `ldiv=false`   | `M` with `ldiv=true` | `N` with `ldiv=false`   | `N` with `ldiv=true` |
+
+!!! warning
+    The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Linear least-norm problems
+
+Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
+
+| Formulation          | Without preconditioning                              | With preconditioning                                 |
+|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
+| Normal equation      | $AA^Hy = b~~\text{with}~~x = A^Hy$                   | $AF^{-1}A^Hy = b~~\text{with}~~x = F^{-1}A^Hy$       |
+| Augmented system     | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
+[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems.
+
+| Formulation          | Without preconditioning                                                                       | With preconditioning                                                                           |
+|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
+| Normal equation      | $(AA^H + \lambda^2 I)y = b~~\text{with}~~x = A^Hy$                                            | $(AF^{-1}A^H + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Hy$                                 |
+| Augmented system     | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$                | $E$                  | $F^{-1}$                | $F$                  |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments       | `M` with `ldiv=false`   | `M` with `ldiv=true` | `N` with `ldiv=false`   | `N` with `ldiv=true` |
+
+!!! warning
+    The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Saddle-point and symmetric quasi-definite systems
+
+[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure
+```math
+  \begin{bmatrix} \tau E & \phantom{-}A \\ A^H & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+| Preconditioners | $E^{-1}$              | $E$                  | $F^{-1}$              | $F$                  |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments       | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+    The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Generalized saddle-point and unsymmetric partitioned systems
+
+[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure
+```math
+  \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+| Relations       | $CE = M^{-1}$                 | $EC = M$                     | $DF = N^{-1}$                 | $FD = N$                     |
+|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:|
+| Arguments       | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` |
+
+!!! note
+    Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
+
+## Packages that provide preconditioners
+
+- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions.
+- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in. 
+- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
+- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
+- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices.
+- [BasicLU.jl](https://github.com/JuliaSmoothOptimizers/BasicLU.jl) uses a sparse LU factorization to compute a maximum volume basis that can be used as a preconditioner for least-norm and least-squares problems.
+
+## Examples
+
+```julia
+using Krylov
+n, m = size(A)
+d = [A[i,i] ≠ 0 ? 1 / abs(A[i,i]) : 1 for i=1:n]  # Jacobi preconditioner
+P⁻¹ = diagm(d)
+x, stats = symmlq(A, b, M=P⁻¹)
+```
+
+```julia
+using Krylov
+n, m = size(A)
+d = [1 / norm(A[:,i]) for i=1:m]  # diagonal preconditioner
+P⁻¹ = diagm(d)
+x, stats = minres(A, b, M=P⁻¹)
+```
+
+```julia
+using IncompleteLU, Krylov
+Pℓ = ilu(A)
+x, stats = gmres(A, b, M=Pℓ, ldiv=true)  # left preconditioning
+```
+
+```julia
+using LimitedLDLFactorizations, Krylov
+P = lldl(A)
+P.D .= abs.(P.D)
+x, stats = cg(A, b, M=P, ldiv=true)  # centered preconditioning
+```
+
+```julia
+using ILUZero, Krylov
+Pᵣ = ilu0(A)
+x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true)  # right preconditioning
+```
+
+```julia
+using LDLFactorizations, Krylov
+
+M = ldl(E)
+N = ldl(F)
+
+# [E   A] [x] = [b]
+# [Aᴴ -F] [y]   [c]
+x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true)
+```
+
+```julia
+using SuiteSparse, Krylov
+import LinearAlgebra.ldiv!
+
+M = cholesky(E)
+
+# ldiv! is not implemented for the sparse Cholesky factorization (SuiteSparse.CHOLMOD)
+ldiv!(y::Vector{T}, F::SuiteSparse.CHOLMOD.Factor{T}, x::Vector{T}) where T = (y .= F \ x)
+
+# [E  A] [x] = [b]
+# [Aᴴ 0] [y]   [c]
+x, y, stats = trimr(A, b, c, M=M, sp=true, ldiv=true)
+```
+
+```julia
+using Krylov
+
+C = lu(M)
+
+# [M  A] [x] = [b]
+# [B  0] [y]   [c]
+x, y, stats = gpmr(A, B, b, c, C=C, gsp=true, ldiv=true)
+```
+
+```julia
+import BasicLU
+using LinearOperators, Krylov
+
+# Least-squares problem
+m, n = size(A)
+Aᴴ = sparse(A')
+basis, B = BasicLU.maxvolbasis(Aᴴ)
+opA = LinearOperator(A)
+B⁻ᴴ = LinearOperator(Float64, n, n, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')),
+                                                  (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')),
+                                                  (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')))
+
+d, stats = lsmr(opA * B⁻ᴴ, b)  # min ‖AB⁻ᴴd - b‖₂
+x = B⁻ᴴ * d                    # recover the solution of min ‖Ax - b‖₂
+
+# Least-norm problem
+m, n = size(A)
+basis, B = maxvolbasis(A)
+opA = LinearOperator(A)
+B⁻¹ = LinearOperator(Float64, m, m, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')),
+                                                  (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')),
+                                                  (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')))
+
+x, y, stats = craigmr(B⁻¹ * opA, B⁻¹ * b)  # min ‖x‖₂  s.t.  B⁻¹Ax = B⁻¹b
+```
diff --git a/docs/src/processes.md b/docs/src/processes.md
new file mode 100644
index 000000000..e9d4066d2
--- /dev/null
+++ b/docs/src/processes.md
@@ -0,0 +1,334 @@
+```@raw html
+<style>
+.content table td {
+    border-right-width: 1px;
+}
+.content table th {
+    border-right-width: 1px;
+}
+.content table td:last-child {
+    border-right-width: 0px;
+}
+.content table th:last-child {
+    border-right-width: 0px;
+}
+html.theme--documenter-dark .content table td {
+    border-right-width: 1px;
+}
+html.theme--documenter-dark .content table th {
+    border-right-width: 1px;
+}
+html.theme--documenter-dark .content table td:last-child {
+    border-right-width: 0px;
+}
+html.theme--documenter-dark .content table th:last-child {
+    border-right-width: 0px;
+}
+</style>
+```
+
+# [Krylov processes](@id krylov-processes)
+
+Krylov processes are the foundation of Krylov methods, they generate bases of Krylov subspaces.
+Depending on the Krylov subspaces generated, Krylov processes are more or less specialized for a subset of linear problems.
+The following table summarizes the most relevant processes for each linear problem.
+
+| Linear problems                                                |  Processes                        |
+|:--------------------------------------------------------------:|:---------------------------------:|
+| Hermitian linear systems                                       | Hermitian Lanczos                 |
+| Square Non-Hermitian linear systems                            | Non-Hermitian Lanczos -- Arnoldi  |
+| Least-squares problems                                         | Golub-Kahan -- Saunders-Simon-Yip |
+| Least-norm problems                                            | Golub-Kahan -- Saunders-Simon-Yip |
+| Saddle-point and Hermitian quasi-definite systems              | Golub-Kahan -- Saunders-Simon-Yip |
+| Generalized saddle-point and non-Hermitian partitioned systems | Montoison-Orban                   |
+
+### Notation
+
+For a matrix $A$, $A^H$ denotes the conjugate transpose of $A$.
+It coincides with $A^T$, the transpose of $A$, for real matrices.
+Define $V_k := \begin{bmatrix} v_1 & \ldots & v_k \end{bmatrix} \enspace$ and $\enspace U_k := \begin{bmatrix} u_1 & \ldots & u_k \end{bmatrix}$.
+
+For a matrix $C \in \mathbb{C}^{n \times n}$ and a vector $t \in \mathbb{C}^{n}$, the $k$-th Krylov subspace generated by $C$ and $t$ is
+```math
+\mathcal{K}_k(C, t) :=
+\left\{\sum_{i=0}^{k-1} \omega_i C^i t \, \middle \vert \, \omega_i \in \mathbb{C},~0 \le i \le k-1 \right\}.
+```
+
+For matrices $C \in \mathbb{C}^{n \times n} \enspace$ and $\enspace T \in \mathbb{C}^{n \times p}$, the $k$-th block Krylov subspace generated by $C$ and $T$ is
+```math
+\mathcal{K}_k^{\square}(C, T) :=
+\left\{\sum_{i=0}^{k-1} C^i T \, \Omega_i \, \middle \vert \, \Omega_i \in \mathbb{C}^{p \times p},~0 \le i \le k-1 \right\}.
+```
+
+## Hermitian Lanczos
+
+![hermitian_lanczos](./graphics/hermitian_lanczos.png)
+
+After $k$ iterations of the Hermitian Lanczos process, the situation may be summarized as
+```math
+\begin{align*}
+  A V_k &= V_k T_k + \beta_{k+1,k} v_{k+1} e_k^T = V_{k+1}  T_{k+1,k}, \\
+  V_k^H V_k &= I_k,
+\end{align*}
+```
+where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$,
+```math
+T_k =
+\begin{bmatrix}
+  \alpha_1 & \beta_2  &          &         \\
+  \beta_2  & \alpha_2 & \ddots   &         \\
+           & \ddots   & \ddots   & \beta_k \\
+           &          & \beta_k  & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+  T_{k} \\
+  \beta_{k+1} e_{k}^T
+\end{bmatrix}.
+```
+Note that $T_{k+1,k}$ is a real tridiagonal matrix even if $A$ is a complex matrix.
+
+The function [`hermitian_lanczos`](@ref hermitian_lanczos) returns $V_{k+1}$ and $T_{k+1,k}$.
+
+Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift).
+
+```@docs
+hermitian_lanczos
+```
+
+## Non-Hermitian Lanczos
+
+![nonhermitian_lanczos](./graphics/nonhermitian_lanczos.png)
+
+After $k$ iterations of the non-Hermitian Lanczos process (also named the Lanczos biorthogonalization process), the situation may be summarized as
+```math
+\begin{align*}
+  A V_k &= V_k T_k   +        \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k},   \\
+  A^H U_k &= U_k T_k^H + \bar{\gamma}_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\
+  V_k^H U_k &= U_k^H V_k = I_k,
+\end{align*}
+```
+where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A,b)$ and $\mathcal{K}_k (A^H,c)$, respectively,
+```math
+T_k = 
+\begin{bmatrix}
+  \alpha_1 & \gamma_2 &          &          \\
+  \beta_2  & \alpha_2 & \ddots   &          \\
+           & \ddots   & \ddots   & \gamma_k \\
+           &          & \beta_k  & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+  T_{k} \\
+  \beta_{k+1} e_{k}^T
+\end{bmatrix}
+, \qquad
+T_{k,k+1} =
+\begin{bmatrix}
+  T_{k} & \gamma_{k+1} e_k
+\end{bmatrix}.
+```
+
+The function [`nonhermitian_lanczos`](@ref nonhermitian_lanczos) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$.
+
+Related methods: [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`CGS`](@ref cgs) and [`BICGSTAB`](@ref bicgstab).
+
+!!! note
+    The scaling factors used in our implementation are $\beta_k = |u_k^H v_k|^{\tfrac{1}{2}}$ and $\gamma_k = (u_k^H v_k) / \beta_k$.
+    With these scaling factors, the non-Hermitian Lanczos process coincides with the Hermitian Lanczos process when $A = A^H$ and $b = c$.
+
+```@docs
+nonhermitian_lanczos
+```
+
+## Arnoldi
+
+![arnoldi](./graphics/arnoldi.png)
+
+After $k$ iterations of the Arnoldi process, the situation may be summarized as
+```math
+\begin{align*}
+  A V_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\
+  V_k^H V_k &= I_k,
+\end{align*}
+```
+where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$,
+```math
+H_k =
+\begin{bmatrix}
+  h_{1,1}~ & h_{1,2}~ & \ldots    & h_{1,k}   \\
+  h_{2,1}~ & \ddots~  & \ddots    & \vdots    \\
+           & \ddots~  & \ddots    & h_{k-1,k} \\
+           &          & h_{k,k-1} & h_{k,k}
+\end{bmatrix}
+, \qquad
+H_{k+1,k} =
+\begin{bmatrix}
+  H_{k} \\
+  h_{k+1,k} e_{k}^T
+\end{bmatrix}.
+```
+
+The function [`arnoldi`](@ref arnoldi) returns $V_{k+1}$ and $H_{k+1,k}$.
+
+Related methods: [`DIOM`](@ref diom), [`FOM`](@ref fom), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres) and [`FGMRES`](@ref fgmres).
+
+!!! note
+    The Arnoldi process coincides with the Hermitian Lanczos process when $A$ is Hermitian.
+
+```@docs
+arnoldi
+```
+
+## Golub-Kahan
+
+![golub_kahan](./graphics/golub_kahan.png)
+
+After $k$ iterations of the Golub-Kahan bidiagonalization process, the situation may be summarized as
+```math
+\begin{align*}
+  A V_k &= U_{k+1} B_k,   \\
+  A^H U_{k+1} &= V_k B_k^H + \alpha_{k+1} v_{k+1} e_{k+1}^T = V_{k+1} L_{k+1}^H, \\
+  V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A^HA,A^Hb)$ and $\mathcal{K}_k (AA^H,b)$, respectively,
+```math
+L_k =
+\begin{bmatrix}
+  \alpha_1 &          &          &          \\
+  \beta_2  & \alpha_2 &          &          \\
+           & \ddots   & \ddots   &          \\
+           &          & \beta_k  & \alpha_k
+\end{bmatrix}
+, \qquad
+B_k =
+\begin{bmatrix}
+  \alpha_1 &          &          &             \\
+  \beta_2  & \alpha_2 &          &             \\
+           & \ddots   & \ddots   &             \\
+           &          & \beta_k  & \alpha_k    \\
+           &          &          & \beta_{k+1} \\
+\end{bmatrix}
+=
+\begin{bmatrix}
+  L_{k} \\
+  \beta_{k+1} e_{k}^T
+\end{bmatrix}.
+```
+Note that $L_k$ is a real bidiagonal matrix even if $A$ is a complex matrix.
+
+The function [`golub_kahan`](@ref golub_kahan) returns $V_{k+1}$, $U_{k+1}$ and $L_{k+1}$.
+
+Related methods: [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig), [`CRAIGMR`](@ref craigmr), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+!!! note
+    The Golub-Kahan process coincides with the Hermitian Lanczos process applied to the normal equations $A^HA x = A^Hb$ and $AA^H x = b$.
+    It is also related to the Hermitian Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial vector $\begin{bmatrix} b \\ 0 \end{bmatrix}$.
+
+```@docs
+golub_kahan
+```
+
+## Saunders-Simon-Yip
+
+![saunders_simon_yip](./graphics/saunders_simon_yip.png)
+
+After $k$ iterations of the Saunders-Simon-Yip process (also named the orthogonal tridiagonalization process), the situation may be summarized as
+```math
+\begin{align*}
+  A U_k &= V_k T_k   + \beta_{k+1}  v_{k+1} e_k^T = V_{k+1} T_{k+1,k},   \\
+  A^H V_k &= U_k T_k^H + \gamma_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\
+  V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$,
+```math
+T_k = 
+\begin{bmatrix}
+  \alpha_1 & \gamma_2 &          &          \\
+  \beta_2  & \alpha_2 & \ddots   &          \\
+           & \ddots   & \ddots   & \gamma_k \\
+           &          & \beta_k  & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+  T_{k} \\
+  \beta_{k+1} e_{k}^T
+\end{bmatrix}
+, \qquad
+T_{k,k+1} =
+\begin{bmatrix}
+  T_{k} & \gamma_{k+1} e_{k}
+\end{bmatrix}.
+```
+
+The function [`saunders_simon_yip`](@ref saunders_simon_yip) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$.
+
+Related methods: [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr), [`TriLQR`](@ref trilqr), [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr).
+
+```@docs
+saunders_simon_yip
+```
+
+!!! note
+    The Saunders-Simon-Yip is equivalent to the block-Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$.
+
+## Montoison-Orban
+
+![montoison_orban](./graphics/montoison_orban.png)
+
+After $k$ iterations of the Montoison-Orban process (also named the orthogonal Hessenberg reduction process), the situation may be summarized as
+```math
+\begin{align*}
+  A U_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\
+  B V_k &= U_k F_k + f_{k+1,k} u_{k+1} e_k^T = U_{k+1} F_{k+1,k}, \\
+  V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$,
+```math
+H_k =
+\begin{bmatrix}
+  h_{1,1}~ & h_{1,2}~ & \ldots    & h_{1,k}   \\
+  h_{2,1}~ & \ddots~  & \ddots    & \vdots    \\
+           & \ddots~  & \ddots    & h_{k-1,k} \\
+           &          & h_{k,k-1} & h_{k,k}
+\end{bmatrix}
+, \qquad
+F_k =
+\begin{bmatrix}
+  f_{1,1}~ & f_{1,2}~ & \ldots    & f_{1,k}   \\
+  f_{2,1}~ & \ddots~  & \ddots    & \vdots    \\
+           & \ddots~  & \ddots    & f_{k-1,k} \\
+           &          & f_{k,k-1} & f_{k,k}
+\end{bmatrix},
+```
+```math
+H_{k+1,k} =
+\begin{bmatrix}
+  H_{k} \\
+  h_{k+1,k} e_{k}^T
+\end{bmatrix}
+, \qquad
+F_{k+1,k} =
+\begin{bmatrix}
+  F_{k} \\
+  f_{k+1,k} e_{k}^T
+\end{bmatrix}.
+```
+
+The function [`montoison_orban`](@ref montoison_orban) returns $V_{k+1}$, $H_{k+1,k}$, $U_{k+1}$ and $F_{k+1,k}$.
+
+Related methods: [`GPMR`](@ref gpmr).
+
+!!! note
+    The Montoison-Orban is equivalent to the block-Arnoldi process applied to $\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$.
+    It also coincides with the Saunders-Simon-Yip process when $B = A^H$.
+
+```@docs
+montoison_orban
+```
diff --git a/docs/src/reference.md b/docs/src/reference.md
index 0896e1639..be0ac5288 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -10,5 +10,7 @@ Krylov.FloatOrComplex
 Krylov.niterations
 Krylov.Aprod
 Krylov.Atprod
+Krylov.kstdout
+Krylov.extract_parameters
 Base.show
 ```
diff --git a/docs/src/solvers/gsp.md b/docs/src/solvers/gsp.md
index 10aaccbe0..33c580b8a 100644
--- a/docs/src/solvers/gsp.md
+++ b/docs/src/solvers/gsp.md
@@ -1,5 +1,5 @@
 ```@meta
-# Generalized saddle-point and unsymmetric partitioned systems
+# Generalized saddle-point and non-Hermitian partitioned systems
 ```
 
 ## GPMR
diff --git a/docs/src/solvers/ln.md b/docs/src/solvers/ln.md
index c5396ffdd..b638b8247 100644
--- a/docs/src/solvers/ln.md
+++ b/docs/src/solvers/ln.md
@@ -36,3 +36,10 @@ craig!
 craigmr
 craigmr!
 ```
+
+## USYMLQ
+
+```@docs
+usymlq
+usymlq!
+```
diff --git a/docs/src/solvers/ls.md b/docs/src/solvers/ls.md
index f77057d94..fecfbc417 100644
--- a/docs/src/solvers/ls.md
+++ b/docs/src/solvers/ls.md
@@ -36,3 +36,10 @@ lsqr!
 lsmr
 lsmr!
 ```
+
+## USYMQR
+
+```@docs
+usymqr
+usymqr!
+```
diff --git a/docs/src/solvers/sid.md b/docs/src/solvers/sid.md
index 1bd459cd2..e911681be 100644
--- a/docs/src/solvers/sid.md
+++ b/docs/src/solvers/sid.md
@@ -1,5 +1,5 @@
 ```@meta
-# Symmetric indefinite linear systems
+# Hermitian indefinite linear systems
 ```
 
 ## SYMMLQ
diff --git a/docs/src/solvers/sp_sqd.md b/docs/src/solvers/sp_sqd.md
index 518684b5b..4ee4ab09b 100644
--- a/docs/src/solvers/sp_sqd.md
+++ b/docs/src/solvers/sp_sqd.md
@@ -1,5 +1,5 @@
 ```@meta
-# Saddle-point and symmetric quasi-definite systems
+# Saddle-point and Hermitian quasi-definite systems
 ```
 
 ## TriCG
diff --git a/docs/src/solvers/spd.md b/docs/src/solvers/spd.md
index 79bb6e9e8..aebda285b 100644
--- a/docs/src/solvers/spd.md
+++ b/docs/src/solvers/spd.md
@@ -1,5 +1,5 @@
 ```@meta
-# Symmetric positive definite linear systems
+# Hermitian positive definite linear systems
 ```
 
 ## CG
diff --git a/docs/src/solvers/unsymmetric.md b/docs/src/solvers/unsymmetric.md
index 280908ea5..c9e77f787 100644
--- a/docs/src/solvers/unsymmetric.md
+++ b/docs/src/solvers/unsymmetric.md
@@ -1,5 +1,5 @@
 ```@meta
-# Unsymmetric linear systems
+# Non-Hermitian square linear systems
 ```
 
 ## BiLQ
@@ -16,20 +16,6 @@ qmr
 qmr!
 ```
 
-## USYMLQ
-
-```@docs
-usymlq
-usymlq!
-```
-
-## USYMQR
-
-```@docs
-usymqr
-usymqr!
-```
-
 ## CGS
 
 ```@docs
@@ -71,3 +57,10 @@ dqgmres!
 gmres
 gmres!
 ```
+
+## FGMRES
+
+```@docs
+fgmres
+fgmres!
+```
diff --git a/docs/src/storage.md b/docs/src/storage.md
new file mode 100644
index 000000000..903cc0558
--- /dev/null
+++ b/docs/src/storage.md
@@ -0,0 +1,152 @@
+```@meta
+# Thanks Morten Piibeleht for the hack with the tables!
+```
+
+```@raw html
+<style>
+.content table td {
+    border-right-width: 1px;
+}
+.content table th {
+    border-right-width: 1px;
+}
+.content table td:last-child {
+    border-right-width: 0px;
+}
+.content table th:last-child {
+    border-right-width: 0px;
+}
+html.theme--documenter-dark .content table td {
+    border-right-width: 1px;
+}
+html.theme--documenter-dark .content table th {
+    border-right-width: 1px;
+}
+html.theme--documenter-dark .content table td:last-child {
+    border-right-width: 0px;
+}
+html.theme--documenter-dark .content table th:last-child {
+    border-right-width: 0px;
+}
+</style>
+```
+
+# [Storage requirements](@id storage-requirements)
+
+This section provides the storage requirements of all Krylov methods available in Krylov.jl.
+
+### Notation
+
+We denote by $m$ and $n$ the number of rows and columns of the linear problem.
+The memory parameter of DIOM, FOM, DQGMRES, GMRES, FGMRES and GPMR is $k$.
+The numbers of shifts of CG-LANCZOS-SHIFT is $p$.
+
+## Theoretical storage requirements
+
+The following tables provide the number of coefficients that must be allocated for each Krylov method.
+The coefficients have the same type as those that compose the linear problem we seek to solve.
+Each table summarizes the storage requirements of Krylov methods recommended to a specific linear problem.
+
+#### Hermitian positive definite linear systems
+
+| Methods | [`CG`](@ref cg) | [`CR`](@ref cr) | [`CG-LANCZOS`](@ref cg_lanczos) | [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift) |
+|:-------:|:---------------:|:---------------:|:-------------------------------:|:-------------------------------------------:|
+ Storage  | $4n$            | $5n$            | $5n$                            | $3n + 2np + 5p$                             |
+
+#### Hermitian indefinite linear systems
+
+| Methods | [`SYMMLQ`](@ref symmlq) | [`MINRES`](@ref minres) | [`MINRES-QLP`](@ref minres_qlp) |
+|:-------:|:-----------------------:|:-----------------------:|:-------------------------------:|
+| Storage | $5n$                    | $6n$                    | $6n$                            |
+
+#### Non-Hermitian square linear systems
+
+| Methods | [`CGS`](@ref cgs) | [`BICGSTAB`](@ref bicgstab) | [`BiLQ`](@ref bilq) | [`QMR`](@ref qmr) |
+|:-------:|:-----------------:|:---------------------------:|:-------------------:|:-----------------:|
+| Storage | $6n$              | $6n$                        | $8n$                | $9n$              |
+
+| Methods | [`DIOM`](@ref diom) | [`DQGMRES`](@ref dqgmres) |
+|:-------:|:-------------------:|:-------------------------:|
+| Storage | $n(2k+1) + 2k - 1$  | $n(2k+2) + 3k + 1$        |
+
+| Methods | [`FOM`](@ref fom)                                  | [`GMRES`](@ref gmres)                   | [`FGMRES`](@ref fgmres)                  |
+|:-------:|:--------------------------------------------------:|:---------------------------------------:|:----------------------------------------:|
+| Storage$\dfrac{}{}$ | $\!n(2+k) +2k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+k) + 3k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+2k) + 3k + \dfrac{k(k + 1)}{2}\!$ |
+
+#### Least-norm problems
+
+| Methods | [`USYMLQ`](@ref usymlq) | [`CGNE`](@ref cgne) | [`CRMR`](@ref crmr) | [`LNLQ`](@ref lnlq) | [`CRAIG`](@ref craig) | [`CRAIGMR`](@ref craigmr) |
+|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:---------------------:|:-------------------------:|
+| Storage | $5n + 3m$               | $3n + 2m$           | $3n + 2m$           | $3n + 4m$           | $3n + 4m$             | $4n + 5m$                 |
+
+#### Least-squares problems
+
+| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) |
+|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|
+| Storage | $6n + 3m$               | $3n + 2m$           | $4n + 3m$           | $4n + 2m$           | $4n + 2m$           | $5n + 2m$           |
+
+#### Adjoint systems
+
+| Methods | [`BiLQR`](@ref bilqr) | [`TriLQR`](@ref trilqr) |
+|:-------:|:---------------------:|:-----------------------:|
+| Storage | $11n$                 | $6m + 5n$               |
+
+#### Saddle-point and Hermitian quasi-definite systems
+
+| Methods  | [`TriCG`](@ref tricg) | [`TriMR`](@ref trimr) |
+|:--------:|:---------------------:|:---------------------:|
+| Storage  | $6n + 6m$             | $8n + 8m$             |
+
+#### Generalized saddle-point and non-Hermitian partitioned systems
+
+| Method  | [`GPMR`](@ref gpmr)       |
+|:-------:|:-------------------------:|
+| Storage | $(2+k)(n+m) + 2k^2 + 11k$ |
+
+## Practical storage requirements
+
+Each method has its own `KrylovSolver` that contains all the storage needed by the method.
+In the REPL, the size in bytes of each attribute and the total amount of memory allocated by the solver are displayed when we show a `KrylovSolver`.
+
+```@example storage
+using Krylov
+
+m = 5000
+n = 12000
+A = rand(Float64, m, n)
+b = rand(Float64, m)
+solver = LsmrSolver(A, b)
+show(stdout, solver, show_stats=false)
+```
+
+If we want the total number of bytes used by the solver, we can call `nbytes = sizeof(solver)`.
+
+```@example storage
+nbytes = sizeof(solver)
+```
+
+Thereafter, we can use `Base.format_bytes(nbytes)` to recover what is displayed in the REPL.
+
+```@example storage
+Base.format_bytes(nbytes)
+```
+
+To verify that we match the theoretical results, we just need to multiply the storage requirement of a method by the number of bytes associated to the precision of the linear problem.
+For instance, we need 4 bytes for the precision `Float32`, 8 bytes for precisions `Float64` and `ComplexF32`, and 16 bytes for the precision `ComplexF64`.
+
+```@example storage
+FC = Float64                            # precision of the least-squares problem
+ncoefs_lsmr = 5*n + 2*m                 # number of coefficients
+nbytes_lsmr = sizeof(FC) * ncoefs_lsmr  # number of bytes
+```
+
+Therefore, you can check that you have enough memory in RAM to allocate a `KrylovSolver`.
+
+```@example storage
+free_nbytes = Sys.free_memory()
+Base.format_bytes(free_nbytes)  # Total free memory in RAM in bytes.
+```
+
+!!! note
+    - Beyond having faster operations, using low precisions, such as simple precision, allows to store more coefficients in RAM and solve larger linear problems.
+    - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to test that we match the expected storage requirement of each method with a tolerance of 2%.
diff --git a/docs/src/tips.md b/docs/src/tips.md
index 604c0633d..e08567ae1 100644
--- a/docs/src/tips.md
+++ b/docs/src/tips.md
@@ -16,14 +16,14 @@ If you don't know the maximum number of threads available on your computer, you
 NMAX = Sys.CPU_THREADS
 ```
 
-and define the number of OpenBLAS/MKL threads at runtine with
+and define the number of OpenBLAS/MKL threads at runtime with
 
 ```julia
 BLAS.set_num_threads(N)  # 1 ≤ N ≤ NMAX
 BLAS.get_num_threads()
 ```
 
-The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2`.
+The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2` if your CPU supports simultaneous multithreading (SMT).
 
 By default Julia ships with OpenBLAS but it's also possible to use Intel MKL BLAS and LAPACK with [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl).
 
diff --git a/docs/src/warm_start.md b/docs/src/warm-start.md
similarity index 59%
rename from docs/src/warm_start.md
rename to docs/src/warm-start.md
index 030cad6c0..6b830bff3 100644
--- a/docs/src/warm_start.md
+++ b/docs/src/warm-start.md
@@ -1,9 +1,10 @@
-## Warm Start
+# [Warm-start](@id warm-start)
 
-Most Krylov methods in this module accept a starting point as argument. The starting point is used as initial approximation to a solution.
+Most Krylov methods in this module accept a starting point as argument.
+The starting point is used as initial approximation to a solution.
 
 ```julia
-solver = CgSolver(n, n, S)
+solver = CgSolver(A, b)
 cg!(solver, A, b, itmax=100)
 if !issolved(solver)
   cg!(solver, A, b, solver.x, itmax=100) # cg! uses the approximate solution `solver.x` as starting point
@@ -28,7 +29,7 @@ If a Krylov method doesn't have the option to warm start, it can still be done e
 We provide an example with `cg_lanczos!`.
 
 ```julia
-solver = CgLanczosSolver(n, n, S)
+solver = CgLanczosSolver(A, b)
 cg_lanczos!(solver, A, b)
 x₀ = solver.x           # Ax₀ ≈ b
 r = b - A * x₀          # r = b - Ax₀
@@ -41,33 +42,34 @@ Explicit restarts cannot be avoided in certain block methods, such as TriMR, due
 
 ```julia
 # [E  A] [x] = [b]
-# [Aᵀ F] [y]   [c]
+# [Aᴴ F] [y]   [c]
 M = inv(E)
 N = inv(F)
 x₀, y₀, stats = trimr(A, b, c, M=M, N=N)
 
 # E and F are not available inside TriMR
 b₀ = b -  Ex₀ - Ay
-c₀ = c - Aᵀx₀ - Fy
+c₀ = c - Aᴴx₀ - Fy
 
 Δx, Δy, stats = trimr(A, b₀, c₀, M=M, N=N)
 x = x₀ + Δx
 y = y₀ + Δy
 ```
-
-## Restarted methods
-
-The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses.
-For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered.
-In this section, we show how to use warm starts to implement GMRES(k) and FOM(k).
-
-```julia
-k = 50
-solver = GmresSolver(A, b, k)  # FomSolver(A, b, k)
-solver.x .= 0                  # solver.x .= x₀ 
-nrestart = 0
-while !issolved(solver) || nrestart ≤ 10
-  solve!(solver, A, b, solver.x, itmax=k)
-  nrestart += 1
-end
+```@meta
+# ## Restarted methods
+#
+# The storage requirements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses.
+# For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are preferred.
+# In this section, we show how to use warm starts to implement GMRES(k) and FOM(k).
+#
+# ```julia
+# k = 50
+# solver = GmresSolver(A, b, k)  # FomSolver(A, b, k)
+# solver.x .= 0                  # solver.x .= x₀ 
+# nrestart = 0
+# while !issolved(solver) || nrestart ≤ 10
+#   solve!(solver, A, b, solver.x, itmax=k)
+#   nrestart += 1
+# end
+# ```
 ```
diff --git a/ext/KrylovComponentArraysExt.jl b/ext/KrylovComponentArraysExt.jl
new file mode 100644
index 000000000..68cc3e7cf
--- /dev/null
+++ b/ext/KrylovComponentArraysExt.jl
@@ -0,0 +1,13 @@
+module KrylovComponentArraysExt
+
+using Krylov: Krylov
+using ComponentArrays: ComponentVector
+
+"""
+    Krylov.ktypeof(::ComponentVector{T,V}) where {T,V}
+
+Return the underlying `V` type.
+"""
+Krylov.ktypeof(::ComponentVector{T,V}) where {T,V} = V
+
+end
diff --git a/ext/KrylovFillArraysExt.jl b/ext/KrylovFillArraysExt.jl
new file mode 100644
index 000000000..636533942
--- /dev/null
+++ b/ext/KrylovFillArraysExt.jl
@@ -0,0 +1,13 @@
+module KrylovFillArraysExt
+
+using Krylov: Krylov
+using FillArrays: AbstractFill
+
+"""
+    Krylov.ktypeof(::AbstractFill{T,1}) where {T}
+
+Return the corresponding `Vector{T}` type.
+"""
+Krylov.ktypeof(::AbstractFill{T,1}) where {T} = Vector{T}
+
+end
diff --git a/ext/KrylovStaticArraysExt.jl b/ext/KrylovStaticArraysExt.jl
new file mode 100644
index 000000000..f24bd34cc
--- /dev/null
+++ b/ext/KrylovStaticArraysExt.jl
@@ -0,0 +1,13 @@
+module KrylovStaticArraysExt
+
+using Krylov: Krylov
+using StaticArrays: StaticVector
+
+"""
+    Krylov.ktypeof(::StaticVector{S,T}) where {S,T}
+
+Return the corresponding `Vector{T}` type.
+"""
+Krylov.ktypeof(::StaticVector{S,T}) where {S,T} = Vector{T}
+
+end
diff --git a/src/Krylov.jl b/src/Krylov.jl
index b714ccd79..013ea3e65 100644
--- a/src/Krylov.jl
+++ b/src/Krylov.jl
@@ -1,10 +1,16 @@
 module Krylov
 
 using LinearAlgebra, SparseArrays, Printf
+using PackageExtensionCompat
+
+function __init__()
+    @require_extensions
+end
 
 include("krylov_utils.jl")
 include("krylov_stats.jl")
 include("krylov_solvers.jl")
+include("krylov_processes.jl")
 
 include("cg.jl")
 include("cr.jl")
@@ -19,6 +25,7 @@ include("diom.jl")
 include("fom.jl")
 include("dqgmres.jl")
 include("gmres.jl")
+include("fgmres.jl")
 
 include("gpmr.jl")
 
@@ -49,6 +56,6 @@ include("lnlq.jl")
 include("craig.jl")
 include("craigmr.jl")
 
-include("callback_utils.jl")
+include("krylov_solve.jl")
 
 end
diff --git a/src/bicgstab.jl b/src/bicgstab.jl
index c3b914599..16a3ceae9 100644
--- a/src/bicgstab.jl
+++ b/src/bicgstab.jl
@@ -16,40 +16,60 @@
 export bicgstab, bicgstab!
 
 """
-    (x, stats) = bicgstab(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
-                          M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                          itmax::Int=0, verbose::Int=0, history::Bool=false,
-                          ldiv::Bool=false, callback=solver->false)
+    (x, stats) = bicgstab(A, b::AbstractVector{FC};
+                          c::AbstractVector{FC}=b, M=I, N=I,
+                          ldiv::Bool=false, atol::T=√eps(T),
+                          rtol::T=√eps(T), itmax::Int=0,
+                          timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                          callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the square linear system Ax = b using the BICGSTAB method.
+    (x, stats) = bicgstab(A, b, x0::AbstractVector; kwargs...)
+
+BICGSTAB can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using BICGSTAB.
 BICGSTAB requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
 
 The Biconjugate Gradient Stabilized method is a variant of BiCG, like CGS,
-but using different updates for the Aᵀ-sequence in order to obtain smoother
+but using different updates for the Aᴴ-sequence in order to obtain smoother
 convergence than CGS.
 
 If BICGSTAB stagnates, we recommend DQGMRES and BiLQ as alternative methods for unsymmetric square systems.
 
 BICGSTAB stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖b‖ * rtol`.
-`atol` is an absolute tolerance and `rtol` is a relative tolerance.
 
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
 
-This implementation allows a left preconditioner `M` and a right preconditioner `N`.
+#### Optional argument
 
-BICGSTAB can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = bicgstab(A, b, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -58,18 +78,6 @@ and `false` otherwise.
 """
 function bicgstab end
 
-function bicgstab(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = BicgstabSolver(A, b)
-  bicgstab!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function bicgstab(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = BicgstabSolver(A, b)
-  bicgstab!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = bicgstab!(solver::BicgstabSolver, A, b; kwargs...)
     solver = bicgstab!(solver::BicgstabSolver, A, b, x0; kwargs...)
@@ -80,150 +88,201 @@ See [`BicgstabSolver`](@ref) for more details about the `solver`.
 """
 function bicgstab! end
 
-function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  bicgstab!(solver, A, b; kwargs...)
-  return solver
-end
-
-function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
-                   M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                   itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-                   ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("BICGSTAB: system of size %d\n", n)
-
-  # Check M = Iₙ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :t , S, n)
-  allocate_if(!NisI, solver, :yz, S, n)
-  Δx, x, r, p, v, s, qd, stats = solver.Δx, solver.x, solver.r, solver.p, solver.v, solver.s, solver.qd, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  q = d = solver.qd
-  t = MisI ? d : solver.t
-  y = NisI ? p : solver.yz
-  z = NisI ? s : solver.yz
-  r₀ = MisI ? r : solver.qd
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
-  else
-    r₀ .= b
+def_args_bicgstab = (:(A                    ),
+                     :(b::AbstractVector{FC}))
+
+def_optargs_bicgstab = (:(x0::AbstractVector),)
+
+def_kwargs_bicgstab = (:(; c::AbstractVector{FC} = b ),
+                       :(; M = I                     ),
+                       :(; N = I                     ),
+                       :(; ldiv::Bool = false        ),
+                       :(; atol::T = √eps(T)         ),
+                       :(; rtol::T = √eps(T)         ),
+                       :(; itmax::Int = 0            ),
+                       :(; timemax::Float64 = Inf    ),
+                       :(; verbose::Int = 0          ),
+                       :(; history::Bool = false     ),
+                       :(; callback = solver -> false),
+                       :(; iostream::IO = kstdout    ))
+
+def_kwargs_bicgstab = mapreduce(extract_parameters, vcat, def_kwargs_bicgstab)
+
+args_bicgstab = (:A, :b)
+optargs_bicgstab = (:x0,)
+kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function bicgstab($(def_args_bicgstab...), $(def_optargs_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = BicgstabSolver(A, b)
+    warm_start!(solver, $(optargs_bicgstab...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    bicgstab!(solver, $(args_bicgstab...); $(kwargs_bicgstab...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  x .= zero(FC)                       # x₀
-  s .= zero(FC)                       # s₀
-  v .= zero(FC)                       # v₀
-  MisI || mulorldiv!(r, M, r₀, ldiv)  # r₀
-  p .= r                              # p₁
-
-  α = one(FC) # α₀
-  ω = one(FC) # ω₀
-  ρ = one(FC) # ρ₀
-
-  # Compute residual norm ‖r₀‖₂.
-  rNorm = @knrm2(n, r)
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function bicgstab($(def_args_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = BicgstabSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    bicgstab!(solver, $(args_bicgstab...); $(kwargs_bicgstab...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s  %8s  %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %8.1e  %8.1e\n", iter, rNorm, abs(α), abs(ω))
-
-  next_ρ = @kdot(n, c, r)  # ρ₁ = ⟨r̅₀,r₀⟩
-  if next_ρ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = false, false
-    stats.status = "Breakdown bᵀc = 0"
-    solver.warm_start = false
-    return solver
-  end
-
-  # Stopping criterion.
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  breakdown = false
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || breakdown || user_requested_exit)
-    # Update iteration index and ρ.
-    iter = iter + 1
-    ρ = next_ρ
-
-    NisI || mulorldiv!(y, N, p, ldiv)    # yₖ = N⁻¹pₖ
-    mul!(q, A, y)                        # qₖ = Ayₖ
-    mulorldiv!(v, M, q, ldiv)            # vₖ = M⁻¹qₖ
-    α = ρ / @kdot(n, c, v)               # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩
-    @kcopy!(n, r, s)                     # sₖ = rₖ₋₁
-    @kaxpy!(n, -α, v, s)                 # sₖ = sₖ - αₖvₖ
-    @kaxpy!(n, α, y, x)                  # xₐᵤₓ = xₖ₋₁ + αₖyₖ
-    NisI || mulorldiv!(z, N, s, ldiv)    # zₖ = N⁻¹sₖ
-    mul!(d, A, z)                        # dₖ = Azₖ
-    MisI || mulorldiv!(t, M, d, ldiv)    # tₖ = M⁻¹dₖ
-    ω = @kdot(n, t, s) / @kdot(n, t, t)  # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩
-    @kaxpy!(n, ω, z, x)                  # xₖ = xₐᵤₓ + ωₖzₖ
-    @kcopy!(n, s, r)                     # rₖ = sₖ
-    @kaxpy!(n, -ω, t, r)                 # rₖ = rₖ - ωₖtₖ
-    next_ρ = @kdot(n, c, r)              # ρₖ₊₁ = ⟨r̅₀,rₖ⟩
-    β = (next_ρ / ρ) * (α / ω)           # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ)
-    @kaxpy!(n, -ω, v, p)                 # pₐᵤₓ = pₖ - ωₖvₖ
-    @kaxpby!(n, one(FC), r, β, p)        # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ
-
-    # Compute residual norm ‖rₖ‖₂.
+  function bicgstab!(solver :: BicgstabSolver{T,FC,S}, $(def_args_bicgstab...); $(def_kwargs_bicgstab...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "BICGSTAB: system of size %d\n", n)
+
+    # Check M = Iₙ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :t , S, n)
+    allocate_if(!NisI, solver, :yz, S, n)
+    Δx, x, r, p, v, s, qd, stats = solver.Δx, solver.x, solver.r, solver.p, solver.v, solver.s, solver.qd, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    q = d = solver.qd
+    t = MisI ? d : solver.t
+    y = NisI ? p : solver.yz
+    z = NisI ? s : solver.yz
+    r₀ = MisI ? r : solver.qd
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
+    else
+      r₀ .= b
+    end
+
+    x .= zero(FC)                       # x₀
+    s .= zero(FC)                       # s₀
+    v .= zero(FC)                       # v₀
+    MisI || mulorldiv!(r, M, r₀, ldiv)  # r₀
+    p .= r                              # p₁
+
+    α = one(FC) # α₀
+    ω = one(FC) # ω₀
+    ρ = one(FC) # ρ₀
+
+    # Compute residual norm ‖r₀‖₂.
     rNorm = @knrm2(n, r)
     history && push!(rNorms, rNorm)
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    solved = resid_decrease_lim || resid_decrease_mach
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
+
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %8s  %8s  %5s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %8.1e  %8.1e  %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time))
+
+    next_ρ = @kdot(n, c, r)  # ρ₁ = ⟨r̅₀,r₀⟩
+    if next_ρ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = false, false
+      stats.timer = ktimer(start_time)
+      stats.status = "Breakdown bᴴc = 0"
+      solver.warm_start = false
+      return solver
+    end
+
+    # Stopping criterion.
+    solved = rNorm ≤ ε
     tired = iter ≥ itmax
-    breakdown = (α == 0 || isnan(α))
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %8.1e  %8.1e\n", iter, rNorm, abs(α), abs(ω))
+    breakdown = false
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+      # Update iteration index and ρ.
+      iter = iter + 1
+      ρ = next_ρ
+
+      NisI || mulorldiv!(y, N, p, ldiv)    # yₖ = N⁻¹pₖ
+      mul!(q, A, y)                        # qₖ = Ayₖ
+      mulorldiv!(v, M, q, ldiv)            # vₖ = M⁻¹qₖ
+      α = ρ / @kdot(n, c, v)               # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩
+      @kcopy!(n, r, s)                     # sₖ = rₖ₋₁
+      @kaxpy!(n, -α, v, s)                 # sₖ = sₖ - αₖvₖ
+      @kaxpy!(n, α, y, x)                  # xₐᵤₓ = xₖ₋₁ + αₖyₖ
+      NisI || mulorldiv!(z, N, s, ldiv)    # zₖ = N⁻¹sₖ
+      mul!(d, A, z)                        # dₖ = Azₖ
+      MisI || mulorldiv!(t, M, d, ldiv)    # tₖ = M⁻¹dₖ
+      ω = @kdot(n, t, s) / @kdot(n, t, t)  # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩
+      @kaxpy!(n, ω, z, x)                  # xₖ = xₐᵤₓ + ωₖzₖ
+      @kcopy!(n, s, r)                     # rₖ = sₖ
+      @kaxpy!(n, -ω, t, r)                 # rₖ = rₖ - ωₖtₖ
+      next_ρ = @kdot(n, c, r)              # ρₖ₊₁ = ⟨r̅₀,rₖ⟩
+      β = (next_ρ / ρ) * (α / ω)           # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ)
+      @kaxpy!(n, -ω, v, p)                 # pₐᵤₓ = pₖ - ωₖvₖ
+      @kaxpby!(n, one(FC), r, β, p)        # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ
+
+      # Compute residual norm ‖rₖ‖₂.
+      rNorm = @knrm2(n, r)
+      history && push!(rNorms, rNorm)
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      breakdown = (α == 0 || isnan(α))
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %8.1e  %8.1e  %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time))
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "breakdown αₖ == 0")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "breakdown αₖ == 0")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/bilq.jl b/src/bilq.jl
index 39725fbfe..2e8823e93 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -13,50 +13,58 @@
 export bilq, bilq!
 
 """
-    (x, stats) = bilq(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
-                      atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true,
-                      itmax::Int=0, verbose::Int=0, history::Bool=false,
-                      callback=solver->false)
+    (x, stats) = bilq(A, b::AbstractVector{FC};
+                      c::AbstractVector{FC}=b, transfer_to_bicg::Bool=true,
+                      atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the square linear system Ax = b using the BiLQ method.
+    (x, stats) = bilq(A, b, x0::AbstractVector; kwargs...)
 
+BiLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using BiLQ.
 BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
-When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
+When `A` is Hermitian and `b = c`, BiLQ is equivalent to SYMMLQ.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
 
-An option gives the possibility of transferring to the BiCG point,
-when it exists. The transfer is based on the residual norm.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-BiLQ can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
 
-    (x, stats) = bilq(A, b, x0; kwargs...)
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
-#### Reference
+#### References
 
 * A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
+* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, Springer, pp. 73--89, 1976.
 """
 function bilq end
 
-function bilq(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = BilqSolver(A, b)
-  bilq!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function bilq(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = BilqSolver(A, b)
-  bilq!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = bilq!(solver::BilqSolver, A, b; kwargs...)
     solver = bilq!(solver::BilqSolver, A, b, x0; kwargs...)
@@ -67,263 +75,312 @@ See [`BilqSolver`](@ref) for more details about the `solver`.
 """
 function bilq! end
 
-function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  bilq!(solver, A, b; kwargs...)
-  return solver
-end
-
-function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
-               atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
-               itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-               callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("BILQ: system of size %d\n", n)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
-  p, Δx, x, d̅, stats = solver.p, solver.Δx, solver.x, solver.d̅, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  r₀ = warm_start ? q : b
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_bilq = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_optargs_bilq = (:(x0::AbstractVector),)
+
+def_kwargs_bilq = (:(; c::AbstractVector{FC} = b    ),
+                   :(; transfer_to_bicg::Bool = true),
+                   :(; atol::T = √eps(T)            ),
+                   :(; rtol::T = √eps(T)            ),
+                   :(; itmax::Int = 0               ),
+                   :(; timemax::Float64 = Inf       ),
+                   :(; verbose::Int = 0             ),
+                   :(; history::Bool = false        ),
+                   :(; callback = solver -> false   ),
+                   :(; iostream::IO = kstdout       ))
+
+def_kwargs_bilq = mapreduce(extract_parameters, vcat, def_kwargs_bilq)
+
+args_bilq = (:A, :b)
+optargs_bilq = (:x0,)
+kwargs_bilq = (:c, :transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function bilq($(def_args_bilq...), $(def_optargs_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = BilqSolver(A, b)
+    warm_start!(solver, $(optargs_bilq...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    bilq!(solver, $(args_bilq...); $(kwargs_bilq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Initial solution x₀ and residual norm ‖r₀‖.
-  x .= zero(FC)
-  bNorm = @knrm2(n, r₀)  # ‖r₀‖ = ‖b₀ - Ax₀‖
-
-  history && push!(rNorms, bNorm)
-  if bNorm == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.inconsistent = false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function bilq($(def_args_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = BilqSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    bilq!(solver, $(args_bilq...); $(kwargs_bilq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  ε = atol + rtol * bNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, bNorm)
-
-  # Initialize the Lanczos biorthogonalization process.
-  cᵗb = @kdot(n, c, r₀)  # ⟨c,r₀⟩
-  if cᵗb == 0
-    stats.niter = 0
-    stats.solved = false
-    stats.inconsistent = false
-    stats.status = "Breakdown bᵀc = 0"
-    solver.warm_start = false
-    return solver
-  end
-
-  βₖ = √(abs(cᵗb))            # β₁γ₁ = cᵀ(b - Ax₀)
-  γₖ = cᵗb / βₖ               # β₁γ₁ = cᵀ(b - Ax₀)
-  vₖ₋₁ .= zero(FC)            # v₀ = 0
-  uₖ₋₁ .= zero(FC)            # u₀ = 0
-  vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
-  uₖ .= c ./ conj(γₖ)         # u₁ = c / γ̄₁
-  cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
-  sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ .= zero(FC)               # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
-  ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
-  ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
-  δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
-  norm_vₖ = bNorm / βₖ        # ‖vₖ‖ is used for residual norm estimates
-
-  # Stopping criterion.
-  solved_lq = bNorm ≤ ε
-  solved_cg = false
-  breakdown = false
-  tired     = iter ≥ itmax
-  status    = "unknown"
-  user_requested_exit = false
-
-  while !(solved_lq || solved_cg || tired || breakdown || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the Lanczos biorthogonalization process.
-    # AVₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
-    # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , vₖ)  # Forms vₖ₊₁ : q ← Avₖ
-    mul!(p, Aᵀ, uₖ)  # Forms uₖ₊₁ : p ← Aᵀuₖ
-
-    @kaxpy!(n, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
-    @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - β̄ₖ * uₖ₋₁
-
-    αₖ = @kdot(n, uₖ, q)      # αₖ = ⟨uₖ,q⟩
-
-    @kaxpy!(n, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
-    @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
-
-    pᵗq = @kdot(n, p, q)      # pᵗq  = ⟨p,q⟩
-    βₖ₊₁ = √(abs(pᵗq))        # βₖ₊₁ = √(|pᵗq|)
-    γₖ₊₁ = pᵗq / βₖ₊₁         # γₖ₊₁ = pᵗq / βₖ₊₁
-
-    # Update the LQ factorization of Tₖ = L̅ₖQₖ.
-    # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
-    # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
-    # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
-    # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
-    # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
-    # [ •        •  •  •  γₖ]   [ •         •   •   •    •    0   ]
-    # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
-    if iter == 1
-      δbarₖ = αₖ
-    elseif iter == 2
-      # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
-      # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
-    else
-      # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
-      #             [sₖ₋₁  -cₖ₋₁   0]
-      #             [ 0      0     1]
-      #
-      # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
-      # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
-      #                       [0   sₖ -cₖ]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      ϵₖ₋₂  =   sₖ₋₁ * βₖ
-      λₖ₋₁  =  -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ =  -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+  function bilq!(solver :: BilqSolver{T,FC,S}, $(def_args_bilq...); $(def_kwargs_bilq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "BILQ: system of size %d\n", n)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
+    p, Δx, x, d̅, stats = solver.p, solver.Δx, solver.x, solver.d̅, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    r₀ = warm_start ? q : b
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
     end
 
-    # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
-    # [δbar₁] [ζbar₁] = [β₁]
-    if iter == 1
-      ηₖ = βₖ
-    end
-    # [δ₁    0  ] [  ζ₁ ] = [β₁]
-    # [λ₁  δbar₂] [ζbar₂]   [0 ]
-    if iter == 2
-      ηₖ₋₁ = ηₖ
-      ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-      ηₖ   = -λₖ₋₁ * ζₖ₋₁
-    end
-    # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
-    # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
-    #                     [ζbarₖ]
-    if iter ≥ 3
-      ζₖ₋₂ = ζₖ₋₁
-      ηₖ₋₁ = ηₖ
-      ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-      ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+    # Initial solution x₀ and residual norm ‖r₀‖.
+    x .= zero(FC)
+    bNorm = @knrm2(n, r₀)  # ‖r₀‖ = ‖b₀ - Ax₀‖
+
+    history && push!(rNorms, bNorm)
+    if bNorm == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.inconsistent = false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
     end
 
-    # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
-    # [d̅ₖ₋₁ vₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
-    #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
-    if iter ≥ 2
-      # Compute solution xₖ.
-      # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
-      @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
-      @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    ε = atol + rtol * bNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, bNorm, ktimer(start_time))
+
+    # Initialize the Lanczos biorthogonalization process.
+    cᴴb = @kdot(n, c, r₀)  # ⟨c,r₀⟩
+    if cᴴb == 0
+      stats.niter = 0
+      stats.solved = false
+      stats.inconsistent = false
+      stats.timer = ktimer(start_time)
+      stats.status = "Breakdown bᴴc = 0"
+      solver.warm_start = false
+      return solver
     end
 
-    # Compute d̅ₖ.
-    if iter == 1
-      # d̅₁ = v₁
-      @. d̅ = vₖ
-    else
-      # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
-      @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
+    βₖ = √(abs(cᴴb))            # β₁γ₁ = cᴴ(b - Ax₀)
+    γₖ = cᴴb / βₖ               # β₁γ₁ = cᴴ(b - Ax₀)
+    vₖ₋₁ .= zero(FC)            # v₀ = 0
+    uₖ₋₁ .= zero(FC)            # u₀ = 0
+    vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
+    uₖ .= c ./ conj(γₖ)         # u₁ = c / γ̄₁
+    cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
+    sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
+    d̅ .= zero(FC)               # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
+    ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+    ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+    δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
+    norm_vₖ = bNorm / βₖ        # ‖vₖ‖ is used for residual norm estimates
+
+    # Stopping criterion.
+    solved_lq = bNorm ≤ ε
+    solved_cg = false
+    breakdown = false
+    tired     = iter ≥ itmax
+    status    = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved_lq || solved_cg || tired || breakdown || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
+
+      # Continue the Lanczos biorthogonalization process.
+      # AVₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+      # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+      mul!(q, A , vₖ)  # Forms vₖ₊₁ : q ← Avₖ
+      mul!(p, Aᴴ, uₖ)  # Forms uₖ₊₁ : p ← Aᴴuₖ
+
+      @kaxpy!(n, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
+      @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - β̄ₖ * uₖ₋₁
+
+      αₖ = @kdot(n, uₖ, q)      # αₖ = ⟨uₖ,q⟩
+
+      @kaxpy!(n, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
+      @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
+
+      pᴴq = @kdot(n, p, q)      # pᴴq  = ⟨p,q⟩
+      βₖ₊₁ = √(abs(pᴴq))        # βₖ₊₁ = √(|pᴴq|)
+      γₖ₊₁ = pᴴq / βₖ₊₁         # γₖ₊₁ = pᴴq / βₖ₊₁
+
+      # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+      # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
+      # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
+      # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
+      # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
+      # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
+      # [ •        •  •  •  γₖ]   [ •         •   •   •    •    0   ]
+      # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
+
+      if iter == 1
+        δbarₖ = αₖ
+      elseif iter == 2
+        # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
+        # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
+      else
+        # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
+        #             [sₖ₋₁  -cₖ₋₁   0]
+        #             [ 0      0     1]
+        #
+        # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
+        # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
+        #                       [0   sₖ -cₖ]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        ϵₖ₋₂  =   sₖ₋₁ * βₖ
+        λₖ₋₁  =  -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ =  -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+      end
+
+      # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+      # [δbar₁] [ζbar₁] = [β₁]
+      if iter == 1
+        ηₖ = βₖ
+      end
+      # [δ₁    0  ] [  ζ₁ ] = [β₁]
+      # [λ₁  δbar₂] [ζbar₂]   [0 ]
+      if iter == 2
+        ηₖ₋₁ = ηₖ
+        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+        ηₖ   = -λₖ₋₁ * ζₖ₋₁
+      end
+      # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
+      # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
+      #                     [ζbarₖ]
+      if iter ≥ 3
+        ζₖ₋₂ = ζₖ₋₁
+        ηₖ₋₁ = ηₖ
+        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+        ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+      end
+
+      # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
+      # [d̅ₖ₋₁ vₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
+      #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+      if iter ≥ 2
+        # Compute solution xₖ.
+        # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
+        @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
+        @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
+      end
+
+      # Compute d̅ₖ.
+      if iter == 1
+        # d̅₁ = v₁
+        @. d̅ = vₖ
+      else
+        # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+        @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
+      end
+
+      # Compute vₖ₊₁ and uₖ₊₁.
+      @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+      @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+      if pᴴq ≠ 0
+        @. vₖ = q / βₖ₊₁        # βₖ₊₁vₖ₊₁ = q
+        @. uₖ = p / conj(γₖ₊₁)  # γ̄ₖ₊₁uₖ₊₁ = p
+      end
+
+      # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
+      vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
+      norm_vₖ₊₁ = @knrm2(n, vₖ)
+
+      # Compute BiLQ residual norm
+      # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
+      if iter == 1
+        rNorm_lq = bNorm
+      else
+        μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+        ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+        θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
+        rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
+      end
+      history && push!(rNorms, rNorm_lq)
+
+      # Compute BiCG residual norm
+      # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
+      if transfer_to_bicg && (abs(δbarₖ) > eps(T))
+        ζbarₖ = ηₖ / δbarₖ
+        ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+        rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
+      end
+
+      # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ, δbarₖ₋₁ and norm_vₖ.
+      sₖ₋₁    = sₖ
+      cₖ₋₁    = cₖ
+      γₖ      = γₖ₊₁
+      βₖ      = βₖ₊₁
+      δbarₖ₋₁ = δbarₖ
+      norm_vₖ = norm_vₖ₊₁
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      solved_lq = rNorm_lq ≤ ε
+      solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
+      tired = iter ≥ itmax
+      breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm_lq, ktimer(start_time))
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Compute vₖ₊₁ and uₖ₊₁.
-    @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
-    @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
-    if pᵗq ≠ 0
-      @. vₖ = q / βₖ₊₁        # βₖ₊₁vₖ₊₁ = q
-      @. uₖ = p / conj(γₖ₊₁)  # γ̄ₖ₊₁uₖ₊₁ = p
+    # Compute BICG point
+    # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+    if solved_cg
+      @kaxpy!(n, ζbarₖ, d̅, x)
     end
 
-    # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
-    vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
-    norm_vₖ₊₁ = @knrm2(n, vₖ)
-
-    # Compute BiLQ residual norm
-    # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
-    if iter == 1
-      rNorm_lq = bNorm
-    else
-      μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
-      ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
-      θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
-      rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
-    end
-    history && push!(rNorms, rNorm_lq)
-
-    # Compute BiCG residual norm
-    # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
-    if transfer_to_bicg && (abs(δbarₖ) > eps(T))
-      ζbarₖ = ηₖ / δbarₖ
-      ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
-      rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
-    end
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
+    solved_lq           && (status = "solution xᴸ good enough given atol and rtol")
+    solved_cg           && (status = "solution xᶜ good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ, δbarₖ₋₁ and norm_vₖ.
-    sₖ₋₁    = sₖ
-    cₖ₋₁    = cₖ
-    γₖ      = γₖ₊₁
-    βₖ      = βₖ₊₁
-    δbarₖ₋₁ = δbarₖ
-    norm_vₖ = norm_vₖ₊₁
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    solved_lq = rNorm_lq ≤ ε
-    solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
-    tired = iter ≥ itmax
-    breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm_lq)
-  end
-  (verbose > 0) && @printf("\n")
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-  # Compute BICG point
-  # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
-  if solved_cg
-    @kaxpy!(n, ζbarₖ, d̅, x)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved_lq || solved_cg
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
-  solved_lq           && (status = "solution xᴸ good enough given atol and rtol")
-  solved_cg           && (status = "solution xᶜ good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved_lq || solved_cg
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/bilqr.jl b/src/bilqr.jl
index 09fef1f6c..486ccceec 100644
--- a/src/bilqr.jl
+++ b/src/bilqr.jl
@@ -1,5 +1,5 @@
 # An implementation of BILQR for the solution of square
-# consistent linear adjoint systems Ax = b and Aᵀy = c.
+# consistent linear adjoint systems Ax = b and Aᴴy = c.
 #
 # This method is described in
 #
@@ -14,33 +14,55 @@ export bilqr, bilqr!
 
 """
     (x, y, stats) = bilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
-                          atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true,
-                          itmax::Int=0, verbose::Int=0, history::Bool=false,
-                          callback=solver->false)
+                          transfer_to_bicg::Bool=true, atol::T=√eps(T),
+                          rtol::T=√eps(T), itmax::Int=0,
+                          timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                          callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
+    (x, y, stats) = bilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+BiLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
 Combine BiLQ and QMR to solve adjoint systems.
 
     [0  A] [y] = [b]
-    [Aᵀ 0] [x]   [c]
+    [Aᴴ 0] [x]   [c]
+
+The relation `bᴴc ≠ 0` must be satisfied.
+BiLQ is used for solving primal system `Ax = b` of size n.
+QMR is used for solving dual system `Aᴴy = c` of size n.
+
+#### Input arguments
 
-The relation `bᵀc ≠ 0` must be satisfied.
-BiLQ is used for solving primal system `Ax = b`.
-QMR is used for solving dual system `Aᵀy = c`.
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n;
+* `c`: a vector of length n.
 
-An option gives the possibility of transferring from the BiLQ point to the
-BiCG point, when it exists. The transfer is based on the residual norm.
+#### Optional arguments
 
-BiLQR can be warm-started from initial guesses `x0` and `y0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
 
-    (x, y, stats) = bilqr(A, b, c, x0, y0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure.
 
 #### Reference
 
@@ -48,18 +70,6 @@ and `false` otherwise.
 """
 function bilqr end
 
-function bilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = BilqrSolver(A, b)
-  bilqr!(solver, A, b, c, x0, y0; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
-function bilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = BilqrSolver(A, b)
-  bilqr!(solver, A, b, c; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = bilqr!(solver::BilqrSolver, A, b, c; kwargs...)
     solver = bilqr!(solver::BilqrSolver, A, b, c, x0, y0; kwargs...)
@@ -70,369 +80,417 @@ See [`BilqrSolver`](@ref) for more details about the `solver`.
 """
 function bilqr! end
 
-function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0, y0)
-  bilqr!(solver, A, b, c; kwargs...)
-  return solver
-end
-
-function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-                atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
-                itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-                callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("Systems must be square")
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("BILQR: systems of size %d\n", n)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
-  p, Δx, Δy, x, t = solver.p, solver.Δx, solver.Δy, solver.x, solver.y
-  d̅, wₖ₋₃, wₖ₋₂, stats = solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂, solver.stats
-  warm_start = solver.warm_start
-  rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
-  reset!(stats)
-  r₀ = warm_start ? q : b
-  s₀ = warm_start ? p : c
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
-    mul!(s₀, Aᵀ, Δy)
-    @kaxpby!(n, one(FC), c, -one(FC), s₀)
+def_args_bilqr = (:(A                    ),
+                  :(b::AbstractVector{FC}),
+                  :(c::AbstractVector{FC}))
+
+def_optargs_bilqr = (:(x0 :: AbstractVector),
+                     :(y0 :: AbstractVector))
+
+def_kwargs_bilqr = (:(; transfer_to_bicg::Bool = true),
+                    :(; atol::T = √eps(T)            ),
+                    :(; rtol::T = √eps(T)            ),
+                    :(; itmax::Int = 0               ),
+                    :(; timemax::Float64 = Inf       ),
+                    :(; verbose::Int = 0             ),
+                    :(; history::Bool = false        ),
+                    :(; callback = solver -> false   ),
+                    :(; iostream::IO = kstdout       ))
+
+def_kwargs_bilqr = mapreduce(extract_parameters, vcat, def_kwargs_bilqr)
+
+args_bilqr = (:A, :b, :c)
+optargs_bilqr = (:x0, :y0)
+kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function bilqr($(def_args_bilqr...), $(def_optargs_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = BilqrSolver(A, b)
+    warm_start!(solver, $(optargs_bilqr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    bilqr!(solver, $(args_bilqr...); $(kwargs_bilqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖.
-  x .= zero(FC)          # x₀
-  bNorm = @knrm2(n, r₀)  # rNorm = ‖r₀‖
-
-  # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀy₀‖.
-  t .= zero(FC)          # t₀
-  cNorm = @knrm2(n, s₀)  # sNorm = ‖s₀‖
-
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  history && push!(rNorms, bNorm)
-  history && push!(sNorms, cNorm)
-  εL = atol + rtol * bNorm
-  εQ = atol + rtol * cNorm
-  (verbose > 0) && @printf("%5s  %7s  %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e\n", iter, bNorm, cNorm)
-
-  # Initialize the Lanczos biorthogonalization process.
-  cᵗb = @kdot(n, s₀, r₀)  # ⟨s₀,r₀⟩ = ⟨c - Aᵀy₀,b - Ax₀⟩
-  if cᵗb == 0
-    stats.niter = 0
-    stats.solved_primal = false
-    stats.solved_dual = false
-    stats.status = "Breakdown bᵀc = 0"
-    solver.warm_start = false
-    return solver
+  function bilqr($(def_args_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = BilqrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    bilqr!(solver, $(args_bilqr...); $(kwargs_bilqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # Set up workspace.
-  βₖ = √(abs(cᵗb))            # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
-  γₖ = cᵗb / βₖ               # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
-  vₖ₋₁ .= zero(FC)            # v₀ = 0
-  uₖ₋₁ .= zero(FC)            # u₀ = 0
-  vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
-  uₖ .= s₀ ./ conj(γₖ)        # u₁ = (c - Aᵀy₀) / γ̄₁
-  cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
-  sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ .= zero(FC)               # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
-  ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
-  ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
-  δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
-  ψbarₖ₋₁ = ψₖ₋₁ = zero(FC)   # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
-  norm_vₖ = bNorm / βₖ        # ‖vₖ‖ is used for residual norm estimates
-  ϵₖ₋₃ = λₖ₋₂ = zero(FC)      # Components of Lₖ₋₁
-  wₖ₋₃ .= zero(FC)            # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ
-  wₖ₋₂ .= zero(FC)            # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ
-  τₖ = zero(T)                # τₖ is used for the dual residual norm estimate
-
-  # Stopping criterion.
-  solved_lq = bNorm == 0
-  solved_lq_tol = solved_lq_mach = false
-  solved_cg = solved_cg_tol = solved_cg_mach = false
-  solved_primal = solved_lq || solved_cg
-  solved_qr_tol = solved_qr_mach = false
-  solved_dual = cNorm == 0
-  tired = iter ≥ itmax
-  breakdown = false
-  status = "unknown"
-  user_requested_exit = false
-
-  while !((solved_primal && solved_dual) || tired || breakdown || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the Lanczos biorthogonalization process.
-    # AVₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
-    # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , vₖ)  # Forms vₖ₊₁ : q ← Avₖ
-    mul!(p, Aᵀ, uₖ)  # Forms uₖ₊₁ : p ← Aᵀuₖ
-
-    @kaxpy!(n, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
-    @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - β̄ₖ * uₖ₋₁
-
-    αₖ = @kdot(n, uₖ, q)  # αₖ = ⟨uₖ,q⟩
-
-    @kaxpy!(n, -     αₖ , vₖ, q)  # q ← q - αₖ * vₖ
-    @kaxpy!(n, -conj(αₖ), uₖ, p)  # p ← p - ᾱₖ * uₖ
-
-    pᵗq = @kdot(n, p, q)  # pᵗq  = ⟨p,q⟩
-    βₖ₊₁ = √(abs(pᵗq))    # βₖ₊₁ = √(|pᵗq|)
-    γₖ₊₁ = pᵗq / βₖ₊₁     # γₖ₊₁ = pᵗq / βₖ₊₁
-
-    # Update the LQ factorization of Tₖ = L̅ₖQₖ.
-    # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
-    # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
-    # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
-    # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
-    # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
-    # [ •        •  •  •  γₖ]   [ •         •   •  λₖ₋₂ δₖ₋₁  0   ]
-    # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
-    if iter == 1
-      δbarₖ = αₖ
-    elseif iter == 2
-      # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
-      # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
-    else
-      # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
-      #             [sₖ₋₁  -cₖ₋₁   0]
-      #             [ 0      0     1]
-      #
-      # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
-      # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
-      #                       [0   sₖ -cₖ]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      ϵₖ₋₂  =  sₖ₋₁ * βₖ
-      λₖ₋₁  = -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+  function bilqr!(solver :: BilqrSolver{T,FC,S}, $(def_args_bilqr...); $(def_kwargs_bilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("Systems must be square")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "BILQR: systems of size %d\n", n)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
+    p, Δx, Δy, x, t = solver.p, solver.Δx, solver.Δy, solver.x, solver.y
+    d̅, wₖ₋₃, wₖ₋₂, stats = solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂, solver.stats
+    warm_start = solver.warm_start
+    rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
+    reset!(stats)
+    r₀ = warm_start ? q : b
+    s₀ = warm_start ? p : c
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
+      mul!(s₀, Aᴴ, Δy)
+      @kaxpby!(n, one(FC), c, -one(FC), s₀)
     end
 
-    if !solved_primal
-      # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
-      # [δbar₁] [ζbar₁] = [β₁]
-      if iter == 1
-        ηₖ = βₖ
-      end
-      # [δ₁    0  ] [  ζ₁ ] = [β₁]
-      # [λ₁  δbar₂] [ζbar₂]   [0 ]
-      if iter == 2
-        ηₖ₋₁ = ηₖ
-        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-        ηₖ   = -λₖ₋₁ * ζₖ₋₁
-      end
-      # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
-      # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
-      #                     [ζbarₖ]
-      if iter ≥ 3
-        ζₖ₋₂ = ζₖ₋₁
-        ηₖ₋₁ = ηₖ
-        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-        ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
-      end
+    # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖.
+    x .= zero(FC)          # x₀
+    bNorm = @knrm2(n, r₀)  # rNorm = ‖r₀‖
+
+    # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖.
+    t .= zero(FC)          # t₀
+    cNorm = @knrm2(n, s₀)  # sNorm = ‖s₀‖
+
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    history && push!(rNorms, bNorm)
+    history && push!(sNorms, cNorm)
+    εL = atol + rtol * bNorm
+    εQ = atol + rtol * cNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %5s\n", "k", "‖rₖ‖", "‖sₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %.2fs\n", iter, bNorm, cNorm, ktimer(start_time))
+
+    # Initialize the Lanczos biorthogonalization process.
+    cᴴb = @kdot(n, s₀, r₀)  # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩
+    if cᴴb == 0
+      stats.niter = 0
+      stats.solved_primal = false
+      stats.solved_dual = false
+      stats.timer = ktimer(start_time)
+      stats.status = "Breakdown bᴴc = 0"
+      solver.warm_start = false
+      return solver
+    end
 
-      # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
-      # [d̅ₖ₋₁ vₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
-      #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
-      if iter ≥ 2
-        # Compute solution xₖ.
-        # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
-        @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
-        @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
-      end
+    # Set up workspace.
+    βₖ = √(abs(cᴴb))            # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
+    γₖ = cᴴb / βₖ               # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
+    vₖ₋₁ .= zero(FC)            # v₀ = 0
+    uₖ₋₁ .= zero(FC)            # u₀ = 0
+    vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
+    uₖ .= s₀ ./ conj(γₖ)        # u₁ = (c - Aᴴy₀) / γ̄₁
+    cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
+    sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
+    d̅ .= zero(FC)               # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
+    ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+    ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+    δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
+    ψbarₖ₋₁ = ψₖ₋₁ = zero(FC)   # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
+    norm_vₖ = bNorm / βₖ        # ‖vₖ‖ is used for residual norm estimates
+    ϵₖ₋₃ = λₖ₋₂ = zero(FC)      # Components of Lₖ₋₁
+    wₖ₋₃ .= zero(FC)            # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
+    wₖ₋₂ .= zero(FC)            # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
+    τₖ = zero(T)                # τₖ is used for the dual residual norm estimate
+
+    # Stopping criterion.
+    solved_lq = bNorm == 0
+    solved_lq_tol = solved_lq_mach = false
+    solved_cg = solved_cg_tol = solved_cg_mach = false
+    solved_primal = solved_lq || solved_cg
+    solved_qr_tol = solved_qr_mach = false
+    solved_dual = cNorm == 0
+    tired = iter ≥ itmax
+    breakdown = false
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
 
-      # Compute d̅ₖ.
-      if iter == 1
-        # d̅₁ = v₁
-        @. d̅ = vₖ
-      else
-        # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
-        @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
-      end
+    while !((solved_primal && solved_dual) || tired || breakdown || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
 
-      # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
-      vₖᵀvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
-      norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁
+      # Continue the Lanczos biorthogonalization process.
+      # AVₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+      # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
 
-      # Compute BiLQ residual norm
-      # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
-      if iter == 1
-        rNorm_lq = bNorm
-      else
-        μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
-        ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
-        θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
-        rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
-      end
-      history && push!(rNorms, rNorm_lq)
+      mul!(q, A , vₖ)  # Forms vₖ₊₁ : q ← Avₖ
+      mul!(p, Aᴴ, uₖ)  # Forms uₖ₊₁ : p ← Aᴴuₖ
 
-      # Update ‖vₖ‖
-      norm_vₖ = norm_vₖ₊₁
+      @kaxpy!(n, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
+      @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - β̄ₖ * uₖ₋₁
 
-      # Compute BiCG residual norm
-      # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
-      if transfer_to_bicg && (abs(δbarₖ) > eps(T))
-        ζbarₖ = ηₖ / δbarₖ
-        ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
-        rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
-      end
+      αₖ = @kdot(n, uₖ, q)  # αₖ = ⟨uₖ,q⟩
 
-      # Update primal stopping criterion
-      solved_lq_tol = rNorm_lq ≤ εL
-      solved_lq_mach = rNorm_lq + 1 ≤ 1
-      solved_lq = solved_lq_tol || solved_lq_mach
-      solved_cg_tol = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
-      solved_cg_mach = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
-      solved_cg = solved_cg_tol || solved_cg_mach
-      solved_primal = solved_lq || solved_cg
-    end
+      @kaxpy!(n, -     αₖ , vₖ, q)  # q ← q - αₖ * vₖ
+      @kaxpy!(n, -conj(αₖ), uₖ, p)  # p ← p - ᾱₖ * uₖ
+
+      pᴴq = @kdot(n, p, q)  # pᴴq  = ⟨p,q⟩
+      βₖ₊₁ = √(abs(pᴴq))    # βₖ₊₁ = √(|pᴴq|)
+      γₖ₊₁ = pᴴq / βₖ₊₁     # γₖ₊₁ = pᴴq / βₖ₊₁
+
+      # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+      # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
+      # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
+      # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
+      # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
+      # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
+      # [ •        •  •  •  γₖ]   [ •         •   •  λₖ₋₂ δₖ₋₁  0   ]
+      # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
 
-    if !solved_dual
-      # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ̄₁e₁.
       if iter == 1
-        ψbarₖ = conj(γₖ)
+        δbarₖ = αₖ
+      elseif iter == 2
+        # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
+        # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
       else
-        # [cₖ  s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
-        # [sₖ -cₖ] [   0   ]   [ ψbarₖ]
-        ψₖ₋₁  = cₖ * ψbarₖ₋₁
-        ψbarₖ = sₖ * ψbarₖ₋₁
+        # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
+        #             [sₖ₋₁  -cₖ₋₁   0]
+        #             [ 0      0     1]
+        #
+        # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
+        # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
+        #                       [0   sₖ -cₖ]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        ϵₖ₋₂  =  sₖ₋₁ * βₖ
+        λₖ₋₁  = -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
       end
 
-      # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
-      # w₁ = u₁ / δ̄₁
-      if iter == 2
-        wₖ₋₁ = wₖ₋₂
-        @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
-        @. wₖ₋₁ = uₖ₋₁ / conj(δₖ₋₁)
+      if !solved_primal
+        # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+        # [δbar₁] [ζbar₁] = [β₁]
+        if iter == 1
+          ηₖ = βₖ
+        end
+        # [δ₁    0  ] [  ζ₁ ] = [β₁]
+        # [λ₁  δbar₂] [ζbar₂]   [0 ]
+        if iter == 2
+          ηₖ₋₁ = ηₖ
+          ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+          ηₖ   = -λₖ₋₁ * ζₖ₋₁
+        end
+        # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
+        # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
+        #                     [ζbarₖ]
+        if iter ≥ 3
+          ζₖ₋₂ = ζₖ₋₁
+          ηₖ₋₁ = ηₖ
+          ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+          ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+        end
+
+        # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
+        # [d̅ₖ₋₁ vₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
+        #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+        if iter ≥ 2
+          # Compute solution xₖ.
+          # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
+          @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
+          @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x)
+        end
+
+        # Compute d̅ₖ.
+        if iter == 1
+          # d̅₁ = v₁
+          @. d̅ = vₖ
+        else
+          # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
+          @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅)
+        end
+
+        # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
+        vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
+        norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁
+
+        # Compute BiLQ residual norm
+        # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩)
+        if iter == 1
+          rNorm_lq = bNorm
+        else
+          μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+          ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+          θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
+          rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
+        end
+        history && push!(rNorms, rNorm_lq)
+
+        # Update ‖vₖ‖
+        norm_vₖ = norm_vₖ₊₁
+
+        # Compute BiCG residual norm
+        # ‖rₖ‖ = |ρₖ| * ‖vₖ₊₁‖
+        if transfer_to_bicg && (abs(δbarₖ) > eps(T))
+          ζbarₖ = ηₖ / δbarₖ
+          ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+          rNorm_cg = abs(ρₖ) * norm_vₖ₊₁
+        end
+
+        # Update primal stopping criterion
+        solved_lq_tol = rNorm_lq ≤ εL
+        solved_lq_mach = rNorm_lq + 1 ≤ 1
+        solved_lq = solved_lq_tol || solved_lq_mach
+        solved_cg_tol = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
+        solved_cg_mach = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
+        solved_cg = solved_cg_tol || solved_cg_mach
+        solved_primal = solved_lq || solved_cg
       end
-      # w₂ = (u₂ - λ̄₁w₁) / δ̄₂
-      if iter == 3
-        wₖ₋₁ = wₖ₋₃
-        @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
-        @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
-        @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+      if !solved_dual
+        # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ̄₁e₁.
+        if iter == 1
+          ψbarₖ = conj(γₖ)
+        else
+          # [cₖ  s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
+          # [sₖ -cₖ] [   0   ]   [ ψbarₖ]
+          ψₖ₋₁  = cₖ * ψbarₖ₋₁
+          ψbarₖ = sₖ * ψbarₖ₋₁
+        end
+
+        # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
+        # w₁ = u₁ / δ̄₁
+        if iter == 2
+          wₖ₋₁ = wₖ₋₂
+          @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
+          @. wₖ₋₁ = uₖ₋₁ / conj(δₖ₋₁)
+        end
+        # w₂ = (u₂ - λ̄₁w₁) / δ̄₂
+        if iter == 3
+          wₖ₋₁ = wₖ₋₃
+          @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
+          @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+          @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+        end
+        # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
+        if iter ≥ 4
+          @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃)
+          wₖ₋₁ = wₖ₋₃
+          @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
+          @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+          @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+        end
+
+        if iter ≥ 3
+          # Swap pointers.
+          @kswap(wₖ₋₃, wₖ₋₂)
+        end
+
+        if iter ≥ 2
+          # Compute solution tₖ₋₁.
+          # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
+          @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t)
+        end
+
+        # Update ψbarₖ₋₁
+        ψbarₖ₋₁ = ψbarₖ
+
+        # Compute τₖ = τₖ₋₁ + ‖uₖ‖²
+        τₖ += @kdotr(n, uₖ, uₖ)
+
+        # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ
+        sNorm = abs(ψbarₖ) * √τₖ
+        history && push!(sNorms, sNorm)
+
+        # Update dual stopping criterion
+        solved_qr_tol = sNorm ≤ εQ
+        solved_qr_mach = sNorm + 1 ≤ 1
+        solved_dual = solved_qr_tol || solved_qr_mach
       end
-      # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
-      if iter ≥ 4
-        @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃)
-        wₖ₋₁ = wₖ₋₃
-        @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
-        @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
-        @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+      # Compute vₖ₊₁ and uₖ₊₁.
+      @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
+      @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
+
+      if pᴴq ≠ zero(FC)
+        @. vₖ = q / βₖ₊₁        # βₖ₊₁vₖ₊₁ = q
+        @. uₖ = p / conj(γₖ₊₁)  # γ̄ₖ₊₁uₖ₊₁ = p
       end
 
+      # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
       if iter ≥ 3
-        # Swap pointers.
-        @kswap(wₖ₋₃, wₖ₋₂)
+        ϵₖ₋₃ = ϵₖ₋₂
       end
-
       if iter ≥ 2
-        # Compute solution tₖ₋₁.
-        # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
-        @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t)
+        λₖ₋₂ = λₖ₋₁
       end
-
-      # Update ψbarₖ₋₁
-      ψbarₖ₋₁ = ψbarₖ
-
-      # Compute τₖ = τₖ₋₁ + ‖uₖ‖²
-      τₖ += @kdotr(n, uₖ, uₖ)
-
-      # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ
-      sNorm = abs(ψbarₖ) * √τₖ
-      history && push!(sNorms, sNorm)
-
-      # Update dual stopping criterion
-      solved_qr_tol = sNorm ≤ εQ
-      solved_qr_mach = sNorm + 1 ≤ 1
-      solved_dual = solved_qr_tol || solved_qr_mach
-    end
-
-    # Compute vₖ₊₁ and uₖ₊₁.
-    @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
-    @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
-
-    if pᵗq ≠ zero(FC)
-      @. vₖ = q / βₖ₊₁        # βₖ₊₁vₖ₊₁ = q
-      @. uₖ = p / conj(γₖ₊₁)  # γ̄ₖ₊₁uₖ₊₁ = p
+      δbarₖ₋₁ = δbarₖ
+      cₖ₋₁    = cₖ
+      sₖ₋₁    = sₖ
+      γₖ      = γₖ₊₁
+      βₖ      = βₖ₊₁
+
+      user_requested_exit = callback(solver) :: Bool
+      tired = iter ≥ itmax
+      breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+
+      kdisplay(iter, verbose) &&  solved_primal && !solved_dual && @printf(iostream, "%5d  %7s  %7.1e  %.2fs\n", iter, "✗ ✗ ✗ ✗", sNorm, ktimer(start_time))
+      kdisplay(iter, verbose) && !solved_primal &&  solved_dual && @printf(iostream, "%5d  %7.1e  %7s  %.2fs\n", iter, rNorm_lq, "✗ ✗ ✗ ✗", ktimer(start_time))
+      kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d  %7.1e  %7.1e  %.2fs\n", iter, rNorm_lq, sNorm, ktimer(start_time))
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
-    if iter ≥ 3
-      ϵₖ₋₃ = ϵₖ₋₂
-    end
-    if iter ≥ 2
-      λₖ₋₂ = λₖ₋₁
+    # Compute BICG point
+    # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+    if solved_cg
+      @kaxpy!(n, ζbarₖ, d̅, x)
     end
-    δbarₖ₋₁ = δbarₖ
-    cₖ₋₁    = cₖ
-    sₖ₋₁    = sₖ
-    γₖ      = γₖ₊₁
-    βₖ      = βₖ₊₁
 
-    user_requested_exit = callback(solver) :: Bool
-    tired = iter ≥ itmax
-    breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
-
-    kdisplay(iter, verbose) &&  solved_primal && !solved_dual && @printf("%5d  %7s  %7.1e\n", iter, "", sNorm)
-    kdisplay(iter, verbose) && !solved_primal &&  solved_dual && @printf("%5d  %7.1e  %7s\n", iter, rNorm_lq, "")
-    kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d  %7.1e  %7.1e\n", iter, rNorm_lq, sNorm)
-  end
-  (verbose > 0) && @printf("\n")
+    # Termination status
+    tired                            && (status = "maximum number of iterations exceeded")
+    breakdown                        && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
+    solved_lq_tol  && !solved_dual   && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
+    solved_cg_tol  && !solved_dual   && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
+    !solved_primal && solved_qr_tol  && (status = "Only the dual solution t is good enough given atol and rtol")
+    solved_lq_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
+    solved_cg_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
+    solved_lq_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᴸ")
+    solved_cg_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᶜ")
+    !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
+    solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
+    solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
+    solved_lq_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
+    solved_cg_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
+    solved_lq_tol  && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+    solved_cg_tol  && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+    user_requested_exit              && (status = "user-requested exit")
+    overtimed                        && (status = "time limit exceeded")
+
+    # Update x and y
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    warm_start && @kaxpy!(n, one(FC), Δy, t)
+    solver.warm_start = false
 
-  # Compute BICG point
-  # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
-  if solved_cg
-    @kaxpy!(n, ζbarₖ, d̅, x)
+    # Update stats
+    stats.niter = iter
+    stats.solved_primal = solved_primal
+    stats.solved_dual = solved_dual
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-
-  tired                            && (status = "maximum number of iterations exceeded")
-  breakdown                        && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
-  solved_lq_tol  && !solved_dual   && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
-  solved_cg_tol  && !solved_dual   && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
-  !solved_primal && solved_qr_tol  && (status = "Only the dual solution t is good enough given atol and rtol")
-  solved_lq_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
-  solved_cg_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
-  solved_lq_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᴸ")
-  solved_cg_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᶜ")
-  !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
-  solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
-  solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
-  solved_lq_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
-  solved_cg_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
-  solved_lq_tol  && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
-  solved_cg_tol  && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
-  user_requested_exit              && (status = "user-requested exit")
-
-  # Update x and y
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  warm_start && @kaxpy!(n, one(FC), Δy, t)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.status = status
-  stats.solved_primal = solved_primal
-  stats.solved_dual = solved_dual
-  return solver
 end
diff --git a/src/callback_utils.jl b/src/callback_utils.jl
deleted file mode 100644
index eac362e5d..000000000
--- a/src/callback_utils.jl
+++ /dev/null
@@ -1,50 +0,0 @@
-export StorageGetxRestartedGmres
-
-export get_x_restarted_gmres!
-
-mutable struct StorageGetxRestartedGmres{S}
-  x::S
-  y::S
-  p::S
-end
-StorageGetxRestartedGmres(solver::GmresSolver; N = I) = 
-  StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
-
-function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, 
-                                stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
-  NisI = (N === I)
-  x2, y2, p2 = stor.x, stor.y, stor.p
-  n = size(A, 2)
-  # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
-  nr = sum(1:solver.inner_iter)
-  y = solver.z  # yᵢ = zᵢ
-  y2 .= y
-  R = solver.R
-  V = solver.V
-  x2 .= solver.Δx
-  for i = solver.inner_iter : -1 : 1
-    pos = nr + i - solver.inner_iter      # position of rᵢ.ₖ
-    for j = solver.inner_iter : -1 : i+1
-      y2[i] = y2[i] - R[pos] * y2[j]  # yᵢ ← yᵢ - rᵢⱼyⱼ
-      pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
-    end
-    # Rₖ can be singular if the system is inconsistent
-    if abs(R[pos]) ≤ eps(T)^(3/4)
-      y2[i] = zero(FC)
-      inconsistent = true
-    else
-      y2[i] = y2[i] / R[pos]  # yᵢ ← yᵢ / rᵢᵢ
-    end
-  end
-
-  # Form xₖ = N⁻¹Vₖyₖ
-  for i = 1 : solver.inner_iter
-    @kaxpy!(n, y2[i], V[i], x2)
-  end
-  if !NisI
-    p2 .= solver.p
-    p2 .= x2
-    mul!(x2, N, p2)
-  end
-  x2 .+= solver.x
-end
diff --git a/src/cg.jl b/src/cg.jl
index 8a974accc..1345a6232 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -15,36 +15,54 @@
 
 export cg, cg!
 
-
 """
     (x, stats) = cg(A, b::AbstractVector{FC};
-                    M=I, atol::T=√eps(T), rtol::T=√eps(T),
-                    itmax::Int=0, radius::T=zero(T), linesearch::Bool=false,
-                    verbose::Int=0, history::Bool=false,
-                    ldiv::Bool=false, callback=solver->false)
+                    M=I, ldiv::Bool=false, radius::T=zero(T),
+                    linesearch::Bool=false, atol::T=√eps(T),
+                    rtol::T=√eps(T), itmax::Int=0,
+                    timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                    callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-The conjugate gradient method to solve the symmetric linear system Ax=b.
+    (x, stats) = cg(A, b, x0::AbstractVector; kwargs...)
 
-The method does _not_ abort if A is not definite.
+CG can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
 
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+The conjugate gradient method to solve the Hermitian linear system Ax = b of size n.
+
+The method does _not_ abort if A is not definite.
 M also indicates the weighted norm in which residuals are measured.
 
-If `itmax=0`, the default number of iterations is set to `2 * n`,
-with `n = length(b)`.
+#### Input arguments
 
-CG can be warm-started from an initial guess `x0` with the method
+* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n;
+* `b`: a vector of length n.
 
-    (x, stats) = cg(A, b, x0; kwargs...)
+#### Optional argument
 
-where `kwargs` are the same keyword arguments as above.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -52,18 +70,6 @@ and `false` otherwise.
 """
 function cg end
 
-function cg(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = CgSolver(A, b)
-  cg!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function cg(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CgSolver(A, b)
-  cg!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cg!(solver::CgSolver, A, b; kwargs...)
     solver = cg!(solver::CgSolver, A, b, x0; kwargs...)
@@ -74,152 +80,200 @@ See [`CgSolver`](@ref) for more details about the `solver`.
 """
 function cg! end
 
-function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  cg!(solver, A, b; kwargs...)
-  return solver
-end
-
-function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
-             M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-             itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false,
-             verbose :: Int=0, history :: Bool=false,
-             ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0")
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CG: system of %d equations in %d variables\n", n, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :z, S, n)
-  Δx, x, r, p, Ap, stats = solver.Δx, solver.x, solver.r, solver.p, solver.Ap, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  z = MisI ? r : solver.z
-
-  x .= zero(FC)
-  if warm_start
-    mul!(r, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r)
-  else
-    r .= b
+def_args_cg = (:(A                    ),
+               :(b::AbstractVector{FC}))
+
+def_optargs_cg = (:(x0::AbstractVector),)
+
+def_kwargs_cg = (:(; M = I                     ),
+                 :(; ldiv::Bool = false        ),
+                 :(; radius::T = zero(T)       ),
+                 :(; linesearch::Bool = false  ),
+                 :(; atol::T = √eps(T)         ),
+                 :(; rtol::T = √eps(T)         ),
+                 :(; itmax::Int = 0            ),
+                 :(; timemax::Float64 = Inf    ),
+                 :(; verbose::Int = 0          ),
+                 :(; history::Bool = false     ),
+                 :(; callback = solver -> false),
+                 :(; iostream::IO = kstdout    ))
+
+def_kwargs_cg = mapreduce(extract_parameters, vcat, def_kwargs_cg)
+
+args_cg = (:A, :b)
+optargs_cg = (:x0,)
+kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cg($(def_args_cg...), $(def_optargs_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgSolver(A, b)
+    warm_start!(solver, $(optargs_cg...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cg!(solver, $(args_cg...); $(kwargs_cg...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(z, M, r, ldiv)
-  p .= z
-  γ = @kdotr(n, r, z)
-  rNorm = sqrt(γ)
-  history && push!(rNorms, rNorm)
-  if γ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+
+  function cg($(def_args_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cg!(solver, $(args_cg...); $(kwargs_cg...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2 * n)
-
-  pAp = zero(T)
-  pNorm² = γ
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s  %8s  %8s  %8s\n", "k", "‖r‖", "pAp", "α", "σ")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  ", iter, rNorm)
-
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  inconsistent = false
-  on_boundary = false
-  zero_curvature = false
-  user_requested_exit = false
-
-  status = "unknown"
-
-  while !(solved || tired || zero_curvature || user_requested_exit)
-    mul!(Ap, A, p)
-    pAp = @kdotr(n, p, Ap)
-    if (pAp ≤ eps(T) * pNorm²) && (radius == 0)
-      if abs(pAp) ≤ eps(T) * pNorm²
-        zero_curvature = true
-        inconsistent = !linesearch
-      end
-      if linesearch
-        iter == 0 && (x .= b)
-        solved = true
-      end
+  function cg!(solver :: CgSolver{T,FC,S}, $(def_args_cg...); $(def_kwargs_cg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == n || error("Inconsistent problem size")
+    linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0")
+    (verbose > 0) && @printf(iostream, "CG: system of %d equations in %d variables\n", n, n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :z, S, n)
+    Δx, x, r, p, Ap, stats = solver.Δx, solver.x, solver.r, solver.p, solver.Ap, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    z = MisI ? r : solver.z
+
+    x .= zero(FC)
+    if warm_start
+      mul!(r, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r)
+    else
+      r .= b
+    end
+    MisI || mulorldiv!(z, M, r, ldiv)
+    p .= z
+    γ = @kdotr(n, r, z)
+    rNorm = sqrt(γ)
+    history && push!(rNorms, rNorm)
+    if γ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
     end
-    (zero_curvature || solved) && continue
 
-    α = γ / pAp
+    iter = 0
+    itmax == 0 && (itmax = 2 * n)
 
-    # Compute step size to boundary if applicable.
-    σ = radius > 0 ? maximum(to_boundary(x, p, radius, dNorm2=pNorm²)) : α
+    pAp = zero(T)
+    pNorm² = γ
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %8s  %8s  %8s  %5s\n", "k", "‖r‖", "pAp", "α", "σ", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e", iter, rNorm)
 
-    kdisplay(iter, verbose) && @printf("%8.1e  %8.1e  %8.1e\n", pAp, α, σ)
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    inconsistent = false
+    on_boundary = false
+    zero_curvature = false
+    user_requested_exit = false
+    overtimed = false
+
+    status = "unknown"
+
+    while !(solved || tired || zero_curvature || user_requested_exit || overtimed)
+      mul!(Ap, A, p)
+      pAp = @kdotr(n, p, Ap)
+      if (pAp ≤ eps(T) * pNorm²) && (radius == 0)
+        if abs(pAp) ≤ eps(T) * pNorm²
+          zero_curvature = true
+          inconsistent = !linesearch
+        end
+        if linesearch
+          iter == 0 && (x .= b)
+          solved = true
+        end
+      end
+      (zero_curvature || solved) && continue
 
-    # Move along p from x to the boundary if either
-    # the next step leads outside the trust region or
-    # we have nonpositive curvature.
-    if (radius > 0) && ((pAp ≤ 0) || (α > σ))
-      α = σ
-      on_boundary = true
-    end
+      α = γ / pAp
 
-    @kaxpy!(n,  α,  p, x)
-    @kaxpy!(n, -α, Ap, r)
-    MisI || mulorldiv!(z, M, r, ldiv)
-    γ_next = @kdotr(n, r, z)
-    rNorm = sqrt(γ_next)
-    history && push!(rNorms, rNorm)
+      # Compute step size to boundary if applicable.
+      σ = radius > 0 ? maximum(to_boundary(n, x, p, radius, dNorm2=pNorm²)) : α
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+      kdisplay(iter, verbose) && @printf(iostream, "  %8.1e  %8.1e  %8.1e  %.2fs\n", pAp, α, σ, ktimer(start_time))
+
+      # Move along p from x to the boundary if either
+      # the next step leads outside the trust region or
+      # we have nonpositive curvature.
+      if (radius > 0) && ((pAp ≤ 0) || (α > σ))
+        α = σ
+        on_boundary = true
+      end
 
-    resid_decrease_lim = rNorm ≤ ε
-    resid_decrease = resid_decrease_lim || resid_decrease_mach
-    solved = resid_decrease || on_boundary
+      @kaxpy!(n,  α,  p, x)
+      @kaxpy!(n, -α, Ap, r)
+      MisI || mulorldiv!(z, M, r, ldiv)
+      γ_next = @kdotr(n, r, z)
+      rNorm = sqrt(γ_next)
+      history && push!(rNorms, rNorm)
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      resid_decrease_lim = rNorm ≤ ε
+      resid_decrease = resid_decrease_lim || resid_decrease_mach
+      solved = resid_decrease || on_boundary
+
+      if !solved
+        β = γ_next / γ
+        pNorm² = γ_next + β^2 * pNorm²
+        γ = γ_next
+        @kaxpby!(n, one(FC), z, β, p)
+      end
 
-    if !solved
-      β = γ_next / γ
-      pNorm² = γ_next + β^2 * pNorm²
-      γ = γ_next
-      @kaxpby!(n, one(FC), z, β, p)
+      iter = iter + 1
+      tired = iter ≥ itmax
+      user_requested_exit = callback(solver) :: Bool
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e", iter, rNorm)
     end
+    (verbose > 0) && @printf(iostream, "\n\n")
+
+    # Termination status
+    solved && on_boundary             && (status = "on trust-region boundary")
+    solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected")
+    solved && (status == "unknown")   && (status = "solution good enough given atol and rtol")
+    zero_curvature                    && (status = "zero curvature detected")
+    tired                             && (status = "maximum number of iterations exceeded")
+    user_requested_exit               && (status = "user-requested exit")
+    overtimed                         && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-    iter = iter + 1
-    tired = iter ≥ itmax
-    user_requested_exit = callback(solver) :: Bool
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  ", iter, rNorm)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  solved && on_boundary && (status = "on trust-region boundary")
-  solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected")
-  solved && (status == "unknown") && (status = "solution good enough given atol and rtol")
-  zero_curvature && (status = "zero curvature detected")
-  tired && (status = "maximum number of iterations exceeded")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl
index a8e24f02f..2c5d72a64 100644
--- a/src/cg_lanczos.jl
+++ b/src/cg_lanczos.jl
@@ -12,34 +12,53 @@
 
 export cg_lanczos, cg_lanczos!
 
-
 """
     (x, stats) = cg_lanczos(A, b::AbstractVector{FC};
-                            M=I, atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
-                            check_curvature::Bool=false, verbose::Int=0, history::Bool=false,
-                            ldiv::Bool=false, callback=solver->false)
+                            M=I, ldiv::Bool=false,
+                            check_curvature::Bool=false, atol::T=√eps(T),
+                            rtol::T=√eps(T), itmax::Int=0,
+                            timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                            callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-The Lanczos version of the conjugate gradient method to solve the
-symmetric linear system
+    (x, stats) = cg_lanczos(A, b, x0::AbstractVector; kwargs...)
+
+CG-LANCZOS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
 
-    Ax = b
+The Lanczos version of the conjugate gradient method to solve the
+Hermitian linear system Ax = b of size n.
 
 The method does _not_ abort if A is not definite.
 
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be hermitian and positive definite.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
 
-CG-LANCZOS can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = cg_lanczos(A, b, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LanczosStats`](@ref) structure.
 
 #### References
 
@@ -48,18 +67,6 @@ and `false` otherwise.
 """
 function cg_lanczos end
 
-function cg_lanczos(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = CgLanczosSolver(A, b)
-  cg_lanczos!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function cg_lanczos(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CgLanczosSolver(A, b)
-  cg_lanczos!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cg_lanczos!(solver::CgLanczosSolver, A, b; kwargs...)
     solver = cg_lanczos!(solver::CgLanczosSolver, A, b, x0; kwargs...)
@@ -70,150 +77,199 @@ See [`CgLanczosSolver`](@ref) for more details about the `solver`.
 """
 function cg_lanczos! end
 
-function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  cg_lanczos!(solver, A, b; kwargs...)
-  return solver
-end
-
-function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                     M=I, atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
-                     check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false,
-                     ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $T")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :v, S, n)
-  Δx, x, Mv, Mv_prev = solver.Δx, solver.x, solver.Mv, solver.Mv_prev
-  p, Mv_next, stats = solver.p, solver.Mv_next, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  v = MisI ? Mv : solver.v
-
-  # Initial state.
-  x .= zero(FC)
-  if warm_start
-    mul!(Mv, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), Mv)
-  else
-    Mv .= b
+def_args_cg_lanczos = (:(A                    ),
+                       :(b::AbstractVector{FC}))
+
+def_optargs_cg_lanczos = (:(x0::AbstractVector),)
+
+def_kwargs_cg_lanczos = (:(; M = I                        ),
+                         :(; ldiv::Bool = false           ),
+                         :(; check_curvature::Bool = false),
+                         :(; atol::T = √eps(T)            ),
+                         :(; rtol::T = √eps(T)            ),
+                         :(; itmax::Int = 0               ),
+                         :(; timemax::Float64 = Inf       ),
+                         :(; verbose::Int = 0             ),
+                         :(; history::Bool = false        ),
+                         :(; callback = solver -> false   ),
+                         :(; iostream::IO = kstdout       ))
+
+def_kwargs_cg_lanczos = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos)
+
+args_cg_lanczos = (:A, :b)
+optargs_cg_lanczos = (:x0,)
+kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cg_lanczos($(def_args_cg_lanczos...), $(def_optargs_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgLanczosSolver(A, b)
+    warm_start!(solver, $(optargs_cg_lanczos...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cg_lanczos!(solver, $(args_cg_lanczos...); $(kwargs_cg_lanczos...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(v, M, Mv, ldiv)  # v₁ = M⁻¹r₀
-  β = sqrt(@kdotr(n, v, Mv))          # β₁ = v₁ᵀ M v₁
-  σ = β
-  rNorm = σ
-  history && push!(rNorms, rNorm)
-  if β == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.Anorm = zero(T)
-    stats.indefinite = false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+
+  function cg_lanczos($(def_args_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgLanczosSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cg_lanczos!(solver, $(args_cg_lanczos...); $(kwargs_cg_lanczos...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  p .= v
-
-  # Initialize Lanczos process.
-  # β₁Mv₁ = b
-  @kscal!(n, one(FC) / β, v)           # v₁  ←  v₁ / β₁
-  MisI || @kscal!(n, one(FC) / β, Mv)  # Mv₁ ← Mv₁ / β₁
-  Mv_prev .= Mv
-
-  iter = 0
-  itmax == 0 && (itmax = 2 * n)
-
-  # Initialize some constants used in recursions below.
-  ω = zero(T)
-  γ = one(T)
-  Anorm2 = zero(T)
-  β_prev = zero(T)
-
-  # Define stopping tolerance.
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
-
-  indefinite = false
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  # Main loop.
-  while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit)
-    # Form next Lanczos vector.
-    # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
-    mul!(Mv_next, A, v)        # Mvₖ₊₁ ← Avₖ
-    δ = @kdotr(n, v, Mv_next)  # δₖ = vₖᵀ A vₖ
-
-    # Check curvature. Exit fast if requested.
-    # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ A pₖ.
-    γ = one(T) / (δ - ω / γ)  # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
-    indefinite |= (γ ≤ 0)
-    (check_curvature & indefinite) && continue
-
-    @kaxpy!(n, -δ, Mv, Mv_next)        # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
-    if iter > 0
-      @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
-      @. Mv_prev = Mv                  # Mvₖ₋₁ ← Mvₖ
+
+  function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, $(def_args_cg_lanczos...); $(def_kwargs_cg_lanczos...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CG-LANCZOS: system of %d equations in %d variables\n", n, n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :v, S, n)
+    Δx, x, Mv, Mv_prev = solver.Δx, solver.x, solver.Mv, solver.Mv_prev
+    p, Mv_next, stats = solver.p, solver.Mv_next, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    v = MisI ? Mv : solver.v
+
+    # Initial state.
+    x .= zero(FC)
+    if warm_start
+      mul!(Mv, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), Mv)
+    else
+      Mv .= b
     end
-    @. Mv = Mv_next                      # Mvₖ ← Mvₖ₊₁
-    MisI || mulorldiv!(v, M, Mv, ldiv)   # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
-    β = sqrt(@kdotr(n, v, Mv))           # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
-    @kscal!(n, one(FC) / β, v)           # vₖ₊₁  ←  vₖ₊₁ / βₖ₊₁
-    MisI || @kscal!(n, one(FC) / β, Mv)  # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
-    Anorm2 += β_prev^2 + β^2 + δ^2       # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
-    β_prev = β
-
-    # Compute next CG iterate.
-    @kaxpy!(n, γ, p, x)     # xₖ₊₁ = xₖ + γₖ * pₖ
-    ω = β * γ
-    σ = -ω * σ              # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ
-    ω = ω * ω               # ωₖ = (βₖ₊₁ * γₖ)²
-    @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ
-    rNorm = abs(σ)          # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1
+    MisI || mulorldiv!(v, M, Mv, ldiv)  # v₁ = M⁻¹r₀
+    β = sqrt(@kdotr(n, v, Mv))          # β₁ = v₁ᴴ M v₁
+    σ = β
+    rNorm = σ
     history && push!(rNorms, rNorm)
-    iter = iter + 1
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-    
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    solved = resid_decrease_lim || resid_decrease_mach
+    if β == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.Anorm = zero(T)
+      stats.indefinite = false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
+    p .= v
+
+    # Initialize Lanczos process.
+    # β₁Mv₁ = b
+    @kscal!(n, one(FC) / β, v)           # v₁  ←  v₁ / β₁
+    MisI || @kscal!(n, one(FC) / β, Mv)  # Mv₁ ← Mv₁ / β₁
+    Mv_prev .= Mv
+
+    iter = 0
+    itmax == 0 && (itmax = 2 * n)
+
+    # Initialize some constants used in recursions below.
+    ω = zero(T)
+    γ = one(T)
+    Anorm2 = zero(T)
+    β_prev = zero(T)
+
+    # Define stopping tolerance.
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+    indefinite = false
+    solved = rNorm ≤ ε
     tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    # Main loop.
+    while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit || overtimed)
+      # Form next Lanczos vector.
+      # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
+      mul!(Mv_next, A, v)        # Mvₖ₊₁ ← Avₖ
+      δ = @kdotr(n, v, Mv_next)  # δₖ = vₖᴴ A vₖ
+
+      # Check curvature. Exit fast if requested.
+      # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ.
+      γ = one(T) / (δ - ω / γ)  # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
+      indefinite |= (γ ≤ 0)
+      (check_curvature & indefinite) && continue
+
+      @kaxpy!(n, -δ, Mv, Mv_next)        # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
+      if iter > 0
+        @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
+        @. Mv_prev = Mv                  # Mvₖ₋₁ ← Mvₖ
+      end
+      @. Mv = Mv_next                      # Mvₖ ← Mvₖ₊₁
+      MisI || mulorldiv!(v, M, Mv, ldiv)   # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
+      β = sqrt(@kdotr(n, v, Mv))           # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
+      @kscal!(n, one(FC) / β, v)           # vₖ₊₁  ←  vₖ₊₁ / βₖ₊₁
+      MisI || @kscal!(n, one(FC) / β, Mv)  # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
+      Anorm2 += β_prev^2 + β^2 + δ^2       # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
+      β_prev = β
+
+      # Compute next CG iterate.
+      @kaxpy!(n, γ, p, x)     # xₖ₊₁ = xₖ + γₖ * pₖ
+      ω = β * γ
+      σ = -ω * σ              # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ
+      ω = ω * ω               # ωₖ = (βₖ₊₁ * γₖ)²
+      @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ
+      rNorm = abs(σ)          # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1
+      history && push!(rNorms, rNorm)
+      iter = iter + 1
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+      
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired                          && (status = "maximum number of iterations exceeded")
+    (check_curvature & indefinite) && (status = "negative curvature")
+    solved                         && (status = "solution good enough given atol and rtol")
+    user_requested_exit            && (status = "user-requested exit")
+    overtimed                      && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats. TODO: Estimate Acond.
+    stats.niter = iter
+    stats.solved = solved
+    stats.Anorm = sqrt(Anorm2)
+    stats.indefinite = indefinite
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired                          && (status = "maximum number of iterations exceeded")
-  (check_curvature & indefinite) && (status = "negative curvature")
-  solved                         && (status = "solution good enough given atol and rtol")
-  user_requested_exit            && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats. TODO: Estimate Acond.
-  stats.niter = iter
-  stats.solved = solved
-  stats.Anorm = sqrt(Anorm2)
-  stats.indefinite = indefinite
-  stats.status = status
-  return solver
 end
diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl
index 01f11e41f..b523e5cc3 100644
--- a/src/cg_lanczos_shift.jl
+++ b/src/cg_lanczos_shift.jl
@@ -13,13 +13,13 @@
 
 export cg_lanczos_shift, cg_lanczos_shift!
 
-
 """
     (x, stats) = cg_lanczos_shift(A, b::AbstractVector{FC}, shifts::AbstractVector{T};
-                                  M=I, atol::T=√eps(T), rtol::T=√eps(T),
-                                  itmax::Int=0, check_curvature::Bool=false,
-                                  verbose::Int=0, history::Bool=false,
-                                  ldiv::Bool=false, callback=solver->false)
+                                  M=I, ldiv::Bool=false,
+                                  check_curvature::Bool=false, atol::T=√eps(T),
+                                  rtol::T=√eps(T), itmax::Int=0,
+                                  timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                                  callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -27,25 +27,42 @@ export cg_lanczos_shift, cg_lanczos_shift!
 The Lanczos version of the conjugate gradient method to solve a family
 of shifted systems
 
-    (A + αI) x = b  (α = α₁, ..., αₙ)
+    (A + αI) x = b  (α = α₁, ..., αₚ)
+
+of size n. The method does _not_ abort if A + αI is not definite.
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n;
+* `shifts`: a vector of length p.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
 
-The method does _not_ abort if A + αI is not definite.
+* `x`: a vector of p dense vectors, each one of length n;
+* `stats`: statistics collected on the run in a [`LanczosShiftStats`](@ref) structure.
 
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be hermitian and positive definite.
+#### References
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* A. Frommer and P. Maass, [*Fast CG-Based Methods for Tikhonov-Phillips Regularization*](https://doi.org/10.1137/S1064827596313310), SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999.
+* C. C. Paige and M. A. Saunders, [*Solution of Sparse Indefinite Systems of Linear Equations*](https://doi.org/10.1137/0712047), SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975.
 """
 function cg_lanczos_shift end
 
-function cg_lanczos_shift(A, b :: AbstractVector{FC}, shifts :: AbstractVector{T}; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
-  nshifts = length(shifts)
-  solver = CgLanczosShiftSolver(A, b, nshifts)
-  cg_lanczos_shift!(solver, A, b, shifts; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cg_lanczos!(solver::CgLanczosShiftSolver, A, b, shifts; kwargs...)
 
@@ -55,174 +72,213 @@ See [`CgLanczosShiftSolver`](@ref) for more details about the `solver`.
 """
 function cg_lanczos_shift! end
 
-function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: AbstractVector{FC}, shifts :: AbstractVector{T};
-                           M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                           itmax :: Int=0, check_curvature :: Bool=false,
-                           verbose :: Int=0, history :: Bool=false,
-                           ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == n || error("Inconsistent problem size")
-
-  nshifts = length(shifts)
-  (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :v, S, n)
-  Mv, Mv_prev, Mv_next = solver.Mv, solver.Mv_prev, solver.Mv_next
-  x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat
-  ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged
-  not_cv, stats = solver.not_cv, solver.stats
-  rNorms_history, indefinite = stats.residuals, stats.indefinite
-  reset!(stats)
-  v = MisI ? Mv : solver.v
-
-  # Initial state.
-  ## Distribute x similarly to shifts.
-  for i = 1 : nshifts
-    x[i] .= zero(FC)  # x₀
-  end
-  Mv .= b                             # Mv₁ ← b
-  MisI || mulorldiv!(v, M, Mv, ldiv)  # v₁ = M⁻¹ * Mv₁
-  β = sqrt(@kdotr(n, v, Mv))          # β₁ = v₁ᵀ M v₁
-  rNorms .= β
-  if history
-    for i = 1 : nshifts
-      push!(rNorms_history[i], rNorms[i])
-    end
+def_args_cg_lanczos_shift = (:(A                        ),
+                             :(b::AbstractVector{FC}    ),
+                             :(shifts::AbstractVector{T}))
+
+def_kwargs_cg_lanczos_shift = (:(; M = I                        ),
+                               :(; ldiv::Bool = false           ),
+                               :(; check_curvature::Bool = false),
+                               :(; atol::T = √eps(T)            ),
+                               :(; rtol::T = √eps(T)            ),
+                               :(; itmax::Int = 0               ),
+                               :(; timemax::Float64 = Inf       ),
+                               :(; verbose::Int = 0             ),
+                               :(; history::Bool = false        ),
+                               :(; callback = solver -> false   ),
+                               :(; iostream::IO = kstdout       ))
+
+def_kwargs_cg_lanczos_shift = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos_shift)
+
+args_cg_lanczos_shift = (:A, :b, :shifts)
+kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cg_lanczos_shift($(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    nshifts = length(shifts)
+    solver = CgLanczosShiftSolver(A, b, nshifts)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cg_lanczos_shift!(solver, $(args_cg_lanczos_shift...); $(kwargs_cg_lanczos_shift...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Keep track of shifted systems with negative curvature if required.
-  indefinite .= false
+  function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, $(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
 
-  if β == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.status = "x = 0 is a zero-residual solution"
-    return solver
-  end
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
 
-  # Initialize each p to v.
-  for i = 1 : nshifts
-    p[i] .= v
-  end
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == n || error("Inconsistent problem size")
 
-  # Initialize Lanczos process.
-  # β₁Mv₁ = b
-  @kscal!(n, one(FC) / β, v)           # v₁  ←  v₁ / β₁
-  MisI || @kscal!(n, one(FC) / β, Mv)  # Mv₁ ← Mv₁ / β₁
-  Mv_prev .= Mv
-
-  # Initialize some constants used in recursions below.
-  ρ = one(T)
-  σ .= β
-  δhat .= zero(T)
-  ω .= zero(T)
-  γ .= one(T)
-
-  # Define stopping tolerance.
-  ε = atol + rtol * β
-
-  # Keep track of shifted systems that have converged.
-  for i = 1 : nshifts
-    converged[i] = rNorms[i] ≤ ε
-    not_cv[i] = !converged[i]
-  end
-  iter = 0
-  itmax == 0 && (itmax = 2 * n)
-
-  # Build format strings for printing.
-  if kdisplay(iter, verbose)
-    fmt = "%5d" * repeat("  %8.1e", nshifts) * "\n"
-    # precompile printf for our particular format
-    local_printf(data...) = Core.eval(Main, :(@printf($fmt, $(data)...)))
-    local_printf(iter, rNorms...)
-  end
+    nshifts = length(shifts)
+    nshifts == solver.nshifts || error("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $nshifts")
+    (verbose > 0) && @printf(iostream, "CG-LANCZOS-SHIFT: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
 
-  solved = sum(not_cv) == 0
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  # Main loop.
-  while ! (solved || tired || user_requested_exit)
-    # Form next Lanczos vector.
-    # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
-    mul!(Mv_next, A, v)                  # Mvₖ₊₁ ← Avₖ
-    δ = @kdotr(n, v, Mv_next)            # δₖ = vₖᵀ A vₖ
-    @kaxpy!(n, -δ, Mv, Mv_next)          # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
-    if iter > 0
-      @kaxpy!(n, -β, Mv_prev, Mv_next)   # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
-      @. Mv_prev = Mv                    # Mvₖ₋₁ ← Mvₖ
-    end
-    @. Mv = Mv_next                      # Mvₖ ← Mvₖ₊₁
-    MisI || mulorldiv!(v, M, Mv, ldiv)   # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
-    β = sqrt(@kdotr(n, v, Mv))           # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
-    @kscal!(n, one(FC) / β, v)           # vₖ₊₁  ←  vₖ₊₁ / βₖ₊₁
-    MisI || @kscal!(n, one(FC) / β, Mv)  # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
-
-    # Check curvature: vₖᵀ(A + sᵢI)vₖ = vₖᵀAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
-    # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ (A + sᵢ I) pₖ.
-    MisI || (ρ = @kdotr(n, v, v))
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :v, S, n)
+    Mv, Mv_prev, Mv_next = solver.Mv, solver.Mv_prev, solver.Mv_next
+    x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat
+    ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged
+    not_cv, stats = solver.not_cv, solver.stats
+    rNorms_history, indefinite = stats.residuals, stats.indefinite
+    reset!(stats)
+    v = MisI ? Mv : solver.v
+
+    # Initial state.
+    ## Distribute x similarly to shifts.
     for i = 1 : nshifts
-      δhat[i] = δ + ρ * shifts[i]
-      γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
+      x[i] .= zero(FC)  # x₀
     end
-    for i = 1 : nshifts
-      indefinite[i] |= γ[i] ≤ 0
+    Mv .= b                             # Mv₁ ← b
+    MisI || mulorldiv!(v, M, Mv, ldiv)  # v₁ = M⁻¹ * Mv₁
+    β = sqrt(@kdotr(n, v, Mv))          # β₁ = v₁ᴴ M v₁
+    rNorms .= β
+    if history
+      for i = 1 : nshifts
+        push!(rNorms_history[i], rNorms[i])
+      end
     end
 
-    # Compute next CG iterate for each shifted system that has not yet converged.
-    # Stop iterating on indefinite problems if requested.
-    for i = 1 : nshifts
-      not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
-      if not_cv[i]
-        @kaxpy!(n, γ[i], p[i], x[i])
-        ω[i] = β * γ[i]
-        σ[i] *= -ω[i]
-        ω[i] *= ω[i]
-        @kaxpby!(n, σ[i], v, ω[i], p[i])
-
-        # Update list of systems that have not converged.
-        rNorms[i] = abs(σ[i])
-        converged[i] = rNorms[i] ≤ ε
-      end
+    # Keep track of shifted systems with negative curvature if required.
+    indefinite .= false
+
+    if β == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      return solver
     end
 
-    if length(not_cv) > 0 && history
-      for i = 1 : nshifts
-        not_cv[i] && push!(rNorms_history[i], rNorms[i])
-      end
+    # Initialize each p to v.
+    for i = 1 : nshifts
+      p[i] .= v
     end
 
-    # Is there a better way than to update this array twice per iteration?
+    # Initialize Lanczos process.
+    # β₁Mv₁ = b
+    @kscal!(n, one(FC) / β, v)           # v₁  ←  v₁ / β₁
+    MisI || @kscal!(n, one(FC) / β, Mv)  # Mv₁ ← Mv₁ / β₁
+    Mv_prev .= Mv
+
+    # Initialize some constants used in recursions below.
+    ρ = one(T)
+    σ .= β
+    δhat .= zero(T)
+    ω .= zero(T)
+    γ .= one(T)
+
+    # Define stopping tolerance.
+    ε = atol + rtol * β
+
+    # Keep track of shifted systems that have converged.
     for i = 1 : nshifts
-      not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+      converged[i] = rNorms[i] ≤ ε
+      not_cv[i] = !converged[i]
     end
-    iter = iter + 1
-    kdisplay(iter, verbose) && local_printf(iter, rNorms...)
+    iter = 0
+    itmax == 0 && (itmax = 2 * n)
+
+    # Build format strings for printing.
+    (verbose > 0) && (fmt = Printf.Format("%5d" * repeat("  %8.1e", nshifts) * "  %.2fs\n"))
+    kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time))
 
-    user_requested_exit = callback(solver) :: Bool
-    solved = sum(not_cv) == 0
+    solved = !reduce(|, not_cv)
     tired = iter ≥ itmax
-  end
-  (verbose > 0) && @printf("\n")
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    # Main loop.
+    while ! (solved || tired || user_requested_exit || overtimed)
+      # Form next Lanczos vector.
+      # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
+      mul!(Mv_next, A, v)                  # Mvₖ₊₁ ← Avₖ
+      δ = @kdotr(n, v, Mv_next)            # δₖ = vₖᴴ A vₖ
+      @kaxpy!(n, -δ, Mv, Mv_next)          # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
+      if iter > 0
+        @kaxpy!(n, -β, Mv_prev, Mv_next)   # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
+        @. Mv_prev = Mv                    # Mvₖ₋₁ ← Mvₖ
+      end
+      @. Mv = Mv_next                      # Mvₖ ← Mvₖ₊₁
+      MisI || mulorldiv!(v, M, Mv, ldiv)   # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
+      β = sqrt(@kdotr(n, v, Mv))           # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
+      @kscal!(n, one(FC) / β, v)           # vₖ₊₁  ←  vₖ₊₁ / βₖ₊₁
+      MisI || @kscal!(n, one(FC) / β, Mv)  # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
+
+      # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
+      # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ.
+      MisI || (ρ = @kdotr(n, v, v))
+      for i = 1 : nshifts
+        δhat[i] = δ + ρ * shifts[i]
+        γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
+      end
+      for i = 1 : nshifts
+        indefinite[i] |= γ[i] ≤ 0
+      end
+
+      # Compute next CG iterate for each shifted system that has not yet converged.
+      # Stop iterating on indefinite problems if requested.
+      for i = 1 : nshifts
+        not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+        if not_cv[i]
+          @kaxpy!(n, γ[i], p[i], x[i])
+          ω[i] = β * γ[i]
+          σ[i] *= -ω[i]
+          ω[i] *= ω[i]
+          @kaxpby!(n, σ[i], v, ω[i], p[i])
+
+          # Update list of systems that have not converged.
+          rNorms[i] = abs(σ[i])
+          converged[i] = rNorms[i] ≤ ε
+        end
+      end
 
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
+      if length(not_cv) > 0 && history
+        for i = 1 : nshifts
+          not_cv[i] && push!(rNorms_history[i], rNorms[i])
+        end
+      end
 
-  # Update stats. TODO: Estimate Anorm and Acond.
-  stats.niter = iter
-  stats.solved = solved
-  stats.status = status
-  return solver
+      # Is there a better way than to update this array twice per iteration?
+      for i = 1 : nshifts
+        not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+      end
+      iter = iter + 1
+      kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time))
+
+      user_requested_exit = callback(solver) :: Bool
+      solved = !reduce(|, not_cv)
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats. TODO: Estimate Anorm and Acond.
+    stats.niter = iter
+    stats.solved = solved
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
+  end
 end
diff --git a/src/cgls.jl b/src/cgls.jl
index f5529fbfb..e36d5acbd 100644
--- a/src/cgls.jl
+++ b/src/cgls.jl
@@ -5,7 +5,7 @@
 #
 # equivalently, of the normal equations
 #
-#  AᵀAx = Aᵀb.
+#  AᴴAx = Aᴴb.
 #
 # CGLS is formally equivalent to applying the conjugate gradient method
 # to the normal equations but should be more stable. It is also formally
@@ -28,12 +28,13 @@
 
 export cgls, cgls!
 
-
 """
     (x, stats) = cgls(A, b::AbstractVector{FC};
-                      M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
-                      radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+                      M=I, ldiv::Bool=false, radius::T=zero(T),
+                      λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+                      itmax::Int=0, timemax::Float64=Inf,
+                      verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -42,19 +43,41 @@ Solve the regularized linear least-squares problem
 
     minimize ‖b - Ax‖₂² + λ‖x‖₂²
 
-using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
 parameter. This method is equivalent to applying CG to the normal equations
 
-    (AᵀA + λI) x = Aᵀb
+    (AᴴA + λI) x = Aᴴb
 
 but is more stable.
 
-CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
 It is formally equivalent to LSQR, though can be slightly less accurate,
 but simpler to implement.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -63,12 +86,6 @@ and `false` otherwise.
 """
 function cgls end
 
-function cgls(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CglsSolver(A, b)
-  cgls!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cgls!(solver::CglsSolver, A, b; kwargs...)
 
@@ -78,110 +95,151 @@ See [`CglsSolver`](@ref) for more details about the `solver`.
 """
 function cgls! end
 
-function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-               radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CGLS: system of %d equations in %d variables\n", m, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :Mr, S, m)
-  x, p, s, r, q, stats = solver.x, solver.p, solver.s, solver.r, solver.q, solver.stats
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  Mr = MisI ? r : solver.Mr
-  Mq = MisI ? q : solver.Mr
-
-  x .= zero(FC)
-  r .= b
-  bNorm = @knrm2(m, r)   # Marginally faster than norm(b)
-  if bNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(rNorms, zero(T))
-    history && push!(ArNorms, zero(T))
-    return solver
+def_args_cgls = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_cgls = (:(; M = I                     ),
+                   :(; ldiv::Bool = false        ),
+                   :(; radius::T = zero(T)       ),
+                   :(; λ::T = zero(T)            ),
+                   :(; atol::T = √eps(T)         ),
+                   :(; rtol::T = √eps(T)         ),
+                   :(; itmax::Int = 0            ),
+                   :(; timemax::Float64 = Inf    ),
+                   :(; verbose::Int = 0          ),
+                   :(; history::Bool = false     ),
+                   :(; callback = solver -> false),
+                   :(; iostream::IO = kstdout    ))
+
+def_kwargs_cgls = mapreduce(extract_parameters, vcat, def_kwargs_cgls)
+
+args_cgls = (:A, :b)
+kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cgls($(def_args_cgls...); $(def_kwargs_cgls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CglsSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cgls!(solver, $(args_cgls...); $(kwargs_cgls...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(Mr, M, r, ldiv)
-  mul!(s, Aᵀ, Mr)
-  p .= s
-  γ = @kdotr(n, s, s)  # γ = sᵀs
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  rNorm  = bNorm
-  ArNorm = sqrt(γ)
-  history && push!(rNorms, rNorm)
-  history && push!(ArNorms, ArNorm)
-  ε = atol + rtol * ArNorm
-  (verbose > 0) && @printf("%5s  %8s  %8s\n", "k", "‖Aᵀr‖", "‖r‖")
-  kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e\n", iter, ArNorm, rNorm)
-
-  status = "unknown"
-  on_boundary = false
-  solved = ArNorm ≤ ε
-  tired = iter ≥ itmax
-  user_requested_exit = false
-
-  while ! (solved || tired || user_requested_exit)
-    mul!(q, A, p)
-    MisI || mulorldiv!(Mq, M, q, ldiv)
-    δ = @kdotr(m, q, Mq)  # δ = qᵀMq
-    λ > 0 && (δ += λ * @kdotr(n, p, p))  # δ = δ + pᵀp
-    α = γ / δ
-
-    # if a trust-region constraint is give, compute step to the boundary
-    σ = radius > 0 ? maximum(to_boundary(x, p, radius)) : α
-    if (radius > 0) & (α > σ)
-      α = σ
-      on_boundary = true
-    end
 
-    @kaxpy!(n,  α, p, x)     # Faster than x = x + α * p
-    @kaxpy!(m, -α, q, r)     # Faster than r = r - α * q
+  function cgls!(solver :: CglsSolver{T,FC,S}, $(def_args_cgls...); $(def_kwargs_cgls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CGLS: system of %d equations in %d variables\n", m, n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :Mr, S, m)
+    x, p, s, r, q, stats = solver.x, solver.p, solver.s, solver.r, solver.q, solver.stats
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    Mr = MisI ? r : solver.Mr
+    Mq = MisI ? q : solver.Mr
+
+    x .= zero(FC)
+    r .= b
+    bNorm = @knrm2(m, r)   # Marginally faster than norm(b)
+    if bNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(rNorms, zero(T))
+      history && push!(ArNorms, zero(T))
+      return solver
+    end
     MisI || mulorldiv!(Mr, M, r, ldiv)
-    mul!(s, Aᵀ, Mr)
-    λ > 0 && @kaxpy!(n, -λ, x, s)   # s = A' * r - λ * x
-    γ_next = @kdotr(n, s, s)   # γ_next = sᵀs
-    β = γ_next / γ
-    @kaxpby!(n, one(FC), s, β, p) # p = s + βp
-    γ = γ_next
-    rNorm = @knrm2(m, r)  # Marginally faster than norm(r)
+    mul!(s, Aᴴ, Mr)
+    p .= s
+    γ = @kdotr(n, s, s)  # γ = sᴴs
+    iter = 0
+    itmax == 0 && (itmax = m + n)
+
+    rNorm  = bNorm
     ArNorm = sqrt(γ)
     history && push!(rNorms, rNorm)
     history && push!(ArNorms, ArNorm)
-    iter = iter + 1
-    kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e\n", iter, ArNorm, rNorm)
-    user_requested_exit = callback(solver) :: Bool
-    solved = (ArNorm ≤ ε) | on_boundary
+    ε = atol + rtol * ArNorm
+    (verbose > 0) && @printf(iostream, "%5s  %8s  %8s  %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+
+    status = "unknown"
+    on_boundary = false
+    solved = ArNorm ≤ ε
     tired = iter ≥ itmax
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || user_requested_exit || overtimed)
+      mul!(q, A, p)
+      MisI || mulorldiv!(Mq, M, q, ldiv)
+      δ = @kdotr(m, q, Mq)  # δ = qᴴMq
+      λ > 0 && (δ += λ * @kdotr(n, p, p))  # δ = δ + pᴴp
+      α = γ / δ
+
+      # if a trust-region constraint is give, compute step to the boundary
+      σ = radius > 0 ? maximum(to_boundary(n, x, p, radius)) : α
+      if (radius > 0) & (α > σ)
+        α = σ
+        on_boundary = true
+      end
+
+      @kaxpy!(n,  α, p, x)     # Faster than x = x + α * p
+      @kaxpy!(m, -α, q, r)     # Faster than r = r - α * q
+      MisI || mulorldiv!(Mr, M, r, ldiv)
+      mul!(s, Aᴴ, Mr)
+      λ > 0 && @kaxpy!(n, -λ, x, s)   # s = A' * r - λ * x
+      γ_next = @kdotr(n, s, s)   # γ_next = sᴴs
+      β = γ_next / γ
+      @kaxpby!(n, one(FC), s, β, p) # p = s + βp
+      γ = γ_next
+      rNorm = @knrm2(m, r)  # Marginally faster than norm(r)
+      ArNorm = sqrt(γ)
+      history && push!(rNorms, rNorm)
+      history && push!(ArNorms, ArNorm)
+      iter = iter + 1
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+      user_requested_exit = callback(solver) :: Bool
+      solved = (ArNorm ≤ ε) || on_boundary
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    on_boundary         && (status = "on trust-region boundary")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  on_boundary         && (status = "on trust-region boundary")
-  user_requested_exit && (status = "user-requested exit")
-
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/cgne.jl b/src/cgne.jl
index 2859414e1..8a4e6dddb 100644
--- a/src/cgne.jl
+++ b/src/cgne.jl
@@ -10,7 +10,7 @@
 # and is equivalent to applying the conjugate gradient method
 # to the linear system
 #
-#  AAᵀy = b.
+#  AAᴴy = b.
 #
 # This method is also known as Craig's method, CGME, and other
 # names, and is described in
@@ -28,12 +28,13 @@
 
 export cgne, cgne!
 
-
 """
     (x, stats) = cgne(A, b::AbstractVector{FC};
-                      M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
-                      itmax::Int=0, verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+                      N=I, ldiv::Bool=false,
+                      λ::T=zero(T), atol::T=√eps(T),
+                      rtol::T=√eps(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -42,11 +43,11 @@ Solve the consistent linear system
 
     Ax + √λs = b
 
-using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
 parameter. This method is equivalent to applying CG to the normal equations
 of the second kind
 
-    (AAᵀ + λI) y = b
+    (AAᴴ + λI) y = b
 
 but is more stable. When λ = 0, this method solves the minimum-norm problem
 
@@ -60,10 +61,29 @@ CGNE produces monotonic errors ‖x-x*‖₂ but not residuals ‖r‖₂.
 It is formally equivalent to CRAIG, though can be slightly less accurate,
 but simpler to implement. Only the x-part of the solution is returned.
 
-A preconditioner M may be provided in the form of a linear operator.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -72,12 +92,6 @@ and `false` otherwise.
 """
 function cgne end
 
-function cgne(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CgneSolver(A, b)
-  cgne!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cgne!(solver::CgneSolver, A, b; kwargs...)
 
@@ -87,113 +101,154 @@ See [`CgneSolver`](@ref) for more details about the `solver`.
 """
 function cgne! end
 
-function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-               itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CGNE: system of %d equations in %d variables\n", m, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :z, S, m)
-  allocate_if(λ > 0, solver, :s, S, m)
-  x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats
-  rNorms = stats.residuals
-  reset!(stats)
-  z = MisI ? r : solver.z
-
-  x .= zero(FC)
-  r .= b
-  MisI || mulorldiv!(z, M, r, ldiv)
-  rNorm = @knrm2(m, r)   # Marginally faster than norm(r)
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    return solver
+def_args_cgne = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_cgne = (:(; N = I                     ),
+                   :(; ldiv::Bool = false        ),
+                   :(; λ::T = zero(T)            ),
+                   :(; atol::T = √eps(T)         ),
+                   :(; rtol::T = √eps(T)         ),
+                   :(; itmax::Int = 0            ),
+                   :(; timemax::Float64 = Inf    ),
+                   :(; verbose::Int = 0          ),
+                   :(; history::Bool = false     ),
+                   :(; callback = solver -> false),
+                   :(; iostream::IO = kstdout    ))
+
+def_kwargs_cgne = mapreduce(extract_parameters, vcat, def_kwargs_cgne)
+
+args_cgne = (:A, :b)
+kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cgne($(def_args_cgne...); $(def_kwargs_cgne...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgneSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cgne!(solver, $(args_cgne...); $(kwargs_cgne...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  λ > 0 && (s .= r)
-  mul!(p, Aᵀ, z)
-
-  # Use ‖p‖ to detect inconsistent system.
-  # An inconsistent system will necessarily have AA' singular.
-  # Because CGNE is equivalent to CG applied to AA'y = b, there will be a
-  # conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this
-  # implementation, p is a substitute for A'u.
-  pNorm = @knrm2(n, p)
-
-  γ = @kdotr(m, r, z)  # Faster than γ = dot(r, z)
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
-  ɛ_i = atol + rtol * pNorm  # Stopping tolerance for inconsistent systems.
-  (verbose > 0) && @printf("%5s  %8s\n", "k", "‖r‖")
-  kdisplay(iter, verbose) && @printf("%5d  %8.2e\n", iter, rNorm)
-
-  status = "unknown"
-  solved = rNorm ≤ ɛ_c
-  inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
-  tired = iter ≥ itmax
-  user_requested_exit = false
-
-  while ! (solved || inconsistent || tired || user_requested_exit)
-    mul!(q, A, p)
-    λ > 0 && @kaxpy!(m, λ, s, q)
-    δ = @kdotr(n, p, p)   # Faster than dot(p, p)
-    λ > 0 && (δ += λ * @kdotr(m, s, s))
-    α = γ / δ
-    @kaxpy!(n,  α, p, x)     # Faster than x = x + α * p
-    @kaxpy!(m, -α, q, r)     # Faster than r = r - α * q
-    MisI || mulorldiv!(z, M, r, ldiv)
-    γ_next = @kdotr(m, r, z)  # Faster than γ_next = dot(r, z)
-    β = γ_next / γ
-    mul!(Aᵀz, Aᵀ, z)
-    @kaxpby!(n, one(FC), Aᵀz, β, p)  # Faster than p = Aᵀz + β * p
-    pNorm = @knrm2(n, p)
-    if λ > 0
-      @kaxpby!(m, one(FC), r, β, s)  # s = r + β * s
-    end
-    γ = γ_next
-    rNorm = sqrt(γ_next)
+
+  function cgne!(solver :: CgneSolver{T,FC,S}, $(def_args_cgne...); $(def_kwargs_cgne...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CGNE: system of %d equations in %d variables\n", m, n)
+
+    # Tests N = Iₙ
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!NisI, solver, :z, S, m)
+    allocate_if(λ > 0, solver, :s, S, m)
+    x, p, Aᴴz, r, q, s, stats = solver.x, solver.p, solver.Aᴴz, solver.r, solver.q, solver.s, solver.stats
+    rNorms = stats.residuals
+    reset!(stats)
+    z = NisI ? r : solver.z
+
+    x .= zero(FC)
+    r .= b
+    NisI || mulorldiv!(z, N, r, ldiv)
+    rNorm = @knrm2(m, r)   # Marginally faster than norm(r)
     history && push!(rNorms, rNorm)
-    iter = iter + 1
-    kdisplay(iter, verbose) && @printf("%5d  %8.2e\n", iter, rNorm)
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      return solver
+    end
+    λ > 0 && (s .= r)
+    mul!(p, Aᴴ, z)
+
+    # Use ‖p‖ to detect inconsistent system.
+    # An inconsistent system will necessarily have AA' singular.
+    # Because CGNE is equivalent to CG applied to AA'y = b, there will be a
+    # conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this
+    # implementation, p is a substitute for A'u.
+    pNorm = @knrm2(n, p)
+
+    γ = @kdotr(m, r, z)  # Faster than γ = dot(r, z)
+    iter = 0
+    itmax == 0 && (itmax = m + n)
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+    ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
+    ɛ_i = atol + rtol * pNorm  # Stopping tolerance for inconsistent systems.
+    (verbose > 0) && @printf(iostream, "%5s  %8s  %5s\n", "k", "‖r‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %.2fs\n", iter, rNorm, ktimer(start_time))
 
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ɛ_c
-    solved = resid_decrease_lim || resid_decrease_mach
+    status = "unknown"
+    solved = rNorm ≤ ɛ_c
     inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
     tired = iter ≥ itmax
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || inconsistent || tired || user_requested_exit || overtimed)
+      mul!(q, A, p)
+      λ > 0 && @kaxpy!(m, λ, s, q)
+      δ = @kdotr(n, p, p)   # Faster than dot(p, p)
+      λ > 0 && (δ += λ * @kdotr(m, s, s))
+      α = γ / δ
+      @kaxpy!(n,  α, p, x)     # Faster than x = x + α * p
+      @kaxpy!(m, -α, q, r)     # Faster than r = r - α * q
+      NisI || mulorldiv!(z, N, r, ldiv)
+      γ_next = @kdotr(m, r, z)  # Faster than γ_next = dot(r, z)
+      β = γ_next / γ
+      mul!(Aᴴz, Aᴴ, z)
+      @kaxpby!(n, one(FC), Aᴴz, β, p)  # Faster than p = Aᴴz + β * p
+      pNorm = @knrm2(n, p)
+      if λ > 0
+        @kaxpby!(m, one(FC), r, β, s)  # s = r + β * s
+      end
+      γ = γ_next
+      rNorm = sqrt(γ_next)
+      history && push!(rNorms, rNorm)
+      iter = iter + 1
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ɛ_c
+      solved = resid_decrease_lim || resid_decrease_mach
+      inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    inconsistent        && (status = "system probably inconsistent")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  inconsistent        && (status = "system probably inconsistent")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/src/cgs.jl b/src/cgs.jl
index c1eb1056e..e95e74d17 100644
--- a/src/cgs.jl
+++ b/src/cgs.jl
@@ -11,17 +11,23 @@
 export cgs, cgs!
 
 """
-    (x, stats) = cgs(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
-                     M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                     itmax::Int=0, verbose::Int=0, history::Bool=false,
-                     ldiv::Bool=false, callback=solver->false)
+    (x, stats) = cgs(A, b::AbstractVector{FC};
+                     c::AbstractVector{FC}=b, M=I, N=I,
+                     ldiv::Bool=false, atol::T=√eps(T),
+                     rtol::T=√eps(T), itmax::Int=0,
+                     timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                     callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the consistent linear system Ax = b using conjugate gradient squared algorithm.
+    (x, stats) = cgs(A, b, x0::AbstractVector; kwargs...)
+
+CGS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using CGS.
 CGS requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
 
 From "Iterative Methods for Sparse Linear Systems (Y. Saad)" :
 
@@ -38,16 +44,34 @@ to become inaccurate.
 
 TFQMR and BICGSTAB were developed to remedy this difficulty.»
 
-This implementation allows a left preconditioner M and a right preconditioner N.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
 
-CGS can be warm-started from an initial guess `x0` with the method
+#### Optional argument
 
-    (x, stats) = cgs(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -55,18 +79,6 @@ and `false` otherwise.
 """
 function cgs end
 
-function cgs(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = CgsSolver(A, b)
-  cgs!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function cgs(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CgsSolver(A, b)
-  cgs!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cgs!(solver::CgsSolver, A, b; kwargs...)
     solver = cgs!(solver::CgsSolver, A, b, x0; kwargs...)
@@ -77,153 +89,204 @@ See [`CgsSolver`](@ref) for more details about the `solver`.
 """
 function cgs! end
 
-function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  cgs!(solver, A, b; kwargs...)
-  return solver
-end
-
-function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
-              M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-              itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-              ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CGS: system of size %d\n", n)
-
-  # Check M = Iₙ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :vw, S, n)
-  allocate_if(!NisI, solver, :yz, S, n)
-  Δx, x, r, u, p, q, ts, stats = solver.Δx, solver.x, solver.r, solver.u, solver.p, solver.q, solver.ts, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  t = s = solver.ts
-  v = MisI ? t : solver.vw
-  w = MisI ? s : solver.vw
-  y = NisI ? p : solver.yz
-  z = NisI ? u : solver.yz
-  r₀ = MisI ? r : solver.ts
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
-  else
-    r₀ .= b
+def_args_cgs = (:(A                    ),
+                :(b::AbstractVector{FC}))
+
+def_optargs_cgs = (:(x0::AbstractVector),)
+
+def_kwargs_cgs = (:(; c::AbstractVector{FC} = b ),
+                  :(; M = I                     ),
+                  :(; N = I                     ),
+                  :(; ldiv::Bool = false        ),
+                  :(; atol::T = √eps(T)         ),
+                  :(; rtol::T = √eps(T)         ),
+                  :(; itmax::Int = 0            ),
+                  :(; timemax::Float64 = Inf    ),
+                  :(; verbose::Int = 0          ),
+                  :(; history::Bool = false     ),
+                  :(; callback = solver -> false),
+                  :(; iostream::IO = kstdout    ))
+
+def_kwargs_cgs = mapreduce(extract_parameters, vcat, def_kwargs_cgs)
+
+args_cgs = (:A, :b)
+optargs_cgs = (:x0,)
+kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cgs($(def_args_cgs...), $(def_optargs_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgsSolver(A, b)
+    warm_start!(solver, $(optargs_cgs...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cgs!(solver, $(args_cgs...); $(kwargs_cgs...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  x .= zero(FC)                       # x₀
-  MisI || mulorldiv!(r, M, r₀, ldiv)  # r₀
-
-  # Compute residual norm ‖r₀‖₂.
-  rNorm = @knrm2(n, r)
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
-  end
-
-  # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩
-  ρ = @kdot(n, c, r)
-  if ρ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = false, false
-    stats.status = "Breakdown bᵀc = 0"
-    solver.warm_start =false
-    return solver
+  function cgs($(def_args_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CgsSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cgs!(solver, $(args_cgs...); $(kwargs_cgs...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
-
-  u .= r        # u₀
-  p .= r        # p₀
-  q .= zero(FC) # q₋₁
-
-  # Stopping criterion.
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  breakdown = false
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || breakdown || user_requested_exit)
-
-    NisI || mulorldiv!(y, N, p, ldiv)  # yₖ = N⁻¹pₖ
-    mul!(t, A, y)                      # tₖ = Ayₖ
-    MisI || mulorldiv!(v, M, t, ldiv)  # vₖ = M⁻¹tₖ
-    σ = @kdot(n, c, v)                 # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩
-    α = ρ / σ                          # αₖ = ρₖ / σₖ
-    @kcopy!(n, u, q)                   # qₖ = uₖ
-    @kaxpy!(n, -α, v, q)               # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ
-    @kaxpy!(n, one(FC), q, u)          # uₖ₊½ = uₖ + qₖ
-    NisI || mulorldiv!(z, N, u, ldiv)  # zₖ = N⁻¹uₖ₊½
-    @kaxpy!(n, α, z, x)                # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ)
-    mul!(s, A, z)                      # sₖ = Azₖ
-    MisI || mulorldiv!(w, M, s, ldiv)  # wₖ = M⁻¹sₖ
-    @kaxpy!(n, -α, w, r)               # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ)
-    ρ_next = @kdot(n, c, r)            # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩
-    β = ρ_next / ρ                     # βₖ = ρₖ₊₁ / ρₖ
-    @kcopy!(n, r, u)                   # uₖ₊₁ = rₖ₊₁
-    @kaxpy!(n, β, q, u)                # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ
-    @kaxpby!(n, one(FC), q, β, p)      # pₐᵤₓ = qₖ + βₖ * pₖ
-    @kaxpby!(n, one(FC), u, β, p)      # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ
-
-    # Update ρ.
-    ρ = ρ_next # ρₖ ← ρₖ₊₁
-
-    # Update iteration index.
-    iter = iter + 1
-
-    # Compute residual norm ‖rₖ‖₂.
+  function cgs!(solver :: CgsSolver{T,FC,S}, $(def_args_cgs...); $(def_kwargs_cgs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CGS: system of size %d\n", n)
+
+    # Check M = Iₙ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :vw, S, n)
+    allocate_if(!NisI, solver, :yz, S, n)
+    Δx, x, r, u, p, q, ts, stats = solver.Δx, solver.x, solver.r, solver.u, solver.p, solver.q, solver.ts, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    t = s = solver.ts
+    v = MisI ? t : solver.vw
+    w = MisI ? s : solver.vw
+    y = NisI ? p : solver.yz
+    z = NisI ? u : solver.yz
+    r₀ = MisI ? r : solver.ts
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
+    else
+      r₀ .= b
+    end
+
+    x .= zero(FC)                       # x₀
+    MisI || mulorldiv!(r, M, r₀, ldiv)  # r₀
+
+    # Compute residual norm ‖r₀‖₂.
     rNorm = @knrm2(n, r)
     history && push!(rNorms, rNorm)
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    solved = resid_decrease_lim || resid_decrease_mach
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
+
+    # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩
+    ρ = @kdot(n, c, r)
+    if ρ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = false, false
+      stats.timer = ktimer(start_time)
+      stats.status = "Breakdown bᴴc = 0"
+      solver.warm_start =false
+      return solver
+    end
+
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+    u .= r        # u₀
+    p .= r        # p₀
+    q .= zero(FC) # q₋₁
+
+    # Stopping criterion.
+    solved = rNorm ≤ ε
     tired = iter ≥ itmax
-    breakdown = (α == 0 || isnan(α))
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
+    breakdown = false
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+      NisI || mulorldiv!(y, N, p, ldiv)  # yₖ = N⁻¹pₖ
+      mul!(t, A, y)                      # tₖ = Ayₖ
+      MisI || mulorldiv!(v, M, t, ldiv)  # vₖ = M⁻¹tₖ
+      σ = @kdot(n, c, v)                 # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩
+      α = ρ / σ                          # αₖ = ρₖ / σₖ
+      @kcopy!(n, u, q)                   # qₖ = uₖ
+      @kaxpy!(n, -α, v, q)               # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ
+      @kaxpy!(n, one(FC), q, u)          # uₖ₊½ = uₖ + qₖ
+      NisI || mulorldiv!(z, N, u, ldiv)  # zₖ = N⁻¹uₖ₊½
+      @kaxpy!(n, α, z, x)                # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ)
+      mul!(s, A, z)                      # sₖ = Azₖ
+      MisI || mulorldiv!(w, M, s, ldiv)  # wₖ = M⁻¹sₖ
+      @kaxpy!(n, -α, w, r)               # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ)
+      ρ_next = @kdot(n, c, r)            # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩
+      β = ρ_next / ρ                     # βₖ = ρₖ₊₁ / ρₖ
+      @kcopy!(n, r, u)                   # uₖ₊₁ = rₖ₊₁
+      @kaxpy!(n, β, q, u)                # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ
+      @kaxpby!(n, one(FC), q, β, p)      # pₐᵤₓ = qₖ + βₖ * pₖ
+      @kaxpby!(n, one(FC), u, β, p)      # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ
+
+      # Update ρ.
+      ρ = ρ_next # ρₖ ← ρₖ₊₁
+
+      # Update iteration index.
+      iter = iter + 1
+
+      # Compute residual norm ‖rₖ‖₂.
+      rNorm = @knrm2(n, r)
+      history && push!(rNorms, rNorm)
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      breakdown = (α == 0 || isnan(α))
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "breakdown αₖ == 0")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "breakdown αₖ == 0")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/cr.jl b/src/cr.jl
index c678c7d29..96194f459 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -6,6 +6,9 @@
 # E. Stiefel, Relaxationsmethoden bester Strategie zur Losung linearer Gleichungssysteme.
 # Commentarii Mathematici Helvetici, 29(1), pp. 157--179, 1955.
 #
+# D. G. Luenberger, The conjugate residual method for constrained minimization problems.
+# SIAM Journal on Numerical Analysis, 7(3), pp. 390--398, 1970.
+#
 # M-A. Dahito and D. Orban, The Conjugate Residual Method in Linesearch and Trust-Region Methods.
 # SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019.
 #
@@ -16,53 +19,63 @@ export cr, cr!
 
 """
     (x, stats) = cr(A, b::AbstractVector{FC};
-                    M=I, atol::T=√eps(T), rtol::T=√eps(T), γ::T=√eps(T), itmax::Int=0,
-                    radius::T=zero(T), verbose::Int=0, linesearch::Bool=false, history::Bool=false,
-                    ldiv::Bool=false, callback=solver->false)
+                    M=I, ldiv::Bool=false, radius::T=zero(T),
+                    linesearch::Bool=false, γ::T=√eps(T),
+                    atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+                    timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                    callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-A truncated version of Stiefel’s Conjugate Residual method to solve the symmetric linear system Ax = b or the least-squares problem min ‖b - Ax‖.
-The matrix A must be positive semi-definite.
+    (x, stats) = cr(A, b, x0::AbstractVector; kwargs...)
+
+CR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
 
-A preconditioner M may be provided in the form of a linear operator and is assumed to be symmetric and positive definite.
+A truncated version of Stiefel’s Conjugate Residual method to solve the Hermitian linear system Ax = b
+of size n or the least-squares problem min ‖b - Ax‖ if A is singular.
+The matrix A must be Hermitian semi-definite.
 M also indicates the weighted norm in which residuals are measured.
 
-In a linesearch context, 'linesearch' must be set to 'true'.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n;
+* `b`: a vector of length n.
 
-If `itmax=0`, the default number of iterations is set to `2 * n`,
-with `n = length(b)`.
+#### Optional argument
 
-CR can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = cr(A, b, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
+* `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
 * M. R. Hestenes and E. Stiefel, [*Methods of conjugate gradients for solving linear systems*](https://doi.org/10.6028/jres.049.044), Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952.
 * E. Stiefel, [*Relaxationsmethoden bester Strategie zur Losung linearer Gleichungssysteme*](https://doi.org/10.1007/BF02564277), Commentarii Mathematici Helvetici, 29(1), pp. 157--179, 1955.
+* D. G. Luenberger, [*The conjugate residual method for constrained minimization problems*](https://doi.org/10.1137/0707032), SIAM Journal on Numerical Analysis, 7(3), pp. 390--398, 1970.
 * M-A. Dahito and D. Orban, [*The Conjugate Residual Method in Linesearch and Trust-Region Methods*](https://doi.org/10.1137/18M1204255), SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019.
 """
 function cr end
 
-function cr(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = CrSolver(A, b)
-  cr!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function cr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CrSolver(A, b)
-  cr!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = cr!(solver::CrSolver, A, b; kwargs...)
     solver = cr!(solver::CrSolver, A, b, x0; kwargs...)
@@ -73,286 +86,339 @@ See [`CrSolver`](@ref) for more details about the `solver`.
 """
 function cr! end
 
-function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  cr!(solver, A, b; kwargs...)
-  return solver
-end
-
-function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
-             M=I, atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0,
-             radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false,
-             ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CR: system of %d equations in %d variables\n", n, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace
-  allocate_if(!MisI, solver, :Mq, S, n)
-  Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats
-  warm_start = solver.warm_start
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  Mq = MisI ? q : solver.Mq
-
-  # Initial state.
-  x .= zero(FC)
-  if warm_start
-    mul!(p, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), p)
-  else
-    p .= b
+def_args_cr = (:(A                    ),
+               :(b::AbstractVector{FC}))
+
+def_optargs_cr = (:(x0::AbstractVector),)
+
+def_kwargs_cr = (:(; M = I                     ),
+                 :(; ldiv::Bool = false        ),
+                 :(; radius::T = zero(T)       ),
+                 :(; linesearch::Bool = false  ),
+                 :(; γ::T = √eps(T)            ),
+                 :(; atol::T = √eps(T)         ),
+                 :(; rtol::T = √eps(T)         ),
+                 :(; itmax::Int = 0            ),
+                 :(; timemax::Float64 = Inf    ),
+                 :(; verbose::Int = 0          ),
+                 :(; history::Bool = false     ),
+                 :(; callback = solver -> false),
+                 :(; iostream::IO = kstdout    ))
+
+def_kwargs_cr = mapreduce(extract_parameters, vcat, def_kwargs_cr)
+
+args_cr = (:A, :b)
+optargs_cr = (:x0,)
+kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function cr($(def_args_cr...), $(def_optargs_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CrSolver(A, b)
+    warm_start!(solver, $(optargs_cr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cr!(solver, $(args_cr...); $(kwargs_cr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  mulorldiv!(r, M, p, ldiv)
-  mul!(Ar, A, r)
-  ρ = @kdotr(n, r, Ar)
-
-  rNorm = sqrt(@kdotr(n, r, p))   # ‖r‖
-  history && push!(rNorms, rNorm) # Values of ‖r‖
-
-  if ρ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(ArNorms, zero(T))
-    solver.warm_start = false
-    return solver
+
+  function cr($(def_args_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    cr!(solver, $(args_cr...); $(kwargs_cr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  p .= r
-  q .= Ar
-  (verbose > 0) && (m = zero(T)) # quadratic model
-
-  iter = 0
-  itmax == 0 && (itmax = 2 * n)
-
-  rNorm² = rNorm * rNorm
-  pNorm = rNorm
-  pNorm² = rNorm²
-  pr = rNorm²
-  abspr = pr
-  pAp = ρ
-  abspAp = abs(pAp)
-  xNorm = zero(T)
-  ArNorm = @knrm2(n, Ar) # ‖Ar‖
-  history && push!(ArNorms, ArNorm)
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad")
-  kdisplay(iter, verbose) && @printf("    %d  %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
-
-  descent = pr > 0 # pᵀr > 0 means p is a descent direction
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  on_boundary = false
-  npcurv = false
-  status = "unknown"
-  user_requested_exit = false
-
-  while ! (solved || tired || user_requested_exit)
-    if linesearch
-      if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
-        npcurv = true
-        (verbose > 0) && @printf("nonpositive curvature detected: pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
-        stats.solved = solved
-        stats.inconsistent = false
-        stats.status = "nonpositive curvature"
-        return solver
-      end
-    elseif pAp ≤ 0 && radius == 0
-      error("Indefinite system and no trust region")
+
+  function cr!(solver :: CrSolver{T,FC,S}, $(def_args_cr...); $(def_kwargs_cr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == n || error("Inconsistent problem size")
+    linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
+    (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace
+    allocate_if(!MisI, solver, :Mq, S, n)
+    Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats
+    warm_start = solver.warm_start
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    Mq = MisI ? q : solver.Mq
+
+    # Initial state.
+    x .= zero(FC)
+    if warm_start
+      mul!(p, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), p)
+    else
+      p .= b
     end
-    MisI || mulorldiv!(Mq, M, q, ldiv)
-
-    if radius > 0
-      (verbose > 0) && @printf("radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
-      # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius²  (i = 1, 2)
-      xNorm² = xNorm * xNorm
-      t = to_boundary(x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
-      t1 = maximum(t) # > 0
-      t2 = minimum(t) # < 0
-      tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
-      (verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
-
-      if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᵀAp ≃ 0
-        npcurv = true # nonpositive curvature
-        (verbose > 0) && @printf("pᵀAp = %8.1e ≃ 0\n", pAp)
-        if abspr ≤ γ * pNorm * rNorm # pᵀr ≃ 0
-          (verbose > 0) && @printf("pᵀr = %8.1e ≃ 0, redefining p := r\n", pr)
-          p = r # - ∇q(x)
-          q = Ar
-          # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᵀAr
-          # 1) if rᵀAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᵀAr
-          # 2) if rᵀAr ≤ 0, the quadratic decreases to -∞ in the direction r
-          if ρ > 0 # case 1
-            (verbose > 0) && @printf("quadratic is convex in direction r, curv = %8.1e\n", ρ)
-            α = min(tr, rNorm² / ρ)
-          else # case 2
-            (verbose > 0) && @printf("r is a direction of nonpositive curvature: %8.1e\n", ρ)
+    mulorldiv!(r, M, p, ldiv)
+    mul!(Ar, A, r)
+    ρ = @kdotr(n, r, Ar)
+
+    rNorm = sqrt(@kdotr(n, r, p))   # ‖r‖
+    history && push!(rNorms, rNorm) # Values of ‖r‖
+
+    if ρ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(ArNorms, zero(T))
+      solver.warm_start = false
+      return solver
+    end
+    p .= r
+    q .= Ar
+    (verbose > 0) && (m = zero(T)) # quadratic model
+
+    iter = 0
+    itmax == 0 && (itmax = 2 * n)
+
+    rNorm² = rNorm * rNorm
+    pNorm = rNorm
+    pNorm² = rNorm²
+    pr = rNorm²
+    abspr = pr
+    pAp = ρ
+    abspAp = abs(pAp)
+    xNorm = zero(T)
+    ArNorm = @knrm2(n, Ar) # ‖Ar‖
+    history && push!(ArNorms, ArNorm)
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %8s  %8s  %8s  %5s\n", "k", "‖x‖", "‖r‖", "quad", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.1e  %8.1e  %8.1e  %.2fs\n", iter, xNorm, rNorm, m, ktimer(start_time))
+
+    descent = pr > 0 # pᴴr > 0 means p is a descent direction
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    on_boundary = false
+    npcurv = false
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || user_requested_exit || overtimed)
+      if linesearch
+        if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
+          npcurv = true
+          (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
+          stats.solved = solved
+          stats.inconsistent = false
+          stats.timer = ktimer(start_time)
+          stats.status = "nonpositive curvature"
+          return solver
+        end
+      elseif pAp ≤ 0 && radius == 0
+        error("Indefinite system and no trust region")
+      end
+      MisI || mulorldiv!(Mq, M, q, ldiv)
+
+      if radius > 0
+        (verbose > 0) && @printf(iostream, "radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
+        # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius²  (i = 1, 2)
+        xNorm² = xNorm * xNorm
+        t = to_boundary(n, x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
+        t1 = maximum(t) # > 0
+        t2 = minimum(t) # < 0
+        tr = maximum(to_boundary(n, x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
+        (verbose > 0) && @printf(iostream, "t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
+
+        if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0
+          npcurv = true # nonpositive curvature
+          (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp)
+          if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0
+            (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr)
+            p = r # - ∇q(x)
+            q = Ar
+            # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr
+            # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr
+            # 2) if rᴴAr ≤ 0, the quadratic decreases to -∞ in the direction r
+            if ρ > 0 # case 1
+              (verbose > 0) && @printf(iostream, "quadratic is convex in direction r, curv = %8.1e\n", ρ)
+              α = min(tr, rNorm² / ρ)
+            else # case 2
+              (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ)
+              α = tr
+            end
+          else
+            # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp
+            # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᴴAr
+            # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed
+            α = descent ? t1 : t2
+            ρ > 0 && (tr = min(tr, rNorm² / ρ))
+            Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0
+            if Δ > 0 # direction r engenders a better decrease
+              (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+              (verbose > 0) && @printf(iostream, "redefining p := r\n")
+              p = r
+              q = Ar
+              α = tr
+            else
+              (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+            end
+          end
+
+        elseif pAp > 0 && ρ > 0 # no negative curvature
+          (verbose > 0) && @printf(iostream, "positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
+          α = ρ / @kdotr(n, q, Mq)
+          if α ≥ t1
+            α = t1
+            on_boundary = true
+          end
+
+        elseif pAp > 0 && ρ < 0
+          npcurv = true
+          (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ)
+          # q_p is minimal for α_p = rᴴp / pᴴAp
+          α = descent ?  min(t1, pr / pAp) : max(t2, pr / pAp)
+          Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
+          if Δ > 0
+            (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+            (verbose > 0) && @printf(iostream, "redefining p := r\n")
+            p = r
+            q = Ar
             α = tr
+          else
+            (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
           end
-        else
-          # q_p = q(x + α_p * p) - q(x) = -α_p * rᵀp + ½ (α_p)² * pᵀAp
-          # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᵀAr
-          # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed
+
+        elseif pAp < 0 && ρ > 0
+          npcurv = true
+          (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ)
           α = descent ? t1 : t2
-          ρ > 0 && (tr = min(tr, rNorm² / ρ))
-          Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᵀAp = 0
-          if Δ > 0 # direction r engenders a better decrease
-            (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
-            (verbose > 0) && @printf("redefining p := r\n")
+          tr = min(tr, rNorm² / ρ)
+          Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
+          if Δ > 0
+            (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+            (verbose > 0) && @printf(iostream, "redefining p := r\n")
             p = r
             q = Ar
             α = tr
           else
-            (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+            (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
           end
-        end
-
-      elseif pAp > 0 && ρ > 0 # no negative curvature
-        (verbose > 0) && @printf("positive curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
-        α = ρ / @kdotr(n, q, Mq)
-        if α ≥ t1
-          α = t1
-          on_boundary = true
-        end
-
-      elseif pAp > 0 && ρ < 0
-        npcurv = true
-        (verbose > 0) && @printf("pᵀAp = %8.1e > 0 and rᵀAr = %8.1e < 0\n", pAp, ρ)
-        # q_p is minimal for α_p = rᵀp / pᵀAp
-        α = descent ?  min(t1, pr / pAp) : max(t2, pr / pAp)
-        Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
-        if Δ > 0
-          (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
-          (verbose > 0) && @printf("redefining p := r\n")
-          p = r
-          q = Ar
-          α = tr
-        else
-          (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
-        end
 
-      elseif pAp < 0 && ρ > 0
-        npcurv = true
-        (verbose > 0) && @printf("pᵀAp = %8.1e < 0 and rᵀAr = %8.1e > 0\n", pAp, ρ)
-        α = descent ? t1 : t2
-        tr = min(tr, rNorm² / ρ)
-        Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
-        if Δ > 0
-          (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
-          (verbose > 0) && @printf("redefining p := r\n")
-          p = r
-          q = Ar
-          α = tr
-        else
-          (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+        elseif pAp < 0 && ρ < 0
+          npcurv = true
+          (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
+          α = descent ? t1 : t2
+          Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
+          if Δ > 0
+            (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+            (verbose > 0) && @printf(iostream, "redefining p := r\n")
+            p = r
+            q = Ar
+            α = tr
+          else
+            (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+          end
         end
 
-      elseif pAp < 0 && ρ < 0
-        npcurv = true
-        (verbose > 0) && @printf("negative curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
-        α = descent ? t1 : t2
-        Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
-        if Δ > 0
-          (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
-          (verbose > 0) && @printf("redefining p := r\n")
-          p = r
-          q = Ar
-          α = tr
-        else
-          (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
-        end
+      elseif radius == 0
+        α = ρ / @kdotr(n, q, Mq) # step
       end
 
-    elseif radius == 0
-      α = ρ / @kdotr(n, q, Mq) # step
-    end
-
-    @kaxpy!(n, α, p, x)
-    xNorm = @knrm2(n, x)
-    xNorm ≈ radius && (on_boundary = true)
-    @kaxpy!(n, -α, Mq, r) # residual
-    if MisI
-      rNorm² = @kdotr(n, r, r)
-      rNorm = sqrt(rNorm²)
-    else
-      ω = sqrt(α) * sqrt(ρ)
-      rNorm = sqrt(abs(rNorm + ω)) * sqrt(abs(rNorm - ω))
-      rNorm² = rNorm * rNorm  # rNorm² = rNorm² - α * ρ
-    end
-    history && push!(rNorms, rNorm)
-    mul!(Ar, A, r)
-    ArNorm = @knrm2(n, Ar)
-    history && push!(ArNorms, ArNorm)
-
-    iter = iter + 1
-    if kdisplay(iter, verbose)
-      m = m - α * pr + α^2 * pAp / 2
-      @printf("    %d  %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
-    end
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+      @kaxpy!(n, α, p, x)
+      xNorm = @knrm2(n, x)
+      xNorm ≈ radius && (on_boundary = true)
+      @kaxpy!(n, -α, Mq, r) # residual
+      if MisI
+        rNorm² = @kdotr(n, r, r)
+        rNorm = sqrt(rNorm²)
+      else
+        ω = sqrt(α) * sqrt(ρ)
+        rNorm = sqrt(abs(rNorm + ω)) * sqrt(abs(rNorm - ω))
+        rNorm² = rNorm * rNorm  # rNorm² = rNorm² - α * ρ
+      end
+      history && push!(rNorms, rNorm)
+      mul!(Ar, A, r)
+      ArNorm = @knrm2(n, Ar)
+      history && push!(ArNorms, ArNorm)
+
+      iter = iter + 1
+      if kdisplay(iter, verbose)
+        m = m - α * pr + α^2 * pAp / 2
+        @printf(iostream, "%5d  %8.1e  %8.1e  %8.1e  %.2fs\n", iter, xNorm, rNorm, m, ktimer(start_time))
+      end
 
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    resid_decrease = resid_decrease_lim || resid_decrease_mach
-    solved = resid_decrease || npcurv || on_boundary
-    tired = iter ≥ itmax
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      resid_decrease = resid_decrease_lim || resid_decrease_mach
+      solved = resid_decrease || npcurv || on_boundary
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+
+      (solved || tired || user_requested_exit || overtimed) && continue
+      ρbar = ρ
+      ρ = @kdotr(n, r, Ar)
+      β = ρ / ρbar # step for the direction computation
+      @kaxpby!(n, one(FC), r, β, p)
+      @kaxpby!(n, one(FC), Ar, β, q)
+
+      pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm²
+      if pNorm² > sqrt(eps(T))
+        pNorm = sqrt(pNorm²)
+      elseif abs(pNorm²) ≤ sqrt(eps(T))
+        pNorm = zero(T)
+      else
+        stats.niter = iter
+        stats.solved = solved
+        stats.inconsistent = false
+        stats.timer = ktimer(start_time)
+        stats.status = "solver encountered numerical issues"
+        solver.warm_start = false
+        return solver
+      end
+      pr = rNorm² + β * pr - β * α * pAp # pᴴr
+      abspr = abs(pr)
+      pAp = ρ + β^2 * pAp # pᴴq
+      abspAp = abs(pAp)
+      descent = pr > 0
 
-    (solved || tired || user_requested_exit) && continue
-    ρbar = ρ
-    ρ = @kdotr(n, r, Ar)
-    β = ρ / ρbar # step for the direction computation
-    @kaxpby!(n, one(FC), r, β, p)
-    @kaxpby!(n, one(FC), Ar, β, q)
-
-    pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm²
-    if pNorm² > sqrt(eps(T))
-      pNorm = sqrt(pNorm²)
-    elseif abs(pNorm²) ≤ sqrt(eps(T))
-      pNorm = zero(T)
-    else
-      stats.niter = iter
-      stats.solved = solved
-      stats.inconsistent = false
-      stats.status = "solver encountered numerical issues"
-      solver.warm_start = false
-      return solver
     end
-    pr = rNorm² + β * pr - β * α * pAp # pᵀr
-    abspr = abs(pr)
-    pAp = ρ + β^2 * pAp # pᵀq
-    abspAp = abs(pAp)
-    descent = pr > 0
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    on_boundary         && (status = "on trust-region boundary")
+    npcurv              && (status = "nonpositive curvature")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  on_boundary         && (status = "on trust-region boundary")
-  npcurv              && (status = "nonpositive curvature")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/craig.jl b/src/craig.jl
index 20597ea02..46e8f93e5 100644
--- a/src/craig.jl
+++ b/src/craig.jl
@@ -11,7 +11,7 @@
 # and is equivalent to applying the conjugate gradient method
 # to the linear system
 #
-#  AAᵀy = b.
+#  AAᴴy = b.
 #
 # This method, sometimes known under the name CRAIG, is the
 # Golub-Kahan implementation of CGNE, and is described in
@@ -32,13 +32,15 @@
 
 export craig, craig!
 
-
 """
     (x, y, stats) = craig(A, b::AbstractVector{FC};
-                          M=I, N=I, sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T),
-                          btol::T=√eps(T), rtol::T=√eps(T), conlim::T=1/√eps(T), itmax::Int=0,
-                          verbose::Int=0, transfer_to_lsqr::Bool=false, history::Bool=false,
-                          ldiv::Bool=false, callback=solver->false)
+                          M=I, N=I, ldiv::Bool=false,
+                          transfer_to_lsqr::Bool=false, sqd::Bool=false,
+                          λ::T=zero(T), btol::T=√eps(T),
+                          conlim::T=1/√eps(T), atol::T=√eps(T),
+                          rtol::T=√eps(T), itmax::Int=0,
+                          timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                          callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -47,19 +49,19 @@ Find the least-norm solution of the consistent linear system
 
     Ax + λ²y = b
 
-using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
+of size m × n using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
 regularization parameter. This method is equivalent to CGNE but is more
 stable.
 
 For a system in the form Ax = b, Craig's method is equivalent to applying
-CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange
+CG to AAᴴy = b and recovering x = Aᴴy. Note that y are the Lagrange
 multipliers of the least-norm problem
 
     minimize ‖x‖  s.t.  Ax = b.
 
 If `λ > 0`, CRAIG solves the symmetric and quasi-definite system
 
-    [ -F     Aᵀ ] [ x ]   [ 0 ]
+    [ -F     Aᴴ ] [ x ]   [ 0 ]
     [  A   λ²E  ] [ y ] = [ b ],
 
 where E and F are symmetric and positive definite.
@@ -70,12 +72,12 @@ The system above represents the optimality conditions of
 
     min ‖x‖²_F + λ²‖y‖²_E  s.t.  Ax + λ²Ey = b.
 
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIG is then equivalent to applying CG to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIG is then equivalent to applying CG to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
 
 If `λ = 0`, CRAIG solves the symmetric and indefinite system
 
-    [ -F   Aᵀ ] [ x ]   [ 0 ]
+    [ -F   Aᴴ ] [ x ]   [ 0 ]
     [  A   0  ] [ y ] = [ b ].
 
 The system above represents the optimality conditions of
@@ -86,8 +88,35 @@ In this case, `M` can still be specified and indicates the weighted norm in whic
 
 In this implementation, both the x and y-parts of the solution are returned.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -96,12 +125,6 @@ and `false` otherwise.
 """
 function craig end
 
-function craig(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CraigSolver(A, b)
-  craig!(solver, A, b; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = craig!(solver::CraigSolver, A, b; kwargs...)
 
@@ -111,192 +134,130 @@ See [`CraigSolver`](@ref) for more details about the `solver`.
 """
 function craig! end
 
-function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
-                btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0,
-                verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false,
-                ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CRAIG: system of %d equations in %d variables\n", m, n)
-
-  # Check sqd and λ parameters
-  sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
-  sqd && (λ = one(T))
-
-  # Tests M = Iₘ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :u , S, m)
-  allocate_if(!NisI, solver, :v , S, n)
-  allocate_if(λ > 0, solver, :w2, S, n)
-  x, Nv, Aᵀu, y, w = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w
-  Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats
-  rNorms = stats.residuals
-  reset!(stats)
-  u = MisI ? Mu : solver.u
-  v = NisI ? Nv : solver.v
-
-  x .= zero(FC)
-  y .= zero(FC)
-
-  Mu .= b
-  MisI || mulorldiv!(u, M, Mu, ldiv)
-  β₁ = sqrt(@kdotr(m, u, Mu))
-  rNorm  = β₁
-  history && push!(rNorms, rNorm)
-  if β₁ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    return solver
+def_args_craig = (:(A                    ),
+                  :(b::AbstractVector{FC}))
+
+def_kwargs_craig = (:(; M = I                         ),
+                    :(; N = I                         ),
+                    :(; ldiv::Bool = false            ),
+                    :(; transfer_to_lsqr::Bool = false),
+                    :(; sqd::Bool = false             ),
+                    :(; λ::T = zero(T)                ),
+                    :(; btol::T = √eps(T)             ),
+                    :(; conlim::T = 1/√eps(T)         ),
+                    :(; atol::T = √eps(T)             ),
+                    :(; rtol::T = √eps(T)             ),
+                    :(; itmax::Int = 0                ),
+                    :(; timemax::Float64 = Inf        ),
+                    :(; verbose::Int = 0              ),
+                    :(; history::Bool = false         ),
+                    :(; callback = solver -> false    ),
+                    :(; iostream::IO = kstdout        ))
+
+def_kwargs_craig = mapreduce(extract_parameters, vcat, def_kwargs_craig)
+
+args_craig = (:A, :b)
+kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function craig($(def_args_craig...); $(def_kwargs_craig...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CraigSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    craig!(solver, $(args_craig...); $(kwargs_craig...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
-  β₁² = β₁^2
-  β = β₁
-  θ = β₁      # θ will differ from β when there is regularization (λ > 0).
-  ξ = -one(T) # Most recent component of x in Range(V).
-  δ = λ
-  ρ_prev = one(T)
-
-  # Initialize Golub-Kahan process.
-  # β₁Mu₁ = b.
-  @kscal!(m, one(FC) / β₁, u)
-  MisI || @kscal!(m, one(FC) / β₁, Mu)
-
-  Nv .= zero(FC)
-  w .= zero(FC)  # Used to update y.
-
-  λ > 0 && (w2 .= zero(FC))
-
-  Anorm² = zero(T) # Estimate of ‖A‖²_F.
-  Anorm  = zero(T)
-  Dnorm² = zero(T) # Estimate of ‖(AᵀA)⁻¹‖².
-  Acond  = zero(T) # Estimate of cond(A).
-  xNorm² = zero(T) # Estimate of ‖x‖².
-  xNorm  = zero(T)
-
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  ɛ_c = atol + rtol * rNorm   # Stopping tolerance for consistent systems.
-  ɛ_i = atol                  # Stopping tolerance for inconsistent systems.
-  ctol = conlim > 0 ? 1/conlim : zero(T)  # Stopping tolerance for ill-conditioned operators.
-  (verbose > 0) && @printf("%5s  %8s  %8s  %8s  %8s  %8s  %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β")
-  kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e  %8.2e  %8.2e\n", iter, rNorm, xNorm, Anorm, Acond)
-
-  bkwerr = one(T)  # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²)
-
-  status = "unknown"
-
-  solved_lim = bkwerr ≤ btol
-  solved_mach = one(T) + bkwerr ≤ one(T)
-  solved_resid_tol = rNorm ≤ ɛ_c
-  solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁
-  solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
-
-  ill_cond = ill_cond_mach = ill_cond_lim = false
-
-  inconsistent = false
-  tired = iter ≥ itmax
-  user_requested_exit = false
-
-  while ! (solved || inconsistent || ill_cond || tired || user_requested_exit)
-    # Generate the next Golub-Kahan vectors
-    # 1. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
-    mul!(Aᵀu, Aᵀ, u)
-    @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
-    NisI || mulorldiv!(v, N, Nv, ldiv)
-    α = sqrt(@kdotr(n, v, Nv))
-    if α == 0
-      inconsistent = true
-      continue
-    end
-    @kscal!(n, one(FC) / α, v)
-    NisI || @kscal!(n, one(FC) / α, Nv)
-
-    Anorm² += α * α + λ * λ
-
-    if λ > 0
-      # Givens rotation to zero out the δ in position (k, 2k):
-      #      k-1  k   2k     k   2k      k-1  k   2k
-      # k   [ θ   α   δ ] [ c₁   s₁ ] = [ θ   ρ      ]
-      # k+1 [     β     ] [ s₁  -c₁ ]   [     θ+   γ ]
-      (c₁, s₁, ρ) = sym_givens(α, δ)
-    else
-      ρ = α
-    end
 
-    ξ = -θ / ρ * ξ
-
-    if λ > 0
-      # w1 = c₁ * v + s₁ * w2
-      # w2 = s₁ * v - c₁ * w2
-      # x  = x + ξ * w1
-      @kaxpy!(n, ξ * c₁, v, x)
-      @kaxpy!(n, ξ * s₁, w2, x)
-      @kaxpby!(n, s₁, v, -c₁, w2)
-    else
-      @kaxpy!(n, ξ, v, x)  # x = x + ξ * v
-    end
+  function craig!(solver :: CraigSolver{T,FC,S}, $(def_args_craig...); $(def_kwargs_craig...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CRAIG: system of %d equations in %d variables\n", m, n)
+
+    # Check sqd and λ parameters
+    sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+    sqd && (λ = one(T))
 
-    # Recur y.
-    @kaxpby!(m, one(FC), u, -θ/ρ_prev, w)  # w = u - θ/ρ_prev * w
-    @kaxpy!(m, ξ/ρ, w, y)  # y = y + ξ/ρ * w
+    # Tests M = Iₘ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
 
-    Dnorm² += @knrm2(m, w)
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
 
-    # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
-    mul!(Av, A, v)
-    @kaxpby!(m, one(FC), Av, -α, Mu)
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :u , S, m)
+    allocate_if(!NisI, solver, :v , S, n)
+    allocate_if(λ > 0, solver, :w2, S, n)
+    x, Nv, Aᴴu, y, w = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w
+    Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats
+    rNorms = stats.residuals
+    reset!(stats)
+    u = MisI ? Mu : solver.u
+    v = NisI ? Nv : solver.v
+
+    x .= zero(FC)
+    y .= zero(FC)
+
+    Mu .= b
     MisI || mulorldiv!(u, M, Mu, ldiv)
-    β = sqrt(@kdotr(m, u, Mu))
-    if β ≠ 0
-      @kscal!(m, one(FC) / β, u)
-      MisI || @kscal!(m, one(FC) / β, Mu)
+    β₁ = sqrt(@kdotr(m, u, Mu))
+    rNorm  = β₁
+    history && push!(rNorms, rNorm)
+    if β₁ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      return solver
     end
+    β₁² = β₁^2
+    β = β₁
+    θ = β₁      # θ will differ from β when there is regularization (λ > 0).
+    ξ = -one(T) # Most recent component of x in Range(V).
+    δ = λ
+    ρ_prev = one(T)
 
-    # Finish  updates from the first Givens rotation.
-    if λ > 0
-      θ =  β * c₁
-      γ =  β * s₁
-    else
-      θ = β
-    end
+    # Initialize Golub-Kahan process.
+    # β₁Mu₁ = b.
+    @kscal!(m, one(FC) / β₁, u)
+    MisI || @kscal!(m, one(FC) / β₁, Mu)
 
-    if λ > 0
-      # Givens rotation to zero out the γ in position (k+1, 2k)
-      #       2k  2k+1     2k  2k+1      2k  2k+1
-      # k+1 [  γ    λ ] [ -c₂   s₂ ] = [  0    δ ]
-      # k+2 [  0    0 ] [  s₂   c₂ ]   [  0    0 ]
-      c₂, s₂, δ = sym_givens(λ, γ)
-      @kscal!(n, s₂, w2)
-    end
+    Nv .= zero(FC)
+    w .= zero(FC)  # Used to update y.
 
-    Anorm² += β * β
-    Anorm = sqrt(Anorm²)
-    Acond = Anorm * sqrt(Dnorm²)
-    xNorm² += ξ * ξ
-    xNorm = sqrt(xNorm²)
-    rNorm = β * abs(ξ)           # r = - β * ξ * u
-    λ > 0 && (rNorm *= abs(c₁))  # r = -c₁ * β * ξ * u when λ > 0.
-    history && push!(rNorms, rNorm)
-    iter = iter + 1
+    λ > 0 && (w2 .= zero(FC))
+
+    Anorm² = zero(T) # Estimate of ‖A‖²_F.
+    Anorm  = zero(T)
+    Dnorm² = zero(T) # Estimate of ‖(AᴴA)⁻¹‖².
+    Acond  = zero(T) # Estimate of cond(A).
+    xNorm² = zero(T) # Estimate of ‖x‖².
+    xNorm  = zero(T)
 
-    bkwerr = rNorm / sqrt(β₁² + Anorm² * xNorm²)
+    iter = 0
+    itmax == 0 && (itmax = m + n)
 
-    ρ_prev = ρ   # Only differs from α if λ > 0.
+    ɛ_c = atol + rtol * rNorm   # Stopping tolerance for consistent systems.
+    ɛ_i = atol                  # Stopping tolerance for inconsistent systems.
+    ctol = conlim > 0 ? 1/conlim : zero(T)  # Stopping tolerance for ill-conditioned operators.
+    (verbose > 0) && @printf(iostream, "%5s  %8s  %8s  %8s  %8s  %8s  %7s  %5s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %8.2e  %8.2e  %8s  %7s  %.2fs\n", iter, rNorm, xNorm, Anorm, Acond, " ✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time))
 
-    kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e  %8.2e  %8.2e  %8.1e  %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β)
+    bkwerr = one(T)  # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²)
+
+    status = "unknown"
 
     solved_lim = bkwerr ≤ btol
     solved_mach = one(T) + bkwerr ≤ one(T)
@@ -304,34 +265,141 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
     solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁
     solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
 
-    ill_cond_mach = one(T) + one(T) / Acond ≤ one(T)
-    ill_cond_lim = 1 / Acond ≤ ctol
-    ill_cond = ill_cond_mach | ill_cond_lim
+    ill_cond = ill_cond_mach = ill_cond_lim = false
 
-    user_requested_exit = callback(solver) :: Bool
     inconsistent = false
     tired = iter ≥ itmax
-  end
-  (verbose > 0) && @printf("\n")
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || inconsistent || ill_cond || tired || user_requested_exit || overtimed)
+      # Generate the next Golub-Kahan vectors
+      # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+      mul!(Aᴴu, Aᴴ, u)
+      @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+      NisI || mulorldiv!(v, N, Nv, ldiv)
+      α = sqrt(@kdotr(n, v, Nv))
+      if α == 0
+        inconsistent = true
+        continue
+      end
+      @kscal!(n, one(FC) / α, v)
+      NisI || @kscal!(n, one(FC) / α, Nv)
+
+      Anorm² += α * α + λ * λ
+
+      if λ > 0
+        # Givens rotation to zero out the δ in position (k, 2k):
+        #      k-1  k   2k     k   2k      k-1  k   2k
+        # k   [ θ   α   δ ] [ c₁   s₁ ] = [ θ   ρ      ]
+        # k+1 [     β     ] [ s₁  -c₁ ]   [     θ+   γ ]
+        (c₁, s₁, ρ) = sym_givens(α, δ)
+      else
+        ρ = α
+      end
+
+      ξ = -θ / ρ * ξ
+
+      if λ > 0
+        # w1 = c₁ * v + s₁ * w2
+        # w2 = s₁ * v - c₁ * w2
+        # x  = x + ξ * w1
+        @kaxpy!(n, ξ * c₁, v, x)
+        @kaxpy!(n, ξ * s₁, w2, x)
+        @kaxpby!(n, s₁, v, -c₁, w2)
+      else
+        @kaxpy!(n, ξ, v, x)  # x = x + ξ * v
+      end
+
+      # Recur y.
+      @kaxpby!(m, one(FC), u, -θ/ρ_prev, w)  # w = u - θ/ρ_prev * w
+      @kaxpy!(m, ξ/ρ, w, y)  # y = y + ξ/ρ * w
+
+      Dnorm² += @knrm2(m, w)
+
+      # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+      mul!(Av, A, v)
+      @kaxpby!(m, one(FC), Av, -α, Mu)
+      MisI || mulorldiv!(u, M, Mu, ldiv)
+      β = sqrt(@kdotr(m, u, Mu))
+      if β ≠ 0
+        @kscal!(m, one(FC) / β, u)
+        MisI || @kscal!(m, one(FC) / β, Mu)
+      end
+
+      # Finish  updates from the first Givens rotation.
+      if λ > 0
+        θ =  β * c₁
+        γ =  β * s₁
+      else
+        θ = β
+      end
+
+      if λ > 0
+        # Givens rotation to zero out the γ in position (k+1, 2k)
+        #       2k  2k+1     2k  2k+1      2k  2k+1
+        # k+1 [  γ    λ ] [ -c₂   s₂ ] = [  0    δ ]
+        # k+2 [  0    0 ] [  s₂   c₂ ]   [  0    0 ]
+        c₂, s₂, δ = sym_givens(λ, γ)
+        @kscal!(n, s₂, w2)
+      end
+
+      Anorm² += β * β
+      Anorm = sqrt(Anorm²)
+      Acond = Anorm * sqrt(Dnorm²)
+      xNorm² += ξ * ξ
+      xNorm = sqrt(xNorm²)
+      rNorm = β * abs(ξ)           # r = - β * ξ * u
+      λ > 0 && (rNorm *= abs(c₁))  # r = -c₁ * β * ξ * u when λ > 0.
+      history && push!(rNorms, rNorm)
+      iter = iter + 1
+
+      bkwerr = rNorm / sqrt(β₁² + Anorm² * xNorm²)
+
+      ρ_prev = ρ   # Only differs from α if λ > 0.
+
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %8.2e  %8.2e  %8.1e  %7.1e  %.2fs\n", iter, rNorm, xNorm, Anorm, Acond, α, β, ktimer(start_time))
+
+      solved_lim = bkwerr ≤ btol
+      solved_mach = one(T) + bkwerr ≤ one(T)
+      solved_resid_tol = rNorm ≤ ɛ_c
+      solved_resid_lim = rNorm ≤ btol + atol * Anorm * xNorm / β₁
+      solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
+
+      ill_cond_mach = one(T) + one(T) / Acond ≤ one(T)
+      ill_cond_lim = 1 / Acond ≤ ctol
+      ill_cond = ill_cond_mach | ill_cond_lim
+
+      user_requested_exit = callback(solver) :: Bool
+      inconsistent = false
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
 
-  # transfer to LSQR point if requested
-  if λ > 0 && transfer_to_lsqr
-    ξ *= -θ / δ
-    @kaxpy!(n, ξ, w2, x)
-    # TODO: update y
-  end
+    # transfer to LSQR point if requested
+    if λ > 0 && transfer_to_lsqr
+      ξ *= -θ / δ
+      @kaxpy!(n, ξ, w2, x)
+      # TODO: update y
+    end
 
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough for the tolerances given")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  ill_cond_lim        && (status = "condition number exceeds tolerance")
-  inconsistent        && (status = "system may be inconsistent")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough for the tolerances given")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    ill_cond_lim        && (status = "condition number exceeds tolerance")
+    inconsistent        && (status = "system may be inconsistent")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
+  end
 end
diff --git a/src/craigmr.jl b/src/craigmr.jl
index e08bb9c36..5f05aa2ae 100644
--- a/src/craigmr.jl
+++ b/src/craigmr.jl
@@ -10,7 +10,7 @@
 # and is equivalent to applying the conjugate residual method
 # to the linear system
 #
-#  AAᵀy = b.
+#  AAᴴy = b.
 #
 # This method is equivalent to CRMR, and is described in
 #
@@ -26,12 +26,13 @@
 
 export craigmr, craigmr!
 
-
 """
     (x, y, stats) = craigmr(A, b::AbstractVector{FC};
-                            M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
-                            rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
-                            ldiv::Bool=false, callback=solver->false)
+                            M=I, N=I, ldiv::Bool=false,
+                            sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T),
+                            rtol::T=√eps(T), itmax::Int=0,
+                            timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                            callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -40,11 +41,11 @@ Solve the consistent linear system
 
     Ax + λ²y = b
 
-using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
 This method is equivalent to applying the Conjugate Residuals method
 to the normal equations of the second kind
 
-    (AAᵀ + λ²I) y = b
+    (AAᴴ + λ²I) y = b
 
 but is more stable. When λ = 0, this method solves the minimum-norm problem
 
@@ -52,7 +53,7 @@ but is more stable. When λ = 0, this method solves the minimum-norm problem
 
 If `λ > 0`, CRAIGMR solves the symmetric and quasi-definite system
 
-    [ -F    Aᵀ ] [ x ]   [ 0 ]
+    [ -F    Aᴴ ] [ x ]   [ 0 ]
     [  A  λ²E  ] [ y ] = [ b ],
 
 where E and F are symmetric and positive definite.
@@ -63,12 +64,12 @@ The system above represents the optimality conditions of
 
     min ‖x‖²_F + λ²‖y‖²_E  s.t.  Ax + λ²Ey = b.
 
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
 
 If `λ = 0`, CRAIGMR solves the symmetric and indefinite system
 
-    [ -F   Aᵀ ] [ x ]   [ 0 ]
+    [ -F   Aᴴ ] [ x ]   [ 0 ]
     [  A   0  ] [ y ] = [ b ].
 
 The system above represents the optimality conditions of
@@ -82,8 +83,32 @@ It is formally equivalent to CRMR, though can be slightly more accurate,
 and intricate to implement. Both the x- and y-parts of the solution are
 returned.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -92,12 +117,6 @@ and `false` otherwise.
 """
 function craigmr end
 
-function craigmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CraigmrSolver(A, b)
-  craigmr!(solver, A, b; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = craigmr!(solver::CraigmrSolver, A, b; kwargs...)
 
@@ -107,230 +126,274 @@ See [`CraigmrSolver`](@ref) for more details about the `solver`.
 """
 function craigmr! end
 
-function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                  M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
-                  rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-                  ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CRAIGMR: system of %d equations in %d variables\n", m, n)
-
-  # Check sqd and λ parameters
-  sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
-  sqd && (λ = one(T))
-
-  # Tests M = Iₘ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :u, S, m)
-  allocate_if(!NisI, solver, :v, S, n)
-  allocate_if(λ > 0, solver, :q, S, n)
-  x, Nv, Aᵀu, d, y, Mu = solver.x, solver.Nv, solver.Aᵀu, solver.d, solver.y, solver.Mu
-  w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  u = MisI ? Mu : solver.u
-  v = NisI ? Nv : solver.v
-
-  # Compute y such that AAᵀy = b. Then recover x = Aᵀy.
-  x .= zero(FC)
-  y .= zero(FC)
-  Mu .= b
-  MisI || mulorldiv!(u, M, Mu, ldiv)
-  β = sqrt(@kdotr(m, u, Mu))
-  if β == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    history && push!(rNorms, β)
-    history && push!(ArNorms, zero(T))
-    stats.status = "x = 0 is a zero-residual solution"
-    return solver
+def_args_craigmr = (:(A                    ),
+                    :(b::AbstractVector{FC}))
+
+def_kwargs_craigmr = (:(; M = I                     ),
+                      :(; N = I                     ),
+                      :(; ldiv::Bool = false        ),
+                      :(; sqd::Bool = false         ),
+                      :(; λ::T = zero(T)            ),
+                      :(; atol::T = √eps(T)         ),
+                      :(; rtol::T = √eps(T)         ),
+                      :(; itmax::Int = 0            ),
+                      :(; timemax::Float64 = Inf    ),
+                      :(; verbose::Int = 0          ),
+                      :(; history::Bool = false     ),
+                      :(; callback = solver -> false),
+                      :(; iostream::IO = kstdout    ))
+
+def_kwargs_craigmr = mapreduce(extract_parameters, vcat, def_kwargs_craigmr)
+
+args_craigmr = (:A, :b)
+kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function craigmr($(def_args_craigmr...); $(def_kwargs_craigmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CraigmrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    craigmr!(solver, $(args_craigmr...); $(kwargs_craigmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # Initialize Golub-Kahan process.
-  # β₁Mu₁ = b.
-  @kscal!(m, one(FC)/β, u)
-  MisI || @kscal!(m, one(FC)/β, Mu)
-  # α₁Nv₁ = Aᵀu₁.
-  mul!(Aᵀu, Aᵀ, u)
-  Nv .= Aᵀu
-  NisI || mulorldiv!(v, N, Nv, ldiv)
-  α = sqrt(@kdotr(n, v, Nv))
-  Anorm² = α * α
-
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s  %7s  %8s  %8s  %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²)
-
-  # Aᵀb = 0 so x = 0 is a minimum least-squares solution
-  if α == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    history && push!(rNorms, β)
-    history && push!(ArNorms, zero(T))
-    stats.status = "x = 0 is a minimum least-squares solution"
-    return solver
-  end
-  @kscal!(n, one(FC)/α, v)
-  NisI || @kscal!(n, one(FC)/α, Nv)
-
-  # Regularization.
-  λₖ  = λ             # λ₁ = λ
-  cpₖ = spₖ = one(T)  # Givens sines and cosines used to zero out λₖ
-  cdₖ = sdₖ = one(T)  # Givens sines and cosines used to define λₖ₊₁
-  λ > 0 && (q .= v)   # Additional vector needed to update x, by definition q₀ = 0
-
-  if λ > 0
-    (cpₖ, spₖ, αhat) = sym_givens(α, λₖ)
-    @kscal!(n, spₖ, q)  # q̄₁ = sp₁ * v₁
-  else
-    αhat = α
-  end
-
-  # Initialize other constants.
-  ζbar = β
-  ρbar = αhat
-  θ = zero(T)
-  rNorm = ζbar
-  history && push!(rNorms, rNorm)
-  ArNorm = α
-  history && push!(ArNorms, ArNorm)
-
-  ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
-  ɛ_i = atol + rtol * ArNorm  # Stopping tolerance for inconsistent systems.
-
-  wbar .= u
-  @kscal!(m, one(FC)/αhat, wbar)
-  w .= zero(FC)
-  d .= zero(FC)
-
-  status = "unknown"
-  solved = rNorm ≤ ɛ_c
-  inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
-  tired  = iter ≥ itmax
-  user_requested_exit = false
-
-  while ! (solved || inconsistent || tired || user_requested_exit)
-    iter = iter + 1
-
-    # Generate next Golub-Kahan vectors.
-    # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
-    mul!(Av, A, v)
-    @kaxpby!(m, one(FC), Av, -α, Mu)
+  function craigmr!(solver :: CraigmrSolver{T,FC,S}, $(def_args_craigmr...); $(def_kwargs_craigmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CRAIGMR: system of %d equations in %d variables\n", m, n)
+
+    # Check sqd and λ parameters
+    sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+    sqd && (λ = one(T))
+
+    # Tests M = Iₘ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :u, S, m)
+    allocate_if(!NisI, solver, :v, S, n)
+    allocate_if(λ > 0, solver, :q, S, n)
+    x, Nv, Aᴴu, d, y, Mu = solver.x, solver.Nv, solver.Aᴴu, solver.d, solver.y, solver.Mu
+    w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    u = MisI ? Mu : solver.u
+    v = NisI ? Nv : solver.v
+
+    # Compute y such that AAᴴy = b. Then recover x = Aᴴy.
+    x .= zero(FC)
+    y .= zero(FC)
+    Mu .= b
     MisI || mulorldiv!(u, M, Mu, ldiv)
     β = sqrt(@kdotr(m, u, Mu))
-    if β ≠ 0
-      @kscal!(m, one(FC)/β, u)
-      MisI || @kscal!(m, one(FC)/β, Mu)
+    if β == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      history && push!(rNorms, β)
+      history && push!(ArNorms, zero(T))
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      return solver
+    end
+
+    # Initialize Golub-Kahan process.
+    # β₁Mu₁ = b.
+    @kscal!(m, one(FC)/β, u)
+    MisI || @kscal!(m, one(FC)/β, Mu)
+    # α₁Nv₁ = Aᴴu₁.
+    mul!(Aᴴu, Aᴴ, u)
+    Nv .= Aᴴu
+    NisI || mulorldiv!(v, N, Nv, ldiv)
+    α = sqrt(@kdotr(n, v, Nv))
+    Anorm² = α * α
+
+    iter = 0
+    itmax == 0 && (itmax = m + n)
+
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %7s  %8s  %8s  %7s  %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %.2fs\n", iter, β, α, β, α, 0, 1, Anorm², ktimer(start_time))
+
+    # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+    if α == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      history && push!(rNorms, β)
+      history && push!(ArNorms, zero(T))
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a minimum least-squares solution"
+      return solver
     end
+    @kscal!(n, one(FC)/α, v)
+    NisI || @kscal!(n, one(FC)/α, Nv)
 
-    Anorm² = Anorm² + β * β  # = ‖B_{k-1}‖²
+    # Regularization.
+    λₖ  = λ             # λ₁ = λ
+    cpₖ = spₖ = one(T)  # Givens sines and cosines used to zero out λₖ
+    cdₖ = sdₖ = one(T)  # Givens sines and cosines used to define λₖ₊₁
+    λ > 0 && (q .= v)   # Additional vector needed to update x, by definition q₀ = 0
 
     if λ > 0
-      βhat = cpₖ * β
-      λₐᵤₓ = spₖ * β
+      (cpₖ, spₖ, αhat) = sym_givens(α, λₖ)
+      @kscal!(n, spₖ, q)  # q̄₁ = sp₁ * v₁
     else
-      βhat = β
+      αhat = α
     end
 
-    # Continue QR factorization
-    #
-    # Q [ Lₖ  β₁ e₁ ] = [ Rₖ   zₖ  ] :
-    #   [ β    0    ]   [ 0   ζbar ]
-    #
-    #       k  k+1    k    k+1      k  k+1
-    # k   [ c   s ] [ ρbar    ] = [ ρ  θ⁺    ]
-    # k+1 [ s  -c ] [ β    α⁺ ]   [    ρbar⁺ ]
-    #
-    # so that we obtain
-    #
-    # [ c  s ] [ ζbar ] = [ ζ     ]
-    # [ s -c ] [  0   ]   [ ζbar⁺ ]
-    (c, s, ρ) = sym_givens(ρbar, βhat)
-    ζ = c * ζbar
-    ζbar = s * ζbar
-    rNorm = abs(ζbar)
+    # Initialize other constants.
+    ζbar = β
+    ρbar = αhat
+    θ = zero(T)
+    rNorm = ζbar
     history && push!(rNorms, rNorm)
+    ArNorm = α
+    history && push!(ArNorms, ArNorm)
 
-    @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w)  # w = (wbar - θ * w) / ρ
-    @kaxpy!(m, ζ, w, y)                    # y = y + ζ * w
+    ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
+    ɛ_i = atol + rtol * ArNorm  # Stopping tolerance for inconsistent systems.
 
-    if λ > 0
-      # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁
-      if iter == 1
-        @kaxpy!(n, one(FC)/ρ, cpₖ * v, d)
+    wbar .= u
+    @kscal!(m, one(FC)/αhat, wbar)
+    w .= zero(FC)
+    d .= zero(FC)
+
+    status = "unknown"
+    solved = rNorm ≤ ɛ_c
+    inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
+    tired  = iter ≥ itmax
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || inconsistent || tired || user_requested_exit || overtimed)
+      iter = iter + 1
+
+      # Generate next Golub-Kahan vectors.
+      # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+      mul!(Av, A, v)
+      @kaxpby!(m, one(FC), Av, -α, Mu)
+      MisI || mulorldiv!(u, M, Mu, ldiv)
+      β = sqrt(@kdotr(m, u, Mu))
+      if β ≠ 0
+        @kscal!(m, one(FC)/β, u)
+        MisI || @kscal!(m, one(FC)/β, Mu)
+      end
+
+      Anorm² = Anorm² + β * β  # = ‖B_{k-1}‖²
+
+      if λ > 0
+        βhat = cpₖ * β
+        λₐᵤₓ = spₖ * β
       else
-        @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d)
-        @kaxpy!(n, one(FC)/ρ, spₖ * q, d)
-        @kaxpby!(n, spₖ, v, -cpₖ, q)  # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
+        βhat = β
       end
-    else
-      # DₖRₖ = Vₖ
-      if iter == 1
-        @kaxpy!(n, one(FC)/ρ, v, d)
+
+      # Continue QR factorization
+      #
+      # Q [ Lₖ  β₁ e₁ ] = [ Rₖ   zₖ  ] :
+      #   [ β    0    ]   [ 0   ζbar ]
+      #
+      #       k  k+1    k    k+1      k  k+1
+      # k   [ c   s ] [ ρbar    ] = [ ρ  θ⁺    ]
+      # k+1 [ s  -c ] [ β    α⁺ ]   [    ρbar⁺ ]
+      #
+      # so that we obtain
+      #
+      # [ c  s ] [ ζbar ] = [ ζ     ]
+      # [ s -c ] [  0   ]   [ ζbar⁺ ]
+      (c, s, ρ) = sym_givens(ρbar, βhat)
+      ζ = c * ζbar
+      ζbar = s * ζbar
+      rNorm = abs(ζbar)
+      history && push!(rNorms, rNorm)
+
+      @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w)  # w = (wbar - θ * w) / ρ
+      @kaxpy!(m, ζ, w, y)                    # y = y + ζ * w
+
+      if λ > 0
+        # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁
+        if iter == 1
+          @kaxpy!(n, one(FC)/ρ, cpₖ * v, d)
+        else
+          @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d)
+          @kaxpy!(n, one(FC)/ρ, spₖ * q, d)
+          @kaxpby!(n, spₖ, v, -cpₖ, q)  # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
+        end
       else
-        @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d)
+        # DₖRₖ = Vₖ
+        if iter == 1
+          @kaxpy!(n, one(FC)/ρ, v, d)
+        else
+          @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d)
+        end
       end
-    end
 
-    # xₖ = Dₖzₖ
-    @kaxpy!(n, ζ, d, x)
+      # xₖ = Dₖzₖ
+      @kaxpy!(n, ζ, d, x)
 
-    # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
-    mul!(Aᵀu, Aᵀ, u)
-    @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
-    NisI || mulorldiv!(v, N, Nv, ldiv)
-    α = sqrt(@kdotr(n, v, Nv))
-    Anorm² = Anorm² + α * α  # = ‖Lₖ‖
-    ArNorm = α * β * abs(ζ/ρ)
-    history && push!(ArNorms, ArNorm)
+      # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+      mul!(Aᴴu, Aᴴ, u)
+      @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+      NisI || mulorldiv!(v, N, Nv, ldiv)
+      α = sqrt(@kdotr(n, v, Nv))
+      Anorm² = Anorm² + α * α  # = ‖Lₖ‖
+      ArNorm = α * β * abs(ζ/ρ)
+      history && push!(ArNorms, ArNorm)
 
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², ktimer(start_time))
 
-    if λ > 0
-      (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ)
-      @kscal!(n, sdₖ, q)  # qₖ ← sdₖ * q̄ₖ
-      (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁)
-    else
-      αhat = α
-    end
+      if λ > 0
+        (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ)
+        @kscal!(n, sdₖ, q)  # qₖ ← sdₖ * q̄ₖ
+        (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁)
+      else
+        αhat = α
+      end
 
-    if α ≠ 0
-      @kscal!(n, one(FC)/α, v)
-      NisI || @kscal!(n, one(FC)/α, Nv)
-      @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar)  # wbar = (u - beta * wbar) / alpha
+      if α ≠ 0
+        @kscal!(n, one(FC)/α, v)
+        NisI || @kscal!(n, one(FC)/α, Nv)
+        @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar)  # wbar = (u - beta * wbar) / alpha
+      end
+      θ    =  s * αhat
+      ρbar = -c * αhat
+
+      user_requested_exit = callback(solver) :: Bool
+      solved = rNorm ≤ ɛ_c
+      inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
     end
-    θ    =  s * αhat
-    ρbar = -c * αhat
-
-    user_requested_exit = callback(solver) :: Bool
-    solved = rNorm ≤ ɛ_c
-    inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
-    tired  = iter ≥ itmax
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "found approximate minimum-norm solution")
+    !tired && !solved   && (status = "found approximate minimum least-squares solution")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-  
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "found approximate minimum-norm solution")
-  !tired && !solved   && (status = "found approximate minimum least-squares solution")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/src/crls.jl b/src/crls.jl
index 6410fb836..bf43fa79b 100644
--- a/src/crls.jl
+++ b/src/crls.jl
@@ -5,7 +5,7 @@
 #
 # equivalently, of the linear system
 #
-#  AᵀAx = Aᵀb.
+#  AᴴAx = Aᴴb.
 #
 # This implementation follows the formulation given in
 #
@@ -20,12 +20,13 @@
 
 export crls, crls!
 
-
 """
     (x, stats) = crls(A, b::AbstractVector{FC};
-                      M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
-                      radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+                      M=I, ldiv::Bool=false, radius::T=zero(T),
+                      λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+                      itmax::Int=0, timemax::Float64=Inf,
+                      verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -34,19 +35,41 @@ Solve the linear least-squares problem
 
     minimize ‖b - Ax‖₂² + λ‖x‖₂²
 
-using the Conjugate Residuals (CR) method. This method is equivalent to
-applying MINRES to the normal equations
+of size m × n using the Conjugate Residuals (CR) method.
+This method is equivalent to applying MINRES to the normal equations
 
-    (AᵀA + λI) x = Aᵀb.
+    (AᴴA + λI) x = Aᴴb.
 
 This implementation recurs the residual r := b - Ax.
 
-CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
 It is formally equivalent to LSMR, though can be substantially less accurate,
 but simpler to implement.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -54,12 +77,6 @@ and `false` otherwise.
 """
 function crls end
 
-function crls(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CrlsSolver(A, b)
-  crls!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = crls!(solver::CrlsSolver, A, b; kwargs...)
 
@@ -69,143 +86,185 @@ See [`CrlsSolver`](@ref) for more details about the `solver`.
 """
 function crls! end
 
-function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-               radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CRLS: system of %d equations in %d variables\n", m, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :Ms, S, m)
-  x, p, Ar, q = solver.x, solver.p, solver.Ar, solver.q
-  r, Ap, s, stats = solver.r, solver.Ap, solver.s, solver.stats
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  Ms  = MisI ? s  : solver.Ms
-  Mr  = MisI ? r  : solver.Ms
-  MAp = MisI ? Ap : solver.Ms
-
-  x .= zero(FC)
-  r .= b
-  bNorm = @knrm2(m, r)  # norm(b - A * x0) if x0 ≠ 0.
-  rNorm = bNorm  # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
-  history && push!(rNorms, rNorm)
-  if bNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(ArNorms, zero(T))
-    return solver
+def_args_crls = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_crls = (:(; M = I                     ),
+                   :(; ldiv::Bool = false        ),
+                   :(; radius::T = zero(T)       ),
+                   :(; λ::T = zero(T)            ),
+                   :(; atol::T = √eps(T)         ),
+                   :(; rtol::T = √eps(T)         ),
+                   :(; itmax::Int = 0            ),
+                   :(; timemax::Float64 = Inf    ),
+                   :(; verbose::Int = 0          ),
+                   :(; history::Bool = false     ),
+                   :(; callback = solver -> false),
+                   :(; iostream::IO = kstdout    ))
+
+def_kwargs_crls = mapreduce(extract_parameters, vcat, def_kwargs_crls)
+
+args_crls = (:A, :b)
+kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function crls($(def_args_crls...); $(def_kwargs_crls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CrlsSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    crls!(solver, $(args_crls...); $(kwargs_crls...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  MisI || mulorldiv!(Mr, M, r, ldiv)
-  mul!(Ar, Aᵀ, Mr)  # - λ * x0 if x0 ≠ 0.
-  mul!(s, A, Ar)
-  MisI || mulorldiv!(Ms, M, s, ldiv)
-
-  p  .= Ar
-  Ap .= s
-  mul!(q, Aᵀ, Ms)  # Ap
-  λ > 0 && @kaxpy!(n, λ, p, q)  # q = q + λ * p
-  γ  = @kdotr(m, s, Ms)  # Faster than γ = dot(s, Ms)
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  ArNorm = @knrm2(n, Ar)  # Marginally faster than norm(Ar)
-  λ > 0 && (γ += λ * ArNorm * ArNorm)
-  history && push!(ArNorms, ArNorm)
-  ε = atol + rtol * ArNorm
-  (verbose > 0) && @printf("%5s  %8s  %8s\n", "k", "‖Aᵀr‖", "‖r‖")
-  kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e\n", iter, ArNorm, rNorm)
-
-  status = "unknown"
-  on_boundary = false
-  solved = ArNorm ≤ ε
-  tired = iter ≥ itmax
-  psd = false
-  user_requested_exit = false
-
-  while ! (solved || tired || user_requested_exit)
-    qNorm² = @kdotr(n, q, q) # dot(q, q)
-    α = γ / qNorm²
-
-    # if a trust-region constraint is give, compute step to the boundary
-    # (note that α > 0 in CRLS)
-    if radius > 0
-      pNorm = @knrm2(n, p)
-      if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p
-        psd = true # det(AᵀA) = 0
-        p = Ar # p = Aᵀr
-        pNorm² = ArNorm * ArNorm
-        mul!(q, Aᵀ, s)
-        α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᵀr for α = ‖Ar‖²/γ
-      else
-        pNorm² = pNorm * pNorm
-        σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))
-        if α ≥ σ
-          α = σ
-          on_boundary = true
-        end
-      end
+  function crls!(solver :: CrlsSolver{T,FC,S}, $(def_args_crls...); $(def_kwargs_crls...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CRLS: system of %d equations in %d variables\n", m, n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :Ms, S, m)
+    x, p, Ar, q = solver.x, solver.p, solver.Ar, solver.q
+    r, Ap, s, stats = solver.r, solver.Ap, solver.s, solver.stats
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    Ms  = MisI ? s  : solver.Ms
+    Mr  = MisI ? r  : solver.Ms
+    MAp = MisI ? Ap : solver.Ms
+
+    x .= zero(FC)
+    r .= b
+    bNorm = @knrm2(m, r)  # norm(b - A * x0) if x0 ≠ 0.
+    rNorm = bNorm  # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
+    history && push!(rNorms, rNorm)
+    if bNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(ArNorms, zero(T))
+      return solver
     end
 
-    @kaxpy!(n,  α, p,   x)     # Faster than  x =  x + α *  p
-    @kaxpy!(n, -α, q,  Ar)     # Faster than Ar = Ar - α *  q
-    ArNorm = @knrm2(n, Ar)
-    solved = psd || on_boundary
-    solved && continue
-    @kaxpy!(m, -α, Ap,  r)     # Faster than  r =  r - α * Ap
+    MisI || mulorldiv!(Mr, M, r, ldiv)
+    mul!(Ar, Aᴴ, Mr)  # - λ * x0 if x0 ≠ 0.
     mul!(s, A, Ar)
     MisI || mulorldiv!(Ms, M, s, ldiv)
-    γ_next = @kdotr(m, s, Ms)   # Faster than γ_next = dot(s, s)
-    λ > 0 && (γ_next += λ * ArNorm * ArNorm)
-    β = γ_next / γ
-
-    @kaxpby!(n, one(FC), Ar, β, p)    # Faster than  p = Ar + β *  p
-    @kaxpby!(m, one(FC), s, β, Ap)    # Faster than Ap =  s + β * Ap
-    MisI || mulorldiv!(MAp, M, Ap, ldiv)
-    mul!(q, Aᵀ, MAp)
+
+    p  .= Ar
+    Ap .= s
+    mul!(q, Aᴴ, Ms)  # Ap
     λ > 0 && @kaxpy!(n, λ, p, q)  # q = q + λ * p
+    γ  = @kdotr(m, s, Ms)  # Faster than γ = dot(s, Ms)
+    iter = 0
+    itmax == 0 && (itmax = m + n)
 
-    γ = γ_next
-    if λ > 0
-      rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x))
-    else
-      rNorm = @knrm2(m, r)  # norm(r)
-    end
-    history && push!(rNorms, rNorm)
+    ArNorm = @knrm2(n, Ar)  # Marginally faster than norm(Ar)
+    λ > 0 && (γ += λ * ArNorm * ArNorm)
     history && push!(ArNorms, ArNorm)
-    iter = iter + 1
-    kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e\n", iter, ArNorm, rNorm)
-    user_requested_exit = callback(solver) :: Bool
-    solved = (ArNorm ≤ ε) || on_boundary
+    ε = atol + rtol * ArNorm
+    (verbose > 0) && @printf(iostream, "%5s  %8s  %8s  %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+
+    status = "unknown"
+    on_boundary = false
+    solved = ArNorm ≤ ε
     tired = iter ≥ itmax
+    psd = false
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || user_requested_exit || overtimed)
+      qNorm² = @kdotr(n, q, q) # dot(q, q)
+      α = γ / qNorm²
+
+      # if a trust-region constraint is give, compute step to the boundary
+      # (note that α > 0 in CRLS)
+      if radius > 0
+        pNorm = @knrm2(n, p)
+        if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p
+          psd = true # det(AᴴA) = 0
+          p = Ar # p = Aᴴr
+          pNorm² = ArNorm * ArNorm
+          mul!(q, Aᴴ, s)
+          α = min(ArNorm^2 / γ, maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ
+        else
+          pNorm² = pNorm * pNorm
+          σ = maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))
+          if α ≥ σ
+            α = σ
+            on_boundary = true
+          end
+        end
+      end
+
+      @kaxpy!(n,  α, p,   x)     # Faster than  x =  x + α *  p
+      @kaxpy!(n, -α, q,  Ar)     # Faster than Ar = Ar - α *  q
+      ArNorm = @knrm2(n, Ar)
+      solved = psd || on_boundary
+      solved && continue
+      @kaxpy!(m, -α, Ap,  r)     # Faster than  r =  r - α * Ap
+      mul!(s, A, Ar)
+      MisI || mulorldiv!(Ms, M, s, ldiv)
+      γ_next = @kdotr(m, s, Ms)   # Faster than γ_next = dot(s, s)
+      λ > 0 && (γ_next += λ * ArNorm * ArNorm)
+      β = γ_next / γ
+
+      @kaxpby!(n, one(FC), Ar, β, p)    # Faster than  p = Ar + β *  p
+      @kaxpby!(m, one(FC), s, β, Ap)    # Faster than Ap =  s + β * Ap
+      MisI || mulorldiv!(MAp, M, Ap, ldiv)
+      mul!(q, Aᴴ, MAp)
+      λ > 0 && @kaxpy!(n, λ, p, q)  # q = q + λ * p
+
+      γ = γ_next
+      if λ > 0
+        rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x))
+      else
+        rNorm = @knrm2(m, r)  # norm(r)
+      end
+      history && push!(rNorms, rNorm)
+      history && push!(ArNorms, ArNorm)
+      iter = iter + 1
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+      user_requested_exit = callback(solver) :: Bool
+      solved = (ArNorm ≤ ε) || on_boundary
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    psd                 && (status = "zero-curvature encountered")
+    on_boundary         && (status = "on trust-region boundary")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  psd                 && (status = "zero-curvature encountered")
-  on_boundary         && (status = "on trust-region boundary")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/crmr.jl b/src/crmr.jl
index deb5cf79f..db333856c 100644
--- a/src/crmr.jl
+++ b/src/crmr.jl
@@ -10,9 +10,9 @@
 # and is equivalent to applying the conjugate residual method
 # to the linear system
 #
-#  AAᵀy = b.
+#  AAᴴy = b.
 #
-# This method is equivalent to Craig-MR, described in
+# This method is equivalent to CRAIGMR, described in
 #
 # D. Orban and M. Arioli. Iterative Solution of Symmetric Quasi-Definite Linear Systems,
 # Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017.
@@ -26,12 +26,13 @@
 
 export crmr, crmr!
 
-
 """
     (x, stats) = crmr(A, b::AbstractVector{FC};
-                      M=I, λ::T=zero(T), atol::T=√eps(T),
-                      rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+                      N=I, ldiv::Bool=false,
+                      λ::T=zero(T), atol::T=√eps(T),
+                      rtol::T=√eps(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -40,11 +41,11 @@ Solve the consistent linear system
 
     Ax + √λs = b
 
-using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
 parameter. This method is equivalent to applying CR to the normal equations
 of the second kind
 
-    (AAᵀ + λI) y = b
+    (AAᴴ + λI) y = b
 
 but is more stable. When λ = 0, this method solves the minimum-norm problem
 
@@ -58,10 +59,29 @@ CRMR produces monotonic residuals ‖r‖₂.
 It is formally equivalent to CRAIG-MR, though can be slightly less accurate,
 but simpler to implement. Only the x-part of the solution is returned.
 
-A preconditioner M may be provided.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -70,12 +90,6 @@ and `false` otherwise.
 """
 function crmr end
 
-function crmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = CrmrSolver(A, b)
-  crmr!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = crmr!(solver::CrmrSolver, A, b; kwargs...)
 
@@ -85,107 +99,148 @@ See [`CrmrSolver`](@ref) for more details about the `solver`.
 """
 function crmr! end
 
-function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, λ :: T=zero(T), atol :: T=√eps(T),
-               rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("CRMR: system of %d equations in %d variables\n", m, n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :Mq, S, m)
-  allocate_if(λ > 0, solver, :s , S, m)
-  x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r
-  q, s, stats = solver.q, solver.s, solver.stats
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  Mq = MisI ? q : solver.Mq
-
-  x .= zero(FC)              # initial estimation x = 0
-  mulorldiv!(r, M, b, ldiv)  # initial residual r = M * (b - Ax) = M * b
-  bNorm = @knrm2(m, r)       # norm(b - A * x0) if x0 ≠ 0.
-  rNorm = bNorm              # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
-  history && push!(rNorms, rNorm)
-  if bNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(ArNorms, zero(T))
-    return solver
+def_args_crmr = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_crmr = (:(; N = I                     ),
+                   :(; ldiv::Bool = false        ),
+                   :(; λ::T = zero(T)            ),
+                   :(; atol::T = √eps(T)         ),
+                   :(; rtol::T = √eps(T)         ),
+                   :(; itmax::Int = 0            ),
+                   :(; timemax::Float64 = Inf    ),
+                   :(; verbose::Int = 0          ),
+                   :(; history::Bool = false     ),
+                   :(; callback = solver -> false),
+                   :(; iostream::IO = kstdout    ))
+
+def_kwargs_crmr = mapreduce(extract_parameters, vcat, def_kwargs_crmr)
+
+args_crmr = (:A, :b)
+kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function crmr($(def_args_crmr...); $(def_kwargs_crmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = CrmrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    crmr!(solver, $(args_crmr...); $(kwargs_crmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  λ > 0 && (s .= r)
-  mul!(Aᵀr, Aᵀ, r)  # - λ * x0 if x0 ≠ 0.
-  p .= Aᵀr
-  γ = @kdotr(n, Aᵀr, Aᵀr)  # Faster than γ = dot(Aᵀr, Aᵀr)
-  λ > 0 && (γ += λ * rNorm * rNorm)
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  ArNorm = sqrt(γ)
-  history && push!(ArNorms, ArNorm)
-  ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
-  ɛ_i = atol + rtol * ArNorm  # Stopping tolerance for inconsistent systems.
-  (verbose > 0) && @printf("%5s  %8s  %8s\n", "k", "‖Aᵀr‖", "‖r‖")
-  kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e\n", iter, ArNorm, rNorm)
-
-  status = "unknown"
-  solved = rNorm ≤ ɛ_c
-  inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
-  tired = iter ≥ itmax
-  user_requested_exit = false
-
-  while ! (solved || inconsistent || tired || user_requested_exit)
-    mul!(q, A, p)
-    λ > 0 && @kaxpy!(m, λ, s, q)  # q = q + λ * s
-    MisI || mulorldiv!(Mq, M, q, ldiv)
-    α = γ / @kdotr(m, q, Mq)   # Compute qᵗ * M * q
-    @kaxpy!(n,  α, p, x)       # Faster than  x =  x + α *  p
-    @kaxpy!(m, -α, Mq, r)      # Faster than  r =  r - α * Mq
-    rNorm = @knrm2(m, r)       # norm(r)
-    mul!(Aᵀr, Aᵀ, r)
-    γ_next = @kdotr(n, Aᵀr, Aᵀr)  # Faster than γ_next = dot(Aᵀr, Aᵀr)
-    λ > 0 && (γ_next += λ * rNorm * rNorm)
-    β = γ_next / γ
-
-    @kaxpby!(n, one(FC), Aᵀr, β, p)  # Faster than  p = Aᵀr + β * p
-    if λ > 0
-      @kaxpby!(m, one(FC), r, β, s) # s = r + β * s
+
+  function crmr!(solver :: CrmrSolver{T,FC,S}, $(def_args_crmr...); $(def_kwargs_crmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "CRMR: system of %d equations in %d variables\n", m, n)
+
+    # Tests N = Iₙ
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!NisI, solver, :Nq, S, m)
+    allocate_if(λ > 0, solver, :s , S, m)
+    x, p, Aᴴr, r = solver.x, solver.p, solver.Aᴴr, solver.r
+    q, s, stats = solver.q, solver.s, solver.stats
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    Nq = NisI ? q : solver.Nq
+
+    x .= zero(FC)              # initial estimation x = 0
+    mulorldiv!(r, N, b, ldiv)  # initial residual r = N * (b - Ax) = N * b
+    bNorm = @knrm2(m, r)       # norm(b - A * x0) if x0 ≠ 0.
+    rNorm = bNorm              # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
+    history && push!(rNorms, rNorm)
+    if bNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(ArNorms, zero(T))
+      return solver
     end
+    λ > 0 && (s .= r)
+    mul!(Aᴴr, Aᴴ, r)  # - λ * x0 if x0 ≠ 0.
+    p .= Aᴴr
+    γ = @kdotr(n, Aᴴr, Aᴴr)  # Faster than γ = dot(Aᴴr, Aᴴr)
+    λ > 0 && (γ += λ * rNorm * rNorm)
+    iter = 0
+    itmax == 0 && (itmax = m + n)
 
-    γ = γ_next
     ArNorm = sqrt(γ)
-    history && push!(rNorms, rNorm)
     history && push!(ArNorms, ArNorm)
-    iter = iter + 1
-    kdisplay(iter, verbose) && @printf("%5d  %8.2e  %8.2e\n", iter, ArNorm, rNorm)
-    user_requested_exit = callback(solver) :: Bool
+    ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
+    ɛ_i = atol + rtol * ArNorm  # Stopping tolerance for inconsistent systems.
+    (verbose > 0) && @printf(iostream, "%5s  %8s  %8s  %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+
+    status = "unknown"
     solved = rNorm ≤ ɛ_c
     inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
     tired = iter ≥ itmax
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || inconsistent || tired || user_requested_exit || overtimed)
+      mul!(q, A, p)
+      λ > 0 && @kaxpy!(m, λ, s, q)  # q = q + λ * s
+      NisI || mulorldiv!(Nq, N, q, ldiv)
+      α = γ / @kdotr(m, q, Nq)   # Compute qᴴ * N * q
+      @kaxpy!(n,  α, p, x)       # Faster than  x =  x + α *  p
+      @kaxpy!(m, -α, Nq, r)      # Faster than  r =  r - α * Nq
+      rNorm = @knrm2(m, r)       # norm(r)
+      mul!(Aᴴr, Aᴴ, r)
+      γ_next = @kdotr(n, Aᴴr, Aᴴr)  # Faster than γ_next = dot(Aᴴr, Aᴴr)
+      λ > 0 && (γ_next += λ * rNorm * rNorm)
+      β = γ_next / γ
+
+      @kaxpby!(n, one(FC), Aᴴr, β, p)  # Faster than  p = Aᴴr + β * p
+      if λ > 0
+        @kaxpby!(m, one(FC), r, β, s) # s = r + β * s
+      end
+
+      γ = γ_next
+      ArNorm = sqrt(γ)
+      history && push!(rNorms, rNorm)
+      history && push!(ArNorms, ArNorm)
+      iter = iter + 1
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %8.2e  %8.2e  %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time))
+      user_requested_exit = callback(solver) :: Bool
+      solved = rNorm ≤ ɛ_c
+      inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    inconsistent        && (status = "system probably inconsistent but least squares/norm solution found")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  inconsistent        && (status = "system probably inconsistent but least squares/norm solution found")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/src/diom.jl b/src/diom.jl
index 9c6b9767b..72ce462f6 100644
--- a/src/diom.jl
+++ b/src/diom.jl
@@ -11,40 +11,59 @@
 export diom, diom!
 
 """
-    (x, stats) = diom(A, b::AbstractVector{FC}; memory::Int=20,
-                      M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                      reorthogonalization::Bool=false, itmax::Int=0,
-                      verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+    (x, stats) = diom(A, b::AbstractVector{FC};
+                      memory::Int=20, M=I, N=I, ldiv::Bool=false,
+                      reorthogonalization::Bool=false, atol::T=√eps(T),
+                      rtol::T=√eps(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the consistent linear system Ax = b using direct incomplete orthogonalization method.
+    (x, stats) = diom(A, b, x0::AbstractVector; kwargs...)
+
+DIOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using DIOM.
 
 DIOM only orthogonalizes the new vectors of the Krylov basis against the `memory` most recent vectors.
 If CG is well defined on `Ax = b` and `memory = 2`, DIOM is theoretically equivalent to CG.
 If `k ≤ memory` where `k` is the number of iterations, DIOM is theoretically equivalent to FOM.
 Otherwise, DIOM interpolates between CG and FOM and is similar to CG with partial reorthogonalization.
 
-Partial reorthogonalization is available with the `reorthogonalization` option.
-
-An advantage of DIOM is that nonsymmetric or symmetric indefinite or both nonsymmetric
+An advantage of DIOM is that non-Hermitian or Hermitian indefinite or both non-Hermitian
 and indefinite systems of linear equations can be handled by this single algorithm.
 
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left  preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
 
-DIOM can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = diom(A, b, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -52,18 +71,6 @@ and `false` otherwise.
 """
 function diom end
 
-function diom(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = DiomSolver(A, b, memory)
-  diom!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function diom(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = DiomSolver(A, b, memory)
-  diom!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = diom!(solver::DiomSolver, A, b; kwargs...)
     solver = diom!(solver::DiomSolver, A, b, x0; kwargs...)
@@ -77,198 +84,256 @@ See [`DiomSolver`](@ref) for more details about the `solver`.
 """
 function diom! end
 
-function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  diom!(solver, A, b; kwargs...)
-  return solver
-end
-
-function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-               reorthogonalization :: Bool=false, itmax :: Int=0,
-               verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("DIOM: system of size %d\n", n)
-
-  # Check M = Iₙ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :w, S, n)
-  allocate_if(!NisI, solver, :z, S, n)
-  Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
-  L, H, stats = solver.L, solver.H, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  w  = MisI ? t : solver.w
-  r₀ = MisI ? t : solver.w
-
-  # Initial solution x₀ and residual r₀.
-  x .= zero(FC)  # x₀
-  if warm_start
-    mul!(t, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), t)
-  else
-    t .= b
+def_args_diom = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_optargs_diom = (:(x0::AbstractVector),)
+
+def_kwargs_diom = (:(; M = I                            ),
+                   :(; N = I                            ),
+                   :(; ldiv::Bool = false               ),
+                   :(; reorthogonalization::Bool = false),
+                   :(; atol::T = √eps(T)                ),
+                   :(; rtol::T = √eps(T)                ),
+                   :(; itmax::Int = 0                   ),
+                   :(; timemax::Float64 = Inf           ),
+                   :(; verbose::Int = 0                 ),
+                   :(; history::Bool = false            ),
+                   :(; callback = solver -> false       ),
+                   :(; iostream::IO = kstdout           ))
+
+def_kwargs_diom = mapreduce(extract_parameters, vcat, def_kwargs_diom)
+
+args_diom = (:A, :b)
+optargs_diom = (:x0,)
+kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function diom($(def_args_diom...), $(def_optargs_diom...); memory :: Int=20, $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = DiomSolver(A, b, memory)
+    warm_start!(solver, $(optargs_diom...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    diom!(solver, $(args_diom...); $(kwargs_diom...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(r₀, M, t, ldiv)  # M⁻¹(b - Ax₀)
-  rNorm = @knrm2(n, r₀)               # β = ‖r₀‖₂
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+
+  function diom($(def_args_diom...); memory :: Int=20, $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = DiomSolver(A, b, memory)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    diom!(solver, $(args_diom...); $(kwargs_diom...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
+  function diom!(solver :: DiomSolver{T,FC,S}, $(def_args_diom...); $(def_kwargs_diom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "DIOM: system of size %d\n", n)
+
+    # Check M = Iₙ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :w, S, n)
+    allocate_if(!NisI, solver, :z, S, n)
+    Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
+    L, H, stats = solver.L, solver.H, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    w  = MisI ? t : solver.w
+    r₀ = MisI ? t : solver.w
+
+    # Initial solution x₀ and residual r₀.
+    x .= zero(FC)  # x₀
+    if warm_start
+      mul!(t, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), t)
+    else
+      t .= b
+    end
+    MisI || mulorldiv!(r₀, M, t, ldiv)  # M(b - Ax₀)
+    rNorm = @knrm2(n, r₀)               # β = ‖r₀‖₂
+    history && push!(rNorms, rNorm)
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
 
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
 
-  mem = length(L)  # Memory
-  for i = 1 : mem
-    V[i] .= zero(FC)  # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
-    P[i] .= zero(FC)  # Directions for x : Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
-  end
-  H .= zero(FC)  # Last column of the band hessenberg matrix Hₘ = LₘUₘ.
-  # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
-  # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
-  # In addition of that, the last column of Uₘ is stored in H.
-  L .= zero(FC)  # Last mem pivots of Lₘ.
-
-  # Initial ξ₁ and V₁.
-  ξ = rNorm
-  @. V[1] = r₀ / rNorm
-
-  # Stopping criterion.
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || user_requested_exit)
-
-    # Update iteration index.
-    iter = iter + 1
-
-    # Set position in circulars stacks.
-    pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
-    next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
-
-    # Incomplete Arnoldi procedure.
-    z = NisI ? V[pos] : solver.z
-    NisI || mulorldiv!(z, N, V[pos], ldiv)  # N⁻¹vₘ, forms pₘ
-    mul!(t, A, z)                           # AN⁻¹vₘ
-    MisI || mulorldiv!(w, M, t, ldiv)       # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
-    for i = max(1, iter-mem+1) : iter
-      ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
-      diag = iter - i + 2
-      H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
-      @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+    mem = length(V)  # Memory
+    for i = 1 : mem
+      V[i] .= zero(FC)  # Orthogonal basis of Kₖ(MAN, Mr₀).
+    end
+    for i = 1 : mem-1
+      P[i] .= zero(FC)  # Directions Pₖ = NVₖ(Uₖ)⁻¹.
     end
+    H .= zero(FC)  # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
+    # Each column has at most mem + 1 nonzero elements.
+    # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+    # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
+    # In addition of that, the last column of Uₖ is stored in H.
+    L .= zero(FC)  # Last mem-1 pivots of Lₖ.
+
+    # Initial ξ₁ and V₁.
+    ξ = rNorm
+    V[1] .= r₀ ./ rNorm
+
+    # Stopping criterion.
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || user_requested_exit || overtimed)
+
+      # Update iteration index.
+      iter = iter + 1
 
-    # Partial reorthogonalization of the Krylov basis.
-    if reorthogonalization
+      # Set position in circulars stacks.
+      pos = mod(iter-1, mem) + 1     # Position corresponding to vₖ in the circular stack V.
+      next_pos = mod(iter, mem) + 1  # Position corresponding to vₖ₊₁ in the circular stack V.
+
+      # Incomplete Arnoldi procedure.
+      z = NisI ? V[pos] : solver.z
+      NisI || mulorldiv!(z, N, V[pos], ldiv)  # Nvₖ, forms pₖ
+      mul!(t, A, z)                           # ANvₖ
+      MisI || mulorldiv!(w, M, t, ldiv)       # MANvₖ, forms vₖ₊₁
       for i = max(1, iter-mem+1) : iter
-        ipos = mod(i-1, mem) + 1
-        diag = iter - i + 2
-        Htmp = @kdot(n, w, V[ipos])
-        H[diag] += Htmp
-        @kaxpy!(n, -Htmp, V[ipos], w)
+        ipos = mod(i-1, mem) + 1  # Position corresponding to vᵢ in the circular stack V.
+        diag = iter - i + 1
+        H[diag] = @kdot(n, w, V[ipos])    # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+        @kaxpy!(n, -H[diag], V[ipos], w)  # w ← w - hᵢ.ₖvᵢ
       end
-    end
 
-    # Compute hₘ₊₁.ₘ and vₘ₊₁.
-    H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
-    if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
-      @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
-    end
-    # It's possible that uₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
-    if iter ≥ mem + 2
-      H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
-    end
+      # Partial reorthogonalization of the Krylov basis.
+      if reorthogonalization
+        for i = max(1, iter-mem+1) : iter
+          ipos = mod(i-1, mem) + 1
+          diag = iter - i + 1
+          Htmp = @kdot(n, w, V[ipos])
+          H[diag] += Htmp
+          @kaxpy!(n, -Htmp, V[ipos], w)
+        end
+      end
 
-    # Update the LU factorization with partial pivoting of H.
-    # Compute the last column of Uₘ.
-    if iter ≥ 2
-      for i = max(2,iter-mem+1) : iter
-        lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
-        diag = iter - i + 2
-        next_diag = diag + 1
-        # uᵢ.ₘ ← hᵢ.ₘ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₘ
-        H[diag] = H[diag] - L[lpos] * H[next_diag]
+      # Compute hₖ₊₁.ₖ and vₖ₊₁.
+      Haux = @knrm2(n, w)         # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+      if Haux ≠ 0                 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+        V[next_pos] .= w ./ Haux  # vₖ₊₁ = w / hₖ₊₁.ₖ
       end
-      # Compute ξₘ the last component of zₘ = β(Lₘ)⁻¹e₁.
-      # ξₘ = -lₘ.ₘ₋₁ * ξₘ₋₁
-      ξ = - L[pos] * ξ
-    end
-    # Compute next pivot lₘ₊₁.ₘ = hₘ₊₁.ₘ / uₘ.ₘ
-    L[next_pos] = H[1] / H[2]
-
-    # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
-    for i = max(1,iter-mem) : iter-1
-      ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
-      diag = iter - i + 2
-      if ipos == pos
-        # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
-        @kscal!(n, -H[diag], P[pos])
-      else
-        # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
-        @kaxpy!(n, -H[diag], P[ipos], P[pos])
+
+      # Update the LU factorization of Hₖ.
+      # Compute the last column of Uₖ.
+      if iter ≥ 2
+        # u₁.ₖ ← h₁.ₖ             if iter ≤ mem
+        # uₖ₋ₘₑₘ₊₁.ₖ ← hₖ₋ₘₑₘ₊₁.ₖ if iter ≥ mem + 1
+        for i = max(2,iter-mem+2) : iter
+          lpos = mod(i-1, mem-1) + 1  # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
+          diag = iter - i + 1
+          next_diag = diag + 1
+          # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
+          H[diag] = H[diag] - L[lpos] * H[next_diag]
+          if i == iter
+            # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁.
+            # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁
+            ξ = - L[lpos] * ξ
+          end
+        end
       end
+      # Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
+      next_lpos = mod(iter, mem-1) + 1
+      L[next_lpos] = Haux / H[1]
+
+      ppos = mod(iter-1, mem-1) + 1 # Position corresponding to pₖ in the circular stack P.
+
+      # Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹.
+      # u₁.ₖp₁ + ... + uₖ.ₖpₖ = Nvₖ             if k ≤ mem
+      # uₖ₋ₘₑₘ₊₁.ₖpₖ₋ₘₑₘ₊₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≥ mem + 1
+      for i = max(1,iter-mem+1) : iter-1
+        ipos = mod(i-1, mem-1) + 1  # Position corresponding to pᵢ in the circular stack P.
+        diag = iter - i + 1
+        if ipos == ppos
+          # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁
+          @kscal!(n, -H[diag], P[ppos])
+        else
+          # pₖ ← pₖ - uᵢ.ₖ * pᵢ
+          @kaxpy!(n, -H[diag], P[ipos], P[ppos])
+        end
+      end
+      # pₐᵤₓ ← pₐᵤₓ + Nvₖ
+      @kaxpy!(n, one(FC), z, P[ppos])
+      # pₖ = pₐᵤₓ / uₖ.ₖ
+      P[ppos] .= P[ppos] ./ H[1]
+
+      # Update solution xₖ.
+      # xₖ = xₖ₋₁ + ξₖ * pₖ
+      @kaxpy!(n, ξ, P[ppos], x)
+
+      # Compute residual norm.
+      # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ|
+      rNorm = Haux * abs(ξ / H[1])
+      history && push!(rNorms, rNorm)
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
     end
-    # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
-    @kaxpy!(n, one(FC), z, P[pos])
-    # pₘ = pₐᵤₓ / uₘ.ₘ
-    @. P[pos] = P[pos] / H[2]
-
-    # Update solution xₘ.
-    # xₘ = xₘ₋₁ + ξₘ * pₘ
-    @kaxpy!(n, ξ, P[pos], x)
-
-    # Compute residual norm.
-    # ‖ M⁻¹(b - Axₘ) ‖₂ = hₘ₊₁.ₘ * |ξₘ / uₘ.ₘ|
-    rNorm = real(H[1]) * abs(ξ / H[2])
-    history && push!(rNorms, rNorm)
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    solved = resid_decrease_lim || resid_decrease_mach
-    tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/dqgmres.jl b/src/dqgmres.jl
index ab7c490a6..4c1e52b37 100644
--- a/src/dqgmres.jl
+++ b/src/dqgmres.jl
@@ -11,16 +11,21 @@
 export dqgmres, dqgmres!
 
 """
-    (x, stats) = dqgmres(A, b::AbstractVector{FC}; memory::Int=20,
-                         M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                         reorthogonalization::Bool=false, itmax::Int=0,
-                         verbose::Int=0, history::Bool=false,
-                         ldiv::Bool=false, callback=solver->false)
+    (x, stats) = dqgmres(A, b::AbstractVector{FC};
+                         memory::Int=20, M=I, N=I, ldiv::Bool=false,
+                         reorthogonalization::Bool=false, atol::T=√eps(T),
+                         rtol::T=√eps(T), itmax::Int=0,
+                         timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                         callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the consistent linear system Ax = b using DQGMRES method.
+    (x, stats) = dqgmres(A, b, x0::AbstractVector; kwargs...)
+
+DQGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using DQGMRES.
 
 DQGMRES algorithm is based on the incomplete Arnoldi orthogonalization process
 and computes a sequence of approximate solutions with the quasi-minimal residual property.
@@ -30,21 +35,35 @@ If MINRES is well defined on `Ax = b` and `memory = 2`, DQGMRES is theoretically
 If `k ≤ memory` where `k` is the number of iterations, DQGMRES is theoretically equivalent to GMRES.
 Otherwise, DQGMRES interpolates between MINRES and GMRES and is similar to MINRES with partial reorthogonalization.
 
-Partial reorthogonalization is available with the `reorthogonalization` option.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
 
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left  preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Optional argument
 
-DQGMRES can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = dqgmres(A, b, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -52,18 +71,6 @@ and `false` otherwise.
 """
 function dqgmres end
 
-function dqgmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = DqgmresSolver(A, b, memory)
-  dqgmres!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function dqgmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = DqgmresSolver(A, b, memory)
-  dqgmres!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = dqgmres!(solver::DqgmresSolver, A, b; kwargs...)
     solver = dqgmres!(solver::DqgmresSolver, A, b, x0; kwargs...)
@@ -77,206 +84,258 @@ See [`DqgmresSolver`](@ref) for more details about the `solver`.
 """
 function dqgmres! end
 
-function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  dqgmres!(solver, A, b; kwargs...)
-  return solver
-end
-
-function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                  M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                  reorthogonalization :: Bool=false, itmax :: Int=0,
-                  verbose :: Int=0, history :: Bool=false,
-                  ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("DQGMRES: system of size %d\n", n)
-
-  # Check M = Iₙ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :w, S, n)
-  allocate_if(!NisI, solver, :z, S, n)
-  Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
-  c, s, H, stats = solver.c, solver.s, solver.H, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  w  = MisI ? t : solver.w
-  r₀ = MisI ? t : solver.w
-
-  # Initial solution x₀ and residual r₀.
-  x .= zero(FC)  # x₀
-  if warm_start
-    mul!(t, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), t)
-  else
-    t .= b
+def_args_dqgmres = (:(A                    ),
+                    :(b::AbstractVector{FC}))
+
+def_optargs_dqgmres = (:(x0::AbstractVector),)
+
+def_kwargs_dqgmres = (:(; M = I                            ),
+                      :(; N = I                            ),
+                      :(; ldiv::Bool = false               ),
+                      :(; reorthogonalization::Bool = false),
+                      :(; atol::T = √eps(T)                ),
+                      :(; rtol::T = √eps(T)                ),
+                      :(; itmax::Int = 0                   ),
+                      :(; timemax::Float64 = Inf           ),
+                      :(; verbose::Int = 0                 ),
+                      :(; history::Bool = false            ),
+                      :(; callback = solver -> false       ),
+                      :(; iostream::IO = kstdout           ))
+
+def_kwargs_dqgmres = mapreduce(extract_parameters, vcat, def_kwargs_dqgmres)
+
+args_dqgmres = (:A, :b)
+optargs_dqgmres = (:x0,)
+kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function dqgmres($(def_args_dqgmres...), $(def_optargs_dqgmres...); memory :: Int=20, $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = DqgmresSolver(A, b, memory)
+    warm_start!(solver, $(optargs_dqgmres...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    dqgmres!(solver, $(args_dqgmres...); $(kwargs_dqgmres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(r₀, M, t, ldiv)  # M⁻¹(b - Ax₀)
-  rNorm = @knrm2(n, r₀)               # β = ‖r₀‖₂
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+
+  function dqgmres($(def_args_dqgmres...); memory :: Int=20, $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = DqgmresSolver(A, b, memory)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    dqgmres!(solver, $(args_dqgmres...); $(kwargs_dqgmres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
+  function dqgmres!(solver :: DqgmresSolver{T,FC,S}, $(def_args_dqgmres...); $(def_kwargs_dqgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "DQGMRES: system of size %d\n", n)
+
+    # Check M = Iₙ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :w, S, n)
+    allocate_if(!NisI, solver, :z, S, n)
+    Δx, x, t, P, V = solver.Δx, solver.x, solver.t, solver.P, solver.V
+    c, s, H, stats = solver.c, solver.s, solver.H, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    w  = MisI ? t : solver.w
+    r₀ = MisI ? t : solver.w
+
+    # Initial solution x₀ and residual r₀.
+    x .= zero(FC)  # x₀
+    if warm_start
+      mul!(t, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), t)
+    else
+      t .= b
+    end
+    MisI || mulorldiv!(r₀, M, t, ldiv)  # M(b - Ax₀)
+    rNorm = @knrm2(n, r₀)               # β = ‖r₀‖₂
+    history && push!(rNorms, rNorm)
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
 
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
 
-  # Set up workspace.
-  mem = length(c)  # Memory.
-  for i = 1 : mem
-    V[i] .= zero(FC)  # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
-    P[i] .= zero(FC)  # Directions for x : Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
-  end
-  c .= zero(T)   # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ.
-  s .= zero(FC)  # Last mem Givens sines used for the factorization QₘRₘ = Hₘ.
-  H .= zero(FC)  # Last column of the band hessenberg matrix Hₘ.
-  # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
-  # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
-  # In addition of that, the last column of Rₘ is also stored in H.
-
-  # Initial γ₁ and V₁.
-  γₘ = rNorm # γₘ and γₘ₊₁ are the last components of gₘ, right-hand of the least squares problem min ‖ Hₘyₘ - gₘ ‖₂.
-  @. V[1] = r₀ / rNorm
-
-  # The following stopping criterion compensates for the lag in the
-  # residual, but usually increases the number of iterations.
-  # solved = sqrt(max(1, iter-mem+1)) * |γₘ₊₁| ≤ ε
-  solved = rNorm ≤ ε # less accurate, but acceptable.
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || user_requested_exit)
-
-    # Update iteration index.
-    iter = iter + 1
-
-    # Set position in circulars stacks.
-    pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
-    next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
-
-    # Incomplete Arnoldi procedure.
-    z = NisI ? V[pos] : solver.z
-    NisI || mulorldiv!(z, N, V[pos], ldiv)  # N⁻¹vₘ, forms pₘ
-    mul!(t, A, z)                           # AN⁻¹vₘ
-    MisI || mulorldiv!(w, M, t, ldiv)       # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
-    for i = max(1, iter-mem+1) : iter
-      ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
-      diag = iter - i + 2
-      H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
-      @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+    # Set up workspace.
+    mem = length(V)  # Memory.
+    for i = 1 : mem
+      V[i] .= zero(FC)  # Orthogonal basis of Kₖ(MAN, Mr₀).
+      P[i] .= zero(FC)  # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹.
     end
+    c .= zero(T)   # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ.
+    s .= zero(FC)  # Last mem Givens sines used for the factorization QₖRₖ = Hₖ.
+    H .= zero(FC)  # Last column of the band hessenberg matrix Hₖ.
+    # Each column has at most mem + 1 nonzero elements.
+    # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+    # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
+    # In addition of that, the last column of Rₖ is also stored in H.
+
+    # Initial γ₁ and V₁.
+    γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂.
+    V[1] .= r₀ ./ rNorm
+
+    # The following stopping criterion compensates for the lag in the
+    # residual, but usually increases the number of iterations.
+    # solved = sqrt(max(1, iter-mem+1)) * |γₖ₊₁| ≤ ε
+    solved = rNorm ≤ ε # less accurate, but acceptable.
+    tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || user_requested_exit || overtimed)
+
+      # Update iteration index.
+      iter = iter + 1
 
-    # Partial reorthogonalization of the Krylov basis.
-    if reorthogonalization
+      # Set position in circulars stacks.
+      pos = mod(iter-1, mem) + 1     # Position corresponding to pₖ and vₖ in circular stacks P and V.
+      next_pos = mod(iter, mem) + 1  # Position corresponding to vₖ₊₁ in the circular stack V.
+
+      # Incomplete Arnoldi procedure.
+      z = NisI ? V[pos] : solver.z
+      NisI || mulorldiv!(z, N, V[pos], ldiv)  # Nvₖ, forms pₖ
+      mul!(t, A, z)                           # ANvₖ
+      MisI || mulorldiv!(w, M, t, ldiv)       # MANvₖ, forms vₖ₊₁
       for i = max(1, iter-mem+1) : iter
-        ipos = mod(i-1, mem) + 1
-        diag = iter - i + 2
-        Htmp = @kdot(n, w, V[ipos])
-        H[diag] += Htmp
-        @kaxpy!(n, -Htmp, V[ipos], w)
+        ipos = mod(i-1, mem) + 1  # Position corresponding to vᵢ in the circular stack V.
+        diag = iter - i + 1
+        H[diag] = @kdot(n, w, V[ipos])    # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+        @kaxpy!(n, -H[diag], V[ipos], w)  # w ← w - hᵢ.ₖvᵢ
       end
-    end
 
-    # Compute hₘ₊₁.ₘ and vₘ₊₁.
-    H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
-    if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
-      @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
-    end
-    # rₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
-    if iter ≥ mem + 2
-      H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
-    end
+      # Partial reorthogonalization of the Krylov basis.
+      if reorthogonalization
+        for i = max(1, iter-mem+1) : iter
+          ipos = mod(i-1, mem) + 1
+          diag = iter - i + 1
+          Htmp = @kdot(n, w, V[ipos])
+          H[diag] += Htmp
+          @kaxpy!(n, -Htmp, V[ipos], w)
+        end
+      end
 
-    # Update the QR factorization of H.
-    # Apply mem previous Givens reflections Ωᵢ.
-    for i = max(1,iter-mem) : iter-1
-      irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
-      diag = iter - i + 1
-      next_diag = diag + 1
-      H_aux        =      c[irot_pos]  * H[next_diag] + s[irot_pos] * H[diag]
-      H[diag]      = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag]
-      H[next_diag] = H_aux
-    end
+      # Compute hₖ₊₁.ₖ and vₖ₊₁.
+      Haux = @knrm2(n, w)         # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+      if Haux ≠ 0                 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+        V[next_pos] .= w ./ Haux  # vₖ₊₁ = w / hₖ₊₁.ₖ
+      end
+      # rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
+      # We don't want to use rₖ₋₁₋ₘₑₘ.ₖ₋₁ when we compute rₖ₋ₘₑₘ.ₖ
+      if iter ≥ mem + 2
+        H[mem+1] = zero(FC)  # rₖ₋ₘₑₘ.ₖ = 0
+      end
+
+      # Update the QR factorization of Hₖ.
+      # Apply mem previous Givens reflections Ωᵢ.
+      for i = max(1,iter-mem) : iter-1
+        irot_pos = mod(i-1, mem) + 1  # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
+        diag = iter - i
+        next_diag = diag + 1
+        Htmp         =      c[irot_pos]  * H[next_diag] + s[irot_pos] * H[diag]
+        H[diag]      = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag]
+        H[next_diag] = Htmp
+      end
 
-    # Compute and apply current Givens reflection Ωₘ.
-    # [cₘ  sₘ] [ hₘ.ₘ ] = [ρₘ]
-    # [sₘ -cₘ] [hₘ₊₁.ₘ]   [0 ]
-    (c[pos], s[pos], H[2]) = sym_givens(H[2], H[1])
-    γₘ₊₁ = conj(s[pos]) * γₘ
-    γₘ   =      c[pos]  * γₘ
-
-    # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
-    for i = max(1,iter-mem) : iter-1
-      ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
-      diag = iter - i + 2
-      if ipos == pos
-        # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
-        @kscal!(n, -H[diag], P[pos])
-      else
-        # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
-        @kaxpy!(n, -H[diag], P[ipos], P[pos])
+      # Compute and apply current Givens reflection Ωₖ.
+      # [cₖ  sₖ] [ hₖ.ₖ ] = [ρₖ]
+      # [sₖ -cₖ] [hₖ₊₁.ₖ]   [0 ]
+      (c[pos], s[pos], H[1]) = sym_givens(H[1], Haux)
+      γₖ₊₁ = conj(s[pos]) * γₖ
+      γₖ   =      c[pos]  * γₖ
+
+      # Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹.
+      for i = max(1,iter-mem) : iter-1
+        ipos = mod(i-1, mem) + 1  # Position corresponding to pᵢ in the circular stack P.
+        diag = iter - i + 1
+        if ipos == pos
+          # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
+          @kscal!(n, -H[diag], P[pos])
+        else
+          # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ
+          @kaxpy!(n, -H[diag], P[ipos], P[pos])
+        end
       end
+      # pₐᵤₓ ← pₐᵤₓ + Nvₖ
+      @kaxpy!(n, one(FC), z, P[pos])
+      # pₖ = pₐᵤₓ / hₖ.ₖ
+      P[pos] .= P[pos] ./ H[1]
+
+      # Compute solution xₖ.
+      # xₖ ← xₖ₋₁ + γₖ * pₖ
+      @kaxpy!(n, γₖ, P[pos], x)
+
+      # Update residual norm estimate.
+      # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁|
+      rNorm = abs(γₖ₊₁)
+      history && push!(rNorms, rNorm)
+
+      # Update γₖ.
+      γₖ = γₖ₊₁
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
     end
-    # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
-    @kaxpy!(n, one(FC), z, P[pos])
-    # pₘ = pₐᵤₓ / hₘ.ₘ
-    @. P[pos] = P[pos] / H[2]
-
-    # Compute solution xₘ.
-    # xₘ ← xₘ₋₁ + γₘ * pₘ
-    @kaxpy!(n, γₘ, P[pos], x)
-
-    # Update residual norm estimate.
-    # ‖ M⁻¹(b - Axₘ) ‖₂ ≈ |γₘ₊₁|
-    rNorm = abs(γₘ₊₁)
-    history && push!(rNorms, rNorm)
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Update γₘ.
-    γₘ = γₘ₊₁
+    # Termination status
+    solved              && (status = "solution good enough given atol and rtol")
+    tired               && (status = "maximum number of iterations exceeded")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    solved = resid_decrease_lim || resid_decrease_mach
-    tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-  solved              && (status = "solution good enough given atol and rtol")
-  tired               && (status = "maximum number of iterations exceeded")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/fgmres.jl b/src/fgmres.jl
new file mode 100644
index 000000000..1a68aac6c
--- /dev/null
+++ b/src/fgmres.jl
@@ -0,0 +1,391 @@
+# An implementation of FGMRES for the solution of the square linear system Ax = b.
+#
+# This method is described in
+#
+# Y. Saad, A Flexible Inner-Outer Preconditioned GMRES Algorithms.
+# SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+#
+# Alexis Montoison, <alexis.montoison@polymtl.ca>
+# Montreal, September 2022.
+
+export fgmres, fgmres!
+
+"""
+    (x, stats) = fgmres(A, b::AbstractVector{FC};
+                        memory::Int=20, M=I, N=I, ldiv::Bool=false,
+                        restart::Bool=false, reorthogonalization::Bool=false,
+                        atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+                        timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                        callback=solver->false, iostream::IO=kstdout)
+
+`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
+`FC` is `T` or `Complex{T}`.
+
+    (x, stats) = fgmres(A, b, x0::AbstractVector; kwargs...)
+
+FGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the linear system Ax = b of size n using FGMRES.
+
+FGMRES computes a sequence of approximate solutions with minimum residual.
+FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration.
+
+This implementation allows a left preconditioner M and a flexible right preconditioner N.
+A situation in which the preconditioner is "not constant" is when a relaxation-type method,
+a Chebyshev iteration or another Krylov subspace method is used as a preconditioner. 
+Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles.
+Thus, GMRES is recommended if the right preconditioner N is constant.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
+
+#### Keyword arguments
+
+* `memory`: if `restart = true`, the restarted version FGMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
+
+#### Reference
+
+* Y. Saad, [*A Flexible Inner-Outer Preconditioned GMRES Algorithm*](https://doi.org/10.1137/0914028), SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+"""
+function fgmres end
+
+"""
+    solver = fgmres!(solver::FgmresSolver, A, b; kwargs...)
+    solver = fgmres!(solver::FgmresSolver, A, b, x0; kwargs...)
+
+where `kwargs` are keyword arguments of [`fgmres`](@ref).
+
+Note that the `memory` keyword argument is the only exception.
+It's required to create a `FgmresSolver` and can't be changed later.
+
+See [`FgmresSolver`](@ref) for more details about the `solver`.
+"""
+function fgmres! end
+
+def_args_fgmres = (:(A                    ),
+                   :(b::AbstractVector{FC}))
+
+def_optargs_fgmres = (:(x0::AbstractVector),)
+
+def_kwargs_fgmres = (:(; M = I                            ),
+                     :(; N = I                            ),
+                     :(; ldiv::Bool = false               ),
+                     :(; restart::Bool = false            ),
+                     :(; reorthogonalization::Bool = false),
+                     :(; atol::T = √eps(T)                ),
+                     :(; rtol::T = √eps(T)                ),
+                     :(; itmax::Int = 0                   ),
+                     :(; timemax::Float64 = Inf           ),
+                     :(; verbose::Int = 0                 ),
+                     :(; history::Bool = false            ),
+                     :(; callback = solver -> false       ),
+                     :(; iostream::IO = kstdout           ))
+
+def_kwargs_fgmres = mapreduce(extract_parameters, vcat, def_kwargs_fgmres)
+
+args_fgmres = (:A, :b)
+optargs_fgmres = (:x0,)
+kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function fgmres($(def_args_fgmres...), $(def_optargs_fgmres...); memory :: Int=20, $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = FgmresSolver(A, b, memory)
+    warm_start!(solver, $(optargs_fgmres...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    fgmres!(solver, $(args_fgmres...); $(kwargs_fgmres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
+  end
+
+  function fgmres($(def_args_fgmres...); memory :: Int=20, $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = FgmresSolver(A, b, memory)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    fgmres!(solver, $(args_fgmres...); $(kwargs_fgmres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
+  end
+
+  function fgmres!(solver :: FgmresSolver{T,FC,S}, $(def_args_fgmres...); $(def_kwargs_fgmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "FGMRES: system of size %d\n", n)
+
+    # Check M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI  , solver, :q , S, n)
+    allocate_if(restart, solver, :Δx, S, n)
+    Δx, x, w, V, Z = solver.Δx, solver.x, solver.w, solver.V, solver.Z
+    z, c, s, R, stats = solver.z, solver.c, solver.s, solver.R, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    q  = MisI ? w : solver.q
+    r₀ = MisI ? w : solver.q
+    xr = restart ? Δx : x
+
+    # Initial solution x₀.
+    x .= zero(FC)
+
+    # Initial residual r₀.
+    if warm_start
+      mul!(w, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), w)
+      restart && @kaxpy!(n, one(FC), Δx, x)
+    else
+      w .= b
+    end
+    MisI || mulorldiv!(r₀, M, w, ldiv)  # r₀ = M(b - Ax₀)
+    β = @knrm2(n, r₀)                   # β = ‖r₀‖₂
+
+    rNorm = β
+    history && push!(rNorms, β)
+    ε = atol + rtol * rNorm
+
+    if β == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
+
+    mem = length(c)  # Memory
+    npass = 0        # Number of pass
+
+    iter = 0        # Cumulative number of iterations
+    inner_iter = 0  # Number of iterations in a pass
+
+    itmax == 0 && (itmax = 2*n)
+    inner_itmax = itmax
+
+    (verbose > 0) && @printf(iostream, "%5s  %5s  %7s  %7s  %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %5d  %7.1e  %7s  %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time))
+
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
+
+    # Stopping criterion
+    breakdown = false
+    inconsistent = false
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    inner_tired = inner_iter ≥ inner_itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+      # Initialize workspace.
+      nr = 0  # Number of coefficients stored in Rₖ.
+      for i = 1 : mem
+        V[i] .= zero(FC)  # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}.
+        Z[i] .= zero(FC)  # Zₖ = [N₁v₁, ..., Nₖvₖ]
+      end
+      s .= zero(FC)  # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+      c .= zero(T)   # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+      R .= zero(FC)  # Upper triangular matrix Rₖ.
+      z .= zero(FC)  # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
+
+      if restart
+        xr .= zero(FC)  # xr === Δx when restart is set to true
+        if npass ≥ 1
+          mul!(w, A, x)
+          @kaxpby!(n, one(FC), b, -one(FC), w)
+          MisI || mulorldiv!(r₀, M, w, ldiv)
+        end
+      end
+
+      # Initial ζ₁ and V₁
+      β = @knrm2(n, r₀)
+      z[1] = β
+      @. V[1] = r₀ / rNorm
+
+      npass = npass + 1
+      solver.inner_iter = 0
+      inner_tired = false
+
+      while !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+
+        # Update iteration index
+        solver.inner_iter = solver.inner_iter + 1
+        inner_iter = solver.inner_iter
+
+        # Update workspace if more storage is required and restart is set to false
+        if !restart && (inner_iter > mem)
+          for i = 1 : inner_iter
+            push!(R, zero(FC))
+          end
+          push!(s, zero(FC))
+          push!(c, zero(T))
+          push!(Z, S(undef, n))
+        end
+
+        # Continue the process.
+        # MAZₖ = Vₖ₊₁Hₖ₊₁.ₖ
+        mulorldiv!(Z[inner_iter], N, V[inner_iter], ldiv)  # zₖ ← Nₖvₖ
+        mul!(w, A, Z[inner_iter])                          # w  ← Azₖ
+        MisI || mulorldiv!(q, M, w, ldiv)                  # q  ← MAzₖ
+        for i = 1 : inner_iter
+          R[nr+i] = @kdot(n, V[i], q)      # hᵢₖ = (vᵢ)ᴴq
+          @kaxpy!(n, -R[nr+i], V[i], q)    # q ← q - hᵢₖvᵢ
+        end
+
+        # Reorthogonalization of the basis.
+        if reorthogonalization
+          for i = 1 : inner_iter
+            Htmp = @kdot(n, V[i], q)
+            R[nr+i] += Htmp
+            @kaxpy!(n, -Htmp, V[i], q)
+          end
+        end
+
+        # Compute hₖ₊₁.ₖ
+        Hbis = @knrm2(n, q)  # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+        # Update the QR factorization of Hₖ₊₁.ₖ.
+        # Apply previous Givens reflections Ωᵢ.
+        # [cᵢ  sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
+        # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ]   [r̄ᵢ₊₁.ₖ]
+        for i = 1 : inner_iter-1
+          Rtmp      =      c[i]  * R[nr+i] + s[i] * R[nr+i+1]
+          R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
+          R[nr+i]   = Rtmp
+        end
+
+        # Compute and apply current Givens reflection Ωₖ.
+        # [cₖ  sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
+        # [s̄ₖ -cₖ] [hₖ₊₁.ₖ]   [ 0  ]
+        (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
+
+        # Update zₖ = (Qₖ)ᴴβe₁
+        ζₖ₊₁          = conj(s[inner_iter]) * z[inner_iter]
+        z[inner_iter] =      c[inner_iter]  * z[inner_iter]
+
+        # Update residual norm estimate.
+        # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
+        rNorm = abs(ζₖ₊₁)
+        history && push!(rNorms, rNorm)
+
+        # Update the number of coefficients in Rₖ
+        nr = nr + inner_iter
+
+        # Stopping conditions that do not depend on user input.
+        # This is to guard against tolerances that are unreasonably small.
+        resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+        
+        # Update stopping criterion.
+        user_requested_exit = callback(solver) :: Bool
+        resid_decrease_lim = rNorm ≤ ε
+        breakdown = Hbis ≤ btol
+        solved = resid_decrease_lim || resid_decrease_mach
+        inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+        timer = time_ns() - start_time
+        overtimed = timer > timemax_ns
+        kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d  %5d  %7.1e  %7.1e  %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time))
+
+        # Compute vₖ₊₁
+        if !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+          if !restart && (inner_iter ≥ mem)
+            push!(V, S(undef, n))
+            push!(z, zero(FC))
+          end
+          @. V[inner_iter+1] = q / Hbis  # hₖ₊₁.ₖvₖ₊₁ = q
+          z[inner_iter+1] = ζₖ₊₁
+        end
+      end
+
+      # Compute y by solving Ry = z with backward substitution.
+      y = z  # yᵢ = ζᵢ
+      for i = inner_iter : -1 : 1
+        pos = nr + i - inner_iter      # position of rᵢ.ₖ
+        for j = inner_iter : -1 : i+1
+          y[i] = y[i] - R[pos] * y[j]  # yᵢ ← yᵢ - rᵢⱼyⱼ
+          pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
+        end
+        # Rₖ can be singular if the system is inconsistent
+        if abs(R[pos]) ≤ btol
+          y[i] = zero(FC)
+          inconsistent = true
+        else
+          y[i] = y[i] / R[pos]  # yᵢ ← yᵢ / rᵢᵢ
+        end
+      end
+
+      # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ
+      for i = 1 : inner_iter
+        @kaxpy!(n, y[i], Z[i], xr)
+      end
+      restart && @kaxpy!(n, one(FC), xr, x)
+
+      # Update inner_itmax, iter and tired variables.
+      inner_itmax = inner_itmax - inner_iter
+      iter = iter + inner_iter
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    inconsistent        && (status = "found approximate least-squares solution")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
+  end
+end
diff --git a/src/fom.jl b/src/fom.jl
index fcae5cf62..351fb246f 100644
--- a/src/fom.jl
+++ b/src/fom.jl
@@ -11,38 +11,54 @@
 export fom, fom!
 
 """
-    (x, stats) = fom(A, b::AbstractVector{FC}; memory::Int=20,
-                     M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                     reorthogonalization::Bool=false, itmax::Int=0,
-                     restart::Bool=false, verbose::Int=0, history::Bool=false,
-                     ldiv::Bool=false, callback=solver->false)
+    (x, stats) = fom(A, b::AbstractVector{FC};
+                     memory::Int=20, M=I, N=I, ldiv::Bool=false,
+                     restart::Bool=false, reorthogonalization::Bool=false,
+                     atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+                     timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                     callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the linear system Ax = b using FOM method.
+    (x, stats) = fom(A, b, x0::AbstractVector; kwargs...)
+
+FOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the linear system Ax = b of size n using FOM.
 
 FOM algorithm is based on the Arnoldi process and a Galerkin condition.
 
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left  preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
 
-Full reorthogonalization is available with the `reorthogonalization` option.
+#### Optional argument
 
-If `restart = true`, the restarted version FOM(k) is used with `k = memory`.
-If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-FOM can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
 
-    (x, stats) = fom(A, b, x0; kwargs...)
+* `memory`: if `restart = true`, the restarted version FOM(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -50,18 +66,6 @@ and `false` otherwise.
 """
 function fom end
 
-function fom(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = FomSolver(A, b, memory)
-  fom!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function fom(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = FomSolver(A, b, memory)
-  fom!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = fom!(solver::FomSolver, A, b; kwargs...)
     solver = fom!(solver::FomSolver, A, b, x0; kwargs...)
@@ -75,241 +79,293 @@ See [`FomSolver`](@ref) for more details about the `solver`.
 """
 function fom! end
 
-function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  fom!(solver, A, b; kwargs...)
-  return solver
-end
-
-function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
-              M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-              reorthogonalization :: Bool=false, itmax :: Int=0,
-              restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
-              ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("FOM: system of size %d\n", n)
-
-  # Check M = Iₙ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI  , solver, :q , S, n)
-  allocate_if(!NisI  , solver, :p , S, n)
-  allocate_if(restart, solver, :Δx, S, n)
-  Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
-  l, U, stats = solver.l, solver.U, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  q  = MisI ? w : solver.q
-  r₀ = MisI ? w : solver.q
-  xr = restart ? Δx : x
-
-  # Initial solution x₀.
-  x .= zero(FC)
-
-  # Initial residual r₀.
-  if warm_start
-    mul!(w, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), w)
-    restart && @kaxpy!(n, one(FC), Δx, x)
-  else
-    w .= b
+def_args_fom = (:(A                    ),
+                :(b::AbstractVector{FC}))
+
+def_optargs_fom = (:(x0::AbstractVector),)
+
+def_kwargs_fom = (:(; M = I                            ),
+                  :(; N = I                            ),
+                  :(; ldiv::Bool = false               ),
+                  :(; restart::Bool = false            ),
+                  :(; reorthogonalization::Bool = false),
+                  :(; atol::T = √eps(T)                ),
+                  :(; rtol::T = √eps(T)                ),
+                  :(; itmax::Int = 0                   ),
+                  :(; timemax::Float64 = Inf           ),
+                  :(; verbose::Int = 0                 ),
+                  :(; history::Bool = false            ),
+                  :(; callback = solver -> false       ),
+                  :(; iostream::IO = kstdout           ))
+
+def_kwargs_fom = mapreduce(extract_parameters, vcat, def_kwargs_fom)
+
+args_fom = (:A, :b)
+optargs_fom = (:x0,)
+kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function fom($(def_args_fom...), $(def_optargs_fom...); memory :: Int=20, $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = FomSolver(A, b, memory)
+    warm_start!(solver, $(optargs_fom...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    fom!(solver, $(args_fom...); $(kwargs_fom...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(r₀, M, w, ldiv)  # r₀ = M⁻¹(b - Ax₀)
-  β = @knrm2(n, r₀)                   # β = ‖r₀‖₂
-
-  rNorm = β
-  history && push!(rNorms, β)
-  ε = atol + rtol * rNorm
 
-  if β == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function fom($(def_args_fom...); memory :: Int=20, $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = FomSolver(A, b, memory)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    fom!(solver, $(args_fom...); $(kwargs_fom...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  mem = length(l)  # Memory
-  npass = 0        # Number of pass
+  function fom!(solver :: FomSolver{T,FC,S}, $(def_args_fom...); $(def_kwargs_fom...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "FOM: system of size %d\n", n)
+
+    # Check M = Iₙ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI  , solver, :q , S, n)
+    allocate_if(!NisI  , solver, :p , S, n)
+    allocate_if(restart, solver, :Δx, S, n)
+    Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
+    l, U, stats = solver.l, solver.U, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    q  = MisI ? w : solver.q
+    r₀ = MisI ? w : solver.q
+    xr = restart ? Δx : x
+
+    # Initial solution x₀.
+    x .= zero(FC)
+
+    # Initial residual r₀.
+    if warm_start
+      mul!(w, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), w)
+      restart && @kaxpy!(n, one(FC), Δx, x)
+    else
+      w .= b
+    end
+    MisI || mulorldiv!(r₀, M, w, ldiv)  # r₀ = M(b - Ax₀)
+    β = @knrm2(n, r₀)                   # β = ‖r₀‖₂
+
+    rNorm = β
+    history && push!(rNorms, β)
+    ε = atol + rtol * rNorm
+
+    if β == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
 
-  iter = 0        # Cumulative number of iterations
-  inner_iter = 0  # Number of iterations in a pass
+    mem = length(l)  # Memory
+    npass = 0        # Number of pass
 
-  itmax == 0 && (itmax = 2*n)
-  inner_itmax = itmax
+    iter = 0        # Cumulative number of iterations
+    inner_iter = 0  # Number of iterations in a pass
 
-  (verbose > 0) && @printf("%5s  %5s  %7s  %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
-  kdisplay(iter, verbose) && @printf("%5d  %5d  %7.1e  %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+    itmax == 0 && (itmax = 2*n)
+    inner_itmax = itmax
 
-  # Tolerance for breakdown detection.
-  btol = eps(T)^(3/4)
+    (verbose > 0) && @printf(iostream, "%5s  %5s  %7s  %7s  %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %5d  %7.1e  %7s  %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time))
 
-  # Stopping criterion
-  breakdown = false
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  inner_tired = inner_iter ≥ inner_itmax
-  status = "unknown"
-  user_requested_exit = false
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
 
-  while !(solved || tired || breakdown || user_requested_exit)
+    # Stopping criterion
+    breakdown = false
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    inner_tired = inner_iter ≥ inner_itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
 
-    # Initialize workspace.
-    nr = 0  # Number of coefficients stored in Uₖ.
-    for i = 1 : mem
-      V[i] .= zero(FC)  # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
-    end
-    l .= zero(FC)  # Lower unit triangular matrix Lₖ.
-    U .= zero(FC)  # Upper triangular matrix Uₖ.
-    z .= zero(FC)  # Solution of Lₖzₖ = βe₁.
-
-    if restart
-      xr .= zero(FC)  # xr === Δx when restart is set to true
-      if npass ≥ 1
-        mul!(w, A, x)
-        @kaxpby!(n, one(FC), b, -one(FC), w)
-        MisI || mulorldiv!(r₀, M, w, ldiv)
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+      # Initialize workspace.
+      nr = 0  # Number of coefficients stored in Uₖ.
+      for i = 1 : mem
+        V[i] .= zero(FC)  # Orthogonal basis of Kₖ(MAN, Mr₀).
+      end
+      l .= zero(FC)  # Lower unit triangular matrix Lₖ.
+      U .= zero(FC)  # Upper triangular matrix Uₖ.
+      z .= zero(FC)  # Solution of Lₖzₖ = βe₁.
+
+      if restart
+        xr .= zero(FC)  # xr === Δx when restart is set to true
+        if npass ≥ 1
+          mul!(w, A, x)
+          @kaxpby!(n, one(FC), b, -one(FC), w)
+          MisI || mulorldiv!(r₀, M, w, ldiv)
+        end
       end
-    end
 
-    # Initial ζ₁ and V₁
-    β = @knrm2(n, r₀)
-    z[1] = β
-    @. V[1] = r₀ / rNorm
+      # Initial ζ₁ and V₁
+      β = @knrm2(n, r₀)
+      z[1] = β
+      @. V[1] = r₀ / rNorm
 
-    npass = npass + 1
-    inner_iter = 0
-    inner_tired = false
+      npass = npass + 1
+      inner_iter = 0
+      inner_tired = false
 
-    while !(solved || inner_tired || breakdown)
+      while !(solved || inner_tired || breakdown)
 
-      # Update iteration index
-      inner_iter = inner_iter + 1
+        # Update iteration index
+        inner_iter = inner_iter + 1
 
-      # Update workspace if more storage is required and restart is set to false
-      if !restart && (inner_iter > mem)
-        for i = 1 : inner_iter
-          push!(U, zero(FC))
+        # Update workspace if more storage is required and restart is set to false
+        if !restart && (inner_iter > mem)
+          for i = 1 : inner_iter
+            push!(U, zero(FC))
+          end
+          push!(l, zero(FC))
+          push!(z, zero(FC))
         end
-        push!(l, zero(FC))
-        push!(z, zero(FC))
-      end
-
-      # Continue the Arnoldi process.
-      p = NisI ? V[inner_iter] : solver.p
-      NisI || mulorldiv!(p, N, V[inner_iter], ldiv)  # p ← N⁻¹vₖ
-      mul!(w, A, p)                                  # w ← AN⁻¹vₖ
-      MisI || mulorldiv!(q, M, w, ldiv)              # q ← M⁻¹AN⁻¹vₖ
-      for i = 1 : inner_iter
-        U[nr+i] = @kdot(n, V[i], q)      # hᵢₖ = qᵀvᵢ
-        @kaxpy!(n, -U[nr+i], V[i], q)    # q ← q - hᵢₖvᵢ
-      end
 
-      # Reorthogonalization of the Krylov basis.
-      if reorthogonalization
+        # Continue the Arnoldi process.
+        p = NisI ? V[inner_iter] : solver.p
+        NisI || mulorldiv!(p, N, V[inner_iter], ldiv)  # p ← Nvₖ
+        mul!(w, A, p)                                  # w ← ANvₖ
+        MisI || mulorldiv!(q, M, w, ldiv)              # q ← MANvₖ
         for i = 1 : inner_iter
-          Htmp = @kdot(n, V[i], q)
-          U[nr+i] += Htmp
-          @kaxpy!(n, -Htmp, V[i], q)
+          U[nr+i] = @kdot(n, V[i], q)      # hᵢₖ = (vᵢ)ᴴq
+          @kaxpy!(n, -U[nr+i], V[i], q)    # q ← q - hᵢₖvᵢ
         end
-      end
 
-      # Compute hₖ₊₁.ₖ
-      Hbis = @knrm2(n, q)  # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+        # Reorthogonalization of the Krylov basis.
+        if reorthogonalization
+          for i = 1 : inner_iter
+            Htmp = @kdot(n, V[i], q)
+            U[nr+i] += Htmp
+            @kaxpy!(n, -Htmp, V[i], q)
+          end
+        end
 
-      # Update the LU factorization of Hₖ.
-      if inner_iter ≥ 2
-        for i = 2 : inner_iter
-          # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
-          U[nr+i] = U[nr+i] - l[i-1] * U[nr+i-1]
+        # Compute hₖ₊₁.ₖ
+        Hbis = @knrm2(n, q)  # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+        # Update the LU factorization of Hₖ.
+        if inner_iter ≥ 2
+          for i = 2 : inner_iter
+            # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
+            U[nr+i] = U[nr+i] - l[i-1] * U[nr+i-1]
+          end
+          # ζₖ = -lₖ.ₖ₋₁ * ζₖ₋₁
+          z[inner_iter] = - l[inner_iter-1] * z[inner_iter-1]
+        end
+        # lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
+        l[inner_iter] = Hbis / U[nr+inner_iter]
+
+        # Update residual norm estimate.
+        # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
+        rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter])
+        history && push!(rNorms, rNorm)
+
+        # Update the number of coefficients in Uₖ
+        nr = nr + inner_iter
+
+        # Stopping conditions that do not depend on user input.
+        # This is to guard against tolerances that are unreasonably small.
+        resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+        # Update stopping criterion.
+        user_requested_exit = callback(solver) :: Bool
+        resid_decrease_lim = rNorm ≤ ε
+        breakdown = Hbis ≤ btol
+        solved = resid_decrease_lim || resid_decrease_mach
+        inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+        timer = time_ns() - start_time
+        overtimed = timer > timemax_ns
+        kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d  %5d  %7.1e  %7.1e  %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time))
+
+        # Compute vₖ₊₁.
+        if !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+          if !restart && (inner_iter ≥ mem)
+            push!(V, S(undef, n))
+          end
+          @. V[inner_iter+1] = q / Hbis  # hₖ₊₁.ₖvₖ₊₁ = q
         end
-        # ζₖ = -lₖ.ₖ₋₁ * ζₖ₋₁
-        z[inner_iter] = - l[inner_iter-1] * z[inner_iter-1]
       end
-      # lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
-      l[inner_iter] = Hbis / U[nr+inner_iter]
-
-      # Update residual norm estimate.
-      # ‖ M⁻¹(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
-      rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter])
-      history && push!(rNorms, rNorm)
-
-      # Update the number of coefficients in Uₖ
-      nr = nr + inner_iter
-
-      # Stopping conditions that do not depend on user input.
-      # This is to guard against tolerances that are unreasonably small.
-      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
-      # Update stopping criterion.
-      user_requested_exit = callback(solver) :: Bool
-      resid_decrease_lim = rNorm ≤ ε
-      breakdown = Hbis ≤ btol
-      solved = resid_decrease_lim || resid_decrease_mach
-      inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
-      kdisplay(iter+inner_iter, verbose) && @printf("%5d  %5d  %7.1e  %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
-
-      # Compute vₖ₊₁.
-      if !(solved || inner_tired || breakdown)
-        if !restart && (inner_iter ≥ mem)
-          push!(V, S(undef, n))
+
+      # Hₖyₖ = βe₁ ⟺ LₖUₖyₖ = βe₁ ⟺ Uₖyₖ = zₖ.
+      # Compute yₖ by solving Uₖyₖ = zₖ with backward substitution.
+      y = z  # yᵢ = zᵢ
+      for i = inner_iter : -1 : 1
+        pos = nr + i - inner_iter      # position of rᵢ.ₖ
+        for j = inner_iter : -1 : i+1
+          y[i] = y[i] - U[pos] * y[j]  # yᵢ ← yᵢ - uᵢⱼyⱼ
+          pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
         end
-        @. V[inner_iter+1] = q / Hbis  # hₖ₊₁.ₖvₖ₊₁ = q
+        y[i] = y[i] / U[pos]  # yᵢ ← yᵢ / rᵢᵢ
       end
-    end
 
-    # Hₖyₖ = βe₁ ⟺ LₖUₖyₖ = βe₁ ⟺ Uₖyₖ = zₖ.
-    # Compute yₖ by solving Uₖyₖ = zₖ with backward substitution.
-    y = z  # yᵢ = zᵢ
-    for i = inner_iter : -1 : 1
-      pos = nr + i - inner_iter      # position of rᵢ.ₖ
-      for j = inner_iter : -1 : i+1
-        y[i] = y[i] - U[pos] * y[j]  # yᵢ ← yᵢ - uᵢⱼyⱼ
-        pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
+      # Form xₖ = NVₖyₖ
+      for i = 1 : inner_iter
+        @kaxpy!(n, y[i], V[i], xr)
+      end
+      if !NisI
+        solver.p .= xr
+        mulorldiv!(xr, N, solver.p, ldiv)
       end
-      y[i] = y[i] / U[pos]  # yᵢ ← yᵢ / rᵢᵢ
+      restart && @kaxpy!(n, one(FC), xr, x)
+
+      # Update inner_itmax, iter, tired and overtimed variables.
+      inner_itmax = inner_itmax - inner_iter
+      iter = iter + inner_iter
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Form xₖ = N⁻¹Vₖyₖ
-    for i = 1 : inner_iter
-      @kaxpy!(n, y[i], V[i], xr)
-    end
-    if !NisI
-      solver.p .= xr
-      mulorldiv!(xr, N, solver.p, ldiv)
-    end
-    restart && @kaxpy!(n, one(FC), xr, x)
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "inconsistent linear system")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Update inner_itmax, iter and tired variables.
-    inner_itmax = inner_itmax - inner_iter
-    iter = iter + inner_iter
-    tired = iter ≥ itmax
+    # Update x
+    warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !solved && breakdown
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "inconsistent linear system")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !solved && breakdown
-  stats.status = status
-  return solver
 end
diff --git a/src/gmres.jl b/src/gmres.jl
index 388a4ab96..7ee6e2341 100644
--- a/src/gmres.jl
+++ b/src/gmres.jl
@@ -11,38 +11,54 @@
 export gmres, gmres!
 
 """
-    (x, stats) = gmres(A, b::AbstractVector{FC}; memory::Int=20,
-                       M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                       reorthogonalization::Bool=false, itmax::Int=0,
-                       restart::Bool=false, verbose::Int=0, history::Bool=false,
-                       ldiv::Bool=false, callback=solver->false)
+    (x, stats) = gmres(A, b::AbstractVector{FC};
+                       memory::Int=20, M=I, N=I, ldiv::Bool=false,
+                       restart::Bool=false, reorthogonalization::Bool=false,
+                       atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+                       timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                       callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the linear system Ax = b using GMRES method.
+    (x, stats) = gmres(A, b, x0::AbstractVector; kwargs...)
 
-GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimal residual property.
+GMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
 
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left  preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+Solve the linear system Ax = b of size n using GMRES.
 
-Full reorthogonalization is available with the `reorthogonalization` option.
+GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimum residual.
 
-If `restart = true`, the restarted version GMRES(k) is used with `k = memory`.
-If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+#### Input arguments
 
-GMRES can be warm-started from an initial guess `x0` with the method
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
 
-    (x, stats) = gmres(A, b, x0; kwargs...)
+#### Optional argument
 
-where `kwargs` are the same keyword arguments as above.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Keyword arguments
+
+* `memory`: if `restart = true`, the restarted version GMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -50,18 +66,6 @@ and `false` otherwise.
 """
 function gmres end
 
-function gmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = GmresSolver(A, b, memory)
-  gmres!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function gmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = GmresSolver(A, b, memory)
-  gmres!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = gmres!(solver::GmresSolver, A, b; kwargs...)
     solver = gmres!(solver::GmresSolver, A, b, x0; kwargs...)
@@ -75,260 +79,310 @@ See [`GmresSolver`](@ref) for more details about the `solver`.
 """
 function gmres! end
 
-function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  gmres!(solver, A, b; kwargs...)
-  return solver
-end
-
-function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                reorthogonalization :: Bool=false, itmax :: Int=0,
-                restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
-                ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("GMRES: system of size %d\n", n)
-
-  # Check M = Iₙ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI  , solver, :q , S, n)
-  allocate_if(!NisI  , solver, :p , S, n)
-  allocate_if(restart, solver, :Δx, S, n)
-  Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
-  c, s, R, stats = solver.c, solver.s, solver.R, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  q  = MisI ? w : solver.q
-  r₀ = MisI ? w : solver.q
-  xr = restart ? Δx : x
-
-  # Initial solution x₀.
-  x .= zero(FC)
-
-  # Initial residual r₀.
-  if warm_start
-    mul!(w, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), w)
-    restart && @kaxpy!(n, one(FC), Δx, x)
-  else
-    w .= b
+def_args_gmres = (:(A                    ),
+                  :(b::AbstractVector{FC}))
+
+def_optargs_gmres = (:(x0::AbstractVector),)
+
+def_kwargs_gmres = (:(; M = I                            ),
+                    :(; N = I                            ),
+                    :(; ldiv::Bool = false               ),
+                    :(; restart::Bool = false            ),
+                    :(; reorthogonalization::Bool = false),
+                    :(; atol::T = √eps(T)                ),
+                    :(; rtol::T = √eps(T)                ),
+                    :(; itmax::Int = 0                   ),
+                    :(; timemax::Float64 = Inf           ),
+                    :(; verbose::Int = 0                 ),
+                    :(; history::Bool = false            ),
+                    :(; callback = solver -> false       ),
+                    :(; iostream::IO = kstdout           ))
+
+def_kwargs_gmres = mapreduce(extract_parameters, vcat, def_kwargs_gmres)
+
+args_gmres = (:A, :b)
+optargs_gmres = (:x0,)
+kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function gmres($(def_args_gmres...), $(def_optargs_gmres...); memory :: Int=20, $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = GmresSolver(A, b, memory)
+    warm_start!(solver, $(optargs_gmres...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    gmres!(solver, $(args_gmres...); $(kwargs_gmres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  MisI || mulorldiv!(r₀, M, w, ldiv)  # r₀ = M⁻¹(b - Ax₀)
-  β = @knrm2(n, r₀)                   # β = ‖r₀‖₂
-
-  rNorm = β
-  history && push!(rNorms, β)
-  ε = atol + rtol * rNorm
 
-  if β == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function gmres($(def_args_gmres...); memory :: Int=20, $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = GmresSolver(A, b, memory)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    gmres!(solver, $(args_gmres...); $(kwargs_gmres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  mem = length(c)  # Memory
-  npass = 0        # Number of pass
+  function gmres!(solver :: GmresSolver{T,FC,S}, $(def_args_gmres...); $(def_kwargs_gmres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "GMRES: system of size %d\n", n)
+
+    # Check M = Iₙ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI  , solver, :q , S, n)
+    allocate_if(!NisI  , solver, :p , S, n)
+    allocate_if(restart, solver, :Δx, S, n)
+    Δx, x, w, V, z = solver.Δx, solver.x, solver.w, solver.V, solver.z
+    c, s, R, stats = solver.c, solver.s, solver.R, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    q  = MisI ? w : solver.q
+    r₀ = MisI ? w : solver.q
+    xr = restart ? Δx : x
+
+    # Initial solution x₀.
+    x .= zero(FC)
+
+    # Initial residual r₀.
+    if warm_start
+      mul!(w, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), w)
+      restart && @kaxpy!(n, one(FC), Δx, x)
+    else
+      w .= b
+    end
+    MisI || mulorldiv!(r₀, M, w, ldiv)  # r₀ = M(b - Ax₀)
+    β = @knrm2(n, r₀)                   # β = ‖r₀‖₂
+
+    rNorm = β
+    history && push!(rNorms, β)
+    ε = atol + rtol * rNorm
+
+    if β == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
+
+    mem = length(c)  # Memory
+    npass = 0        # Number of pass
 
-  iter = 0        # Cumulative number of iterations
-  inner_iter = 0  # Number of iterations in a pass
+    iter = 0        # Cumulative number of iterations
+    inner_iter = 0  # Number of iterations in a pass
 
-  itmax == 0 && (itmax = 2*n)
-  inner_itmax = itmax
+    itmax == 0 && (itmax = 2*n)
+    inner_itmax = itmax
 
-  (verbose > 0) && @printf("%5s  %5s  %7s  %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
-  kdisplay(iter, verbose) && @printf("%5d  %5d  %7.1e  %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+    (verbose > 0) && @printf(iostream, "%5s  %5s  %7s  %7s  %5s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %5d  %7.1e  %7s  %.2fs\n", npass, iter, rNorm, "✗ ✗ ✗ ✗", ktimer(start_time))
 
-  # Tolerance for breakdown detection.
-  btol = eps(T)^(3/4)
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
 
-  # Stopping criterion
-  breakdown = false
-  inconsistent = false
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  inner_tired = inner_iter ≥ inner_itmax
-  status = "unknown"
-  user_requested_exit = false
+    # Stopping criterion
+    breakdown = false
+    inconsistent = false
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    inner_tired = inner_iter ≥ inner_itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
 
-  while !(solved || tired || breakdown || user_requested_exit)
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
 
-    # Initialize workspace.
-    nr = 0  # Number of coefficients stored in Rₖ.
-    for i = 1 : mem
-      V[i] .= zero(FC)  # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
-    end
-    s .= zero(FC)  # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
-    c .= zero(T)   # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
-    R .= zero(FC)  # Upper triangular matrix Rₖ.
-    z .= zero(FC)  # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
-
-    if restart
-      xr .= zero(FC)  # xr === Δx when restart is set to true
-      if npass ≥ 1
-        mul!(w, A, x)
-        @kaxpby!(n, one(FC), b, -one(FC), w)
-        MisI || mulorldiv!(r₀, M, w, ldiv)
+      # Initialize workspace.
+      nr = 0  # Number of coefficients stored in Rₖ.
+      for i = 1 : mem
+        V[i] .= zero(FC)  # Orthogonal basis of Kₖ(MAN, Mr₀).
+      end
+      s .= zero(FC)  # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+      c .= zero(T)   # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+      R .= zero(FC)  # Upper triangular matrix Rₖ.
+      z .= zero(FC)  # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
+
+      if restart
+        xr .= zero(FC)  # xr === Δx when restart is set to true
+        if npass ≥ 1
+          mul!(w, A, x)
+          @kaxpby!(n, one(FC), b, -one(FC), w)
+          MisI || mulorldiv!(r₀, M, w, ldiv)
+        end
       end
-    end
 
-    # Initial ζ₁ and V₁
-    β = @knrm2(n, r₀)
-    z[1] = β
-    @. V[1] = r₀ / rNorm
+      # Initial ζ₁ and V₁
+      β = @knrm2(n, r₀)
+      z[1] = β
+      @. V[1] = r₀ / rNorm
 
-    npass = npass + 1
-    solver.inner_iter = 0
-    inner_tired = false
+      npass = npass + 1
+      solver.inner_iter = 0
+      inner_tired = false
 
-    while !(solved || inner_tired || breakdown || user_requested_exit)
+      while !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
 
-      # Update iteration index
-      solver.inner_iter = solver.inner_iter + 1
-      inner_iter = solver.inner_iter
+        # Update iteration index
+        solver.inner_iter = solver.inner_iter + 1
+        inner_iter = solver.inner_iter
 
-      # Update workspace if more storage is required and restart is set to false
-      if !restart && (inner_iter > mem)
+        # Update workspace if more storage is required and restart is set to false
+        if !restart && (inner_iter > mem)
+          for i = 1 : inner_iter
+            push!(R, zero(FC))
+          end
+          push!(s, zero(FC))
+          push!(c, zero(T))
+        end
+
+        # Continue the Arnoldi process.
+        p = NisI ? V[inner_iter] : solver.p
+        NisI || mulorldiv!(p, N, V[inner_iter], ldiv)  # p ← Nvₖ
+        mul!(w, A, p)                                  # w ← ANvₖ
+        MisI || mulorldiv!(q, M, w, ldiv)              # q ← MANvₖ
         for i = 1 : inner_iter
-          push!(R, zero(FC))
+          R[nr+i] = @kdot(n, V[i], q)      # hᵢₖ = (vᵢ)ᴴq
+          @kaxpy!(n, -R[nr+i], V[i], q)    # q ← q - hᵢₖvᵢ
         end
-        push!(s, zero(FC))
-        push!(c, zero(T))
-      end
 
-      # Continue the Arnoldi process.
-      p = NisI ? V[inner_iter] : solver.p
-      NisI || mulorldiv!(p, N, V[inner_iter], ldiv)  # p ← N⁻¹vₖ
-      mul!(w, A, p)                                  # w ← AN⁻¹vₖ
-      MisI || mulorldiv!(q, M, w, ldiv)              # q ← M⁻¹AN⁻¹vₖ
-      for i = 1 : inner_iter
-        R[nr+i] = @kdot(n, V[i], q)      # hᵢₖ = qᵀvᵢ
-        @kaxpy!(n, -R[nr+i], V[i], q)    # q ← q - hᵢₖvᵢ
-      end
+        # Reorthogonalization of the Krylov basis.
+        if reorthogonalization
+          for i = 1 : inner_iter
+            Htmp = @kdot(n, V[i], q)
+            R[nr+i] += Htmp
+            @kaxpy!(n, -Htmp, V[i], q)
+          end
+        end
 
-      # Reorthogonalization of the Krylov basis.
-      if reorthogonalization
-        for i = 1 : inner_iter
-          Htmp = @kdot(n, V[i], q)
-          R[nr+i] += Htmp
-          @kaxpy!(n, -Htmp, V[i], q)
+        # Compute hₖ₊₁.ₖ
+        Hbis = @knrm2(n, q)  # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+        # Update the QR factorization of Hₖ₊₁.ₖ.
+        # Apply previous Givens reflections Ωᵢ.
+        # [cᵢ  sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
+        # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ]   [r̄ᵢ₊₁.ₖ]
+        for i = 1 : inner_iter-1
+          Rtmp      =      c[i]  * R[nr+i] + s[i] * R[nr+i+1]
+          R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
+          R[nr+i]   = Rtmp
         end
-      end
 
-      # Compute hₖ₊₁.ₖ
-      Hbis = @knrm2(n, q)  # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
-
-      # Update the QR factorization of Hₖ₊₁.ₖ.
-      # Apply previous Givens reflections Ωᵢ.
-      # [cᵢ  sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
-      # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ]   [r̄ᵢ₊₁.ₖ]
-      for i = 1 : inner_iter-1
-        Rtmp      =      c[i]  * R[nr+i] + s[i] * R[nr+i+1]
-        R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
-        R[nr+i]   = Rtmp
+        # Compute and apply current Givens reflection Ωₖ.
+        # [cₖ  sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
+        # [s̄ₖ -cₖ] [hₖ₊₁.ₖ]   [ 0  ]
+        (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
+
+        # Update zₖ = (Qₖ)ᴴβe₁
+        ζₖ₊₁          = conj(s[inner_iter]) * z[inner_iter]
+        z[inner_iter] =      c[inner_iter]  * z[inner_iter]
+
+        # Update residual norm estimate.
+        # ‖ M(b - Axₖ) ‖₂ = |ζₖ₊₁|
+        rNorm = abs(ζₖ₊₁)
+        history && push!(rNorms, rNorm)
+
+        # Update the number of coefficients in Rₖ
+        nr = nr + inner_iter
+
+        # Stopping conditions that do not depend on user input.
+        # This is to guard against tolerances that are unreasonably small.
+        resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+        
+        # Update stopping criterion.
+        user_requested_exit = callback(solver) :: Bool
+        resid_decrease_lim = rNorm ≤ ε
+        breakdown = Hbis ≤ btol
+        solved = resid_decrease_lim || resid_decrease_mach
+        inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+        timer = time_ns() - start_time
+        overtimed = timer > timemax_ns
+        kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d  %5d  %7.1e  %7.1e  %.2fs\n", npass, iter+inner_iter, rNorm, Hbis, ktimer(start_time))
+
+        # Compute vₖ₊₁.
+        if !(solved || inner_tired || breakdown || user_requested_exit || overtimed)
+          if !restart && (inner_iter ≥ mem)
+            push!(V, S(undef, n))
+            push!(z, zero(FC))
+          end
+          @. V[inner_iter+1] = q / Hbis  # hₖ₊₁.ₖvₖ₊₁ = q
+          z[inner_iter+1] = ζₖ₊₁
+        end
       end
 
-      # Compute and apply current Givens reflection Ωₖ.
-      # [cₖ  sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
-      # [s̄ₖ -cₖ] [hₖ₊₁.ₖ]   [ 0  ]
-      (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
-
-      # Update zₖ = (Qₖ)ᵀβe₁
-      ζₖ₊₁          = conj(s[inner_iter]) * z[inner_iter]
-      z[inner_iter] =      c[inner_iter]  * z[inner_iter]
-
-      # Update residual norm estimate.
-      # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
-      rNorm = abs(ζₖ₊₁)
-      history && push!(rNorms, rNorm)
-
-      # Update the number of coefficients in Rₖ
-      nr = nr + inner_iter
-
-      # Stopping conditions that do not depend on user input.
-      # This is to guard against tolerances that are unreasonably small.
-      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-      
-      # Update stopping criterion.
-      resid_decrease_lim = rNorm ≤ ε
-      breakdown = Hbis ≤ btol
-      solved = resid_decrease_lim || resid_decrease_mach
-      inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
-      solver.inner_iter = inner_iter
-      kdisplay(iter+inner_iter, verbose) && @printf("%5d  %5d  %7.1e  %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
-
-      # Compute vₖ₊₁
-      if !(solved || inner_tired || breakdown)
-        if !restart && (inner_iter ≥ mem)
-          push!(V, S(undef, n))
-          push!(z, zero(FC))
+      # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
+      y = z  # yᵢ = zᵢ
+      for i = inner_iter : -1 : 1
+        pos = nr + i - inner_iter      # position of rᵢ.ₖ
+        for j = inner_iter : -1 : i+1
+          y[i] = y[i] - R[pos] * y[j]  # yᵢ ← yᵢ - rᵢⱼyⱼ
+          pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
+        end
+        # Rₖ can be singular if the system is inconsistent
+        if abs(R[pos]) ≤ btol
+          y[i] = zero(FC)
+          inconsistent = true
+        else
+          y[i] = y[i] / R[pos]  # yᵢ ← yᵢ / rᵢᵢ
         end
-        @. V[inner_iter+1] = q / Hbis  # hₖ₊₁.ₖvₖ₊₁ = q
-        z[inner_iter+1] = ζₖ₊₁
       end
 
-      user_requested_exit = callback(solver) :: Bool
-    end
-
-    # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
-    y = z  # yᵢ = zᵢ
-    for i = inner_iter : -1 : 1
-      pos = nr + i - inner_iter      # position of rᵢ.ₖ
-      for j = inner_iter : -1 : i+1
-        y[i] = y[i] - R[pos] * y[j]  # yᵢ ← yᵢ - rᵢⱼyⱼ
-        pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
+      # Form xₖ = NVₖyₖ
+      for i = 1 : inner_iter
+        @kaxpy!(n, y[i], V[i], xr)
       end
-      # Rₖ can be singular if the system is inconsistent
-      if abs(R[pos]) ≤ btol
-        y[i] = zero(FC)
-        inconsistent = true
-      else
-        y[i] = y[i] / R[pos]  # yᵢ ← yᵢ / rᵢᵢ
+      if !NisI
+        solver.p .= xr
+        mulorldiv!(xr, N, solver.p, ldiv)
       end
+      restart && @kaxpy!(n, one(FC), xr, x)
+
+      # Update inner_itmax, iter, tired and overtimed variables.
+      inner_itmax = inner_itmax - inner_iter
+      iter = iter + inner_iter
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Form xₖ = N⁻¹Vₖyₖ
-    for i = 1 : inner_iter
-      @kaxpy!(n, y[i], V[i], xr)
-    end
-    if !NisI
-      solver.p .= xr
-      mulorldiv!(xr, N, solver.p, ldiv)
-    end
-    restart && @kaxpy!(n, one(FC), xr, x)
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    inconsistent        && (status = "found approximate least-squares solution")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Update inner_itmax, iter and tired variables.
-    inner_itmax = inner_itmax - inner_iter
-    iter = iter + inner_iter
-    tired = iter ≥ itmax
+    # Update x
+    warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  inconsistent        && (status = "found approximate least-squares solution")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/src/gpmr.jl b/src/gpmr.jl
index b10942995..1049c3b50 100644
--- a/src/gpmr.jl
+++ b/src/gpmr.jl
@@ -3,8 +3,8 @@
 # This method is described in
 #
 # A. Montoison and D. Orban
-# GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems
-# Cahier du GERAD G-2021-62.
+# GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems.
+# SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023.
 #
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montréal, August 2021.
@@ -12,23 +12,30 @@
 export gpmr, gpmr!
 
 """
-    (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; memory::Int=20,
-                         C=I, D=I, E=I, F=I, atol::T=√eps(T), rtol::T=√eps(T),
-                         gsp::Bool=false, reorthogonalization::Bool=false,
-                         itmax::Int=0, λ::FC=one(FC), μ::FC=one(FC),
-                         verbose::Int=0, history::Bool=false,
-                         ldiv::Bool=false, callback=solver->false)
+    (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC};
+                         memory::Int=20, C=I, D=I, E=I, F=I,
+                         ldiv::Bool=false, gsp::Bool=false,
+                         λ::FC=one(FC), μ::FC=one(FC),
+                         reorthogonalization::Bool=false, atol::T=√eps(T),
+                         rtol::T=√eps(T), itmax::Int=0,
+                         timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                         callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-GPMR solves the unsymmetric partitioned linear system
+    (x, y, stats) = gpmr(A, B, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
 
-    [ λI   A ] [ x ] = [ b ]
-    [  B  μI ] [ y ]   [ c ],
+GPMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
 
-where λ and μ are real or complex numbers.
-`A` can have any shape and `B` has the shape of `Aᵀ`.
+Given matrices `A` of dimension m × n and `B` of dimension n × m,
+GPMR solves the non-Hermitian partitioned linear system
+
+    [ λIₘ   A  ] [ x ] = [ b ]
+    [  B   μIₙ ] [ y ]   [ c ],
+
+of size (n+m) × (n+m) where λ and μ are real or complex numbers.
+`A` can have any shape and `B` has the shape of `Aᴴ`.
 `A`, `B`, `b` and `c` must be all nonzero.
 
 This implementation allows left and right block diagonal preconditioners
@@ -44,8 +51,6 @@ and can solve
 when `CE = M⁻¹` and `DF = N⁻¹`.
 
 By default, GPMR solves unsymmetric linear systems with `λ = 1` and `μ = 1`.
-If `gsp = true`, `λ = 1`, `μ = 0` and the associated generalized saddle point system is solved.
-`λ` and `μ` are also keyword arguments that can be directly modified for more specific problems.
 
 GPMR is based on the orthogonal Hessenberg reduction process and its relations with the block-Arnoldi process.
 The residual norm ‖rₖ‖ is monotonically decreasing in GPMR.
@@ -53,38 +58,50 @@ The residual norm ‖rₖ‖ is monotonically decreasing in GPMR.
 GPMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
 `atol` is an absolute tolerance and `rtol` is a relative tolerance.
 
-Full reorthogonalization is available with the `reorthogonalization` option.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `B`: a linear operator that models a matrix of dimension n × m;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
 
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Optional arguments
 
-GPMR can be warm-started from initial guesses `x0` and `y0` with the method
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
 
-    (x, y, stats) = gpmr(A, B, b, c, x0, y0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `memory`: if `restart = true`, the restarted version GPMR(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `C`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal left preconditioner;
+* `D`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal left preconditioner;
+* `E`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal right preconditioner;
+* `F`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal right preconditioner;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `gsp`: if `true`, set `λ = 1` and `μ = 0` for generalized saddle-point systems;
+* `λ` and `μ`: diagonal scaling factors of the partitioned linear system;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
-* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://dx.doi.org/10.13140/RG.2.2.24069.68326), Cahier du GERAD G-2021-62, GERAD, Montréal, 2021.
+* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://doi.org/10.1137/21M1459265), SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023.
 """
 function gpmr end
 
-function gpmr(A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = GpmrSolver(A, b, memory)
-  gpmr!(solver, A, B, b, c, x0, y0; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
-function gpmr(A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
-  solver = GpmrSolver(A, b, memory)
-  gpmr!(solver, A, B, b, c; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = gpmr!(solver::GpmrSolver, A, B, b, c; kwargs...)
     solver = gpmr!(solver::GpmrSolver, A, B, b, c, x0, y0; kwargs...)
@@ -98,382 +115,436 @@ See [`GpmrSolver`](@ref) for more details about the `solver`.
 """
 function gpmr! end
 
-function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0, y0)
-  gpmr!(solver, A, B, b, c; kwargs...)
-  return solver
-end
+def_args_gpmr = (:(A                    ),
+                 :(B                    ),
+                 :(b::AbstractVector{FC}),
+                 :(c::AbstractVector{FC}))
+
+def_optargs_gpmr = (:(x0 :: AbstractVector),
+                    :(y0 :: AbstractVector))
+
+def_kwargs_gpmr = (:(; C = I                            ),
+                   :(; D = I                            ),
+                   :(; E = I                            ),
+                   :(; F = I                            ),
+                   :(; ldiv::Bool = false               ),
+                   :(; gsp::Bool = false                ),
+                   :(; λ::FC = one(FC)                  ),
+                   :(; μ::FC = one(FC)                  ),
+                   :(; reorthogonalization::Bool = false),
+                   :(; atol::T = √eps(T)                ),
+                   :(; rtol::T = √eps(T)                ),
+                   :(; itmax::Int = 0                   ),
+                   :(; timemax::Float64 = Inf           ),
+                   :(; verbose::Int = 0                 ),
+                   :(; history::Bool = false            ),
+                   :(; callback = solver -> false       ),
+                   :(; iostream::IO = kstdout           ))
+
+def_kwargs_gpmr = mapreduce(extract_parameters, vcat, def_kwargs_gpmr)
+
+args_gpmr = (:A, :B, :b, :c)
+optargs_gpmr = (:x0, :y0)
+kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function gpmr($(def_args_gpmr...), $(def_optargs_gpmr...); memory :: Int=20, $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = GpmrSolver(A, b, memory)
+    warm_start!(solver, $(optargs_gpmr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    gpmr!(solver, $(args_gpmr...); $(kwargs_gpmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
+  end
 
-function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-               C=I, D=I, E=I, F=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-               gsp :: Bool=false, reorthogonalization :: Bool=false,
-               itmax :: Int=0, λ :: FC=one(FC), μ :: FC=one(FC),
-               verbose :: Int=0, history::Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  s, t = size(B)
-  m == t         || error("Inconsistent problem size")
-  s == n         || error("Inconsistent problem size")
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("GPMR: system of %d equations in %d variables\n", m+n, m+n)
-
-  # Check C = E = Iₘ and D = F = Iₙ
-  CisI = (C === I)
-  DisI = (D === I)
-  EisI = (E === I)
-  FisI = (F === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  eltype(B) == FC || error("eltype(B) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Determine λ and μ associated to generalized saddle point systems.
-  gsp && (λ = one(FC) ; μ = zero(FC))
-
-  warm_start = solver.warm_start
-  warm_start && (λ ≠ 0) && !EisI && error("Warm-start with right preconditioners is not supported.")
-  warm_start && (μ ≠ 0) && !FisI && error("Warm-start with right preconditioners is not supported.")
-
-  # Set up workspace.
-  allocate_if(!CisI, solver, :q , S, m)
-  allocate_if(!DisI, solver, :p , S, n)
-  allocate_if(!EisI, solver, :wB, S, m)
-  allocate_if(!FisI, solver, :wA, S, n)
-  wA, wB, dA, dB, Δx, Δy = solver.wA, solver.wB, solver.dA, solver.dB, solver.Δx, solver.Δy
-  x, y, V, U, gs, gc = solver.x, solver.y, solver.V, solver.U, solver.gs, solver.gc
-  zt, R, stats = solver.zt, solver.R, solver.stats
-  rNorms = stats.residuals
-  reset!(stats)
-  b₀ = warm_start ? dA : b
-  c₀ = warm_start ? dB : c
-  q  = CisI ? dA : solver.q
-  p  = DisI ? dB : solver.p
-
-  # Initial solutions x₀ and y₀.
-  x .= zero(FC)
-  y .= zero(FC)
-
-  iter = 0
-  itmax == 0 && (itmax = m+n)
-
-  # Initialize workspace.
-  nr = 0           # Number of coefficients stored in Rₖ
-  mem = length(V)  # Memory
-  ωₖ = zero(FC)    # Auxiliary variable to store fₖₖ
-  for i = 1 : mem
-    V[i] .= zero(FC)
-    U[i] .= zero(FC)
+  function gpmr($(def_args_gpmr...); memory :: Int=20, $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = GpmrSolver(A, b, memory)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    gpmr!(solver, $(args_gpmr...); $(kwargs_gpmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
-  gs .= zero(FC)  # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
-  gc .= zero(T)   # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
-  R  .= zero(FC)  # Upper triangular matrix Rₖ.
-  zt .= zero(FC)  # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᵀ(βe₁ + γe₂).
-
-  # Warm-start
-  # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ
-  # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ
-  # Compute C(b - AΔy) - λΔx
-  warm_start && mul!(b₀, A, Δy)
-  warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀)
-  !CisI && mulorldiv!(q, C, b₀, ldiv)
-  !CisI && (b₀ = q)
-  warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀)
-
-  # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ
-  # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ
-  # Compute D(c - BΔx) - μΔy
-  warm_start && mul!(c₀, B, Δx)
-  warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀)
-  !DisI && mulorldiv!(p, D, c₀, ldiv)
-  !DisI && (c₀ = p)
-  warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀)
-
-  # Initialize the orthogonal Hessenberg reduction process.
-  # βv₁ = Cb
-  β = @knrm2(m, b₀)
-  β ≠ 0 || error("b must be nonzero")
-  @. V[1] = b₀ / β
-
-  # γu₁ = Dc
-  γ = @knrm2(n, c₀)
-  γ ≠ 0 || error("c must be nonzero")
-  @. U[1] = c₀ / γ
-
-  # Compute ‖r₀‖² = γ² + β²
-  rNorm = sqrt(γ^2 + β^2)
-  history && push!(rNorms, rNorm)
-  ε = atol + rtol * rNorm
-
-  # Initialize t̄₀
-  zt[1] = β
-  zt[2] = γ
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7s  %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗")
-
-  # Tolerance for breakdown detection.
-  btol = eps(T)^(3/4)
-
-  # Stopping criterion.
-  breakdown = false
-  inconsistent = false
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || breakdown || user_requested_exit)
-
-    # Update iteration index.
-    iter = iter + 1
-    k = iter
-    nr₂ₖ₋₁ = nr       # Position of the column 2k-1 in Rₖ.
-    nr₂ₖ = nr + 2k-1  # Position of the column 2k in Rₖ.
-
-    # Update workspace if more storage is required
-    if iter > mem
-      for i = 1 : 4k-1
-        push!(R, zero(FC))
-      end
-      for i = 1 : 4
-        push!(gs, zero(FC))
-        push!(gc, zero(T))
-      end
+
+  function gpmr!(solver :: GpmrSolver{T,FC,S}, $(def_args_gpmr...); $(def_kwargs_gpmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    s, t = size(B)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == t         || error("Inconsistent problem size")
+    s == n         || error("Inconsistent problem size")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "GPMR: system of %d equations in %d variables\n", m+n, m+n)
+
+    # Check C = E = Iₘ and D = F = Iₙ
+    CisI = (C === I)
+    DisI = (D === I)
+    EisI = (E === I)
+    FisI = (F === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    eltype(B) == FC || @warn "eltype(B) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Determine λ and μ associated to generalized saddle point systems.
+    gsp && (λ = one(FC) ; μ = zero(FC))
+
+    warm_start = solver.warm_start
+    warm_start && (λ ≠ 0) && !EisI && error("Warm-start with right preconditioners is not supported.")
+    warm_start && (μ ≠ 0) && !FisI && error("Warm-start with right preconditioners is not supported.")
+
+    # Set up workspace.
+    allocate_if(!CisI, solver, :q , S, m)
+    allocate_if(!DisI, solver, :p , S, n)
+    allocate_if(!EisI, solver, :wB, S, m)
+    allocate_if(!FisI, solver, :wA, S, n)
+    wA, wB, dA, dB, Δx, Δy = solver.wA, solver.wB, solver.dA, solver.dB, solver.Δx, solver.Δy
+    x, y, V, U, gs, gc = solver.x, solver.y, solver.V, solver.U, solver.gs, solver.gc
+    zt, R, stats = solver.zt, solver.R, solver.stats
+    rNorms = stats.residuals
+    reset!(stats)
+    b₀ = warm_start ? dA : b
+    c₀ = warm_start ? dB : c
+    q  = CisI ? dA : solver.q
+    p  = DisI ? dB : solver.p
+
+    # Initial solutions x₀ and y₀.
+    x .= zero(FC)
+    y .= zero(FC)
+
+    iter = 0
+    itmax == 0 && (itmax = m+n)
+
+    # Initialize workspace.
+    nr = 0           # Number of coefficients stored in Rₖ
+    mem = length(V)  # Memory
+    ωₖ = zero(FC)    # Auxiliary variable to store fₖₖ
+    for i = 1 : mem
+      V[i] .= zero(FC)
+      U[i] .= zero(FC)
     end
+    gs .= zero(FC)  # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
+    gc .= zero(T)   # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
+    R  .= zero(FC)  # Upper triangular matrix Rₖ.
+    zt .= zero(FC)  # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂).
+
+    # Warm-start
+    # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ
+    # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ
+    # Compute C(b - AΔy) - λΔx
+    warm_start && mul!(b₀, A, Δy)
+    warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀)
+    !CisI && mulorldiv!(q, C, b₀, ldiv)
+    !CisI && (b₀ = q)
+    warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀)
+
+    # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ
+    # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ
+    # Compute D(c - BΔx) - μΔy
+    warm_start && mul!(c₀, B, Δx)
+    warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀)
+    !DisI && mulorldiv!(p, D, c₀, ldiv)
+    !DisI && (c₀ = p)
+    warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀)
+
+    # Initialize the orthogonal Hessenberg reduction process.
+    # βv₁ = Cb
+    β = @knrm2(m, b₀)
+    β ≠ 0 || error("b must be nonzero")
+    @. V[1] = b₀ / β
+
+    # γu₁ = Dc
+    γ = @knrm2(n, c₀)
+    γ ≠ 0 || error("c must be nonzero")
+    @. U[1] = c₀ / γ
+
+    # Compute ‖r₀‖² = γ² + β²
+    rNorm = sqrt(γ^2 + β^2)
+    history && push!(rNorms, rNorm)
+    ε = atol + rtol * rNorm
 
-    # Continue the orthogonal Hessenberg reduction process.
-    # CAFUₖ = VₖHₖ + hₖ₊₁.ₖ * vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Hₖ₊₁.ₖ
-    # DBEVₖ = UₖFₖ + fₖ₊₁.ₖ * uₖ₊₁(eₖ)ᵀ = Uₖ₊₁Fₖ₊₁.ₖ
-    wA = FisI ? U[iter] : solver.wA
-    wB = EisI ? V[iter] : solver.wB
-    FisI || mulorldiv!(wA, F, U[iter], ldiv)  # wA = Fuₖ
-    EisI || mulorldiv!(wB, E, V[iter], ldiv)  # wB = Evₖ
-    mul!(dA, A, wA)                           # dA = AFuₖ
-    mul!(dB, B, wB)                           # dB = BEvₖ
-    CisI || mulorldiv!(q, C, dA, ldiv)        # q  = CAFuₖ
-    DisI || mulorldiv!(p, D, dB, ldiv)        # p  = DBEvₖ
+    # Initialize t̄₀
+    zt[1] = β
+    zt[2] = γ
 
-    for i = 1 : iter
-      hᵢₖ = @kdot(m, V[i], q)    # hᵢ.ₖ = vᵢAuₖ
-      fᵢₖ = @kdot(n, U[i], p)    # fᵢ.ₖ = uᵢBvₖ
-      @kaxpy!(m, -hᵢₖ, V[i], q)  # q ← q - hᵢ.ₖvᵢ
-      @kaxpy!(n, -fᵢₖ, U[i], p)  # p ← p - fᵢ.ₖuᵢ
-      R[nr₂ₖ + 2i-1] = hᵢₖ
-      (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ
-    end
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %5s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7s  %7s  %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time))
 
-    # Reorthogonalization of the Krylov basis.
-    if reorthogonalization
-      for i = 1 : iter
-        Htmp = @kdot(m, V[i], q)    # hₜₘₚ = qᵀvᵢ
-        Ftmp = @kdot(n, U[i], p)    # fₜₘₚ = pᵀuᵢ
-        @kaxpy!(m, -Htmp, V[i], q)  # q ← q - hₜₘₚvᵢ
-        @kaxpy!(n, -Ftmp, U[i], p)  # p ← p - fₜₘₚuᵢ
-        R[nr₂ₖ + 2i-1] += Htmp                            # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ
-        (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp  # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ
-      end
-    end
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
 
-    Haux = @knrm2(m, q)   # hₖ₊₁.ₖ = ‖q‖₂
-    Faux = @knrm2(n, p)   # fₖ₊₁.ₖ = ‖p‖₂
-
-    # Add regularization terms.
-    R[nr₂ₖ₋₁ + 2k-1] = λ  # S₂ₖ₋₁.₂ₖ₋₁ = λ
-    R[nr₂ₖ + 2k]     = μ  # S₂ₖ.₂ₖ = μ
-
-    # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0  ••• vₖ 0 ]
-    #                                [0  u₁ ••• 0  uₖ]
-    #
-    # rₖ = [ b ] - [ λI   A ] [ xₖ ] = [ b ] - [ λI   A ] Wₖzₖ
-    #      [ c ]   [  B  μI ] [ yₖ ]   [ c ]   [  B  μI ]
-    #
-    # block-Arnoldi formulation : [ λI   A ] Wₖ = Wₖ₊₁Sₖ₊₁.ₖ
-    #                             [  B  μI ]
-    #
-    # GPMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - βe₁ - γe₂ ‖
-    #
-    # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
-    #                                            [ Oᵀ ]
-    #
-    # Apply previous givens reflections when k ≥ 2
-    # [ 1                ][ 1                ][ c₂.ᵢ  s₂.ᵢ       ][ c₁.ᵢ        s₁.ᵢ ] [ r̄₂ᵢ₋₁.₂ₖ₋₁  r̄₂ᵢ₋₁.₂ₖ ]   [ r₂ᵢ₋₁.₂ₖ₋₁  r₂ᵢ₋₁.₂ₖ ]
-    # [    c₄.ᵢ  s₄.ᵢ    ][    c₃.ᵢ     s₃.ᵢ ][ s̄₂.ᵢ -c₂.ᵢ       ][       1          ] [ r̄₂ᵢ.₂ₖ₋₁    r̄₂ᵢ.₂ₖ   ] = [ r₂ᵢ.₂ₖ₋₁    r₂ᵢ.₂ₖ   ]
-    # [    s̄₄.ᵢ -c₄.ᵢ    ][          1       ][             1    ][          1       ] [ ρ           hᵢ₊₁.ₖ   ]   [ r̄₂ᵢ₊₁.₂ₖ₋₁  r̄₂ᵢ₊₁.₂ₖ ]
-    # [                1 ][    s̄₃.ᵢ    -c₃.ᵢ ][                1 ][ s̄₁.ᵢ       -c₁.ᵢ ] [ fᵢ₊₁.ₖ      δ        ]   [ r̄₂ᵢ₊₂.₂ₖ₋₁  r̄₂ᵢ₊₂.₂ₖ ]
-    #
-    # r̄₁.₂ₖ₋₁ = 0, r̄₁.₂ₖ = h₁.ₖ, r̄₂.₂ₖ₋₁ = f₁.ₖ and r̄₂.₂ₖ = 0.
-    # (ρ, δ) = (λ, μ) if i == k-1, (ρ, δ) = (0, 0) otherwise.
-    for i = 1 : iter-1
-      for nrcol ∈ (nr₂ₖ₋₁, nr₂ₖ)
-        flag = (i == iter-1 && nrcol == nr₂ₖ₋₁)
-        αₖ = flag ? ωₖ : R[nrcol + 2i+2]
-
-        c₁ᵢ = gc[4i-3]
-        s₁ᵢ = gs[4i-3]
-        rtmp            =      c₁ᵢ  * R[nrcol + 2i-1] + s₁ᵢ * αₖ
-        αₖ              = conj(s₁ᵢ) * R[nrcol + 2i-1] - c₁ᵢ * αₖ
-        R[nrcol + 2i-1] = rtmp
-
-        c₂ᵢ = gc[4i-2]
-        s₂ᵢ = gs[4i-2]
-        rtmp            =      c₂ᵢ  * R[nrcol + 2i-1] + s₂ᵢ * R[nrcol + 2i]
-        R[nrcol + 2i]   = conj(s₂ᵢ) * R[nrcol + 2i-1] - c₂ᵢ * R[nrcol + 2i]
-        R[nrcol + 2i-1] = rtmp
-
-        c₃ᵢ = gc[4i-1]
-        s₃ᵢ = gs[4i-1]
-        rtmp          =      c₃ᵢ  * R[nrcol + 2i] + s₃ᵢ * αₖ
-        αₖ            = conj(s₃ᵢ) * R[nrcol + 2i] - c₃ᵢ * αₖ
-        R[nrcol + 2i] = rtmp
-
-        c₄ᵢ = gc[4i]
-        s₄ᵢ = gs[4i]
-        rtmp            =      c₄ᵢ  * R[nrcol + 2i] + s₄ᵢ * R[nrcol + 2i+1]
-        R[nrcol + 2i+1] = conj(s₄ᵢ) * R[nrcol + 2i] - c₄ᵢ * R[nrcol + 2i+1]
-        R[nrcol + 2i]   = rtmp
-
-        flag ? ωₖ = αₖ : R[nrcol + 2i+2] = αₖ
+    # Stopping criterion.
+    breakdown = false
+    inconsistent = false
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+
+      # Update iteration index.
+      iter = iter + 1
+      k = iter
+      nr₂ₖ₋₁ = nr       # Position of the column 2k-1 in Rₖ.
+      nr₂ₖ = nr + 2k-1  # Position of the column 2k in Rₖ.
+
+      # Update workspace if more storage is required
+      if iter > mem
+        for i = 1 : 4k-1
+          push!(R, zero(FC))
+        end
+        for i = 1 : 4
+          push!(gs, zero(FC))
+          push!(gc, zero(T))
+        end
       end
-    end
 
-    # Compute and apply current givens reflections
-    # [ 1                ][ 1                ][ c₂.ₖ  s₂.ₖ       ][ c₁.ₖ        s₁.ₖ ] [ r̄₂ₖ₋₁.₂ₖ₋₁  r̄₂ₖ₋₁.₂ₖ ]    [ r₂ₖ₋₁.₂ₖ₋₁  r₂ₖ₋₁.₂ₖ ]
-    # [    c₄.ₖ  s₄.ₖ    ][    c₃.ₖ     s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ       ][       1          ] [ r̄₂ₖ.₂ₖ₋₁    r̄₂ₖ.₂ₖ   ] =  [             r₂ₖ.₂ₖ   ]
-    # [    s̄₄.ₖ -c₄.ₖ    ][          1       ][             1    ][          1       ] [             hₖ₊₁.ₖ   ]    [                      ]
-    # [                1 ][    s̄₃.ₖ    -c₃.ₖ ][                1 ][ s̄₁.ₖ       -c₁.ₖ ] [ fₖ₊₁.ₖ               ]    [                      ]
-    (c₁ₖ, s₁ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], Faux)  # annihilate fₖ₊₁.ₖ
-    θₖ             = conj(s₁ₖ) * R[nr₂ₖ + 2k-1]
-    R[nr₂ₖ + 2k-1] =      c₁ₖ  * R[nr₂ₖ + 2k-1]
-
-    (c₂ₖ, s₂ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], ωₖ)  # annihilate ωₖ = r̄₂ₖ.₂ₖ₋₁
-    rtmp           =      c₂ₖ  * R[nr₂ₖ + 2k-1] + s₂ₖ * R[nr₂ₖ + 2k]
-    R[nr₂ₖ + 2k]   = conj(s₂ₖ) * R[nr₂ₖ + 2k-1] - c₂ₖ * R[nr₂ₖ + 2k]
-    R[nr₂ₖ + 2k-1] = rtmp
-
-    (c₃ₖ, s₃ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], θₖ)  # annihilate Θₖ = r̄₂ₖ₊₂.₂ₖ
-
-    (c₄ₖ, s₄ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], Haux)  # annihilate hₖ₊₁.ₖ
-
-    # Update t̄ₖ = (τ₁, ..., τ₂ₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂).
-    #
-    # [ 1                ][ 1                ][ c₂.ₖ  s₂.ₖ       ][ c₁.ₖ        s₁.ₖ ] [ τbar₂ₖ₋₁ ]   [ τ₂ₖ₋₁    ]
-    # [    c₄.ₖ  s₄.ₖ    ][    c₃.ₖ     s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ       ][       1          ] [ τbar₂ₖ   ] = [ τ₂ₖ      ]
-    # [    s̄₄.ₖ -c₄.ₖ    ][          1       ][             1    ][          1       ] [          ]   [ τbar₂ₖ₊₁ ]
-    # [                1 ][    s̄₃.ₖ    -c₃.ₖ ][                1 ][ s̄₁.ₖ       -c₁.ₖ ] [          ]   [ τbar₂ₖ₊₂ ]
-    τbar₂ₖ₊₂ = conj(s₁ₖ) * zt[2k-1]
-    zt[2k-1] =      c₁ₖ  * zt[2k-1]
-
-    τtmp     =      c₂ₖ  * zt[2k-1] + s₂ₖ * zt[2k]
-    zt[2k]   = conj(s₂ₖ) * zt[2k-1] - c₂ₖ * zt[2k]
-    zt[2k-1] = τtmp
-
-    τtmp     =      c₃ₖ  * zt[2k] + s₃ₖ * τbar₂ₖ₊₂
-    τbar₂ₖ₊₂ = conj(s₃ₖ) * zt[2k] - c₃ₖ * τbar₂ₖ₊₂
-    zt[2k]   = τtmp
-
-    τbar₂ₖ₊₁ = conj(s₄ₖ) * zt[2k]
-    zt[2k]   =      c₄ₖ  * zt[2k]
-
-    # Update gc and gs vectors
-    gc[4k-3], gc[4k-2], gc[4k-1], gc[4k] = c₁ₖ, c₂ₖ, c₃ₖ, c₄ₖ
-    gs[4k-3], gs[4k-2], gs[4k-1], gs[4k] = s₁ₖ, s₂ₖ, s₃ₖ, s₄ₖ
-
-    # Compute ‖rₖ‖² = |τbar₂ₖ₊₁|² + |τbar₂ₖ₊₂|²
-    rNorm = sqrt(abs2(τbar₂ₖ₊₁) + abs2(τbar₂ₖ₊₂))
-    history && push!(rNorms, rNorm)
+      # Continue the orthogonal Hessenberg reduction process.
+      # CAFUₖ = VₖHₖ + hₖ₊₁.ₖ * vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Hₖ₊₁.ₖ
+      # DBEVₖ = UₖFₖ + fₖ₊₁.ₖ * uₖ₊₁(eₖ)ᵀ = Uₖ₊₁Fₖ₊₁.ₖ
+      wA = FisI ? U[iter] : solver.wA
+      wB = EisI ? V[iter] : solver.wB
+      FisI || mulorldiv!(wA, F, U[iter], ldiv)  # wA = Fuₖ
+      EisI || mulorldiv!(wB, E, V[iter], ldiv)  # wB = Evₖ
+      mul!(dA, A, wA)                           # dA = AFuₖ
+      mul!(dB, B, wB)                           # dB = BEvₖ
+      CisI || mulorldiv!(q, C, dA, ldiv)        # q  = CAFuₖ
+      DisI || mulorldiv!(p, D, dB, ldiv)        # p  = DBEvₖ
 
-    # Update the number of coefficients in Rₖ.
-    nr = nr + 4k-1
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+      for i = 1 : iter
+        hᵢₖ = @kdot(m, V[i], q)    # hᵢ.ₖ = (vᵢ)ᴴq
+        fᵢₖ = @kdot(n, U[i], p)    # fᵢ.ₖ = (uᵢ)ᴴp
+        @kaxpy!(m, -hᵢₖ, V[i], q)  # q ← q - hᵢ.ₖvᵢ
+        @kaxpy!(n, -fᵢₖ, U[i], p)  # p ← p - fᵢ.ₖuᵢ
+        R[nr₂ₖ + 2i-1] = hᵢₖ
+        (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ
+      end
 
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    breakdown = Faux ≤ btol && Haux ≤ btol
-    solved = resid_decrease_lim || resid_decrease_mach
-    tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e\n", iter, rNorm, Haux, Faux)
-
-    # Compute vₖ₊₁ and uₖ₊₁
-    if !(solved || tired || breakdown || user_requested_exit)
-      if iter ≥ mem
-        push!(V, S(undef, m))
-        push!(U, S(undef, n))
-        push!(zt, zero(FC), zero(FC))
+      # Reorthogonalization of the Krylov basis.
+      if reorthogonalization
+        for i = 1 : iter
+          Htmp = @kdot(m, V[i], q)    # hₜₘₚ = (vᵢ)ᴴq
+          Ftmp = @kdot(n, U[i], p)    # fₜₘₚ = (uᵢ)ᴴp
+          @kaxpy!(m, -Htmp, V[i], q)  # q ← q - hₜₘₚvᵢ
+          @kaxpy!(n, -Ftmp, U[i], p)  # p ← p - fₜₘₚuᵢ
+          R[nr₂ₖ + 2i-1] += Htmp                            # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ
+          (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp  # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ
+        end
       end
 
-      # hₖ₊₁.ₖ ≠ 0
-      if Haux > btol
-        @. V[k+1] = q / Haux  # hₖ₊₁.ₖvₖ₊₁ = q
-      else
-        # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ}
-        V[k+1] .= zero(FC)  # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ}
+      Haux = @knrm2(m, q)   # hₖ₊₁.ₖ = ‖q‖₂
+      Faux = @knrm2(n, p)   # fₖ₊₁.ₖ = ‖p‖₂
+
+      # Add regularization terms.
+      R[nr₂ₖ₋₁ + 2k-1] = λ  # S₂ₖ₋₁.₂ₖ₋₁ = λ
+      R[nr₂ₖ + 2k]     = μ  # S₂ₖ.₂ₖ = μ
+
+      # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0  ••• vₖ 0 ]
+      #                                [0  u₁ ••• 0  uₖ]
+      #
+      # rₖ = [ b ] - [ λI   A ] [ xₖ ] = [ b ] - [ λI   A ] Wₖzₖ
+      #      [ c ]   [  B  μI ] [ yₖ ]   [ c ]   [  B  μI ]
+      #
+      # block-Arnoldi formulation : [ λI   A ] Wₖ = Wₖ₊₁Sₖ₊₁.ₖ
+      #                             [  B  μI ]
+      #
+      # GPMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - βe₁ - γe₂ ‖
+      #
+      # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
+      #                                            [ Oᵀ ]
+      #
+      # Apply previous givens reflections when k ≥ 2
+      # [ 1                ][ 1                ][ c₂.ᵢ  s₂.ᵢ       ][ c₁.ᵢ        s₁.ᵢ ] [ r̄₂ᵢ₋₁.₂ₖ₋₁  r̄₂ᵢ₋₁.₂ₖ ]   [ r₂ᵢ₋₁.₂ₖ₋₁  r₂ᵢ₋₁.₂ₖ ]
+      # [    c₄.ᵢ  s₄.ᵢ    ][    c₃.ᵢ     s₃.ᵢ ][ s̄₂.ᵢ -c₂.ᵢ       ][       1          ] [ r̄₂ᵢ.₂ₖ₋₁    r̄₂ᵢ.₂ₖ   ] = [ r₂ᵢ.₂ₖ₋₁    r₂ᵢ.₂ₖ   ]
+      # [    s̄₄.ᵢ -c₄.ᵢ    ][          1       ][             1    ][          1       ] [ ρ           hᵢ₊₁.ₖ   ]   [ r̄₂ᵢ₊₁.₂ₖ₋₁  r̄₂ᵢ₊₁.₂ₖ ]
+      # [                1 ][    s̄₃.ᵢ    -c₃.ᵢ ][                1 ][ s̄₁.ᵢ       -c₁.ᵢ ] [ fᵢ₊₁.ₖ      δ        ]   [ r̄₂ᵢ₊₂.₂ₖ₋₁  r̄₂ᵢ₊₂.₂ₖ ]
+      #
+      # r̄₁.₂ₖ₋₁ = 0, r̄₁.₂ₖ = h₁.ₖ, r̄₂.₂ₖ₋₁ = f₁.ₖ and r̄₂.₂ₖ = 0.
+      # (ρ, δ) = (λ, μ) if i == k-1, (ρ, δ) = (0, 0) otherwise.
+      for i = 1 : iter-1
+        for nrcol ∈ (nr₂ₖ₋₁, nr₂ₖ)
+          flag = (i == iter-1 && nrcol == nr₂ₖ₋₁)
+          αₖ = flag ? ωₖ : R[nrcol + 2i+2]
+
+          c₁ᵢ = gc[4i-3]
+          s₁ᵢ = gs[4i-3]
+          rtmp            =      c₁ᵢ  * R[nrcol + 2i-1] + s₁ᵢ * αₖ
+          αₖ              = conj(s₁ᵢ) * R[nrcol + 2i-1] - c₁ᵢ * αₖ
+          R[nrcol + 2i-1] = rtmp
+
+          c₂ᵢ = gc[4i-2]
+          s₂ᵢ = gs[4i-2]
+          rtmp            =      c₂ᵢ  * R[nrcol + 2i-1] + s₂ᵢ * R[nrcol + 2i]
+          R[nrcol + 2i]   = conj(s₂ᵢ) * R[nrcol + 2i-1] - c₂ᵢ * R[nrcol + 2i]
+          R[nrcol + 2i-1] = rtmp
+
+          c₃ᵢ = gc[4i-1]
+          s₃ᵢ = gs[4i-1]
+          rtmp          =      c₃ᵢ  * R[nrcol + 2i] + s₃ᵢ * αₖ
+          αₖ            = conj(s₃ᵢ) * R[nrcol + 2i] - c₃ᵢ * αₖ
+          R[nrcol + 2i] = rtmp
+
+          c₄ᵢ = gc[4i]
+          s₄ᵢ = gs[4i]
+          rtmp            =      c₄ᵢ  * R[nrcol + 2i] + s₄ᵢ * R[nrcol + 2i+1]
+          R[nrcol + 2i+1] = conj(s₄ᵢ) * R[nrcol + 2i] - c₄ᵢ * R[nrcol + 2i+1]
+          R[nrcol + 2i]   = rtmp
+
+          flag ? ωₖ = αₖ : R[nrcol + 2i+2] = αₖ
+        end
       end
 
-      # fₖ₊₁.ₖ ≠ 0
-      if Faux > btol
-        @. U[k+1] = p / Faux  # fₖ₊₁.ₖuₖ₊₁ = p
+      # Compute and apply current givens reflections
+      # [ 1                ][ 1                ][ c₂.ₖ  s₂.ₖ       ][ c₁.ₖ        s₁.ₖ ] [ r̄₂ₖ₋₁.₂ₖ₋₁  r̄₂ₖ₋₁.₂ₖ ]    [ r₂ₖ₋₁.₂ₖ₋₁  r₂ₖ₋₁.₂ₖ ]
+      # [    c₄.ₖ  s₄.ₖ    ][    c₃.ₖ     s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ       ][       1          ] [ r̄₂ₖ.₂ₖ₋₁    r̄₂ₖ.₂ₖ   ] =  [             r₂ₖ.₂ₖ   ]
+      # [    s̄₄.ₖ -c₄.ₖ    ][          1       ][             1    ][          1       ] [             hₖ₊₁.ₖ   ]    [                      ]
+      # [                1 ][    s̄₃.ₖ    -c₃.ₖ ][                1 ][ s̄₁.ₖ       -c₁.ₖ ] [ fₖ₊₁.ₖ               ]    [                      ]
+      (c₁ₖ, s₁ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], Faux)  # annihilate fₖ₊₁.ₖ
+      θₖ             = conj(s₁ₖ) * R[nr₂ₖ + 2k-1]
+      R[nr₂ₖ + 2k-1] =      c₁ₖ  * R[nr₂ₖ + 2k-1]
+
+      (c₂ₖ, s₂ₖ, R[nr₂ₖ₋₁ + 2k-1]) = sym_givens(R[nr₂ₖ₋₁ + 2k-1], ωₖ)  # annihilate ωₖ = r̄₂ₖ.₂ₖ₋₁
+      rtmp           =      c₂ₖ  * R[nr₂ₖ + 2k-1] + s₂ₖ * R[nr₂ₖ + 2k]
+      R[nr₂ₖ + 2k]   = conj(s₂ₖ) * R[nr₂ₖ + 2k-1] - c₂ₖ * R[nr₂ₖ + 2k]
+      R[nr₂ₖ + 2k-1] = rtmp
+
+      (c₃ₖ, s₃ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], θₖ)  # annihilate Θₖ = r̄₂ₖ₊₂.₂ₖ
+
+      (c₄ₖ, s₄ₖ, R[nr₂ₖ + 2k]) = sym_givens(R[nr₂ₖ + 2k], Haux)  # annihilate hₖ₊₁.ₖ
+
+      # Update t̄ₖ = (τ₁, ..., τ₂ₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂).
+      #
+      # [ 1                ][ 1                ][ c₂.ₖ  s₂.ₖ       ][ c₁.ₖ        s₁.ₖ ] [ τbar₂ₖ₋₁ ]   [ τ₂ₖ₋₁    ]
+      # [    c₄.ₖ  s₄.ₖ    ][    c₃.ₖ     s₃.ₖ ][ s̄₂.ₖ -c₂.ₖ       ][       1          ] [ τbar₂ₖ   ] = [ τ₂ₖ      ]
+      # [    s̄₄.ₖ -c₄.ₖ    ][          1       ][             1    ][          1       ] [          ]   [ τbar₂ₖ₊₁ ]
+      # [                1 ][    s̄₃.ₖ    -c₃.ₖ ][                1 ][ s̄₁.ₖ       -c₁.ₖ ] [          ]   [ τbar₂ₖ₊₂ ]
+      τbar₂ₖ₊₂ = conj(s₁ₖ) * zt[2k-1]
+      zt[2k-1] =      c₁ₖ  * zt[2k-1]
+
+      τtmp     =      c₂ₖ  * zt[2k-1] + s₂ₖ * zt[2k]
+      zt[2k]   = conj(s₂ₖ) * zt[2k-1] - c₂ₖ * zt[2k]
+      zt[2k-1] = τtmp
+
+      τtmp     =      c₃ₖ  * zt[2k] + s₃ₖ * τbar₂ₖ₊₂
+      τbar₂ₖ₊₂ = conj(s₃ₖ) * zt[2k] - c₃ₖ * τbar₂ₖ₊₂
+      zt[2k]   = τtmp
+
+      τbar₂ₖ₊₁ = conj(s₄ₖ) * zt[2k]
+      zt[2k]   =      c₄ₖ  * zt[2k]
+
+      # Update gc and gs vectors
+      gc[4k-3], gc[4k-2], gc[4k-1], gc[4k] = c₁ₖ, c₂ₖ, c₃ₖ, c₄ₖ
+      gs[4k-3], gs[4k-2], gs[4k-1], gs[4k] = s₁ₖ, s₂ₖ, s₃ₖ, s₄ₖ
+
+      # Compute ‖rₖ‖² = |τbar₂ₖ₊₁|² + |τbar₂ₖ₊₂|²
+      rNorm = sqrt(abs2(τbar₂ₖ₊₁) + abs2(τbar₂ₖ₊₂))
+      history && push!(rNorms, rNorm)
+
+      # Update the number of coefficients in Rₖ.
+      nr = nr + 4k-1
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      breakdown = Faux ≤ btol && Haux ≤ btol
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, Haux, Faux, ktimer(start_time))
+
+      # Compute vₖ₊₁ and uₖ₊₁
+      if !(solved || tired || breakdown || user_requested_exit || overtimed)
+        if iter ≥ mem
+          push!(V, S(undef, m))
+          push!(U, S(undef, n))
+          push!(zt, zero(FC), zero(FC))
+        end
+
+        # hₖ₊₁.ₖ ≠ 0
+        if Haux > btol
+          @. V[k+1] = q / Haux  # hₖ₊₁.ₖvₖ₊₁ = q
+        else
+          # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ}
+          V[k+1] .= zero(FC)  # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ}
+        end
+
+        # fₖ₊₁.ₖ ≠ 0
+        if Faux > btol
+          @. U[k+1] = p / Faux  # fₖ₊₁.ₖuₖ₊₁ = p
+        else
+          # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ}
+          U[k+1] .= zero(FC)  # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ}
+        end
+
+        zt[2k+1] = τbar₂ₖ₊₁
+        zt[2k+2] = τbar₂ₖ₊₂
+      end
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution.
+    for i = 2iter : -1 : 1
+      pos = nr + i - 2iter              # position of rᵢ.ₖ
+      for j = 2iter : -1 : i+1
+        zt[i] = zt[i] - R[pos] * zt[j]  # ζᵢ ← ζᵢ - rᵢ.ⱼζⱼ
+        pos = pos - j + 1               # position of rᵢ.ⱼ₋₁
+      end
+      # Rₖ can be singular if the system is inconsistent
+      if abs(R[pos]) ≤ btol
+        zt[i] = zero(FC)
+        inconsistent = true
       else
-        # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ}
-        U[k+1] .= zero(FC)  # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ}
+        zt[i] = zt[i] / R[pos]          # ζᵢ ← ζᵢ / rᵢ.ᵢ
       end
+    end
 
-      zt[2k+1] = τbar₂ₖ₊₁
-      zt[2k+2] = τbar₂ₖ₊₂
+    # Compute xₖ and yₖ
+    for i = 1 : iter
+      @kaxpy!(m, zt[2i-1], V[i], x)  # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ
+      @kaxpy!(n, zt[2i]  , U[i], y)  # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ
     end
-  end
-  (verbose > 0) && @printf("\n")
-
-  # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution.
-  for i = 2iter : -1 : 1
-    pos = nr + i - 2iter              # position of rᵢ.ₖ
-    for j = 2iter : -1 : i+1
-      zt[i] = zt[i] - R[pos] * zt[j]  # ζᵢ ← ζᵢ - rᵢ.ⱼζⱼ
-      pos = pos - j + 1               # position of rᵢ.ⱼ₋₁
+    if !EisI
+      wB .= x
+      mulorldiv!(x, E, wB, ldiv)
     end
-    # Rₖ can be singular if the system is inconsistent
-    if abs(R[pos]) ≤ btol
-      zt[i] = zero(FC)
-      inconsistent = true
-    else
-      zt[i] = zt[i] / R[pos]          # ζᵢ ← ζᵢ / rᵢ.ᵢ
+    if !FisI
+      wA .= y
+      mulorldiv!(y, F, wA, ldiv)
     end
+    warm_start && @kaxpy!(m, one(FC), Δx, x)
+    warm_start && @kaxpy!(n, one(FC), Δy, y)
+    solver.warm_start = false
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    inconsistent        && (status = "found approximate least-squares solution")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-
-  # Compute xₖ and yₖ
-  for i = 1 : iter
-    @kaxpy!(m, zt[2i-1], V[i], x)  # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ
-    @kaxpy!(n, zt[2i]  , U[i], y)  # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ
-  end
-  if !EisI
-    wB .= x
-    mulorldiv!(x, E, wB, ldiv)
-  end
-  if !FisI
-    wA .= y
-    mulorldiv!(y, F, wA, ldiv)
-  end
-  warm_start && @kaxpy!(m, one(FC), Δx, x)
-  warm_start && @kaxpy!(n, one(FC), Δy, y)
-  solver.warm_start = false
-
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  inconsistent        && (status = "found approximate least-squares solution")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl
new file mode 100644
index 000000000..5c9cad24d
--- /dev/null
+++ b/src/krylov_processes.jl
@@ -0,0 +1,439 @@
+export hermitian_lanczos, nonhermitian_lanczos, arnoldi, golub_kahan, saunders_simon_yip, montoison_orban
+
+"""
+    V, T = hermitian_lanczos(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n;
+* `k`: the number of iterations of the Hermitian Lanczos process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950.
+"""
+function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+  m, n = size(A)
+  R = real(FC)
+  S = ktypeof(b)
+  M = vector_to_matrix(S)
+
+  colptr = zeros(Int, k+1)
+  rowval = zeros(Int, 3k-1)
+  nzval = zeros(R, 3k-1)
+
+  colptr[1] = 1
+  rowval[1] = 1
+  rowval[2] = 2
+  for i = 1:k
+    colptr[i+1] = 3i
+    if i ≥ 2
+      pos = colptr[i]
+      rowval[pos] = i-1
+      rowval[pos+1] = i
+      rowval[pos+2] = i+1
+    end
+  end
+
+  V = M(undef, n, k+1)
+  T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval)
+
+  pαᵢ = 1  # Position of αᵢ in the vector `nzval`
+  for i = 1:k
+    vᵢ = view(V,:,i)
+    vᵢ₊₁ = q = view(V,:,i+1)
+    if i == 1
+      βᵢ = @knrm2(n, b)
+      vᵢ .= b ./ βᵢ
+    end
+    mul!(q, A, vᵢ)
+    αᵢ = @kdotr(n, vᵢ, q)
+    nzval[pαᵢ] = αᵢ  # Tᵢ.ᵢ = αᵢ
+    @kaxpy!(n, -αᵢ, vᵢ, q)
+    if i ≥ 2
+      vᵢ₋₁ = view(V,:,i-1)
+      βᵢ = nzval[pαᵢ-2]  # βᵢ = Tᵢ.ᵢ₋₁
+      nzval[pαᵢ-1] = βᵢ  # Tᵢ₋₁.ᵢ = βᵢ
+      @kaxpy!(n, -βᵢ, vᵢ₋₁, q)
+    end
+    βᵢ₊₁ = @knrm2(n, q)
+    nzval[pαᵢ+1] = βᵢ₊₁  # Tᵢ₊₁.ᵢ = βᵢ₊₁
+    vᵢ₊₁ .= q ./ βᵢ₊₁
+    pαᵢ = pαᵢ + 3
+  end
+  return V, T
+end
+
+"""
+    V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a square matrix of dimension n;
+* `b`: a vector of length n;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the non-Hermitian Lanczos process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix;
+* `U`: a dense n × (k+1) matrix;
+* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950.
+"""
+function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+  m, n = size(A)
+  Aᴴ = A'
+  S = ktypeof(b)
+  M = vector_to_matrix(S)
+
+  colptr = zeros(Int, k+1)
+  rowval = zeros(Int, 3k-1)
+  nzval_T = zeros(FC, 3k-1)
+  nzval_Tᴴ = zeros(FC, 3k-1)
+
+  colptr[1] = 1
+  rowval[1] = 1
+  rowval[2] = 2
+  for i = 1:k
+    colptr[i+1] = 3i
+    if i ≥ 2
+      pos = colptr[i]
+      rowval[pos] = i-1
+      rowval[pos+1] = i
+      rowval[pos+2] = i+1
+    end
+  end
+
+  V = M(undef, n, k+1)
+  U = M(undef, n, k+1)
+  T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T)
+  Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ)
+
+  pαᵢ = 1  # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ`
+  for i = 1:k
+    vᵢ = view(V,:,i)
+    uᵢ = view(U,:,i)
+    vᵢ₊₁ = q = view(V,:,i+1)
+    uᵢ₊₁ = p = view(U,:,i+1)
+    if i == 1
+      cᴴb = @kdot(n, c, b)
+      βᵢ = √(abs(cᴴb))
+      γᵢ = cᴴb / βᵢ
+      vᵢ .= b ./ βᵢ
+      uᵢ .= c ./ conj(γᵢ)
+    end
+    mul!(q, A , vᵢ)
+    mul!(p, Aᴴ, uᵢ)
+    if i ≥ 2
+      vᵢ₋₁ = view(V,:,i-1)
+      uᵢ₋₁ = view(U,:,i-1)
+      βᵢ = nzval_T[pαᵢ-2]  # βᵢ = Tᵢ.ᵢ₋₁
+      γᵢ = nzval_T[pαᵢ-1]  # γᵢ = Tᵢ₋₁.ᵢ
+      @kaxpy!(n, -     γᵢ , vᵢ₋₁, q)
+      @kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p)
+    end
+    αᵢ = @kdot(n, uᵢ, q)
+    nzval_T[pαᵢ]  = αᵢ        # Tᵢ.ᵢ  = αᵢ
+    nzval_Tᴴ[pαᵢ] = conj(αᵢ)  # Tᴴᵢ.ᵢ = ᾱᵢ
+    @kaxpy!(m, -     αᵢ , vᵢ, q)
+    @kaxpy!(n, -conj(αᵢ), uᵢ, p)
+    pᴴq = @kdot(n, p, q)
+    βᵢ₊₁ = √(abs(pᴴq))
+    γᵢ₊₁ = pᴴq / βᵢ₊₁
+    vᵢ₊₁ .= q ./ βᵢ₊₁
+    uᵢ₊₁ .= p ./ conj(γᵢ₊₁)
+    nzval_T[pαᵢ+1]  = βᵢ₊₁        # Tᵢ₊₁.ᵢ  = βᵢ₊₁
+    nzval_Tᴴ[pαᵢ+1] = conj(γᵢ₊₁)  # Tᴴᵢ₊₁.ᵢ = γ̄ᵢ₊₁
+    if i ≤ k-1
+      nzval_T[pαᵢ+2]  = γᵢ₊₁        # Tᵢ.ᵢ₊₁  = γᵢ₊₁
+      nzval_Tᴴ[pαᵢ+2] = conj(βᵢ₊₁)  # Tᴴᵢ.ᵢ₊₁ = β̄ᵢ₊₁
+    end
+    pαᵢ = pαᵢ + 3
+  end
+  return V, T, U, Tᴴ
+end
+
+"""
+    V, H = arnoldi(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a square matrix of dimension n;
+* `b`: a vector of length n;
+* `k`: the number of iterations of the Arnoldi process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `H`: a dense (k+1) × k upper Hessenberg matrix.
+
+#### Reference
+
+* W. E. Arnoldi, [*The principle of minimized iterations in the solution of the matrix eigenvalue problem*](https://doi.org/10.1090/qam/42792), Quarterly of Applied Mathematics, 9, pp. 17--29, 1951.
+"""
+function arnoldi(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+  m, n = size(A)
+  S = ktypeof(b)
+  M = vector_to_matrix(S)
+
+  V = M(undef, n, k+1)
+  H = zeros(FC, k+1, k)
+
+  for i = 1:k
+    vᵢ = view(V,:,i)
+    vᵢ₊₁ = q = view(V,:,i+1)
+    if i == 1
+      β = @knrm2(n, b)
+      vᵢ .= b ./ β
+    end
+    mul!(q, A, vᵢ)
+    for j = 1:i
+      vⱼ = view(V,:,j)
+      H[j,i] = @kdot(n, vⱼ, q)
+      @kaxpy!(n, -H[j,i], vⱼ, q)
+    end
+    H[i+1,i] = @knrm2(n, q)
+    vᵢ₊₁ .= q ./ H[i+1,i]
+  end
+  return V, H
+end
+
+"""
+    V, U, L = golub_kahan(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `k`: the number of iterations of the Golub-Kahan process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `U`: a dense m × (k+1) matrix;
+* `L`: a sparse (k+1) × (k+1) lower bidiagonal matrix.
+
+#### References
+
+* G. H. Golub and W. Kahan, [*Calculating the Singular Values and Pseudo-Inverse of a Matrix*](https://doi.org/10.1137/0702016), SIAM Journal on Numerical Analysis, 2(2), pp. 225--224, 1965.
+* C. C. Paige, [*Bidiagonalization of Matrices and Solution of Linear Equations*](https://doi.org/10.1137/0711019), SIAM Journal on Numerical Analysis, 11(1), pp. 197--209, 1974.
+"""
+function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+  m, n = size(A)
+  R = real(FC)
+  Aᴴ = A'
+  S = ktypeof(b)
+  M = vector_to_matrix(S)
+
+  colptr = zeros(Int, k+2)
+  rowval = zeros(Int, 2k+1)
+  nzval = zeros(R, 2k+1)
+
+  colptr[1] = 1
+  for i = 1:k
+    pos = colptr[i]
+    colptr[i+1] = pos+2
+    rowval[pos] = i
+    rowval[pos+1] = i+1
+  end
+  rowval[2k+1] = k+1
+  colptr[k+2] = 2k+2
+
+  V = M(undef, n, k+1)
+  U = M(undef, m, k+1)
+  L = SparseMatrixCSC(k+1, k+1, colptr, rowval, nzval)
+
+  pαᵢ = 1  # Position of αᵢ in the vector `nzval`
+  for i = 1:k
+    uᵢ = view(U,:,i)
+    vᵢ = view(V,:,i)
+    uᵢ₊₁ = q = view(U,:,i+1)
+    vᵢ₊₁ = p = view(V,:,i+1)
+    if i == 1
+      wᵢ = vᵢ
+      βᵢ = @knrm2(m, b)
+      uᵢ .= b ./ βᵢ
+      mul!(wᵢ, Aᴴ, uᵢ)
+      αᵢ = @knrm2(n, wᵢ)
+      nzval[pαᵢ] = αᵢ  # Lᵢ.ᵢ = αᵢ
+      vᵢ .= wᵢ ./ αᵢ
+    end
+    mul!(q, A, vᵢ)
+    αᵢ = nzval[pαᵢ]  # αᵢ = Lᵢ.ᵢ
+    @kaxpy!(m, -αᵢ, uᵢ, q)
+    βᵢ₊₁ = @knrm2(m, q)
+    uᵢ₊₁ .= q ./ βᵢ₊₁
+    mul!(p, Aᴴ, uᵢ₊₁)
+    @kaxpy!(n, -βᵢ₊₁, vᵢ, p)
+    αᵢ₊₁ = @knrm2(n, p)
+    vᵢ₊₁ .= p ./ αᵢ₊₁
+    nzval[pαᵢ+1] = βᵢ₊₁  # Lᵢ₊₁.ᵢ   = βᵢ₊₁
+    nzval[pαᵢ+2] = αᵢ₊₁  # Lᵢ₊₁.ᵢ₊₁ = αᵢ₊₁
+    pαᵢ = pαᵢ + 2
+  end
+  return V, U, L
+end
+
+"""
+    V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the Saunders-Simon-Yip process.
+
+#### Output arguments
+
+* `V`: a dense m × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix;
+* `U`: a dense n × (k+1) matrix;
+* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* M. A. Saunders, H. D. Simon, and E. L. Yip, [*Two Conjugate-Gradient-Type Methods for Unsymmetric Linear Equations*](https://doi.org/10.1137/0725052), SIAM Journal on Numerical Analysis, 25(4), pp. 927--940, 1988.
+"""
+function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+  m, n = size(A)
+  Aᴴ = A'
+  S = ktypeof(b)
+  M = vector_to_matrix(S)
+
+  colptr = zeros(Int, k+1)
+  rowval = zeros(Int, 3k-1)
+  nzval_T = zeros(FC, 3k-1)
+  nzval_Tᴴ = zeros(FC, 3k-1)
+
+  colptr[1] = 1
+  rowval[1] = 1
+  rowval[2] = 2
+  for i = 1:k
+    colptr[i+1] = 3i
+    if i ≥ 2
+      pos = colptr[i]
+      rowval[pos] = i-1
+      rowval[pos+1] = i
+      rowval[pos+2] = i+1
+    end
+  end
+
+  V = M(undef, m, k+1)
+  U = M(undef, n, k+1)
+  T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T)
+  Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ)
+
+  pαᵢ = 1  # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ`
+  for i = 1:k
+    vᵢ = view(V,:,i)
+    uᵢ = view(U,:,i)
+    vᵢ₊₁ = q = view(V,:,i+1)
+    uᵢ₊₁ = p = view(U,:,i+1)
+    if i == 1
+      β = @knrm2(m, b)
+      γ = @knrm2(n, c)
+      vᵢ .= b ./ β
+      uᵢ .= c ./ γ
+    end
+    mul!(q, A , uᵢ)
+    mul!(p, Aᴴ, vᵢ)
+    if i ≥ 2
+      vᵢ₋₁ = view(V,:,i-1)
+      uᵢ₋₁ = view(U,:,i-1)
+      βᵢ = nzval_T[pαᵢ-2]  # βᵢ = Tᵢ.ᵢ₋₁
+      γᵢ = nzval_T[pαᵢ-1]  # γᵢ = Tᵢ₋₁.ᵢ
+      @kaxpy!(m, -γᵢ, vᵢ₋₁, q)
+      @kaxpy!(n, -βᵢ, uᵢ₋₁, p)
+    end
+    αᵢ = @kdot(m, vᵢ, q)
+    nzval_T[pαᵢ]  = αᵢ        # Tᵢ.ᵢ  = αᵢ
+    nzval_Tᴴ[pαᵢ] = conj(αᵢ)  # Tᴴᵢ.ᵢ = ᾱᵢ
+    @kaxpy!(m, -     αᵢ , vᵢ, q)
+    @kaxpy!(n, -conj(αᵢ), uᵢ, p)
+    βᵢ₊₁ = @knrm2(m, q)
+    γᵢ₊₁ = @knrm2(n, p)
+    vᵢ₊₁ .= q ./ βᵢ₊₁
+    uᵢ₊₁ .= p ./ γᵢ₊₁
+    nzval_T[pαᵢ+1]  = βᵢ₊₁  # Tᵢ₊₁.ᵢ  = βᵢ₊₁
+    nzval_Tᴴ[pαᵢ+1] = γᵢ₊₁  # Tᴴᵢ₊₁.ᵢ = γᵢ₊₁
+    if i ≤ k-1
+      nzval_T[pαᵢ+2]  = γᵢ₊₁  # Tᵢ.ᵢ₊₁  = γᵢ₊₁
+      nzval_Tᴴ[pαᵢ+2] = βᵢ₊₁  # Tᴴᵢ.ᵢ₊₁ = βᵢ₊₁
+    end
+    pαᵢ = pαᵢ + 3
+  end
+  return V, T, U, Tᴴ
+end
+
+"""
+    V, H, U, F = montoison_orban(A, B, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `B`: a linear operator that models a matrix of dimension n × m;
+* `b`: a vector of length m;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the Montoison-Orban process.
+
+#### Output arguments
+
+* `V`: a dense m × (k+1) matrix;
+* `H`: a dense (k+1) × k upper Hessenberg matrix;
+* `U`: a dense n × (k+1) matrix;
+* `F`: a dense (k+1) × k upper Hessenberg matrix.
+
+#### Reference
+
+* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://doi.org/10.1137/21M1459265), SIAM Journal on Matrix Analysis and Applications, 44(1), pp. 293--311, 2023.
+"""
+function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+  m, n = size(A)
+  S = ktypeof(b)
+  M = vector_to_matrix(S)
+
+  V = M(undef, m, k+1)
+  U = M(undef, n, k+1)
+  H = zeros(FC, k+1, k)
+  F = zeros(FC, k+1, k)
+
+  for i = 1:k
+    vᵢ = view(V,:,i)
+    uᵢ = view(U,:,i)
+    vᵢ₊₁ = q = view(V,:,i+1)
+    uᵢ₊₁ = p = view(U,:,i+1)
+    if i == 1
+      β = @knrm2(m, b)
+      γ = @knrm2(n, c)
+      vᵢ .= b ./ β
+      uᵢ .= c ./ γ
+    end
+    mul!(q, A, uᵢ)
+    mul!(p, B, vᵢ)
+    for j = 1:i
+      vⱼ = view(V,:,j)
+      uⱼ = view(U,:,j)
+      H[j,i] = @kdot(m, vⱼ, q)
+      @kaxpy!(n, -H[j,i], vⱼ, q)
+      F[j,i] = @kdot(n, uⱼ, p)
+      @kaxpy!(m, -F[j,i], uⱼ, p)
+    end
+    H[i+1,i] = @knrm2(m, q)
+    vᵢ₊₁ .= q ./ H[i+1,i]
+    F[i+1,i] = @knrm2(n, p)
+    uᵢ₊₁ .= p ./ F[i+1,i]
+  end
+  return V, H, U, F
+end
diff --git a/src/krylov_solve.jl b/src/krylov_solve.jl
new file mode 100644
index 000000000..30a463dfa
--- /dev/null
+++ b/src/krylov_solve.jl
@@ -0,0 +1,60 @@
+"""
+    solve!(solver, args...; kwargs...)
+
+Use the in-place Krylov method associated to `solver`.
+"""
+function solve! end
+
+for (KS, fun, args, def_args, optargs, def_optargs, kwargs, def_kwargs) in [
+  (:LsmrSolver          , :lsmr!            , args_lsmr            , def_args_lsmr            , ()                , ()                    , kwargs_lsmr            , def_kwargs_lsmr            )
+  (:CgsSolver           , :cgs!             , args_cgs             , def_args_cgs             , optargs_cgs       , def_optargs_cgs       , kwargs_cgs             , def_kwargs_cgs             )
+  (:UsymlqSolver        , :usymlq!          , args_usymlq          , def_args_usymlq          , optargs_usymlq    , def_optargs_usymlq    , kwargs_usymlq          , def_kwargs_usymlq          )
+  (:LnlqSolver          , :lnlq!            , args_lnlq            , def_args_lnlq            , ()                , ()                    , kwargs_lnlq            , def_kwargs_lnlq            )
+  (:BicgstabSolver      , :bicgstab!        , args_bicgstab        , def_args_bicgstab        , optargs_bicgstab  , def_optargs_bicgstab  , kwargs_bicgstab        , def_kwargs_bicgstab        )
+  (:CrlsSolver          , :crls!            , args_crls            , def_args_crls            , ()                , ()                    , kwargs_crls            , def_kwargs_crls            )
+  (:LsqrSolver          , :lsqr!            , args_lsqr            , def_args_lsqr            , ()                , ()                    , kwargs_lsqr            , def_kwargs_lsqr            )
+  (:MinresSolver        , :minres!          , args_minres          , def_args_minres          , optargs_minres    , def_optargs_minres    , kwargs_minres          , def_kwargs_minres          )
+  (:CgneSolver          , :cgne!            , args_cgne            , def_args_cgne            , ()                , ()                    , kwargs_cgne            , def_kwargs_cgne            )
+  (:DqgmresSolver       , :dqgmres!         , args_dqgmres         , def_args_dqgmres         , optargs_dqgmres   , def_optargs_dqgmres   , kwargs_dqgmres         , def_kwargs_dqgmres         )
+  (:SymmlqSolver        , :symmlq!          , args_symmlq          , def_args_symmlq          , optargs_symmlq    , def_optargs_symmlq    , kwargs_symmlq          , def_kwargs_symmlq          )
+  (:TrimrSolver         , :trimr!           , args_trimr           , def_args_trimr           , optargs_trimr     , def_optargs_trimr     , kwargs_trimr           , def_kwargs_trimr           )
+  (:UsymqrSolver        , :usymqr!          , args_usymqr          , def_args_usymqr          , optargs_usymqr    , def_optargs_usymqr    , kwargs_usymqr          , def_kwargs_usymqr          )
+  (:BilqrSolver         , :bilqr!           , args_bilqr           , def_args_bilqr           , optargs_bilqr     , def_optargs_bilqr     , kwargs_bilqr           , def_kwargs_bilqr           )
+  (:CrSolver            , :cr!              , args_cr              , def_args_cr              , optargs_cr        , def_optargs_cr        , kwargs_cr              , def_kwargs_cr              )
+  (:CraigmrSolver       , :craigmr!         , args_craigmr         , def_args_craigmr         , ()                , ()                    , kwargs_craigmr         , def_kwargs_craigmr         )
+  (:TricgSolver         , :tricg!           , args_tricg           , def_args_tricg           , optargs_tricg     , def_optargs_tricg     , kwargs_tricg           , def_kwargs_tricg           )
+  (:CraigSolver         , :craig!           , args_craig           , def_args_craig           , ()                , ()                    , kwargs_craig           , def_kwargs_craig           )
+  (:DiomSolver          , :diom!            , args_diom            , def_args_diom            , optargs_diom      , def_optargs_diom      , kwargs_diom            , def_kwargs_diom            )
+  (:LslqSolver          , :lslq!            , args_lslq            , def_args_lslq            , ()                , ()                    , kwargs_lslq            , def_kwargs_lslq            )
+  (:TrilqrSolver        , :trilqr!          , args_trilqr          , def_args_trilqr          , optargs_trilqr    , def_optargs_trilqr    , kwargs_trilqr          , def_kwargs_trilqr          )
+  (:CrmrSolver          , :crmr!            , args_crmr            , def_args_crmr            , ()                , ()                    , kwargs_crmr            , def_kwargs_crmr            )
+  (:CgSolver            , :cg!              , args_cg              , def_args_cg              , optargs_cg        , def_optargs_cg        , kwargs_cg              , def_kwargs_cg              )
+  (:CgLanczosShiftSolver, :cg_lanczos_shift!, args_cg_lanczos_shift, def_args_cg_lanczos_shift, ()                , ()                    , kwargs_cg_lanczos_shift, def_kwargs_cg_lanczos_shift)
+  (:CglsSolver          , :cgls!            , args_cgls            , def_args_cgls            , ()                , ()                    , kwargs_cgls            , def_kwargs_cgls            )
+  (:CgLanczosSolver     , :cg_lanczos!      , args_cg_lanczos      , def_args_cg_lanczos      , optargs_cg_lanczos, def_optargs_cg_lanczos, kwargs_cg_lanczos      , def_kwargs_cg_lanczos      )
+  (:BilqSolver          , :bilq!            , args_bilq            , def_args_bilq            , optargs_bilq      , def_optargs_bilq      , kwargs_bilq            , def_kwargs_bilq            )
+  (:MinresQlpSolver     , :minres_qlp!      , args_minres_qlp      , def_args_minres_qlp      , optargs_minres_qlp, def_optargs_minres_qlp, kwargs_minres_qlp      , def_kwargs_minres_qlp      )
+  (:QmrSolver           , :qmr!             , args_qmr             , def_args_qmr             , optargs_qmr       , def_optargs_qmr       , kwargs_qmr             , def_kwargs_qmr             )
+  (:GmresSolver         , :gmres!           , args_gmres           , def_args_gmres           , optargs_gmres     , def_optargs_gmres     , kwargs_gmres           , def_kwargs_gmres           )
+  (:FgmresSolver        , :fgmres!          , args_fgmres          , def_args_fgmres          , optargs_fgmres    , def_optargs_fgmres    , kwargs_fgmres          , def_kwargs_fgmres          )
+  (:FomSolver           , :fom!             , args_fom             , def_args_fom             , optargs_fom       , def_optargs_fom       , kwargs_fom             , def_kwargs_fom             )
+  (:GpmrSolver          , :gpmr!            , args_gpmr            , def_args_gpmr            , optargs_gpmr      , def_optargs_gpmr      , kwargs_gpmr            , def_kwargs_gpmr            )
+]
+  @eval begin
+    solve!(solver :: $KS{T,FC,S}, $(def_args...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} = $(fun)(solver, $(args...); $(kwargs...))
+
+    if !isempty($optargs)
+      function $(fun)(solver :: $KS{T,FC,S}, $(def_args...), $(def_optargs...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+        start_time = time_ns()
+        warm_start!(solver, $(optargs...))
+        elapsed_time = ktimer(start_time)
+        timemax -= elapsed_time
+        $(fun)(solver, $(args...); $(kwargs...))
+        solver.stats.timer += elapsed_time
+        return solver
+      end
+
+      solve!(solver :: $KS{T,FC,S}, $(def_args...), $(def_optargs...); $(def_kwargs...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} = $(fun)(solver, $(args...), $(optargs...); $(kwargs...))
+    end
+  end
+end
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index 8a109a2be..0e905e807 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -3,11 +3,13 @@ CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver,
 UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver,
 BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver,
 LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver,
-GmresSolver, FomSolver, GpmrSolver
+GmresSolver, FomSolver, GpmrSolver, FgmresSolver
 
 export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual,
 niterations, Aprod, Atprod, Bprod, warm_start!
 
+import Base.size, Base.sizeof, Base.format_bytes
+
 const KRYLOV_SOLVERS = Dict(
   :cg               => :CgSolver            ,
   :cr               => :CrSolver            ,
@@ -20,6 +22,7 @@ const KRYLOV_SOLVERS = Dict(
   :fom              => :FomSolver           ,
   :dqgmres          => :DqgmresSolver       ,
   :gmres            => :GmresSolver         ,
+  :fgmres           => :FgmresSolver        ,
   :gpmr             => :GpmrSolver          ,
   :usymlq           => :UsymlqSolver        ,
   :usymqr           => :UsymqrSolver        ,
@@ -51,12 +54,14 @@ Type for storing the vectors required by the in-place version of MINRES.
 
 The outer constructors
 
-    solver = MinresSolver(n, m, S; window :: Int=5)
+    solver = MinresSolver(m, n, S; window :: Int=5)
     solver = MinresSolver(A, b; window :: Int=5)
 
 may be used in order to create these vectors.
 """
 mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   r1         :: S
@@ -68,29 +73,29 @@ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   err_vec    :: Vector{T}
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function MinresSolver(n, m, S; window :: Int=5)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    r1 = S(undef, n)
-    r2 = S(undef, n)
-    w1 = S(undef, n)
-    w2 = S(undef, n)
-    y  = S(undef, n)
-    v  = S(undef, 0)
-    err_vec = zeros(T, window)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
-    return solver
-  end
+function MinresSolver(m, n, S; window :: Int=5)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  r1 = S(undef, n)
+  r2 = S(undef, n)
+  w1 = S(undef, n)
+  w2 = S(undef, n)
+  y  = S(undef, n)
+  v  = S(undef, 0)
+  err_vec = zeros(T, window)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = MinresSolver{T,FC,S}(m, n, Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
+  return solver
+end
 
-  function MinresSolver(A, b; window :: Int=5)
-    n, m = size(A)
-    S = ktypeof(b)
-    MinresSolver(n, m, S, window=window)
-  end
+function MinresSolver(A, b; window :: Int=5)
+  m, n = size(A)
+  S = ktypeof(b)
+  MinresSolver(m, n, S; window)
 end
 
 """
@@ -98,12 +103,14 @@ Type for storing the vectors required by the in-place version of CG.
 
 The outer constructors
 
-    solver = CgSolver(n, m, S)
+    solver = CgSolver(m, n, S)
     solver = CgSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   r          :: S
@@ -112,26 +119,26 @@ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   z          :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function CgSolver(n, m, S)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    r  = S(undef, n)
-    p  = S(undef, n)
-    Ap = S(undef, n)
-    z  = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, r, p, Ap, z, false, stats)
-    return solver
-  end
+function CgSolver(m, n, S)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  r  = S(undef, n)
+  p  = S(undef, n)
+  Ap = S(undef, n)
+  z  = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CgSolver{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats)
+  return solver
+end
 
-  function CgSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CgSolver(n, m, S)
-  end
+function CgSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CgSolver(m, n, S)
 end
 
 """
@@ -139,12 +146,14 @@ Type for storing the vectors required by the in-place version of CR.
 
 The outer constructors
 
-    solver = CrSolver(n, m, S)
+    solver = CrSolver(m, n, S)
     solver = CrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   r          :: S
@@ -154,27 +163,27 @@ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   Mq         :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function CrSolver(n, m, S)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    r  = S(undef, n)
-    p  = S(undef, n)
-    q  = S(undef, n)
-    Ar = S(undef, n)
-    Mq = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats)
-    return solver
-  end
+function CrSolver(m, n, S)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  r  = S(undef, n)
+  p  = S(undef, n)
+  q  = S(undef, n)
+  Ar = S(undef, n)
+  Mq = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CrSolver{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats)
+  return solver
+end
 
-  function CrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CrSolver(n, m, S)
-  end
+function CrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CrSolver(m, n, S)
 end
 
 """
@@ -182,12 +191,14 @@ Type for storing the vectors required by the in-place version of SYMMLQ.
 
 The outer constructors
 
-    solver = SymmlqSolver(n, m, S)
+    solver = SymmlqSolver(m, n, S)
     solver = SymmlqSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   Mvold      :: S
@@ -200,30 +211,30 @@ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   sprod      :: Vector{T}
   warm_start :: Bool
   stats      :: SymmlqStats{T}
+end
 
-  function SymmlqSolver(n, m, S; window :: Int=5)
-    FC      = eltype(S)
-    T       = real(FC)
-    Δx      = S(undef, 0)
-    x       = S(undef, n)
-    Mvold   = S(undef, n)
-    Mv      = S(undef, n)
-    Mv_next = S(undef, n)
-    w̅       = S(undef, n)
-    v       = S(undef, 0)
-    clist   = zeros(T, window)
-    zlist   = zeros(T, window)
-    sprod   = ones(T, window)
-    stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown")
-    solver = new{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
-    return solver
-  end
+function SymmlqSolver(m, n, S; window :: Int=5)
+  FC      = eltype(S)
+  T       = real(FC)
+  Δx      = S(undef, 0)
+  x       = S(undef, n)
+  Mvold   = S(undef, n)
+  Mv      = S(undef, n)
+  Mv_next = S(undef, n)
+  w̅       = S(undef, n)
+  v       = S(undef, 0)
+  clist   = zeros(T, window)
+  zlist   = zeros(T, window)
+  sprod   = ones(T, window)
+  stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), 0.0, "unknown")
+  solver = SymmlqSolver{T,FC,S}(m, n, Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
+  return solver
+end
 
-  function SymmlqSolver(A, b; window :: Int=5)
-    n, m = size(A)
-    S = ktypeof(b)
-    SymmlqSolver(n, m, S, window=window)
-  end
+function SymmlqSolver(A, b; window :: Int=5)
+  m, n = size(A)
+  S = ktypeof(b)
+  SymmlqSolver(m, n, S; window)
 end
 
 """
@@ -231,12 +242,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS.
 
 The outer constructors
 
-    solver = CgLanczosSolver(n, m, S)
+    solver = CgLanczosSolver(m, n, S)
     solver = CgLanczosSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   Mv         :: S
@@ -246,27 +259,27 @@ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v          :: S
   warm_start :: Bool
   stats      :: LanczosStats{T}
+end
 
-  function CgLanczosSolver(n, m, S)
-    FC      = eltype(S)
-    T       = real(FC)
-    Δx      = S(undef, 0)
-    x       = S(undef, n)
-    Mv      = S(undef, n)
-    Mv_prev = S(undef, n)
-    p       = S(undef, n)
-    Mv_next = S(undef, n)
-    v       = S(undef, 0)
-    stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown")
-    solver = new{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
-    return solver
-  end
+function CgLanczosSolver(m, n, S)
+  FC      = eltype(S)
+  T       = real(FC)
+  Δx      = S(undef, 0)
+  x       = S(undef, n)
+  Mv      = S(undef, n)
+  Mv_prev = S(undef, n)
+  p       = S(undef, n)
+  Mv_next = S(undef, n)
+  v       = S(undef, 0)
+  stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), 0.0, "unknown")
+  solver = CgLanczosSolver{T,FC,S}(m, n, Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
+  return solver
+end
 
-  function CgLanczosSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CgLanczosSolver(n, m, S)
-  end
+function CgLanczosSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CgLanczosSolver(m, n, S)
 end
 
 """
@@ -274,12 +287,15 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIF
 
 The outer constructors
 
-    solver = CgLanczosShiftSolver(n, m, nshifts, S)
+    solver = CgLanczosShiftSolver(m, n, nshifts, S)
     solver = CgLanczosShiftSolver(A, b, nshifts)
 
 may be used in order to create these vectors.
 """
 mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
+  nshifts    :: Int
   Mv         :: S
   Mv_prev    :: S
   Mv_next    :: S
@@ -294,34 +310,34 @@ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   converged  :: BitVector
   not_cv     :: BitVector
   stats      :: LanczosShiftStats{T}
+end
 
-  function CgLanczosShiftSolver(n, m, nshifts, S)
-    FC         = eltype(S)
-    T          = real(FC)
-    Mv         = S(undef, n)
-    Mv_prev    = S(undef, n)
-    Mv_next    = S(undef, n)
-    v          = S(undef, 0)
-    x          = [S(undef, n) for i = 1 : nshifts]
-    p          = [S(undef, n) for i = 1 : nshifts]
-    σ          = Vector{T}(undef, nshifts)
-    δhat       = Vector{T}(undef, nshifts)
-    ω          = Vector{T}(undef, nshifts)
-    γ          = Vector{T}(undef, nshifts)
-    rNorms     = Vector{T}(undef, nshifts)
-    indefinite = BitVector(undef, nshifts)
-    converged  = BitVector(undef, nshifts)
-    not_cv     = BitVector(undef, nshifts)
-    stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown")
-    solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
-    return solver
-  end
+function CgLanczosShiftSolver(m, n, nshifts, S)
+  FC         = eltype(S)
+  T          = real(FC)
+  Mv         = S(undef, n)
+  Mv_prev    = S(undef, n)
+  Mv_next    = S(undef, n)
+  v          = S(undef, 0)
+  x          = S[S(undef, n) for i = 1 : nshifts]
+  p          = S[S(undef, n) for i = 1 : nshifts]
+  σ          = Vector{T}(undef, nshifts)
+  δhat       = Vector{T}(undef, nshifts)
+  ω          = Vector{T}(undef, nshifts)
+  γ          = Vector{T}(undef, nshifts)
+  rNorms     = Vector{T}(undef, nshifts)
+  indefinite = BitVector(undef, nshifts)
+  converged  = BitVector(undef, nshifts)
+  not_cv     = BitVector(undef, nshifts)
+  stats = LanczosShiftStats(0, false, Vector{T}[T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), 0.0, "unknown")
+  solver = CgLanczosShiftSolver{T,FC,S}(m, n, nshifts, Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
+  return solver
+end
 
-  function CgLanczosShiftSolver(A, b, nshifts)
-    n, m = size(A)
-    S = ktypeof(b)
-    CgLanczosShiftSolver(n, m, nshifts, S)
-  end
+function CgLanczosShiftSolver(A, b, nshifts)
+  m, n = size(A)
+  S = ktypeof(b)
+  CgLanczosShiftSolver(m, n, nshifts, S)
 end
 
 """
@@ -329,12 +345,14 @@ Type for storing the vectors required by the in-place version of MINRES-QLP.
 
 The outer constructors
 
-    solver = MinresQlpSolver(n, m, S)
+    solver = MinresQlpSolver(m, n, S)
     solver = MinresQlpSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   wₖ₋₁       :: S
   wₖ         :: S
@@ -345,28 +363,28 @@ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   vₖ         :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function MinresQlpSolver(n, m, S)
-    FC      = eltype(S)
-    T       = real(FC)
-    Δx      = S(undef, 0)
-    wₖ₋₁    = S(undef, n)
-    wₖ      = S(undef, n)
-    M⁻¹vₖ₋₁ = S(undef, n)
-    M⁻¹vₖ   = S(undef, n)
-    x       = S(undef, n)
-    p       = S(undef, n)
-    vₖ      = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
-    return solver
-  end
+function MinresQlpSolver(m, n, S)
+  FC      = eltype(S)
+  T       = real(FC)
+  Δx      = S(undef, 0)
+  wₖ₋₁    = S(undef, n)
+  wₖ      = S(undef, n)
+  M⁻¹vₖ₋₁ = S(undef, n)
+  M⁻¹vₖ   = S(undef, n)
+  x       = S(undef, n)
+  p       = S(undef, n)
+  vₖ      = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = MinresQlpSolver{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
+  return solver
+end
 
-  function MinresQlpSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    MinresQlpSolver(n, m, S)
-  end
+function MinresQlpSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  MinresQlpSolver(m, n, S)
 end
 
 """
@@ -374,13 +392,15 @@ Type for storing the vectors required by the in-place version of DQGMRES.
 
 The outer constructors
 
-    solver = DqgmresSolver(n, m, memory, S)
+    solver = DqgmresSolver(m, n, memory, S)
     solver = DqgmresSolver(A, b, memory = 20)
 
 may be used in order to create these vectors.
 `memory` is set to `n` if the value given is larger than `n`.
 """
 mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   t          :: S
@@ -393,31 +413,31 @@ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   H          :: Vector{FC}
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function DqgmresSolver(n, m, memory, S)
-    memory = min(n, memory)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    t  = S(undef, n)
-    z  = S(undef, 0)
-    w  = S(undef, 0)
-    P  = [S(undef, n) for i = 1 : memory]
-    V  = [S(undef, n) for i = 1 : memory]
-    c  = Vector{T}(undef, memory)
-    s  = Vector{FC}(undef, memory)
-    H  = Vector{FC}(undef, memory+2)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats)
-    return solver
-  end
+function DqgmresSolver(m, n, memory, S)
+  memory = min(m, memory)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  t  = S(undef, n)
+  z  = S(undef, 0)
+  w  = S(undef, 0)
+  P  = S[S(undef, n) for i = 1 : memory]
+  V  = S[S(undef, n) for i = 1 : memory]
+  c  = Vector{T}(undef, memory)
+  s  = Vector{FC}(undef, memory)
+  H  = Vector{FC}(undef, memory+1)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = DqgmresSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats)
+  return solver
+end
 
-  function DqgmresSolver(A, b, memory = 20)
-    n, m = size(A)
-    S = ktypeof(b)
-    DqgmresSolver(n, m, memory, S)
-  end
+function DqgmresSolver(A, b, memory = 20)
+  m, n = size(A)
+  S = ktypeof(b)
+  DqgmresSolver(m, n, memory, S)
 end
 
 """
@@ -425,13 +445,15 @@ Type for storing the vectors required by the in-place version of DIOM.
 
 The outer constructors
 
-    solver = DiomSolver(n, m, memory, S)
+    solver = DiomSolver(m, n, memory, S)
     solver = DiomSolver(A, b, memory = 20)
 
 may be used in order to create these vectors.
 `memory` is set to `n` if the value given is larger than `n`.
 """
 mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   t          :: S
@@ -443,30 +465,30 @@ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   H          :: Vector{FC}
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function DiomSolver(n, m, memory, S)
-    memory = min(n, memory)
-    FC  = eltype(S)
-    T   = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    t  = S(undef, n)
-    z  = S(undef, 0)
-    w  = S(undef, 0)
-    P  = [S(undef, n) for i = 1 : memory]
-    V  = [S(undef, n) for i = 1 : memory]
-    L  = Vector{FC}(undef, memory)
-    H  = Vector{FC}(undef, memory+2)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats)
-    return solver
-  end
+function DiomSolver(m, n, memory, S)
+  memory = min(m, memory)
+  FC  = eltype(S)
+  T   = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  t  = S(undef, n)
+  z  = S(undef, 0)
+  w  = S(undef, 0)
+  P  = S[S(undef, n) for i = 1 : memory-1]
+  V  = S[S(undef, n) for i = 1 : memory]
+  L  = Vector{FC}(undef, memory-1)
+  H  = Vector{FC}(undef, memory)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = DiomSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats)
+  return solver
+end
 
-  function DiomSolver(A, b, memory = 20)
-    n, m = size(A)
-    S = ktypeof(b)
-    DiomSolver(n, m, memory, S)
-  end
+function DiomSolver(A, b, memory = 20)
+  m, n = size(A)
+  S = ktypeof(b)
+  DiomSolver(m, n, memory, S)
 end
 
 """
@@ -474,12 +496,14 @@ Type for storing the vectors required by the in-place version of USYMLQ.
 
 The outer constructors
 
-    solver = UsymlqSolver(n, m, S)
+    solver = UsymlqSolver(m, n, S)
     solver = UsymlqSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   uₖ₋₁       :: S
   uₖ         :: S
   p          :: S
@@ -491,29 +515,29 @@ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   q          :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function UsymlqSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    uₖ₋₁ = S(undef, m)
-    uₖ   = S(undef, m)
-    p    = S(undef, m)
-    Δx   = S(undef, 0)
-    x    = S(undef, m)
-    d̅    = S(undef, m)
-    vₖ₋₁ = S(undef, n)
-    vₖ   = S(undef, n)
-    q    = S(undef, n)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
-    return solver
-  end
+function UsymlqSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  uₖ₋₁ = S(undef, n)
+  uₖ   = S(undef, n)
+  p    = S(undef, n)
+  Δx   = S(undef, 0)
+  x    = S(undef, n)
+  d̅    = S(undef, n)
+  vₖ₋₁ = S(undef, m)
+  vₖ   = S(undef, m)
+  q    = S(undef, m)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = UsymlqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
+  return solver
+end
 
-  function UsymlqSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    UsymlqSolver(n, m, S)
-  end
+function UsymlqSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  UsymlqSolver(m, n, S)
 end
 
 """
@@ -521,12 +545,14 @@ Type for storing the vectors required by the in-place version of USYMQR.
 
 The outer constructors
 
-    solver = UsymqrSolver(n, m, S)
+    solver = UsymqrSolver(m, n, S)
     solver = UsymqrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   vₖ₋₁       :: S
   vₖ         :: S
   q          :: S
@@ -539,30 +565,30 @@ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   p          :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function UsymqrSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    vₖ₋₁ = S(undef, n)
-    vₖ   = S(undef, n)
-    q    = S(undef, n)
-    Δx   = S(undef, 0)
-    x    = S(undef, m)
-    wₖ₋₂ = S(undef, m)
-    wₖ₋₁ = S(undef, m)
-    uₖ₋₁ = S(undef, m)
-    uₖ   = S(undef, m)
-    p    = S(undef, m)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
-    return solver
-  end
+function UsymqrSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  vₖ₋₁ = S(undef, m)
+  vₖ   = S(undef, m)
+  q    = S(undef, m)
+  Δx   = S(undef, 0)
+  x    = S(undef, n)
+  wₖ₋₂ = S(undef, n)
+  wₖ₋₁ = S(undef, n)
+  uₖ₋₁ = S(undef, n)
+  uₖ   = S(undef, n)
+  p    = S(undef, n)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = UsymqrSolver{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
+  return solver
+end
 
-  function UsymqrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    UsymqrSolver(n, m, S)
-  end
+function UsymqrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  UsymqrSolver(m, n, S)
 end
 
 """
@@ -570,12 +596,14 @@ Type for storing the vectors required by the in-place version of TRICG.
 
 The outer constructors
 
-    solver = TricgSolver(n, m, S)
+    solver = TricgSolver(m, n, S)
     solver = TricgSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   y          :: S
   N⁻¹uₖ₋₁    :: S
   N⁻¹uₖ      :: S
@@ -594,36 +622,36 @@ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   vₖ         :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function TricgSolver(n, m, S)
-    FC      = eltype(S)
-    T       = real(FC)
-    y       = S(undef, m)
-    N⁻¹uₖ₋₁ = S(undef, m)
-    N⁻¹uₖ   = S(undef, m)
-    p       = S(undef, m)
-    gy₂ₖ₋₁  = S(undef, m)
-    gy₂ₖ    = S(undef, m)
-    x       = S(undef, n)
-    M⁻¹vₖ₋₁ = S(undef, n)
-    M⁻¹vₖ   = S(undef, n)
-    q       = S(undef, n)
-    gx₂ₖ₋₁  = S(undef, n)
-    gx₂ₖ    = S(undef, n)
-    Δx      = S(undef, 0)
-    Δy      = S(undef, 0)
-    uₖ      = S(undef, 0)
-    vₖ      = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
-    return solver
-  end
+function TricgSolver(m, n, S)
+  FC      = eltype(S)
+  T       = real(FC)
+  y       = S(undef, n)
+  N⁻¹uₖ₋₁ = S(undef, n)
+  N⁻¹uₖ   = S(undef, n)
+  p       = S(undef, n)
+  gy₂ₖ₋₁  = S(undef, n)
+  gy₂ₖ    = S(undef, n)
+  x       = S(undef, m)
+  M⁻¹vₖ₋₁ = S(undef, m)
+  M⁻¹vₖ   = S(undef, m)
+  q       = S(undef, m)
+  gx₂ₖ₋₁  = S(undef, m)
+  gx₂ₖ    = S(undef, m)
+  Δx      = S(undef, 0)
+  Δy      = S(undef, 0)
+  uₖ      = S(undef, 0)
+  vₖ      = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = TricgSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+  return solver
+end
 
-  function TricgSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    TricgSolver(n, m, S)
-  end
+function TricgSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  TricgSolver(m, n, S)
 end
 
 """
@@ -631,12 +659,14 @@ Type for storing the vectors required by the in-place version of TRIMR.
 
 The outer constructors
 
-    solver = TrimrSolver(n, m, S)
+    solver = TrimrSolver(m, n, S)
     solver = TrimrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   y          :: S
   N⁻¹uₖ₋₁    :: S
   N⁻¹uₖ      :: S
@@ -659,40 +689,40 @@ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   vₖ         :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function TrimrSolver(n, m, S)
-    FC      = eltype(S)
-    T       = real(FC)
-    y       = S(undef, m)
-    N⁻¹uₖ₋₁ = S(undef, m)
-    N⁻¹uₖ   = S(undef, m)
-    p       = S(undef, m)
-    gy₂ₖ₋₃  = S(undef, m)
-    gy₂ₖ₋₂  = S(undef, m)
-    gy₂ₖ₋₁  = S(undef, m)
-    gy₂ₖ    = S(undef, m)
-    x       = S(undef, n)
-    M⁻¹vₖ₋₁ = S(undef, n)
-    M⁻¹vₖ   = S(undef, n)
-    q       = S(undef, n)
-    gx₂ₖ₋₃  = S(undef, n)
-    gx₂ₖ₋₂  = S(undef, n)
-    gx₂ₖ₋₁  = S(undef, n)
-    gx₂ₖ    = S(undef, n)
-    Δx      = S(undef, 0)
-    Δy      = S(undef, 0)
-    uₖ      = S(undef, 0)
-    vₖ      = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
-    return solver
-  end
+function TrimrSolver(m, n, S)
+  FC      = eltype(S)
+  T       = real(FC)
+  y       = S(undef, n)
+  N⁻¹uₖ₋₁ = S(undef, n)
+  N⁻¹uₖ   = S(undef, n)
+  p       = S(undef, n)
+  gy₂ₖ₋₃  = S(undef, n)
+  gy₂ₖ₋₂  = S(undef, n)
+  gy₂ₖ₋₁  = S(undef, n)
+  gy₂ₖ    = S(undef, n)
+  x       = S(undef, m)
+  M⁻¹vₖ₋₁ = S(undef, m)
+  M⁻¹vₖ   = S(undef, m)
+  q       = S(undef, m)
+  gx₂ₖ₋₃  = S(undef, m)
+  gx₂ₖ₋₂  = S(undef, m)
+  gx₂ₖ₋₁  = S(undef, m)
+  gx₂ₖ    = S(undef, m)
+  Δx      = S(undef, 0)
+  Δy      = S(undef, 0)
+  uₖ      = S(undef, 0)
+  vₖ      = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = TrimrSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+  return solver
+end
 
-  function TrimrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    TrimrSolver(n, m, S)
-  end
+function TrimrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  TrimrSolver(m, n, S)
 end
 
 """
@@ -700,12 +730,14 @@ Type for storing the vectors required by the in-place version of TRILQR.
 
 The outer constructors
 
-    solver = TrilqrSolver(n, m, S)
+    solver = TrilqrSolver(m, n, S)
     solver = TrilqrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   uₖ₋₁       :: S
   uₖ         :: S
   p          :: S
@@ -721,33 +753,33 @@ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   wₖ₋₂       :: S
   warm_start :: Bool
   stats      :: AdjointStats{T}
+end
 
-  function TrilqrSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    uₖ₋₁ = S(undef, m)
-    uₖ   = S(undef, m)
-    p    = S(undef, m)
-    d̅    = S(undef, m)
-    Δx   = S(undef, 0)
-    x    = S(undef, m)
-    vₖ₋₁ = S(undef, n)
-    vₖ   = S(undef, n)
-    q    = S(undef, n)
-    Δy   = S(undef, 0)
-    y    = S(undef, n)
-    wₖ₋₃ = S(undef, n)
-    wₖ₋₂ = S(undef, n)
-    stats = AdjointStats(0, false, false, T[], T[], "unknown")
-    solver = new{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
-    return solver
-  end
+function TrilqrSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  uₖ₋₁ = S(undef, n)
+  uₖ   = S(undef, n)
+  p    = S(undef, n)
+  d̅    = S(undef, n)
+  Δx   = S(undef, 0)
+  x    = S(undef, n)
+  vₖ₋₁ = S(undef, m)
+  vₖ   = S(undef, m)
+  q    = S(undef, m)
+  Δy   = S(undef, 0)
+  y    = S(undef, m)
+  wₖ₋₃ = S(undef, m)
+  wₖ₋₂ = S(undef, m)
+  stats = AdjointStats(0, false, false, T[], T[], 0.0, "unknown")
+  solver = TrilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
+  return solver
+end
 
-  function TrilqrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    TrilqrSolver(n, m, S)
-  end
+function TrilqrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  TrilqrSolver(m, n, S)
 end
 
 """
@@ -755,12 +787,14 @@ Type for storing the vectors required by the in-place version of CGS.
 
 The outer constructorss
 
-    solver = CgsSolver(n, m, S)
+    solver = CgsSolver(m, n, S)
     solver = CgsSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   r          :: S
@@ -772,29 +806,29 @@ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   vw         :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function CgsSolver(n, m, S)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    r  = S(undef, n)
-    u  = S(undef, n)
-    p  = S(undef, n)
-    q  = S(undef, n)
-    ts = S(undef, n)
-    yz = S(undef, 0)
-    vw = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats)
-    return solver
-  end
+function CgsSolver(m, n, S)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  r  = S(undef, n)
+  u  = S(undef, n)
+  p  = S(undef, n)
+  q  = S(undef, n)
+  ts = S(undef, n)
+  yz = S(undef, 0)
+  vw = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CgsSolver{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats)
+  return solver
+end
 
-  function CgsSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CgsSolver(n, m, S)
-  end
+function CgsSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CgsSolver(m, n, S)
 end
 
 """
@@ -802,12 +836,14 @@ Type for storing the vectors required by the in-place version of BICGSTAB.
 
 The outer constructors
 
-    solver = BicgstabSolver(n, m, S)
+    solver = BicgstabSolver(m, n, S)
     solver = BicgstabSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   r          :: S
@@ -819,29 +855,29 @@ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   t          :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function BicgstabSolver(n, m, S)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    r  = S(undef, n)
-    p  = S(undef, n)
-    v  = S(undef, n)
-    s  = S(undef, n)
-    qd = S(undef, n)
-    yz = S(undef, 0)
-    t  = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats)
-    return solver
-  end
+function BicgstabSolver(m, n, S)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  r  = S(undef, n)
+  p  = S(undef, n)
+  v  = S(undef, n)
+  s  = S(undef, n)
+  qd = S(undef, n)
+  yz = S(undef, 0)
+  t  = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = BicgstabSolver{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats)
+  return solver
+end
 
-  function BicgstabSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    BicgstabSolver(n, m, S)
-  end
+function BicgstabSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  BicgstabSolver(m, n, S)
 end
 
 """
@@ -849,12 +885,14 @@ Type for storing the vectors required by the in-place version of BILQ.
 
 The outer constructors
 
-    solver = BilqSolver(n, m, S)
+    solver = BilqSolver(m, n, S)
     solver = BilqSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   uₖ₋₁       :: S
   uₖ         :: S
   q          :: S
@@ -866,29 +904,29 @@ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   d̅          :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function BilqSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    uₖ₋₁ = S(undef, n)
-    uₖ   = S(undef, n)
-    q    = S(undef, n)
-    vₖ₋₁ = S(undef, n)
-    vₖ   = S(undef, n)
-    p    = S(undef, n)
-    Δx   = S(undef, 0)
-    x    = S(undef, n)
-    d̅    = S(undef, n)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
-    return solver
-  end
+function BilqSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  uₖ₋₁ = S(undef, n)
+  uₖ   = S(undef, n)
+  q    = S(undef, n)
+  vₖ₋₁ = S(undef, n)
+  vₖ   = S(undef, n)
+  p    = S(undef, n)
+  Δx   = S(undef, 0)
+  x    = S(undef, n)
+  d̅    = S(undef, n)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = BilqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
+  return solver
+end
 
-  function BilqSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    BilqSolver(n, m, S)
-  end
+function BilqSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  BilqSolver(m, n, S)
 end
 
 """
@@ -896,12 +934,14 @@ Type for storing the vectors required by the in-place version of QMR.
 
 The outer constructors
 
-    solver = QmrSolver(n, m, S)
+    solver = QmrSolver(m, n, S)
     solver = QmrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   uₖ₋₁       :: S
   uₖ         :: S
   q          :: S
@@ -914,30 +954,30 @@ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   wₖ₋₁       :: S
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function QmrSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    uₖ₋₁ = S(undef, n)
-    uₖ   = S(undef, n)
-    q    = S(undef, n)
-    vₖ₋₁ = S(undef, n)
-    vₖ   = S(undef, n)
-    p    = S(undef, n)
-    Δx   = S(undef, 0)
-    x    = S(undef, n)
-    wₖ₋₂ = S(undef, n)
-    wₖ₋₁ = S(undef, n)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
-    return solver
-  end
+function QmrSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  uₖ₋₁ = S(undef, n)
+  uₖ   = S(undef, n)
+  q    = S(undef, n)
+  vₖ₋₁ = S(undef, n)
+  vₖ   = S(undef, n)
+  p    = S(undef, n)
+  Δx   = S(undef, 0)
+  x    = S(undef, n)
+  wₖ₋₂ = S(undef, n)
+  wₖ₋₁ = S(undef, n)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = QmrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
+  return solver
+end
 
-  function QmrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    QmrSolver(n, m, S)
-  end
+function QmrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  QmrSolver(m, n, S)
 end
 
 """
@@ -945,12 +985,14 @@ Type for storing the vectors required by the in-place version of BILQR.
 
 The outer constructors
 
-    solver = BilqrSolver(n, m, S)
+    solver = BilqrSolver(m, n, S)
     solver = BilqrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   uₖ₋₁       :: S
   uₖ         :: S
   q          :: S
@@ -966,33 +1008,33 @@ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   wₖ₋₂       :: S
   warm_start :: Bool
   stats      :: AdjointStats{T}
+end
 
-  function BilqrSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    uₖ₋₁ = S(undef, n)
-    uₖ   = S(undef, n)
-    q    = S(undef, n)
-    vₖ₋₁ = S(undef, n)
-    vₖ   = S(undef, n)
-    p    = S(undef, n)
-    Δx   = S(undef, 0)
-    x    = S(undef, n)
-    Δy   = S(undef, 0)
-    y    = S(undef, n)
-    d̅    = S(undef, n)
-    wₖ₋₃ = S(undef, n)
-    wₖ₋₂ = S(undef, n)
-    stats = AdjointStats(0, false, false, T[], T[], "unknown")
-    solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
-    return solver
-  end
+function BilqrSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  uₖ₋₁ = S(undef, n)
+  uₖ   = S(undef, n)
+  q    = S(undef, n)
+  vₖ₋₁ = S(undef, n)
+  vₖ   = S(undef, n)
+  p    = S(undef, n)
+  Δx   = S(undef, 0)
+  x    = S(undef, n)
+  Δy   = S(undef, 0)
+  y    = S(undef, n)
+  d̅    = S(undef, n)
+  wₖ₋₃ = S(undef, n)
+  wₖ₋₂ = S(undef, n)
+  stats = AdjointStats(0, false, false, T[], T[], 0.0, "unknown")
+  solver = BilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
+  return solver
+end
 
-  function BilqrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    BilqrSolver(n, m, S)
-  end
+function BilqrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  BilqrSolver(m, n, S)
 end
 
 """
@@ -1000,12 +1042,14 @@ Type for storing the vectors required by the in-place version of CGLS.
 
 The outer constructors
 
-    solver = CglsSolver(n, m, S)
+    solver = CglsSolver(m, n, S)
     solver = CglsSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   p     :: S
   s     :: S
@@ -1013,26 +1057,26 @@ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   q     :: S
   Mr    :: S
   stats :: SimpleStats{T}
+end
 
-  function CglsSolver(n, m, S)
-    FC = eltype(S)
-    T  = real(FC)
-    x  = S(undef, m)
-    p  = S(undef, m)
-    s  = S(undef, m)
-    r  = S(undef, n)
-    q  = S(undef, n)
-    Mr = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, p, s, r, q, Mr, stats)
-    return solver
-  end
+function CglsSolver(m, n, S)
+  FC = eltype(S)
+  T  = real(FC)
+  x  = S(undef, n)
+  p  = S(undef, n)
+  s  = S(undef, n)
+  r  = S(undef, m)
+  q  = S(undef, m)
+  Mr = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CglsSolver{T,FC,S}(m, n, x, p, s, r, q, Mr, stats)
+  return solver
+end
 
-  function CglsSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CglsSolver(n, m, S)
-  end
+function CglsSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CglsSolver(m, n, S)
 end
 
 """
@@ -1040,12 +1084,14 @@ Type for storing the vectors required by the in-place version of CRLS.
 
 The outer constructors
 
-    solver = CrlsSolver(n, m, S)
+    solver = CrlsSolver(m, n, S)
     solver = CrlsSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   p     :: S
   Ar    :: S
@@ -1055,28 +1101,28 @@ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   s     :: S
   Ms    :: S
   stats :: SimpleStats{T}
+end
 
-  function CrlsSolver(n, m, S)
-    FC = eltype(S)
-    T  = real(FC)
-    x  = S(undef, m)
-    p  = S(undef, m)
-    Ar = S(undef, m)
-    q  = S(undef, m)
-    r  = S(undef, n)
-    Ap = S(undef, n)
-    s  = S(undef, n)
-    Ms = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats)
-    return solver
-  end
+function CrlsSolver(m, n, S)
+  FC = eltype(S)
+  T  = real(FC)
+  x  = S(undef, n)
+  p  = S(undef, n)
+  Ar = S(undef, n)
+  q  = S(undef, n)
+  r  = S(undef, m)
+  Ap = S(undef, m)
+  s  = S(undef, m)
+  Ms = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CrlsSolver{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats)
+  return solver
+end
 
-  function CrlsSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CrlsSolver(n, m, S)
-  end
+function CrlsSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CrlsSolver(m, n, S)
 end
 
 """
@@ -1084,41 +1130,43 @@ Type for storing the vectors required by the in-place version of CGNE.
 
 The outer constructors
 
-    solver = CgneSolver(n, m, S)
+    solver = CgneSolver(m, n, S)
     solver = CgneSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   p     :: S
-  Aᵀz   :: S
+  Aᴴz   :: S
   r     :: S
   q     :: S
   s     :: S
   z     :: S
   stats :: SimpleStats{T}
+end
 
-  function CgneSolver(n, m, S)
-    FC  = eltype(S)
-    T   = real(FC)
-    x   = S(undef, m)
-    p   = S(undef, m)
-    Aᵀz = S(undef, m)
-    r   = S(undef, n)
-    q   = S(undef, n)
-    s   = S(undef, 0)
-    z   = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats)
-    return solver
-  end
+function CgneSolver(m, n, S)
+  FC  = eltype(S)
+  T   = real(FC)
+  x   = S(undef, n)
+  p   = S(undef, n)
+  Aᴴz = S(undef, n)
+  r   = S(undef, m)
+  q   = S(undef, m)
+  s   = S(undef, 0)
+  z   = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CgneSolver{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats)
+  return solver
+end
 
-  function CgneSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CgneSolver(n, m, S)
-  end
+function CgneSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CgneSolver(m, n, S)
 end
 
 """
@@ -1126,41 +1174,43 @@ Type for storing the vectors required by the in-place version of CRMR.
 
 The outer constructors
 
-    solver = CrmrSolver(n, m, S)
+    solver = CrmrSolver(m, n, S)
     solver = CrmrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   p     :: S
-  Aᵀr   :: S
+  Aᴴr   :: S
   r     :: S
   q     :: S
-  Mq    :: S
+  Nq    :: S
   s     :: S
   stats :: SimpleStats{T}
+end
 
-  function CrmrSolver(n, m, S)
-    FC  = eltype(S)
-    T   = real(FC)
-    x   = S(undef, m)
-    p   = S(undef, m)
-    Aᵀr = S(undef, m)
-    r   = S(undef, n)
-    q   = S(undef, n)
-    Mq  = S(undef, 0)
-    s   = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats)
-    return solver
-  end
+function CrmrSolver(m, n, S)
+  FC  = eltype(S)
+  T   = real(FC)
+  x   = S(undef, n)
+  p   = S(undef, n)
+  Aᴴr = S(undef, n)
+  r   = S(undef, m)
+  q   = S(undef, m)
+  Nq  = S(undef, 0)
+  s   = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CrmrSolver{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats)
+  return solver
+end
 
-  function CrmrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CrmrSolver(n, m, S)
-  end
+function CrmrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CrmrSolver(m, n, S)
 end
 
 """
@@ -1168,15 +1218,17 @@ Type for storing the vectors required by the in-place version of LSLQ.
 
 The outer constructors
 
-    solver = LslqSolver(n, m, S)
+    solver = LslqSolver(m, n, S)
     solver = LslqSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m       :: Int
+  n       :: Int
   x       :: S
   Nv      :: S
-  Aᵀu     :: S
+  Aᴴu     :: S
   w̄       :: S
   Mu      :: S
   Av      :: S
@@ -1184,29 +1236,29 @@ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v       :: S
   err_vec :: Vector{T}
   stats   :: LSLQStats{T}
+end
 
-  function LslqSolver(n, m, S; window :: Int=5)
-    FC  = eltype(S)
-    T   = real(FC)
-    x   = S(undef, m)
-    Nv  = S(undef, m)
-    Aᵀu = S(undef, m)
-    w̄   = S(undef, m)
-    Mu  = S(undef, n)
-    Av  = S(undef, n)
-    u   = S(undef, 0)
-    v   = S(undef, 0)
-    err_vec = zeros(T, window)
-    stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
-    solver = new{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats)
-    return solver
-  end
+function LslqSolver(m, n, S; window :: Int=5)
+  FC  = eltype(S)
+  T   = real(FC)
+  x   = S(undef, n)
+  Nv  = S(undef, n)
+  Aᴴu = S(undef, n)
+  w̄   = S(undef, n)
+  Mu  = S(undef, m)
+  Av  = S(undef, m)
+  u   = S(undef, 0)
+  v   = S(undef, 0)
+  err_vec = zeros(T, window)
+  stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], 0.0, "unknown")
+  solver = LslqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w̄, Mu, Av, u, v, err_vec, stats)
+  return solver
+end
 
-  function LslqSolver(A, b; window :: Int=5)
-    n, m = size(A)
-    S = ktypeof(b)
-    LslqSolver(n, m, S, window=window)
-  end
+function LslqSolver(A, b; window :: Int=5)
+  m, n = size(A)
+  S = ktypeof(b)
+  LslqSolver(m, n, S; window)
 end
 
 """
@@ -1214,15 +1266,17 @@ Type for storing the vectors required by the in-place version of LSQR.
 
 The outer constructors
 
-    solver = LsqrSolver(n, m, S)
+    solver = LsqrSolver(m, n, S)
     solver = LsqrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m       :: Int
+  n       :: Int
   x       :: S
   Nv      :: S
-  Aᵀu     :: S
+  Aᴴu     :: S
   w       :: S
   Mu      :: S
   Av      :: S
@@ -1230,29 +1284,29 @@ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v       :: S
   err_vec :: Vector{T}
   stats   :: SimpleStats{T}
+end
 
-  function LsqrSolver(n, m, S; window :: Int=5)
-    FC  = eltype(S)
-    T   = real(FC)
-    x   = S(undef, m)
-    Nv  = S(undef, m)
-    Aᵀu = S(undef, m)
-    w   = S(undef, m)
-    Mu  = S(undef, n)
-    Av  = S(undef, n)
-    u   = S(undef, 0)
-    v   = S(undef, 0)
-    err_vec = zeros(T, window)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats)
-    return solver
-  end
+function LsqrSolver(m, n, S; window :: Int=5)
+  FC  = eltype(S)
+  T   = real(FC)
+  x   = S(undef, n)
+  Nv  = S(undef, n)
+  Aᴴu = S(undef, n)
+  w   = S(undef, n)
+  Mu  = S(undef, m)
+  Av  = S(undef, m)
+  u   = S(undef, 0)
+  v   = S(undef, 0)
+  err_vec = zeros(T, window)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = LsqrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats)
+  return solver
+end
 
-  function LsqrSolver(A, b; window :: Int=5)
-    n, m = size(A)
-    S = ktypeof(b)
-    LsqrSolver(n, m, S, window=window)
-  end
+function LsqrSolver(A, b; window :: Int=5)
+  m, n = size(A)
+  S = ktypeof(b)
+  LsqrSolver(m, n, S; window)
 end
 
 """
@@ -1260,15 +1314,17 @@ Type for storing the vectors required by the in-place version of LSMR.
 
 The outer constructors
 
-    solver = LsmrSolver(n, m, S)
+    solver = LsmrSolver(m, n, S)
     solver = LsmrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m       :: Int
+  n       :: Int
   x       :: S
   Nv      :: S
-  Aᵀu     :: S
+  Aᴴu     :: S
   h       :: S
   hbar    :: S
   Mu      :: S
@@ -1277,30 +1333,30 @@ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v       :: S
   err_vec :: Vector{T}
   stats   :: LsmrStats{T}
+end
 
-  function LsmrSolver(n, m, S; window :: Int=5)
-    FC   = eltype(S)
-    T    = real(FC)
-    x    = S(undef, m)
-    Nv   = S(undef, m)
-    Aᵀu  = S(undef, m)
-    h    = S(undef, m)
-    hbar = S(undef, m)
-    Mu   = S(undef, n)
-    Av   = S(undef, n)
-    u    = S(undef, 0)
-    v    = S(undef, 0)
-    err_vec = zeros(T, window)
-    stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
-    solver = new{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats)
-    return solver
-  end
+function LsmrSolver(m, n, S; window :: Int=5)
+  FC   = eltype(S)
+  T    = real(FC)
+  x    = S(undef, n)
+  Nv   = S(undef, n)
+  Aᴴu  = S(undef, n)
+  h    = S(undef, n)
+  hbar = S(undef, n)
+  Mu   = S(undef, m)
+  Av   = S(undef, m)
+  u    = S(undef, 0)
+  v    = S(undef, 0)
+  err_vec = zeros(T, window)
+  stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), 0.0, "unknown")
+  solver = LsmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, h, hbar, Mu, Av, u, v, err_vec, stats)
+  return solver
+end
 
-  function LsmrSolver(A, b; window :: Int=5)
-    n, m = size(A)
-    S = ktypeof(b)
-    LsmrSolver(n, m, S, window=window)
-  end
+function LsmrSolver(A, b; window :: Int=5)
+  m, n = size(A)
+  S = ktypeof(b)
+  LsmrSolver(m, n, S; window)
 end
 
 """
@@ -1308,15 +1364,17 @@ Type for storing the vectors required by the in-place version of LNLQ.
 
 The outer constructors
 
-    solver = LnlqSolver(n, m, S)
+    solver = LnlqSolver(m, n, S)
     solver = LnlqSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   Nv    :: S
-  Aᵀu   :: S
+  Aᴴu   :: S
   y     :: S
   w̄     :: S
   Mu    :: S
@@ -1325,30 +1383,30 @@ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v     :: S
   q     :: S
   stats :: LNLQStats{T}
+end
 
-  function LnlqSolver(n, m, S)
-    FC  = eltype(S)
-    T   = real(FC)
-    x   = S(undef, m)
-    Nv  = S(undef, m)
-    Aᵀu = S(undef, m)
-    y   = S(undef, n)
-    w̄   = S(undef, n)
-    Mu  = S(undef, n)
-    Av  = S(undef, n)
-    u   = S(undef, 0)
-    v   = S(undef, 0)
-    q   = S(undef, 0)
-    stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
-    solver = new{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats)
-    return solver
-  end
+function LnlqSolver(m, n, S)
+  FC  = eltype(S)
+  T   = real(FC)
+  x   = S(undef, n)
+  Nv  = S(undef, n)
+  Aᴴu = S(undef, n)
+  y   = S(undef, m)
+  w̄   = S(undef, m)
+  Mu  = S(undef, m)
+  Av  = S(undef, m)
+  u   = S(undef, 0)
+  v   = S(undef, 0)
+  q   = S(undef, 0)
+  stats = LNLQStats(0, false, T[], false, T[], T[], 0.0, "unknown")
+  solver = LnlqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w̄, Mu, Av, u, v, q, stats)
+  return solver
+end
 
-  function LnlqSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    LnlqSolver(n, m, S)
-  end
+function LnlqSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  LnlqSolver(m, n, S)
 end
 
 """
@@ -1356,15 +1414,17 @@ Type for storing the vectors required by the in-place version of CRAIG.
 
 The outer constructors
 
-    solver = CraigSolver(n, m, S)
+    solver = CraigSolver(m, n, S)
     solver = CraigSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   Nv    :: S
-  Aᵀu   :: S
+  Aᴴu   :: S
   y     :: S
   w     :: S
   Mu    :: S
@@ -1373,30 +1433,30 @@ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v     :: S
   w2    :: S
   stats :: SimpleStats{T}
+end
 
-  function CraigSolver(n, m, S)
-    FC  = eltype(S)
-    T   = real(FC)
-    x   = S(undef, m)
-    Nv  = S(undef, m)
-    Aᵀu = S(undef, m)
-    y   = S(undef, n)
-    w   = S(undef, n)
-    Mu  = S(undef, n)
-    Av  = S(undef, n)
-    u   = S(undef, 0)
-    v   = S(undef, 0)
-    w2  = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats)
-    return solver
-  end
+function CraigSolver(m, n, S)
+  FC  = eltype(S)
+  T   = real(FC)
+  x   = S(undef, n)
+  Nv  = S(undef, n)
+  Aᴴu = S(undef, n)
+  y   = S(undef, m)
+  w   = S(undef, m)
+  Mu  = S(undef, m)
+  Av  = S(undef, m)
+  u   = S(undef, 0)
+  v   = S(undef, 0)
+  w2  = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CraigSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats)
+  return solver
+end
 
-  function CraigSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CraigSolver(n, m, S)
-  end
+function CraigSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CraigSolver(m, n, S)
 end
 
 """
@@ -1404,15 +1464,17 @@ Type for storing the vectors required by the in-place version of CRAIGMR.
 
 The outer constructors
 
-    solver = CraigmrSolver(n, m, S)
+    solver = CraigmrSolver(m, n, S)
     solver = CraigmrSolver(A, b)
 
 may be used in order to create these vectors.
 """
 mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m     :: Int
+  n     :: Int
   x     :: S
   Nv    :: S
-  Aᵀu   :: S
+  Aᴴu   :: S
   d     :: S
   y     :: S
   Mu    :: S
@@ -1423,32 +1485,32 @@ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   v     :: S
   q     :: S
   stats :: SimpleStats{T}
+end
 
-  function CraigmrSolver(n, m, S)
-    FC   = eltype(S)
-    T    = real(FC)
-    x    = S(undef, m)
-    Nv   = S(undef, m)
-    Aᵀu  = S(undef, m)
-    d    = S(undef, m)
-    y    = S(undef, n)
-    Mu   = S(undef, n)
-    w    = S(undef, n)
-    wbar = S(undef, n)
-    Av   = S(undef, n)
-    u    = S(undef, 0)
-    v    = S(undef, 0)
-    q    = S(undef, 0)
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats)
-    return solver
-  end
+function CraigmrSolver(m, n, S)
+  FC   = eltype(S)
+  T    = real(FC)
+  x    = S(undef, n)
+  Nv   = S(undef, n)
+  Aᴴu  = S(undef, n)
+  d    = S(undef, n)
+  y    = S(undef, m)
+  Mu   = S(undef, m)
+  w    = S(undef, m)
+  wbar = S(undef, m)
+  Av   = S(undef, m)
+  u    = S(undef, 0)
+  v    = S(undef, 0)
+  q    = S(undef, 0)
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = CraigmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats)
+  return solver
+end
 
-  function CraigmrSolver(A, b)
-    n, m = size(A)
-    S = ktypeof(b)
-    CraigmrSolver(n, m, S)
-  end
+function CraigmrSolver(A, b)
+  m, n = size(A)
+  S = ktypeof(b)
+  CraigmrSolver(m, n, S)
 end
 
 """
@@ -1456,13 +1518,15 @@ Type for storing the vectors required by the in-place version of GMRES.
 
 The outer constructors
 
-    solver = GmresSolver(n, m, memory, S)
+    solver = GmresSolver(m, n, memory, S)
     solver = GmresSolver(A, b, memory = 20)
 
 may be used in order to create these vectors.
 `memory` is set to `n` if the value given is larger than `n`.
 """
 mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   w          :: S
@@ -1476,31 +1540,85 @@ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   warm_start :: Bool
   inner_iter :: Int
   stats      :: SimpleStats{T}
+end
 
-  function GmresSolver(n, m, memory, S)
-    memory = min(n, memory)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    w  = S(undef, n)
-    p  = S(undef, 0)
-    q  = S(undef, 0)
-    V  = [S(undef, n) for i = 1 : memory]
-    c  = Vector{T}(undef, memory)
-    s  = Vector{FC}(undef, memory)
-    z  = Vector{FC}(undef, memory)
-    R  = Vector{FC}(undef, div(memory * (memory+1), 2))
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
-    return solver
-  end
+function GmresSolver(m, n, memory, S)
+  memory = min(m, memory)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  w  = S(undef, n)
+  p  = S(undef, 0)
+  q  = S(undef, 0)
+  V  = S[S(undef, n) for i = 1 : memory]
+  c  = Vector{T}(undef, memory)
+  s  = Vector{FC}(undef, memory)
+  z  = Vector{FC}(undef, memory)
+  R  = Vector{FC}(undef, div(memory * (memory+1), 2))
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = GmresSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
+  return solver
+end
 
-  function GmresSolver(A, b, memory = 20)
-    n, m = size(A)
-    S = ktypeof(b)
-    GmresSolver(n, m, memory, S)
-  end
+function GmresSolver(A, b, memory = 20)
+  m, n = size(A)
+  S = ktypeof(b)
+  GmresSolver(m, n, memory, S)
+end
+
+"""
+Type for storing the vectors required by the in-place version of FGMRES.
+
+The outer constructors
+
+    solver = FgmresSolver(m, n, memory, S)
+    solver = FgmresSolver(A, b, memory = 20)
+
+may be used in order to create these vectors.
+`memory` is set to `n` if the value given is larger than `n`.
+"""
+mutable struct FgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
+  Δx         :: S
+  x          :: S
+  w          :: S
+  q          :: S
+  V          :: Vector{S}
+  Z          :: Vector{S}
+  c          :: Vector{T}
+  s          :: Vector{FC}
+  z          :: Vector{FC}
+  R          :: Vector{FC}
+  warm_start :: Bool
+  inner_iter :: Int
+  stats      :: SimpleStats{T}
+end
+
+function FgmresSolver(m, n, memory, S)
+  memory = min(m, memory)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  w  = S(undef, n)
+  q  = S(undef, 0)
+  V  = S[S(undef, n) for i = 1 : memory]
+  Z  = S[S(undef, n) for i = 1 : memory]
+  c  = Vector{T}(undef, memory)
+  s  = Vector{FC}(undef, memory)
+  z  = Vector{FC}(undef, memory)
+  R  = Vector{FC}(undef, div(memory * (memory+1), 2))
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = FgmresSolver{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats)
+  return solver
+end
+
+function FgmresSolver(A, b, memory = 20)
+  m, n = size(A)
+  S = ktypeof(b)
+  FgmresSolver(m, n, memory, S)
 end
 
 """
@@ -1508,13 +1626,15 @@ Type for storing the vectors required by the in-place version of FOM.
 
 The outer constructors
 
-    solver = FomSolver(n, m, memory, S)
+    solver = FomSolver(m, n, memory, S)
     solver = FomSolver(A, b, memory = 20)
 
 may be used in order to create these vectors.
 `memory` is set to `n` if the value given is larger than `n`.
 """
 mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   Δx         :: S
   x          :: S
   w          :: S
@@ -1526,30 +1646,30 @@ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   U          :: Vector{FC}
   warm_start :: Bool
   stats      :: SimpleStats{T}
+end
 
-  function FomSolver(n, m, memory, S)
-    memory = min(n, memory)
-    FC = eltype(S)
-    T  = real(FC)
-    Δx = S(undef, 0)
-    x  = S(undef, n)
-    w  = S(undef, n)
-    p  = S(undef, 0)
-    q  = S(undef, 0)
-    V  = [S(undef, n) for i = 1 : memory]
-    l  = Vector{FC}(undef, memory)
-    z  = Vector{FC}(undef, memory)
-    U  = Vector{FC}(undef, div(memory * (memory+1), 2))
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats)
-    return solver
-  end
+function FomSolver(m, n, memory, S)
+  memory = min(m, memory)
+  FC = eltype(S)
+  T  = real(FC)
+  Δx = S(undef, 0)
+  x  = S(undef, n)
+  w  = S(undef, n)
+  p  = S(undef, 0)
+  q  = S(undef, 0)
+  V  = S[S(undef, n) for i = 1 : memory]
+  l  = Vector{FC}(undef, memory)
+  z  = Vector{FC}(undef, memory)
+  U  = Vector{FC}(undef, div(memory * (memory+1), 2))
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = FomSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats)
+  return solver
+end
 
-  function FomSolver(A, b, memory = 20)
-    n, m = size(A)
-    S = ktypeof(b)
-    FomSolver(n, m, memory, S)
-  end
+function FomSolver(A, b, memory = 20)
+  m, n = size(A)
+  S = ktypeof(b)
+  FomSolver(m, n, memory, S)
 end
 
 """
@@ -1557,13 +1677,15 @@ Type for storing the vectors required by the in-place version of GPMR.
 
 The outer constructors
 
-    solver = GpmrSolver(n, m, memory, S)
+    solver = GpmrSolver(m, n, memory, S)
     solver = GpmrSolver(A, b, memory = 20)
 
 may be used in order to create these vectors.
 `memory` is set to `n + m` if the value given is larger than `n + m`.
 """
 mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+  m          :: Int
+  n          :: Int
   wA         :: S
   wB         :: S
   dA         :: S
@@ -1582,45 +1704,38 @@ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
   R          :: Vector{FC}
   warm_start :: Bool
   stats      :: SimpleStats{T}
-
-  function GpmrSolver(n, m, memory, S)
-    memory = min(n + m, memory)
-    FC = eltype(S)
-    T  = real(FC)
-    wA = S(undef, 0)
-    wB = S(undef, 0)
-    dA = S(undef, n)
-    dB = S(undef, m)
-    Δx = S(undef, 0)
-    Δy = S(undef, 0)
-    x  = S(undef, n)
-    y  = S(undef, m)
-    q  = S(undef, 0)
-    p  = S(undef, 0)
-    V  = [S(undef, n) for i = 1 : memory]
-    U  = [S(undef, m) for i = 1 : memory]
-    gs = Vector{FC}(undef, 4 * memory)
-    gc = Vector{T}(undef, 4 * memory)
-    zt = Vector{FC}(undef, 2 * memory)
-    R  = Vector{FC}(undef, memory * (2memory + 1))
-    stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
-    solver = new{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
-    return solver
-  end
-
-  function GpmrSolver(A, b, memory = 20)
-    n, m = size(A)
-    S = ktypeof(b)
-    GpmrSolver(n, m, memory, S)
-  end
 end
 
-"""
-    solve!(solver, args...; kwargs...)
+function GpmrSolver(m, n, memory, S)
+  memory = min(n + m, memory)
+  FC = eltype(S)
+  T  = real(FC)
+  wA = S(undef, 0)
+  wB = S(undef, 0)
+  dA = S(undef, m)
+  dB = S(undef, n)
+  Δx = S(undef, 0)
+  Δy = S(undef, 0)
+  x  = S(undef, m)
+  y  = S(undef, n)
+  q  = S(undef, 0)
+  p  = S(undef, 0)
+  V  = S[S(undef, m) for i = 1 : memory]
+  U  = S[S(undef, n) for i = 1 : memory]
+  gs = Vector{FC}(undef, 4 * memory)
+  gc = Vector{T}(undef, 4 * memory)
+  zt = Vector{FC}(undef, 2 * memory)
+  R  = Vector{FC}(undef, memory * (2 * memory + 1))
+  stats = SimpleStats(0, false, false, T[], T[], T[], 0.0, "unknown")
+  solver = GpmrSolver{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
+  return solver
+end
 
-Use the in-place Krylov method associated to `solver`.
-"""
-function solve! end
+function GpmrSolver(A, b, memory = 20)
+  m, n = size(A)
+  S = ktypeof(b)
+  GpmrSolver(m, n, memory, S)
+end
 
 """
     solution(solver)
@@ -1674,59 +1789,64 @@ Return the number of operator-vector products with `A'` performed by the Krylov
 function Atprod end
 
 for (KS, fun, nsol, nA, nAt, warm_start) in [
-  (LsmrSolver          , :lsmr!            , 1, 1, 1, false)
-  (CgsSolver           , :cgs!             , 1, 2, 0, true )
-  (UsymlqSolver        , :usymlq!          , 1, 1, 1, true )
-  (LnlqSolver          , :lnlq!            , 2, 1, 1, false)
-  (BicgstabSolver      , :bicgstab!        , 1, 2, 0, true )
-  (CrlsSolver          , :crls!            , 1, 1, 1, false)
-  (LsqrSolver          , :lsqr!            , 1, 1, 1, false)
-  (MinresSolver        , :minres!          , 1, 1, 0, true )
-  (CgneSolver          , :cgne!            , 1, 1, 1, false)
-  (DqgmresSolver       , :dqgmres!         , 1, 1, 0, true )
-  (SymmlqSolver        , :symmlq!          , 1, 1, 0, true )
-  (TrimrSolver         , :trimr!           , 2, 1, 1, true )
-  (UsymqrSolver        , :usymqr!          , 1, 1, 1, true )
-  (BilqrSolver         , :bilqr!           , 2, 1, 1, true )
-  (CrSolver            , :cr!              , 1, 1, 0, true )
-  (CraigmrSolver       , :craigmr!         , 2, 1, 1, false)
-  (TricgSolver         , :tricg!           , 2, 1, 1, true )
-  (CraigSolver         , :craig!           , 2, 1, 1, false)
-  (DiomSolver          , :diom!            , 1, 1, 0, true )
-  (LslqSolver          , :lslq!            , 1, 1, 1, false)
-  (TrilqrSolver        , :trilqr!          , 2, 1, 1, true )
-  (CrmrSolver          , :crmr!            , 1, 1, 1, false)
-  (CgSolver            , :cg!              , 1, 1, 0, true )
-  (CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false)
-  (CglsSolver          , :cgls!            , 1, 1, 1, false)
-  (CgLanczosSolver     , :cg_lanczos!      , 1, 1, 0, true )
-  (BilqSolver          , :bilq!            , 1, 1, 1, true )
-  (MinresQlpSolver     , :minres_qlp!      , 1, 1, 0, true )
-  (QmrSolver           , :qmr!             , 1, 1, 1, true )
-  (GmresSolver         , :gmres!           , 1, 1, 0, true )
-  (FomSolver           , :fom!             , 1, 1, 0, true )
-  (GpmrSolver          , :gpmr!            , 2, 1, 0, true )
+  (:LsmrSolver          , :lsmr!            , 1, 1, 1, false)
+  (:CgsSolver           , :cgs!             , 1, 2, 0, true )
+  (:UsymlqSolver        , :usymlq!          , 1, 1, 1, true )
+  (:LnlqSolver          , :lnlq!            , 2, 1, 1, false)
+  (:BicgstabSolver      , :bicgstab!        , 1, 2, 0, true )
+  (:CrlsSolver          , :crls!            , 1, 1, 1, false)
+  (:LsqrSolver          , :lsqr!            , 1, 1, 1, false)
+  (:MinresSolver        , :minres!          , 1, 1, 0, true )
+  (:CgneSolver          , :cgne!            , 1, 1, 1, false)
+  (:DqgmresSolver       , :dqgmres!         , 1, 1, 0, true )
+  (:SymmlqSolver        , :symmlq!          , 1, 1, 0, true )
+  (:TrimrSolver         , :trimr!           , 2, 1, 1, true )
+  (:UsymqrSolver        , :usymqr!          , 1, 1, 1, true )
+  (:BilqrSolver         , :bilqr!           , 2, 1, 1, true )
+  (:CrSolver            , :cr!              , 1, 1, 0, true )
+  (:CraigmrSolver       , :craigmr!         , 2, 1, 1, false)
+  (:TricgSolver         , :tricg!           , 2, 1, 1, true )
+  (:CraigSolver         , :craig!           , 2, 1, 1, false)
+  (:DiomSolver          , :diom!            , 1, 1, 0, true )
+  (:LslqSolver          , :lslq!            , 1, 1, 1, false)
+  (:TrilqrSolver        , :trilqr!          , 2, 1, 1, true )
+  (:CrmrSolver          , :crmr!            , 1, 1, 1, false)
+  (:CgSolver            , :cg!              , 1, 1, 0, true )
+  (:CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false)
+  (:CglsSolver          , :cgls!            , 1, 1, 1, false)
+  (:CgLanczosSolver     , :cg_lanczos!      , 1, 1, 0, true )
+  (:BilqSolver          , :bilq!            , 1, 1, 1, true )
+  (:MinresQlpSolver     , :minres_qlp!      , 1, 1, 0, true )
+  (:QmrSolver           , :qmr!             , 1, 1, 1, true )
+  (:GmresSolver         , :gmres!           , 1, 1, 0, true )
+  (:FgmresSolver        , :fgmres!          , 1, 1, 0, true )
+  (:FomSolver           , :fom!             , 1, 1, 0, true )
+  (:GpmrSolver          , :gpmr!            , 2, 1, 0, true )
 ]
   @eval begin
-    @inline solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...)
-    @inline statistics(solver :: $KS) = solver.stats
-    @inline niterations(solver :: $KS) = solver.stats.niter
-    @inline Aprod(solver :: $KS) = $nA * solver.stats.niter
-    @inline Atprod(solver :: $KS) = $nAt * solver.stats.niter
+    size(solver :: $KS) = solver.m, solver.n
+    statistics(solver :: $KS) = solver.stats
+    niterations(solver :: $KS) = solver.stats.niter
+    Aprod(solver :: $KS) = $nA * solver.stats.niter
+    Atprod(solver :: $KS) = $nAt * solver.stats.niter
     if $KS == GpmrSolver
-      @inline Bprod(solver :: $KS) = solver.stats.niter
+      Bprod(solver :: $KS) = solver.stats.niter
+    end
+    nsolution(solver :: $KS) = $nsol
+    if $nsol == 1
+      solution(solver :: $KS) = solver.x
+      solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.")
+    end
+    if $nsol == 2
+      solution(solver :: $KS) = solver.x, solver.y
+      solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.")
     end
-    @inline nsolution(solver :: $KS) = $nsol
-    ($nsol == 1) && @inline solution(solver :: $KS) = solver.x
-    ($nsol == 2) && @inline solution(solver :: $KS) = solver.x, solver.y
-    ($nsol == 1) && @inline solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.")
-    ($nsol == 2) && @inline solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.")
     if $KS ∈ (BilqrSolver, TrilqrSolver)
-      @inline issolved_primal(solver :: $KS) = solver.stats.solved_primal
-      @inline issolved_dual(solver :: $KS) = solver.stats.solved_dual
-      @inline issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver)
+      issolved_primal(solver :: $KS) = solver.stats.solved_primal
+      issolved_dual(solver :: $KS) = solver.stats.solved_dual
+      issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver)
     else
-      @inline issolved(solver :: $KS) = solver.stats.solved
+      issolved(solver :: $KS) = solver.stats.solved
     end
     if $warm_start
       if $KS in (BilqrSolver, TrilqrSolver, TricgSolver, TrimrSolver, GpmrSolver)
@@ -1758,45 +1878,70 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [
   end
 end
 
+function ksizeof(attribute)
+  if isa(attribute, Vector{<:AbstractVector}) && !isempty(attribute)
+    # A vector of vectors is a vector of pointers in Julia.
+    # All vectors inside a vector have the same size in Krylov.jl
+    size_attribute = sizeof(attribute) + length(attribute) * ksizeof(attribute[1])
+  else
+    size_attribute = sizeof(attribute)
+  end
+  return size_attribute
+end
+
+function sizeof(stats_solver :: Union{KrylovStats, KrylovSolver})
+  type = typeof(stats_solver)
+  nfields = fieldcount(type)
+  storage = 0
+  for i = 1:nfields
+    field_i = getfield(stats_solver, i)
+    size_i = ksizeof(field_i)
+    storage += size_i
+  end
+  return storage
+end
+
 """
     show(io, solver; show_stats=true)
 
 Statistics of `solver` are displayed if `show_stats` is set to true.
 """
-function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
   workspace = typeof(solver)
-  name_solver = workspace.name.wrapper
-  l1 = max(length(string(name_solver)), 10)  # length("warm_start") = 10
-  l2 = length(string(S)) + 8  # length("Vector{}") = 8
+  name_solver = string(workspace.name.name)
+  name_stats = string(typeof(solver.stats).name.name)
+  nbytes = sizeof(solver)
+  storage = format_bytes(nbytes)
   architecture = S <: Vector ? "CPU" : "GPU"
-  format = Printf.Format("│%$(l1)s│%$(l2)s│%18s│\n")
-  format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%18s│\n")
-  @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^18)
-  Printf.format(io, format, name_solver, "Precision: $FC", "Architecture: $architecture")
-  @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18)
+  l1 = max(length(name_solver), length(string(FC)) + 11)  # length("Precision: ") = 11
+  nchar = workspace <: Union{CgLanczosShiftSolver, FomSolver, DiomSolver, DqgmresSolver, GmresSolver, FgmresSolver, GpmrSolver} ? 8 : 0  # length("Vector{}") = 8
+  l2 = max(ndigits(solver.m) + 7, length(architecture) + 14, length(string(S)) + nchar)  # length("nrows: ") = 7 and length("Architecture: ") = 14
+  l2 = max(l2, length(name_stats) + 2 + length(string(T)))  # length("{}") = 2
+  l3 = max(ndigits(solver.n) + 7, length(storage) + 9)  # length("Storage: ") = 9 and length("cols: ") = 7
+  format = Printf.Format("│%$(l1)s│%$(l2)s│%$(l3)s│\n")
+  format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%$(l3)s│\n")
+  @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^l3)
+  Printf.format(io, format, "$(name_solver)", "nrows: $(solver.m)", "ncols: $(solver.n)")
+  @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
+  Printf.format(io, format, "Precision: $FC", "Architecture: $architecture","Storage: $storage")
+  @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
   Printf.format(io, format, "Attribute", "Type", "Size")
-  @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18)
-  for i=1:fieldcount(workspace)-1 # show stats seperately
-    type_i = fieldtype(workspace, i)
+  @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
+  for i=1:fieldcount(workspace)
     name_i = fieldname(workspace, i)
-    len = if type_i <: AbstractVector
-      field_i = getfield(solver, name_i)
-      ni = length(field_i)
-      if eltype(type_i) <: AbstractVector
-        "$(ni) x $(length(field_i[1]))"
-      else
-        length(field_i)
-      end
-    else
-      0
-    end
-    if (name_i in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV")
-      Printf.format(io, format2, string(name_i), type_i, len)
+    type_i = fieldtype(workspace, i)
+    field_i = getfield(solver, name_i)
+    size_i = ksizeof(field_i)
+    if (name_i::Symbol in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV")
+      (size_i ≠ 0) && Printf.format(io, format2, string(name_i), type_i, format_bytes(size_i))
     else
-      Printf.format(io, format, string(name_i), type_i, len)
+      (size_i ≠ 0) && Printf.format(io, format, string(name_i), type_i, format_bytes(size_i))
     end
   end
-  @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^18)
-  show_stats && show(io, solver.stats)
+  @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^l3)
+  if show_stats
+    @printf(io, "\n")
+    show(io, solver.stats)
+  end
   return nothing
 end
diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl
index a662fa0a0..ba217a597 100644
--- a/src/krylov_stats.jl
+++ b/src/krylov_stats.jl
@@ -1,3 +1,6 @@
+export KrylovStats, SimpleStats, LsmrStats, LanczosStats, LanczosShiftStats,
+SymmlqStats, AdjointStats, LNLQStats, LSLQStats
+
 "Abstract type for statistics returned by a solver"
 abstract type KrylovStats{T} end
 
@@ -9,6 +12,7 @@ Type for statistics returned by the majority of Krylov solvers, the attributes a
 - residuals
 - Aresiduals
 - Acond
+- timer
 - status
 """
 mutable struct SimpleStats{T} <: KrylovStats{T}
@@ -18,9 +22,16 @@ mutable struct SimpleStats{T} <: KrylovStats{T}
   residuals    :: Vector{T}
   Aresiduals   :: Vector{T}
   Acond        :: Vector{T}
+  timer        :: Float64
   status       :: String
 end
 
+function reset!(stats :: SimpleStats)
+  empty!(stats.residuals)
+  empty!(stats.Aresiduals)
+  empty!(stats.Acond)
+end
+
 """
 Type for statistics returned by LSMR. The attributes are:
 - niter
@@ -31,6 +42,7 @@ Type for statistics returned by LSMR. The attributes are:
 - Acond
 - Anorm
 - xNorm
+- timer
 - status
 """
 mutable struct LsmrStats{T} <: KrylovStats{T}
@@ -44,9 +56,15 @@ mutable struct LsmrStats{T} <: KrylovStats{T}
   Acond        :: T
   Anorm        :: T
   xNorm        :: T
+  timer        :: Float64
   status       :: String
 end
 
+function reset!(stats :: LsmrStats)
+  empty!(stats.residuals)
+  empty!(stats.Aresiduals)
+end
+
 """
 Type for statistics returned by CG-LANCZOS, the attributes are:
 - niter
@@ -55,6 +73,7 @@ Type for statistics returned by CG-LANCZOS, the attributes are:
 - indefinite
 - Anorm
 - Acond
+- timer
 - status
 """
 mutable struct LanczosStats{T} <: KrylovStats{T}
@@ -64,9 +83,14 @@ mutable struct LanczosStats{T} <: KrylovStats{T}
   indefinite :: Bool
   Anorm      :: T
   Acond      :: T
+  timer      :: Float64
   status     :: String
 end
 
+function reset!(stats :: LanczosStats)
+  empty!(stats.residuals)
+end
+
 """
 Type for statistics returned by CG-LANCZOS with shifts, the attributes are:
 - niter
@@ -75,6 +99,7 @@ Type for statistics returned by CG-LANCZOS with shifts, the attributes are:
 - indefinite
 - Anorm
 - Acond
+- timer
 - status
 """
 mutable struct LanczosShiftStats{T} <: KrylovStats{T}
@@ -84,6 +109,7 @@ mutable struct LanczosShiftStats{T} <: KrylovStats{T}
   indefinite :: BitVector
   Anorm      :: T
   Acond      :: T
+  timer      :: Float64
   status     :: String
 end
 
@@ -103,6 +129,7 @@ Type for statistics returned by SYMMLQ, the attributes are:
 - errorscg
 - Anorm
 - Acond
+- timer
 - status
 """
 mutable struct SymmlqStats{T} <: KrylovStats{T}
@@ -114,9 +141,17 @@ mutable struct SymmlqStats{T} <: KrylovStats{T}
   errorscg    :: Vector{Union{T, Missing}}
   Anorm       :: T
   Acond       :: T
+  timer       :: Float64
   status      :: String
 end
 
+function reset!(stats :: SymmlqStats)
+  empty!(stats.residuals)
+  empty!(stats.residualscg)
+  empty!(stats.errors)
+  empty!(stats.errorscg)
+end
+
 """
 Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the attributes are:
 - niter
@@ -124,6 +159,7 @@ Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the at
 - solved_dual
 - residuals_primal
 - residuals_dual
+- timer
 - status
 """
 mutable struct AdjointStats{T} <: KrylovStats{T}
@@ -132,9 +168,15 @@ mutable struct AdjointStats{T} <: KrylovStats{T}
   solved_dual      :: Bool
   residuals_primal :: Vector{T}
   residuals_dual   :: Vector{T}
+  timer            :: Float64
   status           :: String
 end
 
+function reset!(stats :: AdjointStats)
+  empty!(stats.residuals_primal)
+  empty!(stats.residuals_dual)
+end
+
 """
 Type for statistics returned by the LNLQ method, the attributes are:
 - niter
@@ -143,6 +185,7 @@ Type for statistics returned by the LNLQ method, the attributes are:
 - error_with_bnd
 - error_bnd_x
 - error_bnd_y
+- timer
 - status
 """
 mutable struct LNLQStats{T} <: KrylovStats{T}
@@ -152,9 +195,16 @@ mutable struct LNLQStats{T} <: KrylovStats{T}
   error_with_bnd :: Bool
   error_bnd_x    :: Vector{T}
   error_bnd_y    :: Vector{T}
+  timer          :: Float64
   status         :: String
 end
 
+function reset!(stats :: LNLQStats)
+  empty!(stats.residuals)
+  empty!(stats.error_bnd_x)
+  empty!(stats.error_bnd_y)
+end
+
 """
 Type for statistics returned by the LSLQ method, the attributes are:
 - niter
@@ -166,6 +216,7 @@ Type for statistics returned by the LSLQ method, the attributes are:
 - error_with_bnd
 - err_ubnds_lq
 - err_ubnds_cg
+- timer
 - status
 """
 mutable struct LSLQStats{T} <: KrylovStats{T}
@@ -178,9 +229,18 @@ mutable struct LSLQStats{T} <: KrylovStats{T}
   error_with_bnd :: Bool
   err_ubnds_lq   :: Vector{T}
   err_ubnds_cg   :: Vector{T}
+  timer          :: Float64
   status         :: String
 end
 
+function reset!(stats :: LSLQStats)
+  empty!(stats.residuals)
+  empty!(stats.Aresiduals)
+  empty!(stats.err_lbnds)
+  empty!(stats.err_ubnds_lq)
+  empty!(stats.err_ubnds_cg)
+end
+
 import Base.show
 
 special_fields = Dict(
@@ -192,45 +252,28 @@ special_fields = Dict(
   :err_ubnds_cg => "error bound CG",
 )
 
-for f in ["Simple", "Lsmr", "Adjoint", "LNLQ", "LSLQ", "Lanczos", "Symmlq"]
-  T = Meta.parse("Krylov." * f * "Stats{S}")
-
-  @eval function empty_field!(stats :: $T, i, ::Type{Vector{Si}}) where {S, Si}
-    statfield = getfield(stats, i)
-    empty!(statfield)
-  end
-  @eval empty_field!(stats :: $T, i, type) where S = stats
-
-  @eval function reset!(stats :: $T) where S
-    nfield = length($T.types)
-    for i = 1 : nfield
-      type  = fieldtype($T, i)
-      empty_field!(stats, i, type)
+function show(io :: IO, stats :: KrylovStats)
+  kst = typeof(stats)
+  s = string(kst.name.name) * "\n"
+  nfield = fieldcount(kst)
+  for i = 1 : nfield
+    field = fieldname(kst, i)
+    field_name = if field ∈ keys(special_fields)
+      special_fields[field]
+    else
+      replace(string(field), "_" => " ")
     end
-  end
-end
-
-for f in ["Simple", "Lsmr", "Lanczos", "LanczosShift", "Symmlq", "Adjoint", "LNLQ", "LSLQ"]
-  T = Meta.parse("Krylov." * f * "Stats{S}")
-
-  @eval function show(io :: IO, stats :: $T) where S
-    s  = $f * " stats\n"
-    nfield = length($T.types)
-    for i = 1 : nfield
-      field = fieldname($T, i)
-      field_name = if field ∈ keys(special_fields) 
-        special_fields[field]
-      else
-        replace(string(field), "_" => " ")
-      end
-      s *=  " " * field_name * ":"
-      statfield = getfield(stats, field)
-      if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat}
-        s *= @sprintf " %s\n" vec2str(statfield)
-      else
-        s *= @sprintf " %s\n" statfield
-      end
+    s *=  " " * field_name * ":"
+    statfield = getfield(stats, field)
+    if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat}
+      s *= @sprintf " %s\n" vec2str(statfield)
+    elseif field_name == "timer"
+      (statfield < 1e-3) && (s *= @sprintf " %.2fμs\n" 1e6*statfield)
+      (1e-3 ≤ statfield < 1.00) && (s *= @sprintf " %.2fms\n" 1e3*statfield)
+      (statfield ≥ 1.00) && (s *= @sprintf " %.2fs\n" statfield)
+    else
+      s *= @sprintf " %s\n" statfield
     end
-    print(io, s)
   end
+  print(io, s)
 end
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index 6f0c1c382..fb554395e 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -1,3 +1,8 @@
+export kstdout
+
+"Default I/O stream for all Krylov methods."
+const kstdout = Core.stdout
+
 """
     FloatOrComplex{T}
 Union type of `T` and `Complex{T}` where T is an `AbstractFloat`.
@@ -92,8 +97,8 @@ function sym_givens(a :: Complex{T}, b :: Complex{T}) where T <: AbstractFloat
   return (c, s, ρ)
 end
 
-@inline sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
-@inline sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
+sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
+sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
 
 """
     roots = roots_quadratic(q₂, q₁, q₀; nitref)
@@ -111,79 +116,97 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
   # Case where q(x) is linear.
   if q₂ == zero(T)
     if q₁ == zero(T)
-      root = [zero(T)]
-      q₀ == zero(T) || (root = T[])
+      q₀ == zero(T) || error("The quadratic `q` doesn't have real roots.")
+      root = zero(T)
     else
-      root = [-q₀ / q₁]
+      root = -q₀ / q₁
     end
-    return root
+    return (root, root)
   end
 
   # Case where q(x) is indeed quadratic.
   rhs = √eps(T) * q₁ * q₁
   if abs(q₀ * q₂) > rhs
     ρ = q₁ * q₁ - 4 * q₂ * q₀
-    ρ < 0 && return T[]
+    ρ < 0 && return error("The quadratic `q` doesn't have real roots.")
     d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2
-    roots = [d / q₂, q₀ / d]
+    root1 = d / q₂
+    root2 = q₀ / d
   else
     # Ill-conditioned quadratic.
-    roots = [-q₁ / q₂, zero(T)]
+    root1 = -q₁ / q₂
+    root2 = zero(T)
   end
 
   # Perform a few Newton iterations to improve accuracy.
-  for k = 1 : 2
-    root = roots[k]
-    for it = 1 : nitref
-      q = (q₂ * root + q₁) * root + q₀
-      dq = 2 * q₂ * root + q₁
-      dq == zero(T) && continue
-      root = root - q / dq
-    end
-    roots[k] = root
+  for it = 1 : nitref
+    q = (q₂ * root1 + q₁) * root1 + q₀
+    dq = 2 * q₂ * root1 + q₁
+    dq == zero(T) && continue
+    root1 = root1 - q / dq
   end
-  return roots
-end
 
+  for it = 1 : nitref
+    q = (q₂ * root2 + q₁) * root2 + q₀
+    dq = 2 * q₂ * root2 + q₁
+    dq == zero(T) && continue
+    root2 = root2 - q / dq
+  end
+  return (root1, root2)
+end
 
 """
-    roots = to_boundary(x, d, radius; flip, xNorm2, dNorm2)
-
-Given a trust-region radius `radius`, a vector `x` lying inside the
-trust-region and a direction `d`, return `σ1` and `σ2` such that
-
-    ‖x + σi d‖ = radius, i = 1, 2
+    s = vec2str(x; ndisp)
 
-in the Euclidean norm. If known, ‖x‖² may be supplied in `xNorm2`.
+Display an array in the form
 
-If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+    [ -3.0e-01 -5.1e-01  1.9e-01 ... -2.3e-01 -4.4e-01  2.4e-01 ]
 
-    ‖x - σi d‖ = radius, i = 1, 2.
+with (ndisp - 1)/2 elements on each side.
 """
-function to_boundary(x :: Vector{T}, d :: Vector{T},
-                     radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number
-  radius > 0 || error("radius must be positive")
-
-  # ‖d‖² σ² + 2 xᵀd σ + (‖x‖² - radius²).
-  xd = dot(x, d)
-  flip && (xd = -xd)
-  dNorm2 == zero(T) && (dNorm2 = dot(d, d))
-  dNorm2 == zero(T) && error("zero direction")
-  xNorm2 == zero(T) && (xNorm2 = dot(x, x))
-  (xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius))
-  roots = roots_quadratic(dNorm2, 2 * xd, xNorm2 - radius * radius)
-  return roots # `σ1` and `σ2`
+function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
+  n = length(x)
+  if n ≤ ndisp
+    ndisp = n
+    nside = n
+  else
+    nside = max(1, div(ndisp - 1, 2))
+  end
+  s = "["
+  i = 1
+  while i ≤ nside
+    if x[i] !== missing
+      s *= @sprintf("%8.1e ", x[i])
+    else
+      s *= " ✗✗✗✗ "
+    end
+      i += 1
+  end
+  if i ≤ div(n, 2)
+    s *= "... "
+  end
+  i = max(i, n - nside + 1)
+  while i ≤ n
+    if x[i] !== missing
+      s *= @sprintf("%8.1e ", x[i])
+    else
+      s *= " ✗✗✗✗ "
+    end
+    i += 1
+  end
+  s *= "]"
+  return s
 end
 
 """
     S = ktypeof(v)
 
-Return a dense storage type `S` based on the type of `v`.
+Return the most relevant storage type `S` based on the type of `v`.
 """
 function ktypeof end
 
-function ktypeof(v::S) where S <: DenseVector
-  return S
+function ktypeof(v::S) where S <: AbstractVector
+    return S
 end
 
 function ktypeof(v::S) where S <: SparseVector
@@ -195,90 +218,128 @@ function ktypeof(v::S) where S <: AbstractSparseVector
   return S.types[2]  # return `CuVector` for a `CuSparseVector`
 end
 
-function ktypeof(v::S) where S <: AbstractVector
-  T = eltype(S)
-  return Vector{T}  # BlockArrays, FillArrays, etc...
+function ktypeof(v::S) where S <: SubArray
+  vp = v.parent
+  if isa(vp, DenseMatrix)
+    M = typeof(vp)
+    return matrix_to_vector(M)  # view of a row or a column of a matrix
+  else
+    return ktypeof(vp)  # view of a vector
+  end
 end
 
-function ktypeof(v::S) where S <: SubArray
-  return ktypeof(v.parent)
+"""
+    M = vector_to_matrix(S)
+
+Return the dense matrix storage type `M` related to the dense vector storage type `S`.
+"""
+function vector_to_matrix(::Type{S}) where S <: DenseVector
+  T = hasproperty(S, :body) ? S.body : S
+  par = T.parameters
+  npar = length(par)
+  (2 ≤ npar ≤ 3) || error("Type $S is not supported.")
+  if npar == 2
+    M = T.name.wrapper{par[1], 2}
+  else
+    M = T.name.wrapper{par[1], 2, par[3]}
+  end
+  return M
+end
+
+"""
+    S = matrix_to_vector(M)
+
+Return the dense vector storage type `S` related to the dense matrix storage type `M`.
+"""
+function matrix_to_vector(::Type{M}) where M <: DenseMatrix
+  T = hasproperty(M, :body) ? M.body : M
+  par = T.parameters
+  npar = length(par)
+  (2 ≤ npar ≤ 3) || error("Type $M is not supported.")
+  if npar == 2
+    S = T.name.wrapper{par[1], 1}
+  else
+    S = T.name.wrapper{par[1], 1, par[3]}
+  end
+  return S
 end
 
 """
     v = kzeros(S, n)
 
-Create an AbstractVector of storage type `S` of length `n` only composed of zero.
+Create a vector of storage type `S` of length `n` only composed of zero.
 """
-@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
+kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
 
 """
     v = kones(S, n)
 
-Create an AbstractVector of storage type `S` of length `n` only composed of one.
+Create a vector of storage type `S` of length `n` only composed of one.
 """
-@inline kones(S, n) = fill!(S(undef, n), one(eltype(S)))
+kones(S, n) = fill!(S(undef, n), one(eltype(S)))
 
-@inline allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n))
+allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)::S) && (solver.:($v)::S = S(undef, n))
 
-@inline kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
+kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
 
-@inline mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
+ktimer(start_time::UInt64) = (time_ns() - start_time) / 1e9
 
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = dot(x, y)
+mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
 
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = krylov_dot(n, x, dx, y, dy)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(krylov_dot(n, x, dx, y, dy))
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
+kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y)
 
-@inline krylov_norm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
-@inline krylov_norm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: Number = norm(x)
+kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy)
+kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy))
 
-@inline krylov_scal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: Number = (x .*= s)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = krylov_scal!(n, Complex{T}(s), x, dx)
+knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
+knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x)
 
-@inline krylov_axpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpy!(s, x, y)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpy!(n, Complex{T}(s), x, dx, y, dy)
+kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx)
 
-@inline krylov_axpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpby!(s, x, t, y)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, s, x, dx, Complex{T}(t), y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
+kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy)
 
-@inline krylov_copy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
-@inline krylov_copy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = copyto!(y, x)
+kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
 
-# the macros are just for readability, so we don't have to write the increments (always equal to 1)
+kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
+kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x)
 
+# the macros are just for readability, so we don't have to write the increments (always equal to 1)
 macro kdot(n, x, y)
-  return esc(:(krylov_dot($n, $x, 1, $y, 1)))
+  return esc(:(Krylov.kdot($n, $x, 1, $y, 1)))
 end
 
 macro kdotr(n, x, y)
-  return esc(:(krylov_dotr($n, $x, 1, $y, 1)))
+  return esc(:(Krylov.kdotr($n, $x, 1, $y, 1)))
 end
 
 macro knrm2(n, x)
-  return esc(:(krylov_norm2($n, $x, 1)))
+  return esc(:(Krylov.knrm2($n, $x, 1)))
 end
 
 macro kscal!(n, s, x)
-  return esc(:(krylov_scal!($n, $s, $x, 1)))
+  return esc(:(Krylov.kscal!($n, $s, $x, 1)))
 end
 
 macro kaxpy!(n, s, x, y)
-  return esc(:(krylov_axpy!($n, $s, $x, 1, $y, 1)))
+  return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1)))
 end
 
 macro kaxpby!(n, s, x, t, y)
-  return esc(:(krylov_axpby!($n, $s, $x, 1, $t, $y, 1)))
+  return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1)))
 end
 
 macro kcopy!(n, x, y)
-  return esc(:(krylov_copy!($n, $x, 1, $y, 1)))
+  return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1)))
 end
 
 macro kswap(x, y)
@@ -294,44 +355,48 @@ macro kref!(n, x, y, c, s)
 end
 
 """
-    s = vec2str(x; ndisp)
+    roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2)
 
-Display an array in the form
+Given a trust-region radius `radius`, a vector `x` lying inside the
+trust-region and a direction `d`, return `σ1` and `σ2` such that
 
-    [ -3.0e-01 -5.1e-01  1.9e-01 ... -2.3e-01 -4.4e-01  2.4e-01 ]
+    ‖x + σi d‖ = radius, i = 1, 2
 
-with (ndisp - 1)/2 elements on each side.
+in the Euclidean norm.
+`n` is the length of vectors `x` and `d`.
+If known, ‖x‖² and ‖d‖² may be supplied with `xNorm2` and `dNorm2`.
+
+If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+
+    ‖x - σi d‖ = radius, i = 1, 2.
 """
-function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
-  n = length(x)
-  if n ≤ ndisp
-    ndisp = n
-    nside = n
-  else
-    nside = max(1, div(ndisp - 1, 2))
-  end
-  s = "["
-  i = 1
-  while i ≤ nside
-    if x[i] !== missing
-      s *= @sprintf("%8.1e ", x[i])
-    else
-      s *= " ✗✗✗✗ "
-    end
-      i += 1
-  end
-  if i ≤ div(n, 2)
-    s *= "... "
-  end
-  i = max(i, n - nside + 1)
-  while i ≤ n
-    if x[i] !== missing
-      s *= @sprintf("%8.1e ", x[i])
-    else
-      s *= " ✗✗✗✗ "
-    end
-    i += 1
-  end
-  s *= "]"
-  return s
+function to_boundary(n :: Int, x :: AbstractVector{FC}, d :: AbstractVector{FC}, radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+  radius > 0 || error("radius must be positive")
+
+  # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²).
+  rxd = @kdotr(n, x, d)
+  flip && (rxd = -rxd)
+  dNorm2 == zero(T) && (dNorm2 = @kdotr(n, d, d))
+  dNorm2 == zero(T) && error("zero direction")
+  xNorm2 == zero(T) && (xNorm2 = @kdotr(n, x, x))
+  radius2 = radius * radius
+  (xNorm2 ≤ radius2) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius2))
+
+  # q₂ = ‖d‖², q₁ = xᴴd + dᴴx, q₀ = ‖x‖² - Δ²
+  # ‖x‖² ≤ Δ² ⟹ (q₁)² - 4 * q₂ * q₀ ≥ 0
+  roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius2)
+  return roots  # `σ1` and `σ2`
+end
+
+"""
+    arguments = extract_parameters(ex::Expr)
+
+Extract the arguments of an expression that is keyword parameter tuple.
+Implementation suggested by Mitchell J. O'Sullivan (@mosullivan93).
+"""
+function extract_parameters(ex::Expr)
+  Meta.isexpr(ex, :tuple, 1) &&
+  Meta.isexpr((@inbounds p = ex.args[1]), :parameters) &&
+  all(Base.Docs.validcall, p.args) || throw(ArgumentError("Given expression is not a kw parameter tuple [e.g. :(; x)]: $ex"))
+  return p.args
 end
diff --git a/src/lnlq.jl b/src/lnlq.jl
index a1f890de2..f59f5daf4 100644
--- a/src/lnlq.jl
+++ b/src/lnlq.jl
@@ -9,9 +9,9 @@
 # and is equivalent to applying the SYMMLQ method
 # to the linear system
 #
-#  AAᵀy = b with x = Aᵀy and can be reformulated as
+#  AAᴴy = b with x = Aᴴy and can be reformulated as
 #
-#  [ -I  Aᵀ ][ x ] = [ 0 ]
+#  [ -I  Aᴴ ][ x ] = [ 0 ]
 #  [  A     ][ y ]   [ b ].
 #
 # This method is based on the Golub-Kahan bidiagonalization process and is described in
@@ -26,10 +26,14 @@ export lnlq, lnlq!
 
 """
     (x, y, stats) = lnlq(A, b::AbstractVector{FC};
-                         M=I, N=I, sqd::Bool=false, λ::T=zero(T), σ::T=zero(T),
-                         atol::T=√eps(T), rtol::T=√eps(T), etolx::T=√eps(T), etoly::T=√eps(T), itmax::Int=0,
-                         transfer_to_craig::Bool=true, verbose::Int=0, history::Bool=false,
-                         ldiv::Bool=false, callback=solver->false)
+                         M=I, N=I, ldiv::Bool=false,
+                         transfer_to_craig::Bool=true,
+                         sqd::Bool=false, λ::T=zero(T),
+                         σ::T=zero(T), utolx::T=√eps(T),
+                         utoly::T=√eps(T), atol::T=√eps(T),
+                         rtol::T=√eps(T), itmax::Int=0,
+                         timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                         callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -38,17 +42,17 @@ Find the least-norm solution of the consistent linear system
 
     Ax + λ²y = b
 
-using the LNLQ method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LNLQ method, where λ ≥ 0 is a regularization parameter.
 
 For a system in the form Ax = b, LNLQ method is equivalent to applying
-SYMMLQ to AAᵀy = b and recovering x = Aᵀy but is more stable.
+SYMMLQ to AAᴴy = b and recovering x = Aᴴy but is more stable.
 Note that y are the Lagrange multipliers of the least-norm problem
 
     minimize ‖x‖  s.t.  Ax = b.
 
 If `λ > 0`, LNLQ solves the symmetric and quasi-definite system
 
-    [ -F    Aᵀ ] [ x ]   [ 0 ]
+    [ -F    Aᴴ ] [ x ]   [ 0 ]
     [  A  λ²E  ] [ y ] = [ b ],
 
 where E and F are symmetric and positive definite.
@@ -59,12 +63,12 @@ The system above represents the optimality conditions of
 
     min ‖x‖²_F + λ²‖y‖²_E  s.t.  Ax + λ²Ey = b.
 
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
 
 If `λ = 0`, LNLQ solves the symmetric and indefinite system
 
-    [ -F   Aᵀ ] [ x ]   [ 0 ]
+    [ -F   Aᴴ ] [ x ]   [ 0 ]
     [  A   0  ] [ y ] = [ b ].
 
 The system above represents the optimality conditions of
@@ -75,12 +79,40 @@ In this case, `M` can still be specified and indicates the weighted norm in whic
 
 In this implementation, both the x and y-parts of the solution are returned.
 
-`etolx` and `etoly` are tolerances on the upper bound of the distance to the solution ‖x-xₛ‖ and ‖y-yₛ‖, respectively.
+`utolx` and `utoly` are tolerances on the upper bound of the distance to the solution ‖x-x*‖ and ‖y-y*‖, respectively.
 The bound is valid if λ>0 or σ>0 where σ should be strictly smaller than the smallest positive singular value.
 For instance σ:=(1-1e-7)σₘᵢₙ .
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `transfer_to_craig`: transfer from the LNLQ point to the CRAIG point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`;
+* `utolx`: tolerance on the upper bound on the distance to the solution `‖x-x*‖`;
+* `utoly`: tolerance on the upper bound on the distance to the solution `‖y-y*‖`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`LNLQStats`](@ref) structure.
 
 #### Reference
 
@@ -88,12 +120,6 @@ and `false` otherwise.
 """
 function lnlq end
 
-function lnlq(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = LnlqSolver(A, b)
-  lnlq!(solver, A, b; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = lnlq!(solver::LnlqSolver, A, b; kwargs...)
 
@@ -103,389 +129,432 @@ See [`LnlqSolver`](@ref) for more details about the `solver`.
 """
 function lnlq! end
 
-function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), σ :: T=zero(T),
-               atol :: T=√eps(T), rtol :: T=√eps(T), etolx :: T=√eps(T), etoly :: T=√eps(T), itmax :: Int=0,
-               transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("LNLQ: system of %d equations in %d variables\n", m, n)
-
-  # Check sqd and λ parameters
-  sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
-  sqd && (λ = one(T))
-
-  # Tests M = Iₘ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :u, S, m)
-  allocate_if(!NisI, solver, :v, S, n)
-  allocate_if(λ > 0, solver, :q, S, n)
-  x, Nv, Aᵀu, y, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w̄
-  Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats
-  rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y
-  reset!(stats)
-  u = MisI ? Mu : solver.u
-  v = NisI ? Nv : solver.v
-
-  # Set up parameter σₑₛₜ for the error estimate on x and y
-  σₑₛₜ = √(σ^2 + λ^2)
-  complex_error_bnd = false
-
-  # Initial solutions (x₀, y₀) and residual norm ‖r₀‖.
-  x .= zero(FC)
-  y .= zero(FC)
-
-  bNorm = @knrm2(m, b)
-  if bNorm == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.error_with_bnd = false
-    history && push!(rNorms, bNorm)
-    stats.status = "x = 0 is a zero-residual solution"
-    return solver
+def_args_lnlq = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_lnlq = (:(; M = I                         ),
+                   :(; N = I                         ),
+                   :(; ldiv::Bool = false            ),
+                   :(; transfer_to_craig::Bool = true),
+                   :(; sqd::Bool = false             ),
+                   :(; λ::T = zero(T)                ),
+                   :(; σ::T = zero(T)                ),
+                   :(; utolx::T = √eps(T)            ),
+                   :(; utoly::T = √eps(T)            ),
+                   :(; atol::T = √eps(T)             ),
+                   :(; rtol::T = √eps(T)             ),
+                   :(; itmax::Int = 0                ),
+                   :(; timemax::Float64 = Inf        ),
+                   :(; verbose::Int = 0              ),
+                   :(; history::Bool = false         ),
+                   :(; callback = solver -> false    ),
+                   :(; iostream::IO = kstdout        ))
+
+def_kwargs_lnlq = mapreduce(extract_parameters, vcat, def_kwargs_lnlq)
+
+args_lnlq = (:A, :b)
+kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function lnlq($(def_args_lnlq...); $(def_kwargs_lnlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = LnlqSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    lnlq!(solver, $(args_lnlq...); $(kwargs_lnlq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  history && push!(rNorms, bNorm)
-  ε = atol + rtol * bNorm
-
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, bNorm)
-
-  # Update iteration index
-  iter = iter + 1
-
-  # Initialize generalized Golub-Kahan bidiagonalization.
-  # β₁Mu₁ = b.
-  Mu .= b
-  MisI || mulorldiv!(u, M, Mu, ldiv)  # u₁ = M⁻¹ * Mu₁
-  βₖ = sqrt(@kdotr(m, u, Mu))         # β₁ = ‖u₁‖_M
-  if βₖ ≠ 0
-    @kscal!(m, one(FC) / βₖ, u)
-    MisI || @kscal!(m, one(FC) / βₖ, Mu)
-  end
+  function lnlq!(solver :: LnlqSolver{T,FC,S}, $(def_args_lnlq...); $(def_kwargs_lnlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "LNLQ: system of %d equations in %d variables\n", m, n)
+
+    # Check sqd and λ parameters
+    sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+    sqd && (λ = one(T))
+
+    # Tests M = Iₘ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :u, S, m)
+    allocate_if(!NisI, solver, :v, S, n)
+    allocate_if(λ > 0, solver, :q, S, n)
+    x, Nv, Aᴴu, y, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w̄
+    Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats
+    rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y
+    reset!(stats)
+    u = MisI ? Mu : solver.u
+    v = NisI ? Nv : solver.v
+
+    # Set up parameter σₑₛₜ for the error estimate on x and y
+    σₑₛₜ = √(σ^2 + λ^2)
+    complex_error_bnd = false
+
+    # Initial solutions (x₀, y₀) and residual norm ‖r₀‖.
+    x .= zero(FC)
+    y .= zero(FC)
+
+    bNorm = @knrm2(m, b)
+    if bNorm == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.error_with_bnd = false
+      history && push!(rNorms, bNorm)
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      return solver
+    end
 
-  # α₁Nv₁ = Aᵀu₁.
-  mul!(Aᵀu, Aᵀ, u)
-  Nv .= Aᵀu
-  NisI || mulorldiv!(v, N, Nv, ldiv)  # v₁ = N⁻¹ * Nv₁
-  αₖ = sqrt(@kdotr(n, v, Nv))         # α₁ = ‖v₁‖_N
-  if αₖ ≠ 0
-    @kscal!(n, one(FC) / αₖ, v)
-    NisI || @kscal!(n, one(FC) / αₖ, Nv)
-  end
+    history && push!(rNorms, bNorm)
+    ε = atol + rtol * bNorm
 
-  w̄ .= u           # Direction w̄₁
-  cₖ = zero(T)     # Givens cosines used for the LQ factorization of (Lₖ)ᵀ
-  sₖ = zero(FC)    # Givens sines used for the LQ factorization of (Lₖ)ᵀ
-  ζₖ₋₁ = zero(FC)  # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
-  ηₖ = zero(FC)    # Coefficient of M̅ₖ
-
-  # Variable used for the regularization.
-  λₖ  = λ             # λ₁ = λ
-  cpₖ = spₖ = one(T)  # Givens sines and cosines used to zero out λₖ
-  cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁
-  λ > 0 && (q .= v)   # Additional vector needed to update x, by definition q₀ = 0
-
-  # Initialize the regularization.
-  if λ > 0
-    #        k    2k      k   2k           k      2k
-    # k   [  αₖ   λₖ ] [ cpₖ  spₖ ] = [  αhatₖ    0   ]
-    # k+1 [ βₖ₊₁  0  ] [ spₖ -cpₖ ]   [ βhatₖ₊₁  θₖ₊₁ ]
-    (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ)
-
-    # q̄₁ = sp₁ * v₁
-    @kscal!(n, spₖ, q)
-  else
-    αhatₖ = αₖ
-  end
+    iter = 0
+    itmax == 0 && (itmax = m + n)
 
-  # Begin the LQ factorization of (Lₖ)ᵀ = M̅ₖQₖ.
-  # [ α₁ β₂ 0  •  •  •  0 ]   [ ϵ₁  0   •   •   •   •   0   ]
-  # [ 0  α₂ •  •        • ]   [ η₂  ϵ₂  •               •   ]
-  # [ •  •  •  •  •     • ]   [ 0   •   •   •           •   ]
-  # [ •     •  •  •  •  • ] = [ •   •   •   •   •       •   ] Qₖ
-  # [ •        •  •  •  0 ]   [ •       •   •   •   •   •   ]
-  # [ •           •  •  βₖ]   [ •           •   •   •   0   ]
-  # [ 0  •  •  •  •  0  αₖ]   [ 0   •   •   •   0   ηₖ ϵbarₖ]
-
-  ϵbarₖ = αhatₖ  # ϵbar₁ = αhat₁
-
-  # Hₖ = Bₖ(Lₖ)ᵀ = [   Lₖ(Lₖ)ᵀ   ] ⟹ (Hₖ₋₁)ᵀ = [Lₖ₋₁Mₖ₋₁  0] Qₖ
-  #                [ αₖβₖ₊₁(eₖ)ᵀ ]
-  #
-  # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ
-  # tₖ = (τ₁, •••, τₖ)
-  # z̅ₖ = (zₖ₋₁, ζbarₖ) = (ζ₁, •••, ζₖ₋₁, ζbarₖ)
-
-  τₖ    = βₖ / αhatₖ  # τ₁ = β₁ / αhat₁
-  ζbarₖ = τₖ / ϵbarₖ  # ζbar₁ = τ₁ / ϵbar₁
-
-  # Stopping criterion.
-  solved_lq = solved_cg = false
-  tired = false
-  status = "unknown"
-  user_requested_exit = false
-
-  if σₑₛₜ > 0
-    τtildeₖ = βₖ / σₑₛₜ
-    ζtildeₖ = τtildeₖ / σₑₛₜ
-    err_x = τtildeₖ
-    err_y = ζtildeₖ
-
-    solved_lq = err_x ≤ etolx || err_y ≤ etoly
-    history && push!(xNorms, err_x)
-    history && push!(yNorms, err_y)
-
-    ρbar = -σₑₛₜ
-    csig = -one(T)
-  end
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, bNorm, ktimer(start_time))
 
-  while !(solved_lq || solved_cg || tired || user_requested_exit)
+    # Update iteration index
+    iter = iter + 1
 
-    # Update of (xᵃᵘˣ)ₖ = Vₖtₖ
-    if λ > 0
-      # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
-      @kaxpy!(n, τₖ * cpₖ, v, x)
-      if iter ≥ 2
-        @kaxpy!(n, τₖ * spₖ, q, x)
-        # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
-        @kaxpby!(n, spₖ, v, -cpₖ, q)
-      end
-    else
-      # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
-      @kaxpy!(n, τₖ, v, x)
+    # Initialize generalized Golub-Kahan bidiagonalization.
+    # β₁Mu₁ = b.
+    Mu .= b
+    MisI || mulorldiv!(u, M, Mu, ldiv)  # u₁ = M⁻¹ * Mu₁
+    βₖ = sqrt(@kdotr(m, u, Mu))         # β₁ = ‖u₁‖_M
+    if βₖ ≠ 0
+      @kscal!(m, one(FC) / βₖ, u)
+      MisI || @kscal!(m, one(FC) / βₖ, Mu)
     end
 
-    # Continue the generalized Golub-Kahan bidiagonalization.
-    # AVₖ    = MUₖ₊₁Bₖ
-    # AᵀUₖ₊₁ = NVₖ(Bₖ)ᵀ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᵀ = NVₖ₊₁(Lₖ₊₁)ᵀ
-    #
-    #      [ α₁ 0  •  •  •  •  0 ]
-    #      [ β₂ α₂ •           • ]
-    #      [ 0  •  •  •        • ]
-    # Lₖ = [ •  •  •  •  •     • ]
-    #      [ •     •  •  •  •  • ]
-    #      [ •        •  •  •  0 ]
-    #      [ 0  •  •  •  0  βₖ αₖ]
-    #
-    # Bₖ = [    Lₖ     ]
-    #      [ βₖ₊₁(eₖ)ᵀ ]
-
-    # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
-    mul!(Av, A, v)
-    @kaxpby!(m, one(FC), Av, -αₖ, Mu)
-    MisI || mulorldiv!(u, M, Mu, ldiv)  # uₖ₊₁ = M⁻¹ * Muₖ₊₁
-    βₖ₊₁ = sqrt(@kdotr(m, u, Mu))       # βₖ₊₁ = ‖uₖ₊₁‖_M
-    if βₖ₊₁ ≠ 0
-      @kscal!(m, one(FC) / βₖ₊₁, u)
-      MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu)
+    # α₁Nv₁ = Aᴴu₁.
+    mul!(Aᴴu, Aᴴ, u)
+    Nv .= Aᴴu
+    NisI || mulorldiv!(v, N, Nv, ldiv)  # v₁ = N⁻¹ * Nv₁
+    αₖ = sqrt(@kdotr(n, v, Nv))         # α₁ = ‖v₁‖_N
+    if αₖ ≠ 0
+      @kscal!(n, one(FC) / αₖ, v)
+      NisI || @kscal!(n, one(FC) / αₖ, Nv)
     end
 
-    # αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
-    mul!(Aᵀu, Aᵀ, u)
-    @kaxpby!(n, one(FC), Aᵀu, -βₖ₊₁, Nv)
-    NisI || mulorldiv!(v, N, Nv, ldiv)  # vₖ₊₁ = N⁻¹ * Nvₖ₊₁
-    αₖ₊₁ = sqrt(@kdotr(n, v, Nv))       # αₖ₊₁ = ‖vₖ₊₁‖_N
-    if αₖ₊₁ ≠ 0
-      @kscal!(n, one(FC) / αₖ₊₁, v)
-      NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv)
-    end
+    w̄ .= u           # Direction w̄₁
+    cₖ = zero(T)     # Givens cosines used for the LQ factorization of (Lₖ)ᴴ
+    sₖ = zero(FC)    # Givens sines used for the LQ factorization of (Lₖ)ᴴ
+    ζₖ₋₁ = zero(FC)  # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
+    ηₖ = zero(FC)    # Coefficient of M̅ₖ
+
+    # Variable used for the regularization.
+    λₖ  = λ             # λ₁ = λ
+    cpₖ = spₖ = one(T)  # Givens sines and cosines used to zero out λₖ
+    cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁
+    λ > 0 && (q .= v)   # Additional vector needed to update x, by definition q₀ = 0
 
-    # Continue the regularization.
+    # Initialize the regularization.
     if λ > 0
       #        k    2k      k   2k           k      2k
       # k   [  αₖ   λₖ ] [ cpₖ  spₖ ] = [  αhatₖ    0   ]
       # k+1 [ βₖ₊₁  0  ] [ spₖ -cpₖ ]   [ βhatₖ₊₁  θₖ₊₁ ]
-      βhatₖ₊₁ = cpₖ * βₖ₊₁
-      θₖ₊₁    = spₖ * βₖ₊₁
+      (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ)
 
-      #       2k  2k+1     2k  2k+1       2k  2k+1
-      # k   [  0    0 ] [ -cdₖ  sdₖ ] = [ 0    0  ]
-      # k+1 [ θₖ₊₁  λ ] [  sdₖ  cdₖ ]   [ 0  λₖ₊₁ ]
-      (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁)
-
-      # qₖ ← sdₖ * q̄ₖ
-      @kscal!(n, sdₖ, q)
-
-      #       k+1   2k+1      k+1    2k+1        k+1     2k+1
-      # k+1 [ αₖ₊₁  λₖ₊₁ ] [ cpₖ₊₁  spₖ₊₁ ] = [ αhatₖ₊₁   0   ]
-      # k+2 [ βₖ₊₂   0   ] [ spₖ₊₁ -cpₖ₊₁ ]   [  γₖ₊₂    θₖ₊₂ ]
-      (cpₖ₊₁, spₖ₊₁, αhatₖ₊₁) = sym_givens(αₖ₊₁, λₖ₊₁)
+      # q̄₁ = sp₁ * v₁
+      @kscal!(n, spₖ, q)
     else
-      βhatₖ₊₁ = βₖ₊₁
-      αhatₖ₊₁ = αₖ₊₁
+      αhatₖ = αₖ
     end
 
-    if σₑₛₜ > 0 && !complex_error_bnd
-      μbar = -csig * αhatₖ
-      ρ = √(ρbar^2 + αhatₖ^2)
-      csig = ρbar / ρ
-      ssig = αhatₖ / ρ
-      ρbar = ssig * μbar + csig * σₑₛₜ
-      μbar = -csig * βhatₖ₊₁
-      θ = βhatₖ₊₁ * csig / ρbar
-      ωdisc = σₑₛₜ^2 - σₑₛₜ * βhatₖ₊₁ * θ
-      if ωdisc < 0
-        complex_error_bnd = true
-      else
-        ω = √ωdisc
-        τtildeₖ = - τₖ * βhatₖ₊₁ / ω
-      end
+    # Begin the LQ factorization of (Lₖ)ᴴ = M̅ₖQₖ.
+    # [ α₁ β₂ 0  •  •  •  0 ]   [ ϵ₁  0   •   •   •   •   0   ]
+    # [ 0  α₂ •  •        • ]   [ η₂  ϵ₂  •               •   ]
+    # [ •  •  •  •  •     • ]   [ 0   •   •   •           •   ]
+    # [ •     •  •  •  •  • ] = [ •   •   •   •   •       •   ] Qₖ
+    # [ •        •  •  •  0 ]   [ •       •   •   •   •   •   ]
+    # [ •           •  •  βₖ]   [ •           •   •   •   0   ]
+    # [ 0  •  •  •  •  0  αₖ]   [ 0   •   •   •   0   ηₖ ϵbarₖ]
 
-      ρ = √(ρbar^2 + βhatₖ₊₁^2)
-      csig = ρbar / ρ
-      ssig = βhatₖ₊₁ / ρ
-      ρbar = ssig * μbar + csig * σₑₛₜ
-    end
+    ϵbarₖ = αhatₖ  # ϵbar₁ = αhat₁
 
-    # Continue the LQ factorization of (Lₖ₊₁)ᵀ.
-    # [ηₖ ϵbarₖ βₖ₊₁] [1     0     0 ] = [ηₖ  ϵₖ     0    ]
-    # [0    0   αₖ₊₁] [0   cₖ₊₁  sₖ₊₁]   [0  ηₖ₊₁  ϵbarₖ₊₁]
-    #                 [0   sₖ₊₁ -cₖ₊₁]
+    # Hₖ = Bₖ(Lₖ)ᴴ = [   Lₖ(Lₖ)ᴴ   ] ⟹ (Hₖ₋₁)ᴴ = [Lₖ₋₁Mₖ₋₁  0] Qₖ
+    #                [ αₖβₖ₊₁(eₖ)ᵀ ]
+    #
+    # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ
+    # tₖ = (τ₁, •••, τₖ)
+    # z̅ₖ = (zₖ₋₁, ζbarₖ) = (ζ₁, •••, ζₖ₋₁, ζbarₖ)
 
-    (cₖ₊₁, sₖ₊₁, ϵₖ) = sym_givens(ϵbarₖ, βhatₖ₊₁)
-    ηₖ₊₁    =   αhatₖ₊₁ * sₖ₊₁
-    ϵbarₖ₊₁ = - αhatₖ₊₁ * cₖ₊₁
+    τₖ    = βₖ / αhatₖ  # τ₁ = β₁ / αhat₁
+    ζbarₖ = τₖ / ϵbarₖ  # ζbar₁ = τ₁ / ϵbar₁
 
-    # Update solutions of Lₖ₊₁tₖ₊₁ = β₁e₁ and M̅ₖ₊₁z̅ₖ₊₁ = tₖ₊₁.
-    τₖ₊₁    = - βhatₖ₊₁ * τₖ / αhatₖ₊₁
-    ζₖ      = cₖ₊₁ * ζbarₖ
-    ζbarₖ₊₁ = (τₖ₊₁ - ηₖ₊₁ * ζₖ) / ϵbarₖ₊₁
+    # Stopping criterion.
+    solved_lq = solved_cg = false
+    tired = false
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
 
-    # Relations for the directions wₖ and w̄ₖ₊₁
-    # [w̄ₖ uₖ₊₁] [cₖ₊₁  sₖ₊₁] = [wₖ w̄ₖ₊₁] → wₖ   = cₖ₊₁ * w̄ₖ + sₖ₊₁ * uₖ₊₁
-    #           [sₖ₊₁ -cₖ₊₁]             → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁
+    if σₑₛₜ > 0
+      τtildeₖ = βₖ / σₑₛₜ
+      ζtildeₖ = τtildeₖ / σₑₛₜ
+      err_x = τtildeₖ
+      err_y = ζtildeₖ
 
-    # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ
-    @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y)
-    @kaxpy!(m, ζₖ * sₖ₊₁, u, y)
+      solved_lq = err_x ≤ utolx || err_y ≤ utoly
+      history && push!(xNorms, err_x)
+      history && push!(yNorms, err_y)
 
-    # Compute w̄ₖ₊₁
-    @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄)
+      ρbar = -σₑₛₜ
+      csig = -one(T)
+    end
 
-    if σₑₛₜ > 0 && !complex_error_bnd
-      if transfer_to_craig
-        disc_x = τtildeₖ^2 - τₖ₊₁^2
-        disc_x < 0 ? complex_error_bnd = true : err_x = √disc_x
+    while !(solved_lq || solved_cg || tired || user_requested_exit || overtimed)
+
+      # Update of (xᵃᵘˣ)ₖ = Vₖtₖ
+      if λ > 0
+        # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
+        @kaxpy!(n, τₖ * cpₖ, v, x)
+        if iter ≥ 2
+          @kaxpy!(n, τₖ * spₖ, q, x)
+          # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁
+          @kaxpby!(n, spₖ, v, -cpₖ, q)
+        end
       else
-        disc_xL = τtildeₖ^2 - τₖ₊₁^2 + (τₖ₊₁ - ηₖ₊₁ * ζₖ)^2
-        disc_xL < 0 ? complex_error_bnd = true : err_x = √disc_xL
+        # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
+        @kaxpy!(n, τₖ, v, x)
       end
-      ηtildeₖ = ω * sₖ₊₁
-      ϵtildeₖ = -ω * cₖ₊₁
-      ζtildeₖ = (τtildeₖ - ηtildeₖ * ζₖ) / ϵtildeₖ
-      
-      if transfer_to_craig
-        disc_y = ζtildeₖ^2 - ζbarₖ₊₁^2
-        disc_y < 0 ? complex_error_bnd = true : err_y = √disc_y
-      else
-        err_y = abs(ζtildeₖ)
+
+      # Continue the generalized Golub-Kahan bidiagonalization.
+      # AVₖ    = MUₖ₊₁Bₖ
+      # AᴴUₖ₊₁ = NVₖ(Bₖ)ᴴ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᴴ = NVₖ₊₁(Lₖ₊₁)ᴴ
+      #
+      #      [ α₁ 0  •  •  •  •  0 ]
+      #      [ β₂ α₂ •           • ]
+      #      [ 0  •  •  •        • ]
+      # Lₖ = [ •  •  •  •  •     • ]
+      #      [ •     •  •  •  •  • ]
+      #      [ •        •  •  •  0 ]
+      #      [ 0  •  •  •  0  βₖ αₖ]
+      #
+      # Bₖ = [    Lₖ     ]
+      #      [ βₖ₊₁(eₖ)ᵀ ]
+
+      # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+      mul!(Av, A, v)
+      @kaxpby!(m, one(FC), Av, -αₖ, Mu)
+      MisI || mulorldiv!(u, M, Mu, ldiv)  # uₖ₊₁ = M⁻¹ * Muₖ₊₁
+      βₖ₊₁ = sqrt(@kdotr(m, u, Mu))       # βₖ₊₁ = ‖uₖ₊₁‖_M
+      if βₖ₊₁ ≠ 0
+        @kscal!(m, one(FC) / βₖ₊₁, u)
+        MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu)
       end
 
-      history && push!(xNorms, err_x)
-      history && push!(yNorms, err_y)
-    end
+      # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+      mul!(Aᴴu, Aᴴ, u)
+      @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv)
+      NisI || mulorldiv!(v, N, Nv, ldiv)  # vₖ₊₁ = N⁻¹ * Nvₖ₊₁
+      αₖ₊₁ = sqrt(@kdotr(n, v, Nv))       # αₖ₊₁ = ‖vₖ₊₁‖_N
+      if αₖ₊₁ ≠ 0
+        @kscal!(n, one(FC) / αₖ₊₁, v)
+        NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv)
+      end
 
-    # Compute residual norm ‖(rᴸ)ₖ‖ = |αₖ| * √(|ϵbarₖζbarₖ|² + |βₖ₊₁sₖζₖ₋₁|²)
-    if iter == 1
-      rNorm_lq = bNorm
-    else
-      rNorm_lq = abs(αhatₖ) * √(abs2(ϵbarₖ * ζbarₖ) + abs2(βhatₖ₊₁ * sₖ * ζₖ₋₁))
-    end
-    history && push!(rNorms, rNorm_lq)
+      # Continue the regularization.
+      if λ > 0
+        #        k    2k      k   2k           k      2k
+        # k   [  αₖ   λₖ ] [ cpₖ  spₖ ] = [  αhatₖ    0   ]
+        # k+1 [ βₖ₊₁  0  ] [ spₖ -cpₖ ]   [ βhatₖ₊₁  θₖ₊₁ ]
+        βhatₖ₊₁ = cpₖ * βₖ₊₁
+        θₖ₊₁    = spₖ * βₖ₊₁
+
+        #       2k  2k+1     2k  2k+1       2k  2k+1
+        # k   [  0    0 ] [ -cdₖ  sdₖ ] = [ 0    0  ]
+        # k+1 [ θₖ₊₁  λ ] [  sdₖ  cdₖ ]   [ 0  λₖ₊₁ ]
+        (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁)
+
+        # qₖ ← sdₖ * q̄ₖ
+        @kscal!(n, sdₖ, q)
+
+        #       k+1   2k+1      k+1    2k+1        k+1     2k+1
+        # k+1 [ αₖ₊₁  λₖ₊₁ ] [ cpₖ₊₁  spₖ₊₁ ] = [ αhatₖ₊₁   0   ]
+        # k+2 [ βₖ₊₂   0   ] [ spₖ₊₁ -cpₖ₊₁ ]   [  γₖ₊₂    θₖ₊₂ ]
+        (cpₖ₊₁, spₖ₊₁, αhatₖ₊₁) = sym_givens(αₖ₊₁, λₖ₊₁)
+      else
+        βhatₖ₊₁ = βₖ₊₁
+        αhatₖ₊₁ = αₖ₊₁
+      end
 
-    # Compute residual norm ‖(rᶜ)ₖ‖ = |βₖ₊₁ * τₖ|
-    if transfer_to_craig
-      rNorm_cg = abs(βhatₖ₊₁ * τₖ)
-    end
+      if σₑₛₜ > 0 && !complex_error_bnd
+        μbar = -csig * αhatₖ
+        ρ = √(ρbar^2 + αhatₖ^2)
+        csig = ρbar / ρ
+        ssig = αhatₖ / ρ
+        ρbar = ssig * μbar + csig * σₑₛₜ
+        μbar = -csig * βhatₖ₊₁
+        θ = βhatₖ₊₁ * csig / ρbar
+        ωdisc = σₑₛₜ^2 - σₑₛₜ * βhatₖ₊₁ * θ
+        if ωdisc < 0
+          complex_error_bnd = true
+        else
+          ω = √ωdisc
+          τtildeₖ = - τₖ * βhatₖ₊₁ / ω
+        end
+
+        ρ = √(ρbar^2 + βhatₖ₊₁^2)
+        csig = ρbar / ρ
+        ssig = βhatₖ₊₁ / ρ
+        ρbar = ssig * μbar + csig * σₑₛₜ
+      end
 
-    # Update sₖ, cₖ, αₖ, βₖ, ηₖ, ϵbarₖ, τₖ, ζₖ₋₁ and ζbarₖ.
-    cₖ    = cₖ₊₁
-    sₖ    = sₖ₊₁
-    αₖ    = αₖ₊₁
-    αhatₖ = αhatₖ₊₁
-    βₖ    = βₖ₊₁
-    ηₖ    = ηₖ₊₁
-    ϵbarₖ = ϵbarₖ₊₁
-    τₖ    = τₖ₊₁
-    ζₖ₋₁  = ζₖ
-    ζbarₖ = ζbarₖ₊₁
-
-    # Update regularization variables.
-    if λ > 0
-      cpₖ = cpₖ₊₁
-      spₖ = spₖ₊₁
-    end
+      # Continue the LQ factorization of (Lₖ₊₁)ᴴ.
+      # [ηₖ ϵbarₖ βₖ₊₁] [1     0     0 ] = [ηₖ  ϵₖ     0    ]
+      # [0    0   αₖ₊₁] [0   cₖ₊₁  sₖ₊₁]   [0  ηₖ₊₁  ϵbarₖ₊₁]
+      #                 [0   sₖ₊₁ -cₖ₊₁]
+
+      (cₖ₊₁, sₖ₊₁, ϵₖ) = sym_givens(ϵbarₖ, βhatₖ₊₁)
+      ηₖ₊₁    =   αhatₖ₊₁ * sₖ₊₁
+      ϵbarₖ₊₁ = - αhatₖ₊₁ * cₖ₊₁
+
+      # Update solutions of Lₖ₊₁tₖ₊₁ = β₁e₁ and M̅ₖ₊₁z̅ₖ₊₁ = tₖ₊₁.
+      τₖ₊₁    = - βhatₖ₊₁ * τₖ / αhatₖ₊₁
+      ζₖ      = cₖ₊₁ * ζbarₖ
+      ζbarₖ₊₁ = (τₖ₊₁ - ηₖ₊₁ * ζₖ) / ϵbarₖ₊₁
+
+      # Relations for the directions wₖ and w̄ₖ₊₁
+      # [w̄ₖ uₖ₊₁] [cₖ₊₁  sₖ₊₁] = [wₖ w̄ₖ₊₁] → wₖ   = cₖ₊₁ * w̄ₖ + sₖ₊₁ * uₖ₊₁
+      #           [sₖ₊₁ -cₖ₊₁]             → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁
+
+      # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ
+      @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y)
+      @kaxpy!(m, ζₖ * sₖ₊₁, u, y)
+
+      # Compute w̄ₖ₊₁
+      @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄)
+
+      if σₑₛₜ > 0 && !complex_error_bnd
+        if transfer_to_craig
+          disc_x = τtildeₖ^2 - τₖ₊₁^2
+          disc_x < 0 ? complex_error_bnd = true : err_x = √disc_x
+        else
+          disc_xL = τtildeₖ^2 - τₖ₊₁^2 + (τₖ₊₁ - ηₖ₊₁ * ζₖ)^2
+          disc_xL < 0 ? complex_error_bnd = true : err_x = √disc_xL
+        end
+        ηtildeₖ = ω * sₖ₊₁
+        ϵtildeₖ = -ω * cₖ₊₁
+        ζtildeₖ = (τtildeₖ - ηtildeₖ * ζₖ) / ϵtildeₖ
+        
+        if transfer_to_craig
+          disc_y = ζtildeₖ^2 - ζbarₖ₊₁^2
+          disc_y < 0 ? complex_error_bnd = true : err_y = √disc_y
+        else
+          err_y = abs(ζtildeₖ)
+        end
+
+        history && push!(xNorms, err_x)
+        history && push!(yNorms, err_y)
+      end
 
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    tired = iter ≥ itmax
-    solved_lq = rNorm_lq ≤ ε
-    solved_cg = transfer_to_craig && rNorm_cg ≤ ε
-    if σₑₛₜ > 0
-      if transfer_to_craig
-        solved_cg = solved_cg || err_x ≤ etolx || err_y ≤ etoly
+      # Compute residual norm ‖(rᴸ)ₖ‖ = |αₖ| * √(|ϵbarₖζbarₖ|² + |βₖ₊₁sₖζₖ₋₁|²)
+      if iter == 1
+        rNorm_lq = bNorm
       else
-        solved_lq = solved_lq || err_x ≤ etolx || err_y ≤ etoly
+        rNorm_lq = abs(αhatₖ) * √(abs2(ϵbarₖ * ζbarₖ) + abs2(βhatₖ₊₁ * sₖ * ζₖ₋₁))
       end
-    end
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm_lq)
+      history && push!(rNorms, rNorm_lq)
 
-    # Update iteration index.
-    iter = iter + 1
-  end
-  (verbose > 0) && @printf("\n")
+      # Compute residual norm ‖(rᶜ)ₖ‖ = |βₖ₊₁ * τₖ|
+      if transfer_to_craig
+        rNorm_cg = abs(βhatₖ₊₁ * τₖ)
+      end
 
-  if solved_cg
-    if λ > 0
-      # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
-      @kaxpy!(n, τₖ * cpₖ, v, x)
-      if iter ≥ 2
-        @kaxpy!(n, τₖ * spₖ, q, x)
+      # Update sₖ, cₖ, αₖ, βₖ, ηₖ, ϵbarₖ, τₖ, ζₖ₋₁ and ζbarₖ.
+      cₖ    = cₖ₊₁
+      sₖ    = sₖ₊₁
+      αₖ    = αₖ₊₁
+      αhatₖ = αhatₖ₊₁
+      βₖ    = βₖ₊₁
+      ηₖ    = ηₖ₊₁
+      ϵbarₖ = ϵbarₖ₊₁
+      τₖ    = τₖ₊₁
+      ζₖ₋₁  = ζₖ
+      ζbarₖ = ζbarₖ₊₁
+
+      # Update regularization variables.
+      if λ > 0
+        cpₖ = cpₖ₊₁
+        spₖ = spₖ₊₁
       end
-    else
-      # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
-      @kaxpy!(n, τₖ, v, x)
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      tired = iter ≥ itmax
+      solved_lq = rNorm_lq ≤ ε
+      solved_cg = transfer_to_craig && rNorm_cg ≤ ε
+      if σₑₛₜ > 0
+        solved_lq = solved_lq || err_x ≤ utolx || err_y ≤ utoly
+        solved_cg = transfer_to_craig && (solved_cg || err_x ≤ utolx || err_y ≤ utoly)
+      end
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm_lq, ktimer(start_time))
+
+      # Update iteration index.
+      iter = iter + 1
     end
-    # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ
-    @kaxpy!(m, ζbarₖ, w̄, y)
-  else
-    if λ > 0
-      # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁)
-      @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x)
-      if iter ≥ 2
-        @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x)
+    (verbose > 0) && @printf(iostream, "\n")
+
+    if solved_cg
+      if λ > 0
+        # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁)
+        @kaxpy!(n, τₖ * cpₖ, v, x)
+        if iter ≥ 2
+          @kaxpy!(n, τₖ * spₖ, q, x)
+        end
+      else
+        # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ
+        @kaxpy!(n, τₖ, v, x)
       end
+      # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ
+      @kaxpy!(m, ζbarₖ, w̄, y)
     else
-      # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ
-      @kaxpy!(n, ηₖ * ζₖ₋₁, v, x)
+      if λ > 0
+        # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁)
+        @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x)
+        if iter ≥ 2
+          @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x)
+        end
+      else
+        # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ
+        @kaxpy!(n, ηₖ * ζₖ₋₁, v, x)
+      end
     end
-  end
 
-  tired               && (status = "maximum number of iterations exceeded")
-  solved_lq           && (status = "solutions (xᴸ, yᴸ) good enough for the tolerances given")
-  solved_cg           && (status = "solutions (xᶜ, yᶜ) good enough for the tolerances given")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved_lq || solved_cg
-  stats.error_with_bnd = complex_error_bnd
-  stats.status = status
-  return solver
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved_lq           && (status = "solutions (xᴸ, yᴸ) good enough for the tolerances given")
+    solved_cg           && (status = "solutions (xᶜ, yᶜ) good enough for the tolerances given")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved_lq || solved_cg
+    stats.error_with_bnd = complex_error_bnd
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
+  end
 end
diff --git a/src/lslq.jl b/src/lslq.jl
index 908de19c5..3a549207e 100644
--- a/src/lslq.jl
+++ b/src/lslq.jl
@@ -5,7 +5,7 @@
 #
 # equivalently, of the normal equations
 #
-#  AᵀAx = Aᵀb.
+#  AᴴAx = Aᴴb.
 #
 # LSLQ is formally equivalent to applying SYMMLQ to the normal equations
 # but should be more stable.
@@ -21,15 +21,17 @@
 
 export lslq, lslq!
 
-
 """
     (x, stats) = lslq(A, b::AbstractVector{FC};
-                      M=I, N=I, sqd::Bool=false, λ::T=zero(T),
-                      atol::T=√eps(T), btol::T=√eps(T), etol::T=√eps(T),
-                      window::Int=5, utol::T=√eps(T), itmax::Int=0,
-                      σ::T=zero(T), transfer_to_lsqr::Bool=false, 
-                      conlim::T=1/√eps(T), verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+                      M=I, N=I, ldiv::Bool=false,
+                      window::Int=5, transfer_to_lsqr::Bool=false,
+                      sqd::Bool=false, λ::T=zero(T),
+                      σ::T=zero(T), etol::T=√eps(T),
+                      utol::T=√eps(T), btol::T=√eps(T),
+                      conlim::T=1/√eps(T), atol::T=√eps(T),
+                      rtol::T=√eps(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -38,31 +40,17 @@ Solve the regularized linear least-squares problem
 
     minimize ‖b - Ax‖₂² + λ²‖x‖₂²
 
-using the LSLQ method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSLQ method, where λ ≥ 0 is a regularization parameter.
 LSLQ is formally equivalent to applying SYMMLQ to the normal equations
 
-    (AᵀA + λ²I) x = Aᵀb
+    (AᴴA + λ²I) x = Aᴴb
 
 but is more stable.
 
-#### Main features
-
-* the solution estimate is updated along orthogonal directions
-* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing
-* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing
-* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error)
-* if `A` is rank deficient, identify the minimum least-squares solution
-
-#### Optional arguments
-
-* `M`: a symmetric and positive definite dual preconditioner
-* `N`: a symmetric and positive definite primal preconditioner
-* `sqd` indicates that we are solving a symmetric and quasi-definite system with `λ=1`
-
 If `λ > 0`, we solve the symmetric and quasi-definite system
 
     [ E      A ] [ r ]   [ b ]
-    [ Aᵀ  -λ²F ] [ x ] = [ 0 ],
+    [ Aᴴ  -λ²F ] [ x ] = [ 0 ],
 
 where E and F are symmetric and positive definite.
 Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -72,39 +60,61 @@ The system above represents the optimality conditions of
 
     minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
 
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSLQ is then equivalent to applying SYMMLQ to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSLQ is then equivalent to applying SYMMLQ to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
 
 If `λ = 0`, we solve the symmetric and indefinite system
 
     [ E    A ] [ r ]   [ b ]
-    [ Aᵀ   0 ] [ x ] = [ 0 ].
+    [ Aᴴ   0 ] [ x ] = [ 0 ].
 
 The system above represents the optimality conditions of
 
     minimize ‖b - Ax‖²_E⁻¹.
 
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
 `r` can be recovered by computing `E⁻¹(b - Ax)`.
 
-* `λ` is a regularization parameter (see the problem statement above)
-* `σ` is an underestimate of the smallest nonzero singular value of `A`---setting `σ` too large will result in an error in the course of the iterations
-* `atol` is a stopping tolerance based on the residual
-* `btol` is a stopping tolerance used to detect zero-residual problems
-* `etol` is a stopping tolerance based on the lower bound on the error
-* `window` is the number of iterations used to accumulate a lower bound on the error
-* `utol` is a stopping tolerance based on the upper bound on the error
-* `transfer_to_lsqr` return the CG solution estimate (i.e., the LSQR point) instead of the LQ estimate
-* `itmax` is the maximum number of iterations (0 means no imposed limit)
-* `conlim` is the limit on the estimated condition number of `A` beyond which the solution will be abandoned
-* `verbose` determines verbosity.
-
-#### Return values
+#### Main features
 
-`lslq` returns the tuple `(x, stats)` where
+* the solution estimate is updated along orthogonal directions
+* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing
+* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing
+* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error)
+* if `A` is rank deficient, identify the minimum least-squares solution
 
-* `x` is the LQ solution estimate
-* `stats` collects other statistics on the run in a LSLQStats
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `utol`: stopping tolerance based on the upper bound on the error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LSLQStats`](@ref) structure.
 
 * `stats.err_lbnds` is a vector of lower bounds on the LQ error---the vector is empty if `window` is set to zero
 * `stats.err_ubnds_lq` is a vector of upper bounds on the LQ error---the vector is empty if `σ == 0` is left at zero
@@ -116,8 +126,8 @@ In this case, `N` can still be specified and indicates the weighted norm in whic
 The iterations stop as soon as one of the following conditions holds true:
 
 * the optimality residual is sufficiently small (`stats.status = "found approximate minimum least-squares solution"`) in the sense that either
-  * ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ atol, or
-  * 1 + ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ 1
+  * ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ atol, or
+  * 1 + ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ 1
 * an approximate zero-residual solution has been found (`stats.status = "found approximate zero-residual solution"`) in the sense that either
   * ‖r‖ / ‖b‖ ≤ btol + atol ‖A‖ * ‖xᴸ‖ / ‖b‖, or
   * 1 + ‖r‖ / ‖b‖ ≤ 1
@@ -127,9 +137,6 @@ The iterations stop as soon as one of the following conditions holds true:
 * the lower bound on the LQ forward error is less than etol * ‖xᴸ‖
 * the upper bound on the CG forward error is less than utol * ‖xᶜ‖
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
-
 #### References
 
 * R. Estrin, D. Orban and M. A. Saunders, [*Euclidean-norm error bounds for SYMMLQ and CG*](https://doi.org/10.1137/16M1094816), SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019.
@@ -137,12 +144,6 @@ and `false` otherwise.
 """
 function lslq end
 
-function lslq(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = LslqSolver(A, b, window=window)
-  lslq!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = lslq!(solver::LslqSolver, A, b; kwargs...)
 
@@ -152,315 +153,363 @@ See [`LslqSolver`](@ref) for more details about the `solver`.
 """
 function lslq! end
 
-function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
-               atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T),
-               utol :: T=√eps(T), itmax :: Int=0, σ :: T=zero(T),
-               transfer_to_lsqr :: Bool=false, conlim :: T=1/√eps(T),
-               verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("LSLQ: system of %d equations in %d variables\n", m, n)
-
-  # Check sqd and λ parameters
-  sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
-  sqd && (λ = one(T))
-
-  # Tests M = Iₙ and N = Iₘ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :u, S, m)
-  allocate_if(!NisI, solver, :v, S, n)
-  x, Nv, Aᵀu, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.w̄
-  Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
-  rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds
-  err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg
-  reset!(stats)
-  u = MisI ? Mu : solver.u
-  v = NisI ? Nv : solver.v
-
-  λ² = λ * λ
-  ctol = conlim > 0 ? 1/conlim : zero(T)
-
-  x .= zero(FC)  # LSLQ point
-
-  # Initialize Golub-Kahan process.
-  # β₁ M u₁ = b.
-  Mu .= b
-  MisI || mulorldiv!(u, M, Mu, ldiv)
-  β₁ = sqrt(@kdotr(m, u, Mu))
-  if β₁ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.error_with_bnd = false
-    history && push!(rNorms, zero(T))
-    history && push!(ArNorms, zero(T))
-    stats.status = "x = 0 is a zero-residual solution"
-    return solver
-  end
-  β = β₁
-
-  @kscal!(m, one(FC)/β₁, u)
-  MisI || @kscal!(m, one(FC)/β₁, Mu)
-  mul!(Aᵀu, Aᵀ, u)
-  Nv .= Aᵀu
-  NisI || mulorldiv!(v, N, Nv, ldiv)
-  α = sqrt(@kdotr(n, v, Nv))  # = α₁
-
-  # Aᵀb = 0 so x = 0 is a minimum least-squares solution
-  if α == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.error_with_bnd = false
-    history && push!(rNorms, β₁)
-    history && push!(ArNorms, zero(T))
-    stats.status = "x = 0 is a minimum least-squares solution"
-    return solver
+def_args_lslq = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_lslq = (:(; M = I                         ),
+                   :(; N = I                         ),
+                   :(; ldiv::Bool = false            ),
+                   :(; transfer_to_lsqr::Bool = false),
+                   :(; sqd::Bool = false             ),
+                   :(; λ::T = zero(T)                ),
+                   :(; σ::T = zero(T)                ),
+                   :(; etol::T = √eps(T)             ),
+                   :(; utol::T = √eps(T)             ),
+                   :(; btol::T = √eps(T)             ),
+                   :(; conlim::T = 1/√eps(T)         ),
+                   :(; atol::T = √eps(T)             ),
+                   :(; rtol::T = √eps(T)             ),
+                   :(; itmax::Int = 0                ),
+                   :(; timemax::Float64 = Inf        ),
+                   :(; verbose::Int = 0              ),
+                   :(; history::Bool = false         ),
+                   :(; callback = solver -> false    ),
+                   :(; iostream::IO = kstdout        ))
+
+def_kwargs_lslq = mapreduce(extract_parameters, vcat, def_kwargs_lslq)
+
+args_lslq = (:A, :b)
+kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function lslq($(def_args_lslq...); window :: Int=5, $(def_kwargs_lslq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = LslqSolver(A, b; window)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    lslq!(solver, $(args_lslq...); $(kwargs_lslq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  @kscal!(n, one(FC)/α, v)
-  NisI || @kscal!(n, one(FC)/α, Nv)
-
-  Anorm = α
-  Anorm² = α * α
-
-  # condition number estimate
-  σmax = zero(T)
-  σmin = Inf
-  Acond  = zero(T)
-
-  xlqNorm  = zero(T)
-  xlqNorm² = zero(T)
-  xcgNorm  = zero(T)
-  xcgNorm² = zero(T)
-
-  w̄ .= v  # w̄₁ = v₁
-
-  err_lbnd = zero(T)
-  window = length(err_vec)
-  err_vec .= zero(T)
-  complex_error_bnd = false
-
-  # Initialize other constants.
-  αL = α
-  βL = β
-  ρ̄ = -σ
-  γ̄ = α
-  ψ = β₁
-  c = -one(T)
-  s = zero(T)
-  δ = -one(T)
-  τ = α * β₁
-  ζ = zero(T)
-  ζ̄  = zero(T)
-  ζ̃  = zero(T)
-  csig = -one(T)
-
-  rNorm = β₁
-  history && push!(rNorms, rNorm)
-  ArNorm = α * β
-  history && push!(ArNorms, ArNorm)
-
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s  %7s  %8s  %8s  %7s  %7s  %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm)
-
-  status = "unknown"
-  solved = solved_mach = solved_lim = (rNorm ≤ atol)
-  tired  = iter ≥ itmax
-  ill_cond = ill_cond_mach = ill_cond_lim = false
-  zero_resid = zero_resid_mach = zero_resid_lim = false
-  fwd_err_lbnd = false
-  fwd_err_ubnd = false
-  user_requested_exit = false
-
-  while ! (solved || tired || ill_cond || user_requested_exit)
-
-    # Generate next Golub-Kahan vectors.
-    # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
-    mul!(Av, A, v)
-    @kaxpby!(m, one(FC), Av, -α, Mu)
-    MisI || mulorldiv!(u, M, Mu, ldiv)
-    β = sqrt(@kdotr(m, u, Mu))
-    if β ≠ 0
-      @kscal!(m, one(FC)/β, u)
-      MisI || @kscal!(m, one(FC)/β, Mu)
-
-      # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
-      mul!(Aᵀu, Aᵀ, u)
-      @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
-      NisI || mulorldiv!(v, N, Nv, ldiv)
-      α = sqrt(@kdotr(n, v, Nv))
-      if α ≠ 0
-        @kscal!(n, one(FC)/α, v)
-        NisI || @kscal!(n, one(FC)/α, Nv)
-      end
 
-      # rotate out regularization term if present
-      αL = α
-      βL = β
-      if λ ≠ 0
-        (cL, sL, βL) = sym_givens(β, λ)
-        αL = cL * α
+  function lslq!(solver :: LslqSolver{T,FC,S}, $(def_args_lslq...); $(def_kwargs_lslq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
 
-        # the rotation updates the next regularization parameter
-        λ = sqrt(λ² + (sL * α)^2)
-      end
-      Anorm² = Anorm² + αL * αL + βL * βL  # = ‖Lₖ‖²
-      Anorm = sqrt(Anorm²)
-    end
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "LSLQ: system of %d equations in %d variables\n", m, n)
+
+    # Check sqd and λ parameters
+    sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+    sqd && (λ = one(T))
+
+    # Tests M = Iₙ and N = Iₘ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :u, S, m)
+    allocate_if(!NisI, solver, :v, S, n)
+    x, Nv, Aᴴu, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.w̄
+    Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
+    rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds
+    err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg
+    reset!(stats)
+    u = MisI ? Mu : solver.u
+    v = NisI ? Nv : solver.v
 
-    # Continue QR factorization of Bₖ
-    #
-    #       k   k+1     k   k+1      k  k+1
-    # k   [ c'   s' ] [ γ̄      ] = [ γ   δ  ]
-    # k+1 [ s'  -c' ] [ β   α⁺ ]   [     γ̄  ]
-    (cp, sp, γ) = sym_givens(γ̄, βL)
-    τ = -τ * δ / γ  # forward substitution for t
-    δ = sp * αL
-    γ̄ = -cp * αL
-
-    if σ > 0 && !complex_error_bnd
-      # Continue QR factorization for error estimate
-      μ̄ = -csig * γ
-      (csig, ssig, ρ) = sym_givens(ρ̄, γ)
-      ρ̄ = ssig * μ̄ + csig * σ
-      μ̄ = -csig * δ
-
-      # determine component of eigenvector and Gauss-Radau parameter
-      h = δ * csig / ρ̄
-      disc = σ * (σ - δ * h)
-      disc < 0 ? complex_error_bnd = true : ω = sqrt(disc)
-      (csig, ssig, ρ) = sym_givens(ρ̄, δ)
-      ρ̄ = ssig * μ̄ + csig * σ
+    λ² = λ * λ
+    ctol = conlim > 0 ? 1/conlim : zero(T)
+
+    x .= zero(FC)  # LSLQ point
+
+    # Initialize Golub-Kahan process.
+    # β₁ M u₁ = b.
+    Mu .= b
+    MisI || mulorldiv!(u, M, Mu, ldiv)
+    β₁ = sqrt(@kdotr(m, u, Mu))
+    if β₁ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.error_with_bnd = false
+      history && push!(rNorms, zero(T))
+      history && push!(ArNorms, zero(T))
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      return solver
     end
+    β = β₁
+
+    @kscal!(m, one(FC)/β₁, u)
+    MisI || @kscal!(m, one(FC)/β₁, Mu)
+    mul!(Aᴴu, Aᴴ, u)
+    Nv .= Aᴴu
+    NisI || mulorldiv!(v, N, Nv, ldiv)
+    α = sqrt(@kdotr(n, v, Nv))  # = α₁
+
+    # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+    if α == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.error_with_bnd = false
+      history && push!(rNorms, β₁)
+      history && push!(ArNorms, zero(T))
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a minimum least-squares solution"
+      return solver
+    end
+    @kscal!(n, one(FC)/α, v)
+    NisI || @kscal!(n, one(FC)/α, Nv)
 
-    # Continue LQ factorization of Rₖ
-    ϵ̄ = -γ * c
-    η = γ * s
-    (c, s, ϵ) = sym_givens(ϵ̄, δ)
+    Anorm = α
+    Anorm² = α * α
 
     # condition number estimate
-    # the QLP factorization suggests that the diagonal of M̄ approximates
-    # the singular values of B.
-    σmax = max(σmax, ϵ, abs(ϵ̄))
-    σmin = min(σmin, ϵ, abs(ϵ̄))
-    Acond = σmax / σmin
-
-    # forward substitution for z, ζ̄
-    ζold = ζ
-    ζ = (τ - ζ * η) / ϵ
-    ζ̄ = ζ / c
-
-    # residual norm estimate
-    rNorm = sqrt((ψ * cp - ζold * η)^2 + (ψ * sp)^2)
+    σmax = zero(T)
+    σmin = Inf
+    Acond  = zero(T)
+
+    xlqNorm  = zero(T)
+    xlqNorm² = zero(T)
+    xcgNorm  = zero(T)
+    xcgNorm² = zero(T)
+
+    w̄ .= v  # w̄₁ = v₁
+
+    err_lbnd = zero(T)
+    window = length(err_vec)
+    err_vec .= zero(T)
+    complex_error_bnd = false
+
+    # Initialize other constants.
+    αL = α
+    βL = β
+    ρ̄ = -σ
+    γ̄ = α
+    ψ = β₁
+    c = -one(T)
+    s = zero(T)
+    δ = -one(T)
+    τ = α * β₁
+    ζ = zero(T)
+    ζ̄  = zero(T)
+    ζ̃  = zero(T)
+    csig = -one(T)
+
+    rNorm = β₁
     history && push!(rNorms, rNorm)
-
-    ArNorm = sqrt((γ * ϵ * ζ)^2 + (δ * η * ζold)^2)
+    ArNorm = α * β
     history && push!(ArNorms, ArNorm)
 
-    # Compute ψₖ
-    ψ = ψ * sp
+    iter = 0
+    itmax == 0 && (itmax = m + n)
 
-    # Compute ‖x_cg‖₂
-    xcgNorm² = xlqNorm² + ζ̄ * ζ̄
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %7s  %8s  %8s  %7s  %7s  %7s  %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm, ktimer(start_time))
 
-    if σ > 0 && iter > 0 && !complex_error_bnd
-      disc = ζ̃ * ζ̃ - ζ̄  * ζ̄ 
-      if disc < 0
-        complex_error_bnd = true
-      else
-        err_ubnd_cg = sqrt(disc)
-        history && push!(err_ubnds_cg, err_ubnd_cg)
-        fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²)
+    status = "unknown"
+    ε = atol + rtol * β₁
+    solved = solved_mach = solved_lim = (rNorm ≤ ε)
+    tired  = iter ≥ itmax
+    ill_cond = ill_cond_mach = ill_cond_lim = false
+    zero_resid = zero_resid_mach = zero_resid_lim = false
+    fwd_err_lbnd = false
+    fwd_err_ubnd = false
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+
+      # Generate next Golub-Kahan vectors.
+      # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+      mul!(Av, A, v)
+      @kaxpby!(m, one(FC), Av, -α, Mu)
+      MisI || mulorldiv!(u, M, Mu, ldiv)
+      β = sqrt(@kdotr(m, u, Mu))
+      if β ≠ 0
+        @kscal!(m, one(FC)/β, u)
+        MisI || @kscal!(m, one(FC)/β, Mu)
+
+        # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+        mul!(Aᴴu, Aᴴ, u)
+        @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+        NisI || mulorldiv!(v, N, Nv, ldiv)
+        α = sqrt(@kdotr(n, v, Nv))
+        if α ≠ 0
+          @kscal!(n, one(FC)/α, v)
+          NisI || @kscal!(n, one(FC)/α, Nv)
+        end
+
+        # rotate out regularization term if present
+        αL = α
+        βL = β
+        if λ ≠ 0
+          (cL, sL, βL) = sym_givens(β, λ)
+          αL = cL * α
+
+          # the rotation updates the next regularization parameter
+          λ = sqrt(λ² + (sL * α)^2)
+        end
+        Anorm² = Anorm² + αL * αL + βL * βL  # = ‖Lₖ‖²
+        Anorm = sqrt(Anorm²)
       end
-    end
 
-    test1 = rNorm / β₁
-    test2 = ArNorm / (Anorm * rNorm)
-    test3 = 1 / Acond
-    t1    = test1 / (one(T) + Anorm * xlqNorm / β₁)
-    rtol  = btol + atol * Anorm * xlqNorm / β₁
+      # Continue QR factorization of Bₖ
+      #
+      #       k   k+1     k   k+1      k  k+1
+      # k   [ c'   s' ] [ γ̄      ] = [ γ   δ  ]
+      # k+1 [ s'  -c' ] [ β   α⁺ ]   [     γ̄  ]
+      (cp, sp, γ) = sym_givens(γ̄, βL)
+      τ = -τ * δ / γ  # forward substitution for t
+      δ = sp * αL
+      γ̄ = -cp * αL
+
+      if σ > 0 && !complex_error_bnd
+        # Continue QR factorization for error estimate
+        μ̄ = -csig * γ
+        (csig, ssig, ρ) = sym_givens(ρ̄, γ)
+        ρ̄ = ssig * μ̄ + csig * σ
+        μ̄ = -csig * δ
+
+        # determine component of eigenvector and Gauss-Radau parameter
+        h = δ * csig / ρ̄
+        disc = σ * (σ - δ * h)
+        disc < 0 ? complex_error_bnd = true : ω = sqrt(disc)
+        (csig, ssig, ρ) = sym_givens(ρ̄, δ)
+        ρ̄ = ssig * μ̄ + csig * σ
+      end
 
-    # update LSLQ point for next iteration
-    @kaxpy!(n, c * ζ, w̄, x)
-    @kaxpy!(n, s * ζ, v, x)
+      # Continue LQ factorization of Rₖ
+      ϵ̄ = -γ * c
+      η = γ * s
+      (c, s, ϵ) = sym_givens(ϵ̄, δ)
+
+      # condition number estimate
+      # the QLP factorization suggests that the diagonal of M̄ approximates
+      # the singular values of B.
+      σmax = max(σmax, ϵ, abs(ϵ̄))
+      σmin = min(σmin, ϵ, abs(ϵ̄))
+      Acond = σmax / σmin
+
+      # forward substitution for z, ζ̄
+      ζold = ζ
+      ζ = (τ - ζ * η) / ϵ
+      ζ̄ = ζ / c
+
+      # residual norm estimate
+      rNorm = sqrt((ψ * cp - ζold * η)^2 + (ψ * sp)^2)
+      history && push!(rNorms, rNorm)
+
+      ArNorm = sqrt((γ * ϵ * ζ)^2 + (δ * η * ζold)^2)
+      history && push!(ArNorms, ArNorm)
+
+      # Compute ψₖ
+      ψ = ψ * sp
+
+      # Compute ‖x_cg‖₂
+      xcgNorm² = xlqNorm² + ζ̄ * ζ̄
+
+      if σ > 0 && iter > 0 && !complex_error_bnd
+        disc = ζ̃ * ζ̃ - ζ̄  * ζ̄ 
+        if disc < 0
+          complex_error_bnd = true
+        else
+          err_ubnd_cg = sqrt(disc)
+          history && push!(err_ubnds_cg, err_ubnd_cg)
+          fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²)
+        end
+      end
 
-    # compute w̄
-    @kaxpby!(n, -c, v, s, w̄)
+      test1 = rNorm
+      test2 = ArNorm / (Anorm * rNorm)
+      test3 = 1 / Acond
+      t1    = test1 / (one(T) + Anorm * xlqNorm)
+      tol   = btol + atol * Anorm * xlqNorm / β₁
 
-    xlqNorm² += ζ * ζ
-    xlqNorm = sqrt(xlqNorm²)
+      # update LSLQ point for next iteration
+      @kaxpy!(n, c * ζ, w̄, x)
+      @kaxpy!(n, s * ζ, v, x)
 
-    # check stopping condition based on forward error lower bound
-    err_vec[mod(iter, window) + 1] = ζ
-    if iter ≥ window
-      err_lbnd = norm(err_vec)
-      history && push!(err_lbnds, err_lbnd)
-      fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm
-    end
+      # compute w̄
+      @kaxpby!(n, -c, v, s, w̄)
 
-    # compute LQ forward error upper bound
-    if σ > 0 && !complex_error_bnd
-      η̃ = ω * s
-      ϵ̃ = -ω * c
-      τ̃ = -τ * δ / ω
-      ζ̃ = (τ̃ - ζ * η̃) / ϵ̃
-      history && push!(err_ubnds_lq, abs(ζ̃ ))
-    end
+      xlqNorm² += ζ * ζ
+      xlqNorm = sqrt(xlqNorm²)
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    ill_cond_mach = (one(T) + test3 ≤ one(T))
-    solved_mach = (one(T) + test2 ≤ one(T))
-    zero_resid_mach = (one(T) + t1 ≤ one(T))
+      # check stopping condition based on forward error lower bound
+      err_vec[mod(iter, window) + 1] = ζ
+      if iter ≥ window
+        err_lbnd = @knrm2(window, err_vec)
+        history && push!(err_lbnds, err_lbnd)
+        fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm
+      end
 
-    # Stopping conditions based on user-provided tolerances.
-    user_requested_exit = callback(solver) :: Bool
-    tired  = iter ≥ itmax
-    ill_cond_lim = (test3 ≤ ctol)
-    solved_lim = (test2 ≤ atol)
-    zero_resid_lim = (test1 ≤ rtol)
+      # compute LQ forward error upper bound
+      if σ > 0 && !complex_error_bnd
+        η̃ = ω * s
+        ϵ̃ = -ω * c
+        τ̃ = -τ * δ / ω
+        ζ̃ = (τ̃ - ζ * η̃) / ϵ̃
+        history && push!(err_ubnds_lq, abs(ζ̃ ))
+      end
 
-    ill_cond = ill_cond_mach || ill_cond_lim
-    zero_resid = zero_resid_mach || zero_resid_lim
-    solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      ill_cond_mach = (one(T) + test3 ≤ one(T))
+      solved_mach = (one(T) + test2 ≤ one(T))
+      zero_resid_mach = (one(T) + t1 ≤ one(T))
+
+      # Stopping conditions based on user-provided tolerances.
+      user_requested_exit = callback(solver) :: Bool
+      tired  = iter ≥ itmax
+      ill_cond_lim = (test3 ≤ ctol)
+      solved_lim = (test2 ≤ atol)
+      zero_resid_lim = (test1 ≤ ε)
+
+      ill_cond = ill_cond_mach || ill_cond_lim
+      zero_resid = zero_resid_mach || zero_resid_lim
+      solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+
+      iter = iter + 1
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm, ktimer(start_time))
+    end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    iter = iter + 1
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm)
-  end
-  (verbose > 0) && @printf("\n")
+    if transfer_to_lsqr  # compute LSQR point
+      @kaxpy!(n, ζ̄ , w̄, x)
+    end
 
-  if transfer_to_lsqr  # compute LSQR point
-    @kaxpy!(n, ζ̄ , w̄, x)
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    ill_cond_lim        && (status = "condition number exceeds tolerance")
+    solved              && (status = "found approximate minimum least-squares solution")
+    zero_resid          && (status = "found approximate zero-residual solution")
+    fwd_err_lbnd        && (status = "forward error lower bound small enough")
+    fwd_err_ubnd        && (status = "forward error upper bound small enough")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !zero_resid
+    stats.error_with_bnd = complex_error_bnd
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-
-  tired               && (status = "maximum number of iterations exceeded")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  ill_cond_lim        && (status = "condition number exceeds tolerance")
-  solved              && (status = "found approximate minimum least-squares solution")
-  zero_resid          && (status = "found approximate zero-residual solution")
-  fwd_err_lbnd        && (status = "forward error lower bound small enough")
-  fwd_err_ubnd        && (status = "forward error upper bound small enough")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !zero_resid
-  stats.error_with_bnd = complex_error_bnd
-  stats.status = status
-  return solver
 end
diff --git a/src/lsmr.jl b/src/lsmr.jl
index f4d8349d1..085d941db 100644
--- a/src/lsmr.jl
+++ b/src/lsmr.jl
@@ -5,7 +5,7 @@
 #
 # equivalently, of the normal equations
 #
-#  AᵀAx = Aᵀb.
+#  AᴴAx = Aᴴb.
 #
 # LSMR is formally equivalent to applying MINRES to the normal equations
 # but should be more stable. It is also formally equivalent to CRLS though
@@ -24,17 +24,16 @@
 
 export lsmr, lsmr!
 
-
 """
     (x, stats) = lsmr(A, b::AbstractVector{FC};
-                      M=I, N=I, sqd::Bool=false, λ::T=zero(T),
+                      M=I, N=I, ldiv::Bool=false,
+                      window::Int=5, sqd::Bool=false, λ::T=zero(T),
+                      radius::T=zero(T), etol::T=√eps(T),
                       axtol::T=√eps(T), btol::T=√eps(T),
-                      atol::T=zero(T), rtol::T=zero(T),
-                      etol::T=√eps(T), window::Int=5,
-                      itmax::Int=0, conlim::T=1/√eps(T),
-                      radius::T=zero(T), verbose::Int=0,
-                      history::Bool=false, ldiv::Bool=false,
-                      callback=solver->false)
+                      conlim::T=1/√eps(T), atol::T=zero(T),
+                      rtol::T=zero(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -43,24 +42,24 @@ Solve the regularized linear least-squares problem
 
     minimize ‖b - Ax‖₂² + λ²‖x‖₂²
 
-using the LSMR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSMR method, where λ ≥ 0 is a regularization parameter.
 LSMR is formally equivalent to applying MINRES to the normal equations
 
-    (AᵀA + λ²I) x = Aᵀb
+    (AᴴA + λ²I) x = Aᴴb
 
 (and therefore to CRLS) but is more stable.
 
-LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
 It is formally equivalent to CRLS, though can be substantially more accurate.
 
 LSMR can be also used to find a null vector of a singular matrix A
-by solving the problem `min ‖Aᵀx - b‖` with any nonzero vector `b`.
-At a minimizer, the residual vector `r = b - Aᵀx` will satisfy `Ar = 0`.
+by solving the problem `min ‖Aᴴx - b‖` with any nonzero vector `b`.
+At a minimizer, the residual vector `r = b - Aᴴx` will satisfy `Ar = 0`.
 
 If `λ > 0`, we solve the symmetric and quasi-definite system
 
     [ E      A ] [ r ]   [ b ]
-    [ Aᵀ  -λ²F ] [ x ] = [ 0 ],
+    [ Aᴴ  -λ²F ] [ x ] = [ 0 ],
 
 where E and F are symmetric and positive definite.
 Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -70,23 +69,52 @@ The system above represents the optimality conditions of
 
     minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
 
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSMR is then equivalent to applying MINRES to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSMR is then equivalent to applying MINRES to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
 
 If `λ = 0`, we solve the symmetric and indefinite system
 
     [ E    A ] [ r ]   [ b ]
-    [ Aᵀ   0 ] [ x ] = [ 0 ].
+    [ Aᴴ   0 ] [ x ] = [ 0 ].
 
 The system above represents the optimality conditions of
 
     minimize ‖b - Ax‖²_E⁻¹.
 
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
 `r` can be recovered by computing `E⁻¹(b - Ax)`.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `axtol`: tolerance on the backward error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LsmrStats`](@ref) structure.
 
 #### Reference
 
@@ -94,12 +122,6 @@ and `false` otherwise.
 """
 function lsmr end
 
-function lsmr(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = LsmrSolver(A, b, window=window)
-  lsmr!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = lsmr!(solver::LsmrSolver, A, b; kwargs...)
 
@@ -109,274 +131,320 @@ See [`LsmrSolver`](@ref) for more details about the `solver`.
 """
 function lsmr! end
 
-function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
-               axtol :: T=√eps(T), btol :: T=√eps(T),
-               atol :: T=zero(T), rtol :: T=zero(T),
-               etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
-               radius :: T=zero(T), verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("LSMR: system of %d equations in %d variables\n", m, n)
-
-  # Check sqd and λ parameters
-  sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
-  sqd && (λ = one(T))
-
-  # Tests M = Iₙ and N = Iₘ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :u, S, m)
-  allocate_if(!NisI, solver, :v, S, n)
-  x, Nv, Aᵀu, h, hbar = solver.x, solver.Nv, solver.Aᵀu, solver.h, solver.hbar
-  Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  u = MisI ? Mu : solver.u
-  v = NisI ? Nv : solver.v
-
-  ctol = conlim > 0 ? 1/conlim : zero(T)
-  x .= zero(FC)
-
-  # Initialize Golub-Kahan process.
-  # β₁ M u₁ = b.
-  Mu .= b
-  MisI || mulorldiv!(u, M, Mu, ldiv)
-  β₁ = sqrt(@kdotr(m, u, Mu))
-  if β₁ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(rNorms, zero(T))
-    history && push!(ArNorms, zero(T))
-    return solver
-  end
-  β = β₁
-
-  @kscal!(m, one(FC)/β₁, u)
-  MisI || @kscal!(m, one(FC)/β₁, Mu)
-  mul!(Aᵀu, Aᵀ, u)
-  Nv .= Aᵀu
-  NisI || mulorldiv!(v, N, Nv, ldiv)
-  α = sqrt(@kdotr(n, v, Nv))
-
-  ζbar = α * β
-  αbar = α
-  ρ = one(T)
-  ρbar = one(T)
-  cbar = one(T)
-  sbar = zero(T)
-
-  # Initialize variables for estimation of ‖r‖.
-  βdd = β
-  βd = zero(T)
-  ρdold = one(T)
-  τtildeold = zero(T)
-  θtilde = zero(T)
-  ζ = zero(T)
-  d = zero(T)
-
-  # Initialize variables for estimation of ‖A‖, cond(A) and xNorm.
-  Anorm² = α * α
-  maxrbar = zero(T)
-  minrbar = min(floatmax(T), T(1.0e+100))
-  Acond = maxrbar / minrbar
-  Anorm = sqrt(Anorm²)
-  xNorm = zero(T)
-
-  # Items for use in stopping rules.
-  ctol = conlim > 0 ? 1 / conlim : zero(T)
-  rNorm = β
-  history && push!(rNorms, rNorm)
-  ArNorm = ArNorm0 = α * β
-  history && push!(ArNorms, ArNorm)
-
-  xENorm² = zero(T)
-  err_lbnd = zero(T)
-  window = length(err_vec)
-  err_vec .= zero(T)
-
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s  %7s  %8s  %8s  %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²)
-
-  # Aᵀb = 0 so x = 0 is a minimum least-squares solution
-  if α == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a minimum least-squares solution"
-    return solver
+def_args_lsmr = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_lsmr = (:(; M = I                     ),
+                   :(; N = I                     ),
+                   :(; ldiv::Bool = false        ),
+                   :(; sqd::Bool = false         ),
+                   :(; λ::T = zero(T)            ),
+                   :(; radius::T = zero(T)       ),
+                   :(; etol::T = √eps(T)         ),
+                   :(; axtol::T = √eps(T)        ),
+                   :(; btol::T = √eps(T)         ),
+                   :(; conlim::T = 1/√eps(T)     ),
+                   :(; atol::T = zero(T)         ),
+                   :(; rtol::T = zero(T)         ),
+                   :(; itmax::Int = 0            ),
+                   :(; timemax::Float64 = Inf    ),
+                   :(; verbose::Int = 0          ),
+                   :(; history::Bool = false     ),
+                   :(; callback = solver -> false),
+                   :(; iostream::IO = kstdout    ))
+
+def_kwargs_lsmr = mapreduce(extract_parameters, vcat, def_kwargs_lsmr)
+
+args_lsmr = (:A, :b)
+kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function lsmr($(def_args_lsmr...); window :: Int=5, $(def_kwargs_lsmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = LsmrSolver(A, b; window)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    lsmr!(solver, $(args_lsmr...); $(kwargs_lsmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  @kscal!(n, one(FC)/α, v)
-  NisI || @kscal!(n, one(FC)/α, Nv)
-
-  h .= v
-  hbar .= zero(FC)
-
-  status = "unknown"
-  on_boundary = false
-  solved = solved_mach = solved_lim = (rNorm ≤ axtol)
-  tired  = iter ≥ itmax
-  ill_cond = ill_cond_mach = ill_cond_lim = false
-  zero_resid = zero_resid_mach = zero_resid_lim = false
-  fwd_err = false
-  user_requested_exit = false
-
-  while ! (solved || tired || ill_cond || user_requested_exit)
-    iter = iter + 1
-
-    # Generate next Golub-Kahan vectors.
-    # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
-    mul!(Av, A, v)
-    @kaxpby!(m, one(FC), Av, -α, Mu)
-    MisI || mulorldiv!(u, M, Mu, ldiv)
-    β = sqrt(@kdotr(m, u, Mu))
-    if β ≠ 0
-      @kscal!(m, one(FC)/β, u)
-      MisI || @kscal!(m, one(FC)/β, Mu)
-
-      # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
-      mul!(Aᵀu, Aᵀ, u)
-      @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
-      NisI || mulorldiv!(v, N, Nv, ldiv)
-      α = sqrt(@kdotr(n, v, Nv))
-      if α ≠ 0
-        @kscal!(n, one(FC)/α, v)
-        NisI || @kscal!(n, one(FC)/α, Nv)
-      end
-    end
 
-    # Continue QR factorization
-    (chat, shat, αhat) = sym_givens(αbar, λ)
-
-    ρold = ρ
-    (c, s, ρ) = sym_givens(αhat, β)
-    θnew = s * α
-    αbar = c * α
-
-    ρbarold = ρbar
-    ζold = ζ
-    θbar = sbar * ρ
-    ρtemp = cbar * ρ
-    (cbar, sbar, ρbar) = sym_givens(ρtemp, θnew)
-    ζ = cbar * ζbar
-    ζbar = -sbar * ζbar
-
-    xENorm² = xENorm² + ζ * ζ
-    err_vec[mod(iter, window) + 1] = ζ
-    iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
-
-    # Update h, hbar and x.
-    δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁)
-    @kaxpby!(n, one(FC), h, -δ, hbar)   # ĥₖ = hₖ - δₖ * ĥₖ₋₁
-
-    # if a trust-region constraint is given, compute step to the boundary
-    # the step ϕ/ρ is not necessarily positive
-    σ = ζ / (ρ * ρbar)
-    if radius > 0
-      t1, t2 = to_boundary(x, hbar, radius)
-      tmax, tmin = max(t1, t2), min(t1, t2)
-      on_boundary = σ > tmax || σ < tmin
-      σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
-    end
+  function lsmr!(solver :: LsmrSolver{T,FC,S}, $(def_args_lsmr...); $(def_kwargs_lsmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
 
-    @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ
-    @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
 
-    # Estimate ‖r‖.
-    βacute =  chat * βdd
-    βcheck = -shat * βdd
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "LSMR: system of %d equations in %d variables\n", m, n)
 
-    βhat =  c * βacute
-    βdd  = -s * βacute
+    # Check sqd and λ parameters
+    sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+    sqd && (λ = one(T))
 
-    θtildeold = θtilde
-    (ctildeold, stildeold, ρtildeold) = sym_givens(ρdold, θbar)
-    θtilde = stildeold * ρbar
-    ρdold = ctildeold * ρbar
-    βd = -stildeold * βd + ctildeold * βhat
+    # Tests M = Iₙ and N = Iₘ
+    MisI = (M === I)
+    NisI = (N === I)
 
-    τtildeold = (ζold - θtildeold * τtildeold) / ρtildeold
-    τd = (ζ - θtilde * τtildeold) / ρdold
-    d = d + βcheck * βcheck
-    rNorm = sqrt(d + (βd - τd)^2 + βdd * βdd)
-    history && push!(rNorms, rNorm)
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
 
-    # Estimate ‖A‖.
-    Anorm² += β * β
-    Anorm   = sqrt(Anorm²)
-    Anorm² += α * α
+    # Set up workspace.
+    allocate_if(!MisI, solver, :u, S, m)
+    allocate_if(!NisI, solver, :v, S, n)
+    x, Nv, Aᴴu, h, hbar = solver.x, solver.Nv, solver.Aᴴu, solver.h, solver.hbar
+    Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    u = MisI ? Mu : solver.u
+    v = NisI ? Nv : solver.v
 
-    # Estimate cond(A).
-    maxrbar = max(maxrbar, ρbarold)
-    iter > 1 && (minrbar = min(minrbar, ρbarold))
-    Acond = max(maxrbar, ρtemp) / min(minrbar, ρtemp)
+    ctol = conlim > 0 ? 1/conlim : zero(T)
+    x .= zero(FC)
 
-    # Test for convergence.
-    ArNorm = abs(ζbar)
+    # Initialize Golub-Kahan process.
+    # β₁ M u₁ = b.
+    Mu .= b
+    MisI || mulorldiv!(u, M, Mu, ldiv)
+    β₁ = sqrt(@kdotr(m, u, Mu))
+    if β₁ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(rNorms, zero(T))
+      history && push!(ArNorms, zero(T))
+      return solver
+    end
+    β = β₁
+
+    @kscal!(m, one(FC)/β₁, u)
+    MisI || @kscal!(m, one(FC)/β₁, Mu)
+    mul!(Aᴴu, Aᴴ, u)
+    Nv .= Aᴴu
+    NisI || mulorldiv!(v, N, Nv, ldiv)
+    α = sqrt(@kdotr(n, v, Nv))
+
+    ζbar = α * β
+    αbar = α
+    ρ = one(T)
+    ρbar = one(T)
+    cbar = one(T)
+    sbar = zero(T)
+
+    # Initialize variables for estimation of ‖r‖.
+    βdd = β
+    βd = zero(T)
+    ρdold = one(T)
+    τtildeold = zero(T)
+    θtilde = zero(T)
+    ζ = zero(T)
+    d = zero(T)
+
+    # Initialize variables for estimation of ‖A‖, cond(A) and xNorm.
+    Anorm² = α * α
+    maxrbar = zero(T)
+    minrbar = min(floatmax(T), T(1.0e+100))
+    Acond = maxrbar / minrbar
+    Anorm = sqrt(Anorm²)
+    xNorm = zero(T)
+
+    # Items for use in stopping rules.
+    ctol = conlim > 0 ? 1 / conlim : zero(T)
+    rNorm = β
+    history && push!(rNorms, rNorm)
+    ArNorm = ArNorm0 = α * β
     history && push!(ArNorms, ArNorm)
-    xNorm = @knrm2(n, x)
 
-    test1 = rNorm / β₁
-    test2 = ArNorm / (Anorm * rNorm)
-    test3 = 1 / Acond
-    t1    = test1 / (one(T) + Anorm * xNorm / β₁)
-    rNormtol  = btol + axtol * Anorm * xNorm / β₁
+    xENorm² = zero(T)
+    err_lbnd = zero(T)
+    window = length(err_vec)
+    err_vec .= zero(T)
 
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
+    iter = 0
+    itmax == 0 && (itmax = m + n)
+
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %7s  %8s  %8s  %7s  %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %.2fs\n", iter, β₁, α, β₁, α, 0, 1, Anorm², ktimer(start_time))
+
+    # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+    if α == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a minimum least-squares solution"
+      return solver
+    end
+    @kscal!(n, one(FC)/α, v)
+    NisI || @kscal!(n, one(FC)/α, Nv)
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    ill_cond_mach = (one(T) + test3 ≤ one(T))
-    solved_mach = (one(T) + test2 ≤ one(T))
-    zero_resid_mach = (one(T) + t1 ≤ one(T))
+    h .= v
+    hbar .= zero(FC)
 
-    # Stopping conditions based on user-provided tolerances.
-    user_requested_exit = callback(solver) :: Bool
+    status = "unknown"
+    on_boundary = false
+    solved = solved_mach = solved_lim = (rNorm ≤ axtol)
     tired  = iter ≥ itmax
-    ill_cond_lim = (test3 ≤ ctol)
-    solved_lim = (test2 ≤ axtol)
-    solved_opt = ArNorm ≤ atol + rtol * ArNorm0
-    zero_resid_lim = (test1 ≤ rNormtol)
-    iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
-
-    ill_cond = ill_cond_mach | ill_cond_lim
-    zero_resid = zero_resid_mach | zero_resid_lim
-    solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary
+    ill_cond = ill_cond_mach = ill_cond_lim = false
+    zero_resid = zero_resid_mach = zero_resid_lim = false
+    fwd_err = false
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+      iter = iter + 1
+
+      # Generate next Golub-Kahan vectors.
+      # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+      mul!(Av, A, v)
+      @kaxpby!(m, one(FC), Av, -α, Mu)
+      MisI || mulorldiv!(u, M, Mu, ldiv)
+      β = sqrt(@kdotr(m, u, Mu))
+      if β ≠ 0
+        @kscal!(m, one(FC)/β, u)
+        MisI || @kscal!(m, one(FC)/β, Mu)
+
+        # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+        mul!(Aᴴu, Aᴴ, u)
+        @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+        NisI || mulorldiv!(v, N, Nv, ldiv)
+        α = sqrt(@kdotr(n, v, Nv))
+        if α ≠ 0
+          @kscal!(n, one(FC)/α, v)
+          NisI || @kscal!(n, one(FC)/α, Nv)
+        end
+      end
+
+      # Continue QR factorization
+      (chat, shat, αhat) = sym_givens(αbar, λ)
+
+      ρold = ρ
+      (c, s, ρ) = sym_givens(αhat, β)
+      θnew = s * α
+      αbar = c * α
+
+      ρbarold = ρbar
+      ζold = ζ
+      θbar = sbar * ρ
+      ρtemp = cbar * ρ
+      (cbar, sbar, ρbar) = sym_givens(ρtemp, θnew)
+      ζ = cbar * ζbar
+      ζbar = -sbar * ζbar
+
+      xENorm² = xENorm² + ζ * ζ
+      err_vec[mod(iter, window) + 1] = ζ
+      iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
+
+      # Update h, hbar and x.
+      δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁)
+      @kaxpby!(n, one(FC), h, -δ, hbar)   # ĥₖ = hₖ - δₖ * ĥₖ₋₁
+
+      # if a trust-region constraint is given, compute step to the boundary
+      # the step ϕ/ρ is not necessarily positive
+      σ = ζ / (ρ * ρbar)
+      if radius > 0
+        t1, t2 = to_boundary(n, x, hbar, radius)
+        tmax, tmin = max(t1, t2), min(t1, t2)
+        on_boundary = σ > tmax || σ < tmin
+        σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
+      end
+
+      @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ
+      @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ
+
+      # Estimate ‖r‖.
+      βacute =  chat * βdd
+      βcheck = -shat * βdd
+
+      βhat =  c * βacute
+      βdd  = -s * βacute
+
+      θtildeold = θtilde
+      (ctildeold, stildeold, ρtildeold) = sym_givens(ρdold, θbar)
+      θtilde = stildeold * ρbar
+      ρdold = ctildeold * ρbar
+      βd = -stildeold * βd + ctildeold * βhat
+
+      τtildeold = (ζold - θtildeold * τtildeold) / ρtildeold
+      τd = (ζ - θtilde * τtildeold) / ρdold
+      d = d + βcheck * βcheck
+      rNorm = sqrt(d + (βd - τd)^2 + βdd * βdd)
+      history && push!(rNorms, rNorm)
+
+      # Estimate ‖A‖.
+      Anorm² += β * β
+      Anorm   = sqrt(Anorm²)
+      Anorm² += α * α
+
+      # Estimate cond(A).
+      maxrbar = max(maxrbar, ρbarold)
+      iter > 1 && (minrbar = min(minrbar, ρbarold))
+      Acond = max(maxrbar, ρtemp) / min(minrbar, ρtemp)
+
+      # Test for convergence.
+      ArNorm = abs(ζbar)
+      history && push!(ArNorms, ArNorm)
+      xNorm = @knrm2(n, x)
+
+      test1 = rNorm / β₁
+      test2 = ArNorm / (Anorm * rNorm)
+      test3 = 1 / Acond
+      t1    = test1 / (one(T) + Anorm * xNorm / β₁)
+      rNormtol  = btol + axtol * Anorm * xNorm / β₁
+
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %.2fs\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², ktimer(start_time))
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      ill_cond_mach = (one(T) + test3 ≤ one(T))
+      solved_mach = (one(T) + test2 ≤ one(T))
+      zero_resid_mach = (one(T) + t1 ≤ one(T))
+
+      # Stopping conditions based on user-provided tolerances.
+      user_requested_exit = callback(solver) :: Bool
+      tired  = iter ≥ itmax
+      ill_cond_lim = (test3 ≤ ctol)
+      solved_lim = (test2 ≤ axtol)
+      solved_opt = ArNorm ≤ atol + rtol * ArNorm0
+      zero_resid_lim = (test1 ≤ rNormtol)
+      iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
+
+      ill_cond = ill_cond_mach || ill_cond_lim
+      zero_resid = zero_resid_mach || zero_resid_lim
+      solved = solved_mach || solved_lim || solved_opt || zero_resid || fwd_err || on_boundary
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    ill_cond_lim        && (status = "condition number exceeds tolerance")
+    solved              && (status = "found approximate minimum least-squares solution")
+    zero_resid          && (status = "found approximate zero-residual solution")
+    fwd_err             && (status = "truncated forward error small enough")
+    on_boundary         && (status = "on trust-region boundary")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.residual = rNorm
+    stats.Aresidual = ArNorm
+    stats.Acond = Acond
+    stats.Anorm = Anorm
+    stats.xNorm = xNorm
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !zero_resid
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  ill_cond_lim        && (status = "condition number exceeds tolerance")
-  solved              && (status = "found approximate minimum least-squares solution")
-  zero_resid          && (status = "found approximate zero-residual solution")
-  fwd_err             && (status = "truncated forward error small enough")
-  on_boundary         && (status = "on trust-region boundary")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.residual = rNorm
-  stats.Aresidual = ArNorm
-  stats.Acond = Acond
-  stats.Anorm = Anorm
-  stats.xNorm = xNorm
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !zero_resid
-  stats.status = status
-  return solver
 end
diff --git a/src/lsqr.jl b/src/lsqr.jl
index dd3779dce..fe7acc37c 100644
--- a/src/lsqr.jl
+++ b/src/lsqr.jl
@@ -5,7 +5,7 @@
 #
 # equivalently, of the normal equations
 #
-#  AᵀAx = Aᵀb.
+#  AᴴAx = Aᴴb.
 #
 # LSQR is formally equivalent to applying the conjugate gradient method
 # to the normal equations but should be more stable. It is also formally
@@ -24,16 +24,16 @@
 
 export lsqr, lsqr!
 
-
 """
     (x, stats) = lsqr(A, b::AbstractVector{FC};
-                      M=I, N=I, sqd::Bool=false, λ::T=zero(T),
+                      M=I, N=I, ldiv::Bool=false,
+                      window::Int=5, sqd::Bool=false, λ::T=zero(T),
+                      radius::T=zero(T), etol::T=√eps(T),
                       axtol::T=√eps(T), btol::T=√eps(T),
-                      atol::T=zero(T), rtol::T=zero(T),
-                      etol::T=√eps(T), window::Int=5,
-                      itmax::Int=0, conlim::T=1/√eps(T),
-                      radius::T=zero(T), verbose::Int=0, history::Bool=false,
-                      ldiv::Bool=false, callback=solver->false)
+                      conlim::T=1/√eps(T), atol::T=zero(T),
+                      rtol::T=zero(T), itmax::Int=0,
+                      timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                      callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
@@ -42,20 +42,20 @@ Solve the regularized linear least-squares problem
 
     minimize ‖b - Ax‖₂² + λ²‖x‖₂²
 
-using the LSQR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSQR method, where λ ≥ 0 is a regularization parameter.
 LSQR is formally equivalent to applying CG to the normal equations
 
-    (AᵀA + λ²I) x = Aᵀb
+    (AᴴA + λ²I) x = Aᴴb
 
 (and therefore to CGLS) but is more stable.
 
-LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
 It is formally equivalent to CGLS, though can be slightly more accurate.
 
 If `λ > 0`, LSQR solves the symmetric and quasi-definite system
 
     [ E      A ] [ r ]   [ b ]
-    [ Aᵀ  -λ²F ] [ x ] = [ 0 ],
+    [ Aᴴ  -λ²F ] [ x ] = [ 0 ],
 
 where E and F are symmetric and positive definite.
 Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -65,23 +65,52 @@ The system above represents the optimality conditions of
 
     minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
 
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSQR is then equivalent to applying CG to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSQR is then equivalent to applying CG to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
 
 If `λ = 0`, we solve the symmetric and indefinite system
 
     [ E    A ] [ r ]   [ b ]
-    [ Aᵀ   0 ] [ x ] = [ 0 ].
+    [ Aᴴ   0 ] [ x ] = [ 0 ].
 
 The system above represents the optimality conditions of
 
     minimize ‖b - Ax‖²_E⁻¹.
 
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
 `r` can be recovered by computing `E⁻¹(b - Ax)`.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `axtol`: tolerance on the backward error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -89,12 +118,6 @@ and `false` otherwise.
 """
 function lsqr end
 
-function lsqr(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = LsqrSolver(A, b, window=window)
-  lsqr!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = lsqr!(solver::LsqrSolver, A, b; kwargs...)
 
@@ -104,263 +127,309 @@ See [`LsqrSolver`](@ref) for more details about the `solver`.
 """
 function lsqr! end
 
-function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
-               M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
-               axtol :: T=√eps(T), btol :: T=√eps(T),
-               atol :: T=zero(T), rtol :: T=zero(T),
-               etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
-               radius :: T=zero(T), verbose :: Int=0, history :: Bool=false,
-               ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("LSQR: system of %d equations in %d variables\n", m, n)
-
-  # Check sqd and λ parameters
-  sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
-  sqd && (λ = one(T))
-
-  # Tests M = Iₙ and N = Iₘ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :u, S, m)
-  allocate_if(!NisI, solver, :v, S, n)
-  x, Nv, Aᵀu, w = solver.x, solver.Nv, solver.Aᵀu, solver.w
-  Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
-  rNorms, ArNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  u = MisI ? Mu : solver.u
-  v = NisI ? Nv : solver.v
-
-  λ² = λ * λ
-  ctol = conlim > 0 ? 1/conlim : zero(T)
-  x .= zero(FC)
-
-  # Initialize Golub-Kahan process.
-  # β₁ M u₁ = b.
-  Mu .= b
-  MisI || mulorldiv!(u, M, Mu, ldiv)
-  β₁ = sqrt(@kdotr(m, u, Mu))
-  if β₁ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(rNorms, zero(T))
-    history && push!(ArNorms, zero(T))
-    return solver
+def_args_lsqr = (:(A                    ),
+                 :(b::AbstractVector{FC}))
+
+def_kwargs_lsqr = (:(; M = I                     ),
+                   :(; N = I                     ),
+                   :(; ldiv::Bool = false        ),
+                   :(; sqd::Bool = false         ),
+                   :(; λ::T = zero(T)            ),
+                   :(; radius::T = zero(T)       ),
+                   :(; etol::T = √eps(T)         ),
+                   :(; axtol::T = √eps(T)        ),
+                   :(; btol::T = √eps(T)         ),
+                   :(; conlim::T = 1/√eps(T)     ),
+                   :(; atol::T = zero(T)         ),
+                   :(; rtol::T = zero(T)         ),
+                   :(; itmax::Int = 0            ),
+                   :(; timemax::Float64 = Inf    ),
+                   :(; verbose::Int = 0          ),
+                   :(; history::Bool = false     ),
+                   :(; callback = solver -> false),
+                   :(; iostream::IO = kstdout    ))
+
+def_kwargs_lsqr = mapreduce(extract_parameters, vcat, def_kwargs_lsqr)
+
+args_lsqr = (:A, :b)
+kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function lsqr($(def_args_lsqr...); window :: Int=5, $(def_kwargs_lsqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = LsqrSolver(A, b; window)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    lsqr!(solver, $(args_lsqr...); $(kwargs_lsqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  β = β₁
-
-  @kscal!(m, one(FC)/β₁, u)
-  MisI || @kscal!(m, one(FC)/β₁, Mu)
-  mul!(Aᵀu, Aᵀ, u)
-  Nv .= Aᵀu
-  NisI || mulorldiv!(v, N, Nv, ldiv)
-  Anorm² = @kdotr(n, v, Nv)
-  Anorm = sqrt(Anorm²)
-  α = Anorm
-  Acond  = zero(T)
-  xNorm  = zero(T)
-  xNorm² = zero(T)
-  dNorm² = zero(T)
-  c2 = -one(T)
-  s2 = zero(T)
-  z  = zero(T)
-
-  xENorm² = zero(T)
-  err_lbnd = zero(T)
-  window = length(err_vec)
-  err_vec .= zero(T)
-
-  iter = 0
-  itmax == 0 && (itmax = m + n)
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s  %7s  %7s  %7s  %7s  %7s\n", "k", "α", "β", "‖r‖", "‖Aᵀr‖", "compat", "backwrd", "‖A‖", "κ(A)")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond)
-
-  rNorm = β₁
-  r1Norm = rNorm
-  r2Norm = rNorm
-  res2   = zero(T)
-  history && push!(rNorms, r2Norm)
-  ArNorm = ArNorm0 = α * β
-  history && push!(ArNorms, ArNorm)
-  # Aᵀb = 0 so x = 0 is a minimum least-squares solution
-  if α == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a minimum least-squares solution"
-    return solver
-  end
-  @kscal!(n, one(FC)/α, v)
-  NisI || @kscal!(n, one(FC)/α, Nv)
-  w .= v
-
-  # Initialize other constants.
-  ϕbar = β₁
-  ρbar = α
-
-  status = "unknown"
-  on_boundary = false
-  solved_lim = ArNorm / (Anorm * rNorm) ≤ axtol
-  solved_mach = one(T) + ArNorm / (Anorm * rNorm) ≤ one(T)
-  solved = solved_mach | solved_lim
-  tired  = iter ≥ itmax
-  ill_cond = ill_cond_mach = ill_cond_lim = false
-  zero_resid_lim = rNorm / β₁ ≤ axtol
-  zero_resid_mach = one(T) + rNorm / β₁ ≤ one(T)
-  zero_resid = zero_resid_mach | zero_resid_lim
-  fwd_err = false
-  user_requested_exit = false
-
-  while ! (solved || tired || ill_cond || user_requested_exit)
-    iter = iter + 1
-
-    # Generate next Golub-Kahan vectors.
-    # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
-    mul!(Av, A, v)
-    @kaxpby!(m, one(FC), Av, -α, Mu)
-    MisI || mulorldiv!(u, M, Mu, ldiv)
-    β = sqrt(@kdotr(m, u, Mu))
-    if β ≠ 0
-      @kscal!(m, one(FC)/β, u)
-      MisI || @kscal!(m, one(FC)/β, Mu)
-      Anorm² = Anorm² + α * α + β * β  # = ‖B_{k-1}‖²
-      λ > 0 && (Anorm² += λ²)
-
-      # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
-      mul!(Aᵀu, Aᵀ, u)
-      @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
-      NisI || mulorldiv!(v, N, Nv, ldiv)
-      α = sqrt(@kdotr(n, v, Nv))
-      if α ≠ 0
-        @kscal!(n, one(FC)/α, v)
-        NisI || @kscal!(n, one(FC)/α, Nv)
-      end
-    end
 
-    # Continue QR factorization
-    # 1. Eliminate the regularization parameter.
-    (c1, s1, ρbar1) = sym_givens(ρbar, λ)
-    ψ = s1 * ϕbar
-    ϕbar = c1 * ϕbar
-
-    # 2. Eliminate β.
-    # Q [ Lₖ  β₁ e₁ ] = [ Rₖ   zₖ  ] :
-    #   [ β    0    ]   [ 0   ζbar ]
-    #
-    #       k  k+1    k    k+1      k  k+1
-    # k   [ c   s ] [ ρbar    ] = [ ρ  θ⁺    ]
-    # k+1 [ s  -c ] [ β    α⁺ ]   [    ρbar⁺ ]
-    #
-    # so that we obtain
-    #
-    # [ c  s ] [ ζbar ] = [ ζ     ]
-    # [ s -c ] [  0   ]   [ ζbar⁺ ]
-    (c, s, ρ) = sym_givens(ρbar1, β)
-    ϕ = c * ϕbar
-    ϕbar = s * ϕbar
-
-    xENorm² = xENorm² + ϕ * ϕ
-    err_vec[mod(iter, window) + 1] = ϕ
-    iter ≥ window && (err_lbnd = norm(err_vec))
-
-    τ = s * ϕ
-    θ = s * α
-    ρbar = -c * α
-    dNorm² += @kdotr(n, w, w) / ρ^2
-
-    # if a trust-region constraint is give, compute step to the boundary
-    # the step ϕ/ρ is not necessarily positive
-    σ = ϕ / ρ
-    if radius > 0
-      t1, t2 = to_boundary(x, w, radius)
-      tmax, tmin = max(t1, t2), min(t1, t2)
-      on_boundary = σ > tmax || σ < tmin
-      σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
+  function lsqr!(solver :: LsqrSolver{T,FC,S}, $(def_args_lsqr...); $(def_kwargs_lsqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "LSQR: system of %d equations in %d variables\n", m, n)
+
+    # Check sqd and λ parameters
+    sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
+    sqd && (λ = one(T))
+
+    # Tests M = Iₙ and N = Iₘ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :u, S, m)
+    allocate_if(!NisI, solver, :v, S, n)
+    x, Nv, Aᴴu, w = solver.x, solver.Nv, solver.Aᴴu, solver.w
+    Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
+    rNorms, ArNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    u = MisI ? Mu : solver.u
+    v = NisI ? Nv : solver.v
+
+    λ² = λ * λ
+    ctol = conlim > 0 ? 1/conlim : zero(T)
+    x .= zero(FC)
+
+    # Initialize Golub-Kahan process.
+    # β₁ M u₁ = b.
+    Mu .= b
+    MisI || mulorldiv!(u, M, Mu, ldiv)
+    β₁ = sqrt(@kdotr(m, u, Mu))
+    if β₁ == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(rNorms, zero(T))
+      history && push!(ArNorms, zero(T))
+      return solver
     end
-
-    @kaxpy!(n, σ, w, x)  # x = x + ϕ / ρ * w
-    @kaxpby!(n, one(FC), v, -θ/ρ, w)  # w = v - θ / ρ * w
-
-    # Use a plane rotation on the right to eliminate the super-diagonal
-    # element (θ) of the upper-bidiagonal matrix.
-    # Use the result to estimate norm(x).
-    δ = s2 * ρ
-    γbar = -c2 * ρ
-    rhs = ϕ - δ * z
-    zbar = rhs / γbar
-    xNorm = sqrt(xNorm² + zbar * zbar)
-    (c2, s2, γ) = sym_givens(γbar, θ)
-    z = rhs / γ
-    xNorm² += z * z
-
+    β = β₁
+
+    @kscal!(m, one(FC)/β₁, u)
+    MisI || @kscal!(m, one(FC)/β₁, Mu)
+    mul!(Aᴴu, Aᴴ, u)
+    Nv .= Aᴴu
+    NisI || mulorldiv!(v, N, Nv, ldiv)
+    Anorm² = @kdotr(n, v, Nv)
     Anorm = sqrt(Anorm²)
-    Acond = Anorm * sqrt(dNorm²)
-    res1  = ϕbar * ϕbar
-    res2 += ψ * ψ
-    rNorm = sqrt(res1 + res2)
-
-    ArNorm = α * abs(τ)
-    history && push!(ArNorms, ArNorm)
-
-    r1sq = rNorm * rNorm - λ² * xNorm²
-    r1Norm = sqrt(abs(r1sq))
-    r1sq < 0 && (r1Norm = -r1Norm)
+    α = Anorm
+    Acond  = zero(T)
+    xNorm  = zero(T)
+    xNorm² = zero(T)
+    dNorm² = zero(T)
+    c2 = -one(T)
+    s2 = zero(T)
+    z  = zero(T)
+
+    xENorm² = zero(T)
+    err_lbnd = zero(T)
+    window = length(err_vec)
+    err_vec .= zero(T)
+
+    iter = 0
+    itmax == 0 && (itmax = m + n)
+
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %7s  %7s  %7s  %7s  %7s  %5s\n", "k", "α", "β", "‖r‖", "‖Aᴴr‖", "compat", "backwrd", "‖A‖", "κ(A)", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond, ktimer(start_time))
+
+    rNorm = β₁
+    r1Norm = rNorm
     r2Norm = rNorm
+    res2   = zero(T)
     history && push!(rNorms, r2Norm)
-
-    test1 = rNorm / β₁
-    test2 = ArNorm / (Anorm * rNorm)
-    test3 = 1 / Acond
-    t1    = test1 / (one(T) + Anorm * xNorm / β₁)
-    rNormtol = btol + axtol * Anorm * xNorm / β₁
-
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond)
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    ill_cond_mach = (one(T) + test3 ≤ one(T))
-    solved_mach = (one(T) + test2 ≤ one(T))
-    zero_resid_mach = (one(T) + t1 ≤ one(T))
-
-    # Stopping conditions based on user-provided tolerances.
-    user_requested_exit = callback(solver) :: Bool
+    ArNorm = ArNorm0 = α * β
+    history && push!(ArNorms, ArNorm)
+    # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+    if α == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a minimum least-squares solution"
+      return solver
+    end
+    @kscal!(n, one(FC)/α, v)
+    NisI || @kscal!(n, one(FC)/α, Nv)
+    w .= v
+
+    # Initialize other constants.
+    ϕbar = β₁
+    ρbar = α
+
+    status = "unknown"
+    on_boundary = false
+    solved_lim = ArNorm / (Anorm * rNorm) ≤ axtol
+    solved_mach = one(T) + ArNorm / (Anorm * rNorm) ≤ one(T)
+    solved = solved_mach | solved_lim
     tired  = iter ≥ itmax
-    ill_cond_lim = (test3 ≤ ctol)
-    solved_lim = (test2 ≤ axtol)
-    solved_opt = ArNorm ≤ atol + rtol * ArNorm0
-    zero_resid_lim = (test1 ≤ rNormtol)
-    iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
-
-    ill_cond = ill_cond_mach | ill_cond_lim
+    ill_cond = ill_cond_mach = ill_cond_lim = false
+    zero_resid_lim = rNorm / β₁ ≤ axtol
+    zero_resid_mach = one(T) + rNorm / β₁ ≤ one(T)
     zero_resid = zero_resid_mach | zero_resid_lim
-    solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary
+    fwd_err = false
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+      iter = iter + 1
+
+      # Generate next Golub-Kahan vectors.
+      # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ
+      mul!(Av, A, v)
+      @kaxpby!(m, one(FC), Av, -α, Mu)
+      MisI || mulorldiv!(u, M, Mu, ldiv)
+      β = sqrt(@kdotr(m, u, Mu))
+      if β ≠ 0
+        @kscal!(m, one(FC)/β, u)
+        MisI || @kscal!(m, one(FC)/β, Mu)
+        Anorm² = Anorm² + α * α + β * β  # = ‖B_{k-1}‖²
+        λ > 0 && (Anorm² += λ²)
+
+        # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+        mul!(Aᴴu, Aᴴ, u)
+        @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
+        NisI || mulorldiv!(v, N, Nv, ldiv)
+        α = sqrt(@kdotr(n, v, Nv))
+        if α ≠ 0
+          @kscal!(n, one(FC)/α, v)
+          NisI || @kscal!(n, one(FC)/α, Nv)
+        end
+      end
+
+      # Continue QR factorization
+      # 1. Eliminate the regularization parameter.
+      (c1, s1, ρbar1) = sym_givens(ρbar, λ)
+      ψ = s1 * ϕbar
+      ϕbar = c1 * ϕbar
+
+      # 2. Eliminate β.
+      # Q [ Lₖ  β₁ e₁ ] = [ Rₖ   zₖ  ] :
+      #   [ β    0    ]   [ 0   ζbar ]
+      #
+      #       k  k+1    k    k+1      k  k+1
+      # k   [ c   s ] [ ρbar    ] = [ ρ  θ⁺    ]
+      # k+1 [ s  -c ] [ β    α⁺ ]   [    ρbar⁺ ]
+      #
+      # so that we obtain
+      #
+      # [ c  s ] [ ζbar ] = [ ζ     ]
+      # [ s -c ] [  0   ]   [ ζbar⁺ ]
+      (c, s, ρ) = sym_givens(ρbar1, β)
+      ϕ = c * ϕbar
+      ϕbar = s * ϕbar
+
+      xENorm² = xENorm² + ϕ * ϕ
+      err_vec[mod(iter, window) + 1] = ϕ
+      iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
+
+      τ = s * ϕ
+      θ = s * α
+      ρbar = -c * α
+      dNorm² += @kdotr(n, w, w) / ρ^2
+
+      # if a trust-region constraint is give, compute step to the boundary
+      # the step ϕ/ρ is not necessarily positive
+      σ = ϕ / ρ
+      if radius > 0
+        t1, t2 = to_boundary(n, x, w, radius)
+        tmax, tmin = max(t1, t2), min(t1, t2)
+        on_boundary = σ > tmax || σ < tmin
+        σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
+      end
+
+      @kaxpy!(n, σ, w, x)  # x = x + ϕ / ρ * w
+      @kaxpby!(n, one(FC), v, -θ/ρ, w)  # w = v - θ / ρ * w
+
+      # Use a plane rotation on the right to eliminate the super-diagonal
+      # element (θ) of the upper-bidiagonal matrix.
+      # Use the result to estimate norm(x).
+      δ = s2 * ρ
+      γbar = -c2 * ρ
+      rhs = ϕ - δ * z
+      zbar = rhs / γbar
+      xNorm = sqrt(xNorm² + zbar * zbar)
+      (c2, s2, γ) = sym_givens(γbar, θ)
+      z = rhs / γ
+      xNorm² += z * z
+
+      Anorm = sqrt(Anorm²)
+      Acond = Anorm * sqrt(dNorm²)
+      res1  = ϕbar * ϕbar
+      res2 += ψ * ψ
+      rNorm = sqrt(res1 + res2)
+
+      ArNorm = α * abs(τ)
+      history && push!(ArNorms, ArNorm)
+
+      r1sq = rNorm * rNorm - λ² * xNorm²
+      r1Norm = sqrt(abs(r1sq))
+      r1sq < 0 && (r1Norm = -r1Norm)
+      r2Norm = rNorm
+      history && push!(rNorms, r2Norm)
+
+      test1 = rNorm / β₁
+      test2 = ArNorm / (Anorm * rNorm)
+      test3 = 1 / Acond
+      t1    = test1 / (one(T) + Anorm * xNorm / β₁)
+      rNormtol = btol + axtol * Anorm * xNorm / β₁
+
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond, ktimer(start_time))
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      ill_cond_mach = (one(T) + test3 ≤ one(T))
+      solved_mach = (one(T) + test2 ≤ one(T))
+      zero_resid_mach = (one(T) + t1 ≤ one(T))
+
+      # Stopping conditions based on user-provided tolerances.
+      user_requested_exit = callback(solver) :: Bool
+      tired  = iter ≥ itmax
+      ill_cond_lim = (test3 ≤ ctol)
+      solved_lim = (test2 ≤ axtol)
+      solved_opt = ArNorm ≤ atol + rtol * ArNorm0
+      zero_resid_lim = (test1 ≤ rNormtol)
+      iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
+
+      ill_cond = ill_cond_mach || ill_cond_lim
+      zero_resid = zero_resid_mach || zero_resid_lim
+      solved = solved_mach || solved_lim || solved_opt || zero_resid || fwd_err || on_boundary
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    ill_cond_lim        && (status = "condition number exceeds tolerance")
+    solved              && (status = "found approximate minimum least-squares solution")
+    zero_resid          && (status = "found approximate zero-residual solution")
+    fwd_err             && (status = "truncated forward error small enough")
+    on_boundary         && (status = "on trust-region boundary")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !zero_resid
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  ill_cond_lim        && (status = "condition number exceeds tolerance")
-  solved              && (status = "found approximate minimum least-squares solution")
-  zero_resid          && (status = "found approximate zero-residual solution")
-  fwd_err             && (status = "truncated forward error small enough")
-  on_boundary         && (status = "on trust-region boundary")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !zero_resid
-  stats.status = status
-  return solver
 end
diff --git a/src/minres.jl b/src/minres.jl
index cbaefee9f..8e6659472 100644
--- a/src/minres.jl
+++ b/src/minres.jl
@@ -3,7 +3,7 @@
 #
 #  minimize ‖Ax - b‖₂
 #
-# where A is square and symmetric.
+# where A is Hermitian.
 #
 # MINRES is formally equivalent to applying the conjugate residuals method
 # to Ax = b when A is positive definite, but is more general and also applies
@@ -21,20 +21,22 @@
 
 export minres, minres!
 
-
 """
     (x, stats) = minres(A, b::AbstractVector{FC};
-                        M=I, λ::T=zero(T), atol::T=√eps(T)/100,
-                        rtol::T=√eps(T)/100, ratol :: T=zero(T), 
-                        rrtol :: T=zero(T), etol::T=√eps(T),
-                        window::Int=5, itmax::Int=0,
-                        conlim::T=1/√eps(T), verbose::Int=0,
-                        history::Bool=false, ldiv::Bool=false,
-                        callback=solver->false)
+                        M=I, ldiv::Bool=false, window::Int=5,
+                        λ::T=zero(T), atol::T=√eps(T),
+                        rtol::T=√eps(T), etol::T=√eps(T),
+                        conlim::T=1/√eps(T), itmax::Int=0,
+                        timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                        callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
+    (x, stats) = minres(A, b, x0::AbstractVector; kwargs...)
+
+MINRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
 Solve the shifted linear least-squares problem
 
     minimize ‖b - (A + λI)x‖₂²
@@ -43,26 +45,45 @@ or the shifted linear system
 
     (A + λI) x = b
 
-using the MINRES method, where λ ≥ 0 is a shift parameter,
-where A is square and symmetric.
+of size n using the MINRES method, where λ ≥ 0 is a shift parameter,
+where A is Hermitian.
 
 MINRES is formally equivalent to applying CR to Ax=b when A is positive
 definite, but is typically more stable and also applies to the case where
 A is indefinite.
 
-MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
+
+#### Input arguments
 
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
 
-MINRES can be warm-started from an initial guess `x0` with the method
+#### Optional argument
 
-    (x, stats) = minres(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -70,18 +91,6 @@ and `false` otherwise.
 """
 function minres end
 
-function minres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = MinresSolver(A, b, window=window)
-  minres!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function minres(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = MinresSolver(A, b, window=window)
-  minres!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = minres!(solver::MinresSolver, A, b; kwargs...)
     solver = minres!(solver::MinresSolver, A, b, x0; kwargs...)
@@ -92,257 +101,306 @@ See [`MinresSolver`](@ref) for more details about the `solver`.
 """
 function minres! end
 
-function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  minres!(solver, A, b; kwargs...)
-  return solver
-end
-
-function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                 M=I, λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100, 
-                 ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T),
-                 itmax :: Int=0, conlim :: T=1/√eps(T), verbose :: Int=0,
-                 history :: Bool=false, ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("MINRES: system of size %d\n", n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :v, S, n)
-  Δx, x, r1, r2, w1, w2, y = solver.Δx, solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.y
-  err_vec, stats = solver.err_vec, solver.stats
-  warm_start = solver.warm_start
-  rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
-  reset!(stats)
-  v = MisI ? r2 : solver.v
-
-  ϵM = eps(T)
-  ctol = conlim > 0 ? 1 / conlim : zero(T)
-
-  # Initial solution x₀
-  x .= zero(FC)
-
-  if warm_start
-    mul!(r1, A, Δx)
-    (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1)
-    @kaxpby!(n, one(FC), b, -one(FC), r1)
-  else
-    r1 .= b
+def_args_minres = (:(A                    ),
+                   :(b::AbstractVector{FC}))
+
+def_optargs_minres = (:(x0::AbstractVector),)
+
+def_kwargs_minres = (:(; M = I                     ),
+                     :(; ldiv::Bool = false        ),
+                     :(; λ::T = zero(T)            ),
+                     :(; atol::T = √eps(T)         ),
+                     :(; rtol::T = √eps(T)         ),
+                     :(; etol::T = √eps(T)         ),
+                     :(; conlim::T = 1/√eps(T)     ),
+                     :(; itmax::Int = 0            ),
+                     :(; timemax::Float64 = Inf    ),
+                     :(; verbose::Int = 0          ),
+                     :(; history::Bool = false     ),
+                     :(; callback = solver -> false),
+                     :(; iostream::IO = kstdout    ))
+
+def_kwargs_minres = mapreduce(extract_parameters, vcat, def_kwargs_minres)
+
+args_minres = (:A, :b)
+optargs_minres = (:x0,)
+kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function minres($(def_args_minres...), $(def_optargs_minres...); window :: Int=5, $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = MinresSolver(A, b; window)
+    warm_start!(solver, $(optargs_minres...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    minres!(solver, $(args_minres...); $(kwargs_minres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Initialize Lanczos process.
-  # β₁ M v₁ = b.
-  r2 .= r1
-  MisI || mulorldiv!(v, M, r1, ldiv)
-  β₁ = @kdotr(m, r1, v)
-  β₁ < 0 && error("Preconditioner is not positive definite")
-  if β₁ == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    history && push!(rNorms, β₁)
-    history && push!(ArNorms, zero(T))
-    history && push!(Aconds, zero(T))
-    solver.warm_start = false
-    return solver
+  function minres($(def_args_minres...); window :: Int=5, $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = MinresSolver(A, b; window)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    minres!(solver, $(args_minres...); $(kwargs_minres...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  β₁ = sqrt(β₁)
-  β = β₁
-
-  oldβ = zero(T)
-  δbar = zero(T)
-  ϵ = zero(T)
-  rNorm = β₁
-  history && push!(rNorms, β₁)
-  ϕbar = β₁
-  rhs1 = β₁
-  rhs2 = zero(T)
-  γmax = zero(T)
-  γmin = T(Inf)
-  cs = -one(T)
-  sn = zero(T)
-  w1 .= zero(FC)
-  w2 .= zero(FC)
-
-  ANorm² = zero(T)
-  ANorm = zero(T)
-  Acond = zero(T)
-  history && push!(Aconds, Acond)
-  ArNorm = zero(T)
-  history && push!(ArNorms, ArNorm)
-  xNorm = zero(T)
-
-  xENorm² = zero(T)
-  err_lbnd = zero(T)
-  window = length(err_vec)
-  err_vec .= zero(T)
-
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s  %8s  %8s  %7s  %7s  %7s  %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond)
-
-  tol = atol + rtol * β₁
-  rNormtol = ratol + rrtol * β₁ 
-  stats.status = "unknown"
-  solved = solved_mach = solved_lim = (rNorm ≤ rtol)
-  tired  = iter ≥ itmax
-  ill_cond = ill_cond_mach = ill_cond_lim = false
-  zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ tol)
-  fwd_err = false
-  user_requested_exit = false
-
-  while !(solved || tired || ill_cond || user_requested_exit)
-    iter = iter + 1
-
-    # Generate next Lanczos vector.
-    mul!(y, A, v)
-    λ ≠ 0 && @kaxpy!(n, λ, v, y)             # (y = y + λ * v)
-    @kscal!(n, one(FC) / β, y)
-    iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1)
-
-    α = real((@kdot(n, v, y) / β))
-    @kaxpy!(n, -α / β, r2, y)  # y = y - α / β * r2
-
-    # Compute w.
-    δ = cs * δbar + sn * α
-    if iter == 1
-      w = w2
-    else
-      iter ≥ 3 && @kscal!(n, -ϵ, w1)
-      w = w1
-      @kaxpy!(n, -δ, w2, w)
-    end
-    @kaxpy!(n, one(FC) / β, v, w)
-
-    @. r1 = r2
-    @. r2 = y
-    MisI || mulorldiv!(v, M, r2, ldiv)
-    oldβ = β
-    β = @kdotr(n, r2, v)
-    β < 0 && error("Preconditioner is not positive definite")
-    β = sqrt(β)
-    ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
-
-    # Apply rotation to obtain
-    #  [ δₖ    ϵₖ₊₁    ] = [ cs  sn ] [ δbarₖ  0    ]
-    #  [ γbar  δbarₖ₊₁ ]   [ sn -cs ] [ αₖ     βₖ₊₁ ]
-    γbar = sn * δbar - cs * α
-    ϵ = sn * β
-    δbar = -cs * β
-    root = sqrt(γbar * γbar + δbar * δbar)
-    ArNorm = ϕbar * root  # = ‖Aᵀrₖ₋₁‖
-    history && push!(ArNorms, ArNorm)
-
-    # Compute the next plane rotation.
-    γ = sqrt(γbar * γbar + β * β)
-    γ = max(γ, ϵM)
-    cs = γbar / γ
-    sn = β / γ
-    ϕ = cs * ϕbar
-    ϕbar = sn * ϕbar
-
-    # Final update of w.
-    @kscal!(n, one(FC) / γ, w)
 
-    # Update x.
-    @kaxpy!(n, ϕ, w, x)  # x = x + ϕ * w
-    xENorm² = xENorm² + ϕ * ϕ
+  function minres!(solver :: MinresSolver{T,FC,S}, $(def_args_minres...); $(def_kwargs_minres...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
 
-    # Update directions for x.
-    if iter ≥ 2
-      @kswap(w1, w2)
-    end
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
 
-    # Compute lower bound on forward error.
-    err_vec[mod(iter, window) + 1] = ϕ
-    iter ≥ window && (err_lbnd = norm(err_vec))
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "MINRES: system of size %d\n", n)
 
-    γmax = max(γmax, γ)
-    γmin = min(γmin, γ)
-    ζ = rhs1 / γ
-    rhs1 = rhs2 - δ * ζ
-    rhs2 = -ϵ * ζ
+    # Tests M = Iₙ
+    MisI = (M === I)
 
-    # Estimate various norms.
-    ANorm = sqrt(ANorm²)
-    xNorm = @knrm2(n, x)
-    ϵA = ANorm * ϵM
-    ϵx = ANorm * xNorm * ϵM
-    ϵr = ANorm * xNorm * rtol
-    d = γbar
-    d == 0 && (d = ϵA)
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
 
-    rNorm = ϕbar
+    # Set up workspace.
+    allocate_if(!MisI, solver, :v, S, n)
+    Δx, x, r1, r2, w1, w2, y = solver.Δx, solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.y
+    err_vec, stats = solver.err_vec, solver.stats
+    warm_start = solver.warm_start
+    rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
+    reset!(stats)
+    v = MisI ? r2 : solver.v
 
-    test1 = rNorm / (ANorm * xNorm)
-    test2 = root / ANorm
-    history && push!(rNorms, rNorm)
+    ϵM = eps(T)
+    ctol = conlim > 0 ? 1 / conlim : zero(T)
 
-    Acond = γmax / γmin
-    history && push!(Aconds, Acond)
+    # Initial solution x₀
+    x .= zero(FC)
 
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e  %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2)
+    if warm_start
+      mul!(r1, A, Δx)
+      (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1)
+      @kaxpby!(n, one(FC), b, -one(FC), r1)
+    else
+      r1 .= b
+    end
 
-    if iter == 1 && β / β₁ ≤ 10 * ϵM
-      # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+    # Initialize Lanczos process.
+    # β₁ M v₁ = b.
+    r2 .= r1
+    MisI || mulorldiv!(v, M, r1, ldiv)
+    β₁ = @kdotr(m, r1, v)
+    β₁ < 0 && error("Preconditioner is not positive definite")
+    if β₁ == 0
       stats.niter = 0
-      stats.solved, stats.inconsistent = true, true
-      stats.status = "x is a minimum least-squares solution"
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      history && push!(rNorms, β₁)
+      history && push!(ArNorms, zero(T))
+      history && push!(Aconds, zero(T))
       solver.warm_start = false
       return solver
     end
+    β₁ = sqrt(β₁)
+    β = β₁
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
-    solved_mach = (one(T) + test2 ≤ one(T))
-    zero_resid_mach = (one(T) + test1 ≤ one(T))
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-    # solved_mach = (ϵx ≥ β₁)
-
-    # Stopping conditions based on user-provided tolerances.
-    tired = iter ≥ itmax
-    ill_cond_lim = (one(T) / Acond ≤ ctol)
-    solved_lim = (test2 ≤ tol)
-    zero_resid_lim = (test1 ≤ tol)
-    resid_decrease_lim = (rNorm ≤ rNormtol)
-    iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
-
-    user_requested_exit = callback(solver) :: Bool
-    zero_resid = zero_resid_mach | zero_resid_lim
-    resid_decrease = resid_decrease_mach | resid_decrease_lim
-    ill_cond = ill_cond_mach | ill_cond_lim
-    solved = solved_mach | solved_lim | zero_resid | fwd_err | resid_decrease
+    oldβ = zero(T)
+    δbar = zero(T)
+    ϵ = zero(T)
+    rNorm = β₁
+    history && push!(rNorms, β₁)
+    ϕbar = β₁
+    rhs1 = β₁
+    rhs2 = zero(T)
+    γmax = zero(T)
+    γmin = T(Inf)
+    cs = -one(T)
+    sn = zero(T)
+    w1 .= zero(FC)
+    w2 .= zero(FC)
+
+    ANorm² = zero(T)
+    ANorm = zero(T)
+    Acond = zero(T)
+    history && push!(Aconds, Acond)
+    ArNorm = zero(T)
+    history && push!(ArNorms, ArNorm)
+    xNorm = zero(T)
+
+    xENorm² = zero(T)
+    err_lbnd = zero(T)
+    window = length(err_vec)
+    err_vec .= zero(T)
+
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %8s  %8s  %7s  %7s  %7s  %7s  %5s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7s  %7s  %.2fs\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗", ktimer(start_time))
+
+    ε = atol + rtol * β₁
+    solved = solved_mach = solved_lim = (rNorm ≤ rtol)
+    tired  = iter ≥ itmax
+    ill_cond = ill_cond_mach = ill_cond_lim = false
+    zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ ε)
+    fwd_err = false
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || ill_cond || user_requested_exit || overtimed)
+      iter = iter + 1
+
+      # Generate next Lanczos vector.
+      mul!(y, A, v)
+      λ ≠ 0 && @kaxpy!(n, λ, v, y)             # (y = y + λ * v)
+      @kscal!(n, one(FC) / β, y)
+      iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1)
+
+      α = real((@kdot(n, v, y) / β))
+      @kaxpy!(n, -α / β, r2, y)  # y = y - α / β * r2
+
+      # Compute w.
+      δ = cs * δbar + sn * α
+      if iter == 1
+        w = w2
+      else
+        iter ≥ 3 && @kscal!(n, -ϵ, w1)
+        w = w1
+        @kaxpy!(n, -δ, w2, w)
+      end
+      @kaxpy!(n, one(FC) / β, v, w)
+
+      @. r1 = r2
+      @. r2 = y
+      MisI || mulorldiv!(v, M, r2, ldiv)
+      oldβ = β
+      β = @kdotr(n, r2, v)
+      β < 0 && error("Preconditioner is not positive definite")
+      β = sqrt(β)
+      ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
+
+      # Apply rotation to obtain
+      #  [ δₖ    ϵₖ₊₁    ] = [ cs  sn ] [ δbarₖ  0    ]
+      #  [ γbar  δbarₖ₊₁ ]   [ sn -cs ] [ αₖ     βₖ₊₁ ]
+      γbar = sn * δbar - cs * α
+      ϵ = sn * β
+      δbar = -cs * β
+      root = sqrt(γbar * γbar + δbar * δbar)
+      ArNorm = ϕbar * root  # = ‖Aᴴrₖ₋₁‖
+      history && push!(ArNorms, ArNorm)
+
+      # Compute the next plane rotation.
+      γ = sqrt(γbar * γbar + β * β)
+      γ = max(γ, ϵM)
+      cs = γbar / γ
+      sn = β / γ
+      ϕ = cs * ϕbar
+      ϕbar = sn * ϕbar
+
+      # Final update of w.
+      @kscal!(n, one(FC) / γ, w)
+
+      # Update x.
+      @kaxpy!(n, ϕ, w, x)  # x = x + ϕ * w
+      xENorm² = xENorm² + ϕ * ϕ
+
+      # Update directions for x.
+      if iter ≥ 2
+        @kswap(w1, w2)
+      end
+
+      # Compute lower bound on forward error.
+      err_vec[mod(iter, window) + 1] = ϕ
+      iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
+
+      γmax = max(γmax, γ)
+      γmin = min(γmin, γ)
+      ζ = rhs1 / γ
+      rhs1 = rhs2 - δ * ζ
+      rhs2 = -ϵ * ζ
+
+      # Estimate various norms.
+      ANorm = sqrt(ANorm²)
+      xNorm = @knrm2(n, x)
+      ϵA = ANorm * ϵM
+      ϵx = ANorm * xNorm * ϵM
+      ϵr = ANorm * xNorm * rtol
+      d = γbar
+      d == 0 && (d = ϵA)
+
+      rNorm = ϕbar
+
+      test1 = rNorm / (ANorm * xNorm)
+      test2 = root / ANorm
+      history && push!(rNorms, rNorm)
+
+      Acond = γmax / γmin
+      history && push!(Aconds, Acond)
+
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2, ktimer(start_time))
+
+      if iter == 1 && β / β₁ ≤ 10 * ϵM
+        # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+        stats.niter = 1
+        stats.solved, stats.inconsistent = true, true
+        stats.timer = ktimer(start_time)
+        stats.status = "x is a minimum least-squares solution"
+        solver.warm_start = false
+        return solver
+      end
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
+      solved_mach = (one(T) + test2 ≤ one(T))
+      zero_resid_mach = (one(T) + test1 ≤ one(T))
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+      # solved_mach = (ϵx ≥ β₁)
+
+      # Stopping conditions based on user-provided tolerances.
+      tired = iter ≥ itmax
+      ill_cond_lim = (one(T) / Acond ≤ ctol)
+      solved_lim = (test2 ≤ ε)
+      zero_resid_lim = MisI && (test1 ≤ eps(T))
+      resid_decrease_lim = (rNorm ≤ ε)
+      iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
+
+      user_requested_exit = callback(solver) :: Bool
+      zero_resid = zero_resid_mach || zero_resid_lim
+      resid_decrease = resid_decrease_mach || resid_decrease_lim
+      ill_cond = ill_cond_mach || ill_cond_lim
+      solved = solved_mach || solved_lim || zero_resid || fwd_err || resid_decrease
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    ill_cond_lim        && (status = "condition number exceeds tolerance")
+    solved              && (status = "found approximate minimum least-squares solution")
+    zero_resid          && (status = "found approximate zero-residual solution")
+    fwd_err             && (status = "truncated forward error small enough")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !zero_resid
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  ill_cond_lim        && (status = "condition number exceeds tolerance")
-  solved              && (status = "found approximate minimum least-squares solution")
-  zero_resid          && (status = "found approximate zero-residual solution")
-  fwd_err             && (status = "truncated forward error small enough")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !zero_resid
-  stats.status = status
-  return solver
 end
diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl
index bbfbf856b..5bc3399eb 100644
--- a/src/minres_qlp.jl
+++ b/src/minres_qlp.jl
@@ -18,30 +18,53 @@ export minres_qlp, minres_qlp!
 
 """
     (x, stats) = minres_qlp(A, b::AbstractVector{FC};
-                            M=I, atol::T=√eps(T), rtol::T=√eps(T),
-                            ctol::T=√eps(T), λ::T=zero(T), itmax::Int=0,
-                            verbose::Int=0, history::Bool=false,
-                            ldiv::Bool=false, callback=solver->false)
+                            M=I, ldiv::Bool=false, Artol::T=√eps(T),
+                            λ::T=zero(T), atol::T=√eps(T),
+                            rtol::T=√eps(T), itmax::Int=0,
+                            timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                            callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
+    (x, stats) = minres_qlp(A, b, x0::AbstractVector; kwargs...)
+
+MINRES-QLP can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
 MINRES-QLP is the only method based on the Lanczos process that returns the minimum-norm
-solution on singular inconsistent systems (A + λI)x = b, where λ is a shift parameter.
+solution on singular inconsistent systems (A + λI)x = b of size n, where λ is a shift parameter.
 It is significantly more complex but can be more reliable than MINRES when A is ill-conditioned.
 
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
 M also indicates the weighted norm in which residuals are measured.
 
-MINRES-QLP can be warm-started from an initial guess `x0` with the method
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
 
-    (x, stats) = minres_qlp(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `Artol`: relative stopping tolerance based on the Aᴴ-residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -51,18 +74,6 @@ and `false` otherwise.
 """
 function minres_qlp end
 
-function minres_qlp(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = MinresQlpSolver(A, b)
-  minres_qlp!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function minres_qlp(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = MinresQlpSolver(A, b)
-  minres_qlp!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = minres_qlp!(solver::MinresQlpSolver, A, b; kwargs...)
     solver = minres_qlp!(solver::MinresQlpSolver, A, b, x0; kwargs...)
@@ -73,365 +84,414 @@ See [`MinresQlpSolver`](@ref) for more details about the `solver`.
 """
 function minres_qlp! end
 
-function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  minres_qlp!(solver, A, b; kwargs...)
-  return solver
-end
-
-function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                     M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                     ctol :: T=√eps(T), λ ::T=zero(T), itmax :: Int=0,
-                     verbose :: Int=0, history :: Bool=false,
-                     ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("MINRES-QLP: system of size %d\n", n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :vₖ, S, n)
-  wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ
-  Δx, x, p, stats = solver.Δx, solver.x, solver.p, solver.stats
-  warm_start = solver.warm_start
-  rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
-  reset!(stats)
-  vₖ = MisI ? M⁻¹vₖ : solver.vₖ
-  vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁
-
-  # Initial solution x₀
-  x .= zero(FC)
-
-  if warm_start
-    mul!(M⁻¹vₖ, A, Δx)
-    (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ)
-    @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ)
-  else
-    M⁻¹vₖ .= b
+def_args_minres_qlp = (:(A                    ),
+                       :(b::AbstractVector{FC}))
+
+def_optargs_minres_qlp = (:(x0::AbstractVector),)
+
+def_kwargs_minres_qlp = (:(; M = I                     ),
+                         :(; ldiv::Bool = false        ),
+                         :(; λ::T = zero(T)            ),
+                         :(; atol::T = √eps(T)         ),
+                         :(; rtol::T = √eps(T)         ),
+                         :(; Artol::T = √eps(T)        ),
+                         :(; itmax::Int = 0            ),
+                         :(; timemax::Float64 = Inf    ),
+                         :(; verbose::Int = 0          ),
+                         :(; history::Bool = false     ),
+                         :(; callback = solver -> false),
+                         :(; iostream::IO = kstdout    ))
+
+def_kwargs_minres_qlp = mapreduce(extract_parameters, vcat, def_kwargs_minres_qlp)
+
+args_minres_qlp = (:A, :b)
+optargs_minres_qlp = (:x0,)
+kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function minres_qlp($(def_args_minres_qlp...), $(def_optargs_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = MinresQlpSolver(A, b)
+    warm_start!(solver, $(optargs_minres_qlp...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    minres_qlp!(solver, $(args_minres_qlp...); $(kwargs_minres_qlp...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # β₁v₁ = Mb
-  MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
-  βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ))
-  if βₖ ≠ 0
-    @kscal!(n, one(FC) / βₖ, M⁻¹vₖ)
-    MisI || @kscal!(n, one(FC) / βₖ, vₖ)
+  function minres_qlp($(def_args_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = MinresQlpSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    minres_qlp!(solver, $(args_minres_qlp...); $(kwargs_minres_qlp...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  rNorm = βₖ
-  ANorm² = zero(T)
-  ANorm = zero(T)
-  μmin = zero(T)
-  μmax = zero(T)
-  Acond = zero(T)
-  history && push!(rNorms, rNorm)
-  history && push!(Aconds, Acond)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved, stats.inconsistent = true, false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
-  end
-
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  ε = atol + rtol * rNorm
-  κ = zero(T)
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s  %7s  %8s  %7s  %8s  %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7s  %7.1e  %7s  %8s  %7.1e  %7.1e  %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗")
-
-  # Set up workspace.
-  M⁻¹vₖ₋₁ .= zero(FC)
-  ζbarₖ = βₖ
-  ξₖ₋₁ = zero(T)
-  τₖ₋₂ = τₖ₋₁ = τₖ = zero(T)
-  ψbarₖ₋₂ = zero(T)
-  μbisₖ₋₂ = μbarₖ₋₁ = zero(T)
-  wₖ₋₁ .= zero(FC)
-  wₖ .= zero(FC)
-  cₖ₋₂ = cₖ₋₁ = cₖ = one(T)   # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
-  sₖ₋₂ = sₖ₋₁ = sₖ = zero(T)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
-
-  # Tolerance for breakdown detection.
-  btol = eps(T)^(3/4)
-
-  # Stopping criterion.
-  breakdown = false
-  solved = zero_resid = zero_resid_lim = rNorm ≤ ε
-  zero_resid_mach = false
-  inconsistent = false
-  ill_cond_mach = false
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || inconsistent || ill_cond_mach || breakdown || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the preconditioned Lanczos process.
-    # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ
-    # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁
-
-    mul!(p, A, vₖ)          # p ← Avₖ
-    if λ ≠ 0
-      @kaxpy!(n, λ, vₖ, p)  # p ← p + λvₖ
-    end
-
-    if iter ≥ 2
-      @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁
-    end
-
-    αₖ = @kdotr(n, vₖ, p)  # αₖ = ⟨vₖ,p⟩
-
-    @kaxpy!(n, -αₖ, M⁻¹vₖ, p)  # p ← p - αₖM⁻¹vₖ
-
-    MisI || mulorldiv!(vₖ₊₁, M, p, ldiv)  # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ
-
-    βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p))
-
-    # βₖ₊₁.ₖ ≠ 0
-    if βₖ₊₁ > btol
-      @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
-      MisI || @kscal!(m, one(FC) / βₖ₊₁, p)
-    end
-
-    ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁
-
-    # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
-    #                                            [ Oᵀ ]
-    #
-    # [ α₁ β₂ 0  •  •  •   0  ]      [ λ₁ γ₁ ϵ₁ 0  •  •  0  ]
-    # [ β₂ α₂ β₃ •         •  ]      [ 0  λ₂ γ₂ •  •     •  ]
-    # [ 0  •  •  •  •      •  ]      [ •  •  λ₃ •  •  •  •  ]
-    # [ •  •  •  •  •  •   •  ] = Qₖ [ •     •  •  •  •  0  ]
-    # [ •     •  •  •  •   0  ]      [ •        •  •  • ϵₖ₋₂]
-    # [ •        •  •  •   βₖ ]      [ •           •  • γₖ₋₁]
-    # [ •           •  βₖ  αₖ ]      [ 0  •  •  •  •  0  λₖ ]
-    # [ 0  •  •  •  •  0  βₖ₊₁]      [ 0  •  •  •  •  •  0  ]
-    #
-    # If k = 1, we don't have any previous reflexion.
-    # If k = 2, we apply the last reflexion.
-    # If k ≥ 3, we only apply the two previous reflexions.
-
-    # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
-    if iter ≥ 3
-      # [cₖ₋₂  sₖ₋₂] [0 ] = [  ϵₖ₋₂ ]
-      # [sₖ₋₂ -cₖ₋₂] [βₖ]   [γbarₖ₋₁]
-      ϵₖ₋₂    =  sₖ₋₂ * βₖ
-      γbarₖ₋₁ = -cₖ₋₂ * βₖ
-    end
-    # Apply previous Givens reflections Qₖ₋₁.ₖ
-    if iter ≥ 2
-      iter == 2 && (γbarₖ₋₁ = βₖ)
-      # [cₖ₋₁  sₖ₋₁] [γbarₖ₋₁] = [γₖ₋₁ ]
-      # [sₖ₋₁ -cₖ₋₁] [   αₖ  ]   [λbarₖ]
-      γₖ₋₁  = cₖ₋₁ * γbarₖ₋₁ + sₖ₋₁ * αₖ
-      λbarₖ = sₖ₋₁ * γbarₖ₋₁ - cₖ₋₁ * αₖ
-    end
-    iter == 1 && (λbarₖ = αₖ)
-
-    # Compute and apply current Givens reflection Qₖ.ₖ₊₁
-    # [cₖ  sₖ] [λbarₖ] = [λₖ]
-    # [sₖ -cₖ] [βₖ₊₁ ]   [0 ]
-    (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁)
-
-    # Compute [   zₖ  ] = (Qₖ)ᵀβ₁e₁
-    #         [ζbarₖ₊₁]
-    #
-    # [cₖ  sₖ] [ζbarₖ] = [   ζₖ  ]
-    # [sₖ -cₖ] [  0  ]   [ζbarₖ₊₁]
-    ζₖ      = cₖ * ζbarₖ
-    ζbarₖ₊₁ = sₖ * ζbarₖ
-
-    # Update the LQ factorization of Rₖ = LₖPₖ.
-    # [ λ₁ γ₁ ϵ₁ 0  •  •  0  ]   [ μ₁   0    •    •     •      •      0  ]
-    # [ 0  λ₂ γ₂ •  •     •  ]   [ ψ₁   μ₂   •                        •  ]
-    # [ •  •  λ₃ •  •  •  •  ]   [ ρ₁   ψ₂   μ₃   •                   •  ]
-    # [ •     •  •  •  •  0  ] = [ 0    •    •    •     •             •  ] Pₖ
-    # [ •        •  •  • ϵₖ₋₂]   [ •    •    •    •   μₖ₋₂     •      •  ]
-    # [ •           •  • γₖ₋₁]   [ •         •    •   ψₖ₋₂  μbisₖ₋₁   0  ]
-    # [ 0  •  •  •  •  0  λₖ ]   [ 0    •    •    0   ρₖ₋₂  ψbarₖ₋₁ μbarₖ]
-
-    if iter == 1
-      μbarₖ = λₖ
-    elseif iter == 2
-      # [μbar₁ γ₁] [cp₂  sp₂] = [μbis₁   0  ]
-      # [  0   λ₂] [sp₂ -cp₂]   [ψbar₁ μbar₂]
-      (cpₖ, spₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, γₖ₋₁)
-      ψbarₖ₋₁ =  spₖ * λₖ
-      μbarₖ   = -cpₖ * λₖ
-    else
-      # [μbisₖ₋₂   0     ϵₖ₋₂] [cpₖ  0   spₖ]   [μₖ₋₂   0     0 ]
-      # [ψbarₖ₋₂ μbarₖ₋₁ γₖ₋₁] [ 0   1    0 ] = [ψₖ₋₂ μbarₖ₋₁ θₖ]
-      # [  0       0      λₖ ] [spₖ  0  -cpₖ]   [ρₖ₋₂   0     ηₖ]
-      (cpₖ, spₖ, μₖ₋₂) = sym_givens(μbisₖ₋₂, ϵₖ₋₂)
-      ψₖ₋₂ =  cpₖ * ψbarₖ₋₂ + spₖ * γₖ₋₁
-      θₖ   =  spₖ * ψbarₖ₋₂ - cpₖ * γₖ₋₁
-      ρₖ₋₂ =  spₖ * λₖ
-      ηₖ   = -cpₖ * λₖ
-
-      # [μₖ₋₂   0     0 ] [1   0    0 ]   [μₖ₋₂   0       0  ]
-      # [ψₖ₋₂ μbarₖ₋₁ θₖ] [0  cdₖ  sdₖ] = [ψₖ₋₂ μbisₖ₋₁   0  ]
-      # [ρₖ₋₂   0     ηₖ] [0  sdₖ -cdₖ]   [ρₖ₋₂ ψbarₖ₋₁ μbarₖ]
-      (cdₖ, sdₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, θₖ)
-      ψbarₖ₋₁ =  sdₖ * ηₖ
-      μbarₖ   = -cdₖ * ηₖ
-    end
-
-    # Compute Lₖtₖ = zₖ
-    # [ μ₁   0    •    •     •      •      0  ] [τ₁]   [ζ₁]
-    # [ ψ₁   μ₂   •                        •  ] [τ₂]   [ζ₂]
-    # [ ρ₁   ψ₂   μ₃   •                   •  ] [τ₃]   [ζ₃]
-    # [ 0    •    •    •     •             •  ] [••] = [••]
-    # [ •    •    •    •   μₖ₋₂     •      •  ] [••]   [••]
-    # [ •         •    •   ψₖ₋₂  μbisₖ₋₁   0  ] [••]   [••]
-    # [ 0    •    •    0   ρₖ₋₂  ψbarₖ₋₁ μbarₖ] [τₖ]   [ζₖ]
-    if iter == 1
-      τₖ = ζₖ / μbarₖ
-    elseif iter == 2
-      τₖ₋₁ = τₖ
-      τₖ₋₁ = τₖ₋₁ * μbarₖ₋₁ / μbisₖ₋₁
-      ξₖ   = ζₖ
-      τₖ   = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+  function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, $(def_args_minres_qlp...); $(def_kwargs_minres_qlp...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "MINRES-QLP: system of size %d\n", n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :vₖ, S, n)
+    wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ
+    Δx, x, p, stats = solver.Δx, solver.x, solver.p, solver.stats
+    warm_start = solver.warm_start
+    rNorms, ArNorms, Aconds = stats.residuals, stats.Aresiduals, stats.Acond
+    reset!(stats)
+    vₖ = MisI ? M⁻¹vₖ : solver.vₖ
+    vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁
+
+    # Initial solution x₀
+    x .= zero(FC)
+
+    if warm_start
+      mul!(M⁻¹vₖ, A, Δx)
+      (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ)
+      @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ)
     else
-      τₖ₋₂ = τₖ₋₁
-      τₖ₋₂ = τₖ₋₂ * μbisₖ₋₂ / μₖ₋₂
-      τₖ₋₁ = (ξₖ₋₁ - ψₖ₋₂ * τₖ₋₂) / μbisₖ₋₁
-      ξₖ   = ζₖ - ρₖ₋₂ * τₖ₋₂
-      τₖ   = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+      M⁻¹vₖ .= b
     end
 
-    # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᵀ
-    if iter == 1
-      # w̅₁ = v₁
-      @. wₖ = vₖ
-    elseif iter == 2
-      # [w̅ₖ₋₁ vₖ] [cpₖ  spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ
-      #           [spₖ -cpₖ]             ⟷ w̅ₖ   = spₖ * w̅ₖ₋₁ - cpₖ * vₖ
-      @kswap(wₖ₋₁, wₖ)
-      @. wₖ = spₖ * wₖ₋₁ - cpₖ * vₖ
-      @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁)
-    else
-      # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ  0   spₖ] [1   0    0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ
-      #                [ 0   1    0 ] [0  cdₖ  sdₖ]                  ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
-      #                [spₖ  0  -cpₖ] [0  sdₖ -cdₖ]                  ⟷ w̄ₖ   = sdₖ * w̄ₖ₋₁ - cdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
-      ẘₖ₋₂ = wₖ₋₁
-      w̄ₖ₋₁ = wₖ
-      # Update the solution x
-      @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x)
-      @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x)
-      # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ
-      @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂)
-      wₐᵤₓ = ẘₖ₋₂
-      # Compute ẘₖ₋₁ and w̄ₖ
-      @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ)
-      @kswap(wₖ₋₁, wₖ)
+    # β₁v₁ = Mb
+    MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
+    βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ))
+    if βₖ ≠ 0
+      @kscal!(n, one(FC) / βₖ, M⁻¹vₖ)
+      MisI || @kscal!(n, one(FC) / βₖ, vₖ)
     end
 
-    # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ
-    MisI || (vₖ .= vₖ₊₁)
-    M⁻¹vₖ₋₁ .= M⁻¹vₖ
-    M⁻¹vₖ .= p
-
-    # Update ‖rₖ‖ estimate
-    # ‖ rₖ ‖ = |ζbarₖ₊₁|
-    rNorm = abs(ζbarₖ₊₁)
+    rNorm = βₖ
+    ANorm² = zero(T)
+    ANorm = zero(T)
+    μmin = zero(T)
+    μmax = zero(T)
+    Acond = zero(T)
     history && push!(rNorms, rNorm)
-
-    # Update ‖Arₖ₋₁‖ estimate
-    # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²)
-    ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁))
-    iter == 1 && (κ = atol + ctol * ArNorm)
-    history && push!(ArNorms, ArNorm)
-
-    ANorm = sqrt(ANorm²)
-    # estimate A condition number
-    abs_μbarₖ = abs(μbarₖ)
-    if iter == 1
-      μmin = abs_μbarₖ
-      μmax = abs_μbarₖ
-    elseif iter == 2
-      μmax = max(μmax, μbisₖ₋₁, abs_μbarₖ)
-      μmin = min(μmin, μbisₖ₋₁, abs_μbarₖ)
-    else
-      μmax = max(μmax, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
-      μmin = min(μmin, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
-    end
-    Acond = μmax / μmin
     history && push!(Aconds, Acond)
-    xNorm = @knrm2(n, x)
-    backward = rNorm / (ANorm * xNorm)
-
-    # Update stopping criterion.
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
-    resid_decrease_mach = (one(T) + rNorm ≤ one(T))
-    zero_resid_mach = (one(T) + backward ≤ one(T))
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved, stats.inconsistent = true, false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
 
-    # Stopping conditions based on user-provided tolerances.
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    ε = atol + rtol * rNorm
+    κ = zero(T)
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %7s  %8s  %7s  %7s  %8s  %5s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7s  %7.1e  %7s  %8s  %7.1e  %7.1e  %8s  %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗", ktimer(start_time))
+
+    # Set up workspace.
+    M⁻¹vₖ₋₁ .= zero(FC)
+    ζbarₖ = βₖ
+    ξₖ₋₁ = zero(T)
+    τₖ₋₂ = τₖ₋₁ = τₖ = zero(T)
+    ψbarₖ₋₂ = zero(T)
+    μbisₖ₋₂ = μbarₖ₋₁ = zero(T)
+    wₖ₋₁ .= zero(FC)
+    wₖ .= zero(FC)
+    cₖ₋₂ = cₖ₋₁ = cₖ = one(T)   # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
+    sₖ₋₂ = sₖ₋₁ = sₖ = zero(T)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
+
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
+
+    # Stopping criterion.
+    breakdown = false
+    solved = zero_resid = zero_resid_lim = rNorm ≤ ε
+    zero_resid_mach = false
+    inconsistent = false
+    ill_cond_mach = false
     tired = iter ≥ itmax
-    resid_decrease_lim = (rNorm ≤ ε)
-    zero_resid_lim = (backward ≤ ε)
-    breakdown = βₖ₊₁ ≤ btol
-
-    user_requested_exit = callback(solver) :: Bool
-    zero_resid = zero_resid_mach | zero_resid_lim
-    resid_decrease = resid_decrease_mach | resid_decrease_lim
-    solved = resid_decrease | zero_resid
-    inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ ctol) || (breakdown && !solved)
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || inconsistent || ill_cond_mach || breakdown || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
+
+      # Continue the preconditioned Lanczos process.
+      # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ
+      # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁
+
+      mul!(p, A, vₖ)          # p ← Avₖ
+      if λ ≠ 0
+        @kaxpy!(n, λ, vₖ, p)  # p ← p + λvₖ
+      end
+
+      if iter ≥ 2
+        @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁
+      end
+
+      αₖ = @kdotr(n, vₖ, p)  # αₖ = ⟨vₖ,p⟩
+
+      @kaxpy!(n, -αₖ, M⁻¹vₖ, p)  # p ← p - αₖM⁻¹vₖ
+
+      MisI || mulorldiv!(vₖ₊₁, M, p, ldiv)  # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ
+
+      βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p))
+
+      # βₖ₊₁.ₖ ≠ 0
+      if βₖ₊₁ > btol
+        @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
+        MisI || @kscal!(m, one(FC) / βₖ₊₁, p)
+      end
+
+      ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁
+
+      # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
+      #                                            [ Oᵀ ]
+      #
+      # [ α₁ β₂ 0  •  •  •   0  ]      [ λ₁ γ₁ ϵ₁ 0  •  •  0  ]
+      # [ β₂ α₂ β₃ •         •  ]      [ 0  λ₂ γ₂ •  •     •  ]
+      # [ 0  •  •  •  •      •  ]      [ •  •  λ₃ •  •  •  •  ]
+      # [ •  •  •  •  •  •   •  ] = Qₖ [ •     •  •  •  •  0  ]
+      # [ •     •  •  •  •   0  ]      [ •        •  •  • ϵₖ₋₂]
+      # [ •        •  •  •   βₖ ]      [ •           •  • γₖ₋₁]
+      # [ •           •  βₖ  αₖ ]      [ 0  •  •  •  •  0  λₖ ]
+      # [ 0  •  •  •  •  0  βₖ₊₁]      [ 0  •  •  •  •  •  0  ]
+      #
+      # If k = 1, we don't have any previous reflexion.
+      # If k = 2, we apply the last reflexion.
+      # If k ≥ 3, we only apply the two previous reflexions.
+
+      # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
+      if iter ≥ 3
+        # [cₖ₋₂  sₖ₋₂] [0 ] = [  ϵₖ₋₂ ]
+        # [sₖ₋₂ -cₖ₋₂] [βₖ]   [γbarₖ₋₁]
+        ϵₖ₋₂    =  sₖ₋₂ * βₖ
+        γbarₖ₋₁ = -cₖ₋₂ * βₖ
+      end
+      # Apply previous Givens reflections Qₖ₋₁.ₖ
+      if iter ≥ 2
+        iter == 2 && (γbarₖ₋₁ = βₖ)
+        # [cₖ₋₁  sₖ₋₁] [γbarₖ₋₁] = [γₖ₋₁ ]
+        # [sₖ₋₁ -cₖ₋₁] [   αₖ  ]   [λbarₖ]
+        γₖ₋₁  = cₖ₋₁ * γbarₖ₋₁ + sₖ₋₁ * αₖ
+        λbarₖ = sₖ₋₁ * γbarₖ₋₁ - cₖ₋₁ * αₖ
+      end
+      iter == 1 && (λbarₖ = αₖ)
+
+      # Compute and apply current Givens reflection Qₖ.ₖ₊₁
+      # [cₖ  sₖ] [λbarₖ] = [λₖ]
+      # [sₖ -cₖ] [βₖ₊₁ ]   [0 ]
+      (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁)
+
+      # Compute [   zₖ  ] = (Qₖ)ᴴβ₁e₁
+      #         [ζbarₖ₊₁]
+      #
+      # [cₖ  sₖ] [ζbarₖ] = [   ζₖ  ]
+      # [sₖ -cₖ] [  0  ]   [ζbarₖ₊₁]
+      ζₖ      = cₖ * ζbarₖ
+      ζbarₖ₊₁ = sₖ * ζbarₖ
+
+      # Update the LQ factorization of Rₖ = LₖPₖ.
+      # [ λ₁ γ₁ ϵ₁ 0  •  •  0  ]   [ μ₁   0    •    •     •      •      0  ]
+      # [ 0  λ₂ γ₂ •  •     •  ]   [ ψ₁   μ₂   •                        •  ]
+      # [ •  •  λ₃ •  •  •  •  ]   [ ρ₁   ψ₂   μ₃   •                   •  ]
+      # [ •     •  •  •  •  0  ] = [ 0    •    •    •     •             •  ] Pₖ
+      # [ •        •  •  • ϵₖ₋₂]   [ •    •    •    •   μₖ₋₂     •      •  ]
+      # [ •           •  • γₖ₋₁]   [ •         •    •   ψₖ₋₂  μbisₖ₋₁   0  ]
+      # [ 0  •  •  •  •  0  λₖ ]   [ 0    •    •    0   ρₖ₋₂  ψbarₖ₋₁ μbarₖ]
+
+      if iter == 1
+        μbarₖ = λₖ
+      elseif iter == 2
+        # [μbar₁ γ₁] [cp₂  sp₂] = [μbis₁   0  ]
+        # [  0   λ₂] [sp₂ -cp₂]   [ψbar₁ μbar₂]
+        (cpₖ, spₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, γₖ₋₁)
+        ψbarₖ₋₁ =  spₖ * λₖ
+        μbarₖ   = -cpₖ * λₖ
+      else
+        # [μbisₖ₋₂   0     ϵₖ₋₂] [cpₖ  0   spₖ]   [μₖ₋₂   0     0 ]
+        # [ψbarₖ₋₂ μbarₖ₋₁ γₖ₋₁] [ 0   1    0 ] = [ψₖ₋₂ μbarₖ₋₁ θₖ]
+        # [  0       0      λₖ ] [spₖ  0  -cpₖ]   [ρₖ₋₂   0     ηₖ]
+        (cpₖ, spₖ, μₖ₋₂) = sym_givens(μbisₖ₋₂, ϵₖ₋₂)
+        ψₖ₋₂ =  cpₖ * ψbarₖ₋₂ + spₖ * γₖ₋₁
+        θₖ   =  spₖ * ψbarₖ₋₂ - cpₖ * γₖ₋₁
+        ρₖ₋₂ =  spₖ * λₖ
+        ηₖ   = -cpₖ * λₖ
+
+        # [μₖ₋₂   0     0 ] [1   0    0 ]   [μₖ₋₂   0       0  ]
+        # [ψₖ₋₂ μbarₖ₋₁ θₖ] [0  cdₖ  sdₖ] = [ψₖ₋₂ μbisₖ₋₁   0  ]
+        # [ρₖ₋₂   0     ηₖ] [0  sdₖ -cdₖ]   [ρₖ₋₂ ψbarₖ₋₁ μbarₖ]
+        (cdₖ, sdₖ, μbisₖ₋₁) = sym_givens(μbarₖ₋₁, θₖ)
+        ψbarₖ₋₁ =  sdₖ * ηₖ
+        μbarₖ   = -cdₖ * ηₖ
+      end
+
+      # Compute Lₖtₖ = zₖ
+      # [ μ₁   0    •    •     •      •      0  ] [τ₁]   [ζ₁]
+      # [ ψ₁   μ₂   •                        •  ] [τ₂]   [ζ₂]
+      # [ ρ₁   ψ₂   μ₃   •                   •  ] [τ₃]   [ζ₃]
+      # [ 0    •    •    •     •             •  ] [••] = [••]
+      # [ •    •    •    •   μₖ₋₂     •      •  ] [••]   [••]
+      # [ •         •    •   ψₖ₋₂  μbisₖ₋₁   0  ] [••]   [••]
+      # [ 0    •    •    0   ρₖ₋₂  ψbarₖ₋₁ μbarₖ] [τₖ]   [ζₖ]
+      if iter == 1
+        τₖ = ζₖ / μbarₖ
+      elseif iter == 2
+        τₖ₋₁ = τₖ
+        τₖ₋₁ = τₖ₋₁ * μbarₖ₋₁ / μbisₖ₋₁
+        ξₖ   = ζₖ
+        τₖ   = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+      else
+        τₖ₋₂ = τₖ₋₁
+        τₖ₋₂ = τₖ₋₂ * μbisₖ₋₂ / μₖ₋₂
+        τₖ₋₁ = (ξₖ₋₁ - ψₖ₋₂ * τₖ₋₂) / μbisₖ₋₁
+        ξₖ   = ζₖ - ρₖ₋₂ * τₖ₋₂
+        τₖ   = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
+      end
+
+      # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ
+      if iter == 1
+        # w̅₁ = v₁
+        @. wₖ = vₖ
+      elseif iter == 2
+        # [w̅ₖ₋₁ vₖ] [cpₖ  spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ
+        #           [spₖ -cpₖ]             ⟷ w̅ₖ   = spₖ * w̅ₖ₋₁ - cpₖ * vₖ
+        @kswap(wₖ₋₁, wₖ)
+        @. wₖ = spₖ * wₖ₋₁ - cpₖ * vₖ
+        @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁)
+      else
+        # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ  0   spₖ] [1   0    0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ
+        #                [ 0   1    0 ] [0  cdₖ  sdₖ]                  ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
+        #                [spₖ  0  -cpₖ] [0  sdₖ -cdₖ]                  ⟷ w̄ₖ   = sdₖ * w̄ₖ₋₁ - cdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ)
+        ẘₖ₋₂ = wₖ₋₁
+        w̄ₖ₋₁ = wₖ
+        # Update the solution x
+        @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x)
+        @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x)
+        # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ
+        @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂)
+        wₐᵤₓ = ẘₖ₋₂
+        # Compute ẘₖ₋₁ and w̄ₖ
+        @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ)
+        @kswap(wₖ₋₁, wₖ)
+      end
+
+      # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ
+      MisI || (vₖ .= vₖ₊₁)
+      M⁻¹vₖ₋₁ .= M⁻¹vₖ
+      M⁻¹vₖ .= p
+
+      # Update ‖rₖ‖ estimate
+      # ‖ rₖ ‖ = |ζbarₖ₊₁|
+      rNorm = abs(ζbarₖ₊₁)
+      history && push!(rNorms, rNorm)
+
+      # Update ‖Arₖ₋₁‖ estimate
+      # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²)
+      ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁))
+      iter == 1 && (κ = atol + Artol * ArNorm)
+      history && push!(ArNorms, ArNorm)
+
+      ANorm = sqrt(ANorm²)
+      # estimate A condition number
+      abs_μbarₖ = abs(μbarₖ)
+      if iter == 1
+        μmin = abs_μbarₖ
+        μmax = abs_μbarₖ
+      elseif iter == 2
+        μmax = max(μmax, μbisₖ₋₁, abs_μbarₖ)
+        μmin = min(μmin, μbisₖ₋₁, abs_μbarₖ)
+      else
+        μmax = max(μmax, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
+        μmin = min(μmin, μₖ₋₂, μbisₖ₋₁, abs_μbarₖ)
+      end
+      Acond = μmax / μmin
+      history && push!(Aconds, Acond)
+      xNorm = @knrm2(n, x)
+      backward = rNorm / (ANorm * xNorm)
+
+      # Update stopping criterion.
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
+      resid_decrease_mach = (one(T) + rNorm ≤ one(T))
+      zero_resid_mach = (one(T) + backward ≤ one(T))
+
+      # Stopping conditions based on user-provided tolerances.
+      tired = iter ≥ itmax
+      resid_decrease_lim = (rNorm ≤ ε)
+      zero_resid_lim = MisI && (backward ≤ eps(T))
+      breakdown = βₖ₊₁ ≤ btol
+
+      user_requested_exit = callback(solver) :: Bool
+      zero_resid = zero_resid_mach | zero_resid_lim
+      resid_decrease = resid_decrease_mach | resid_decrease_lim
+      solved = resid_decrease | zero_resid
+      inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ Artol) || (breakdown && !solved)
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+
+      # Update variables
+      if iter ≥ 2
+        sₖ₋₂ = sₖ₋₁
+        cₖ₋₂ = cₖ₋₁
+        ξₖ₋₁ = ξₖ
+        μbisₖ₋₂ = μbisₖ₋₁
+        ψbarₖ₋₂ = ψbarₖ₋₁
+      end
+      sₖ₋₁ = sₖ
+      cₖ₋₁ = cₖ
+      μbarₖ₋₁ = μbarₖ
+      ζbarₖ = ζbarₖ₊₁
+      βₖ = βₖ₊₁
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %7.1e  %7.1e  %8.1e  %.2fs\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward, ktimer(start_time))
+    end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Update variables
+    # Finalize the update of x
     if iter ≥ 2
-      sₖ₋₂ = sₖ₋₁
-      cₖ₋₂ = cₖ₋₁
-      ξₖ₋₁ = ξₖ
-      μbisₖ₋₂ = μbisₖ₋₁
-      ψbarₖ₋₂ = ψbarₖ₋₁
+      @kaxpy!(n, τₖ₋₁, wₖ₋₁, x)
+    end
+    if !inconsistent
+      @kaxpy!(n, τₖ, wₖ, x)
     end
-    sₖ₋₁ = sₖ
-    cₖ₋₁ = cₖ
-    μbarₖ₋₁ = μbarₖ
-    ζbarₖ = ζbarₖ₊₁
-    βₖ = βₖ₊₁
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e  %7.1e  %8.1e  %7.1e  %7.1e  %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward)
-  end
-  (verbose > 0) && @printf("\n")
 
-  # Finalize the update of x
-  if iter ≥ 2
-    @kaxpy!(n, τₖ₋₁, wₖ₋₁, x)
-  end
-  if !inconsistent
-    @kaxpy!(n, τₖ, wₖ, x)
-  end
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    inconsistent        && (status = "found approximate minimum least-squares solution")
+    zero_resid          && (status = "found approximate zero-residual solution")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-  tired               && (status = "maximum number of iterations exceeded")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  inconsistent        && (status = "found approximate minimum least-squares solution")
-  zero_resid          && (status = "found approximate zero-residual solution")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
- # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
+   # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
+  end
 end
diff --git a/src/qmr.jl b/src/qmr.jl
index eb4a4eb46..995392f0c 100644
--- a/src/qmr.jl
+++ b/src/qmr.jl
@@ -21,28 +21,49 @@
 export qmr, qmr!
 
 """
-    (x, stats) = qmr(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
-                     atol::T=√eps(T), rtol::T=√eps(T),
-                     itmax::Int=0, verbose::Int=0, history::Bool=false,
-                     callback=solver->false)
+    (x, stats) = qmr(A, b::AbstractVector{FC};
+                     c::AbstractVector{FC}=b, atol::T=√eps(T),
+                     rtol::T=√eps(T), itmax::Int=0, timemax::Float64=Inf, verbose::Int=0,
+                     history::Bool=false, callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the square linear system Ax = b using the QMR method.
+    (x, stats) = qmr(A, b, x0::AbstractVector; kwargs...)
+
+QMR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using QMR.
 
 QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
-When `A` is symmetric and `b = c`, QMR is equivalent to MINRES.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
+When `A` is Hermitian and `b = c`, QMR is equivalent to MINRES.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
 
-QMR can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = qmr(A, b, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -52,18 +73,6 @@ and `false` otherwise.
 """
 function qmr end
 
-function qmr(A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = QmrSolver(A, b)
-  qmr!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function qmr(A, b :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = QmrSolver(A, b)
-  qmr!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = qmr!(solver::QmrSolver, A, b; kwargs...)
     solver = qmr!(solver::QmrSolver, A, b, x0; kwargs...)
@@ -74,253 +83,301 @@ See [`QmrSolver`](@ref) for more details about the `solver`.
 """
 function qmr! end
 
-function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  qmr!(solver, A, b; kwargs...)
-  return solver
-end
-
-function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
-              atol :: T=√eps(T), rtol :: T=√eps(T),
-              itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-              callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  n, m = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("QMR: system of size %d\n", n)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p
-  Δx, x, wₖ₋₂, wₖ₋₁, stats = solver.Δx, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  r₀ = warm_start ? q : b
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_qmr = (:(A                    ),
+                :(b::AbstractVector{FC}))
+
+def_optargs_qmr = (:(x0::AbstractVector),)
+
+def_kwargs_qmr = (:(; c::AbstractVector{FC} = b ),
+                  :(; atol::T = √eps(T)         ),
+                  :(; rtol::T = √eps(T)         ),
+                  :(; itmax::Int = 0            ),
+                  :(; timemax::Float64 = Inf    ),
+                  :(; verbose::Int = 0          ),
+                  :(; history::Bool = false     ),
+                  :(; callback = solver -> false),
+                  :(; iostream::IO = kstdout    ))
+
+def_kwargs_qmr = mapreduce(extract_parameters, vcat, def_kwargs_qmr)
+
+args_qmr = (:A, :b)
+optargs_qmr = (:x0,)
+kwargs_qmr = (:c, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function qmr($(def_args_qmr...), $(def_optargs_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = QmrSolver(A, b)
+    warm_start!(solver, $(optargs_qmr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    qmr!(solver, $(args_qmr...); $(kwargs_qmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Initial solution x₀ and residual norm ‖r₀‖.
-  x .= zero(FC)
-  rNorm = @knrm2(n, r₀)  # ‖r₀‖ = ‖b₀ - Ax₀‖
-
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.inconsistent = false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function qmr($(def_args_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = QmrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    qmr!(solver, $(args_qmr...); $(kwargs_qmr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = 2*n)
-
-  ε = atol + rtol * rNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
-
-  # Initialize the Lanczos biorthogonalization process.
-  cᵗb = @kdot(n, c, r₀)  # ⟨c,r₀⟩
-  if cᵗb == 0
-    stats.niter = 0
-    stats.solved = false
-    stats.inconsistent = false
-    stats.status = "Breakdown bᵀc = 0"
-    solver.warm_start = false
-    return solver
-  end
-
-  βₖ = √(abs(cᵗb))             # β₁γ₁ = cᵀ(b - Ax₀)
-  γₖ = cᵗb / βₖ                # β₁γ₁ = cᵀ(b - Ax₀)
-  vₖ₋₁ .= zero(FC)             # v₀ = 0
-  uₖ₋₁ .= zero(FC)             # u₀ = 0
-  vₖ .= r₀ ./ βₖ               # v₁ = (b - Ax₀) / β₁
-  uₖ .= c ./ conj(γₖ)          # u₁ = c / γ̄₁
-  cₖ₋₂ = cₖ₋₁ = cₖ = zero(T)   # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
-  sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
-  wₖ₋₂ .= zero(FC)             # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
-  wₖ₋₁ .= zero(FC)             # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
-  ζbarₖ = βₖ                   # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
-  τₖ = @kdotr(n, vₖ, vₖ)       # τₖ is used for the residual norm estimate
-
-  # Stopping criterion.
-  solved    = rNorm ≤ ε
-  breakdown = false
-  tired     = iter ≥ itmax
-  status    = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || breakdown || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the Lanczos biorthogonalization process.
-    # AVₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
-    # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , vₖ)  # Forms vₖ₊₁ : q ← Avₖ
-    mul!(p, Aᵀ, uₖ)  # Forms uₖ₊₁ : p ← Aᵀuₖ
-
-    @kaxpy!(n, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
-    @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - β̄ₖ * uₖ₋₁
-
-    αₖ = @kdot(n, uₖ, q)      # αₖ = ⟨uₖ,q⟩
-
-    @kaxpy!(n, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
-    @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
-
-    pᵗq = @kdot(n, p, q)      # pᵗq  = ⟨p,q⟩
-    βₖ₊₁ = √(abs(pᵗq))        # βₖ₊₁ = √(|pᵗq|)
-    γₖ₊₁ = pᵗq / βₖ₊₁         # γₖ₊₁ = pᵗq / βₖ₊₁
-
-    # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
-    #                                            [ Oᵀ ]
-    # [ α₁ γ₂ 0  •  •  •   0  ]      [ δ₁ λ₁ ϵ₁ 0  •  •  0  ]
-    # [ β₂ α₂ γ₃ •         •  ]      [ 0  δ₂ λ₂ •  •     •  ]
-    # [ 0  •  •  •  •      •  ]      [ •  •  δ₃ •  •  •  •  ]
-    # [ •  •  •  •  •  •   •  ] = Qₖ [ •     •  •  •  •  0  ]
-    # [ •     •  •  •  •   0  ]      [ •        •  •  • ϵₖ₋₂]
-    # [ •        •  •  •   γₖ ]      [ •           •  • λₖ₋₁]
-    # [ •           •  βₖ  αₖ ]      [ •              •  δₖ ]
-    # [ 0  •  •  •  •  0  βₖ₊₁]      [ 0  •  •  •  •  •  0  ]
-    #
-    # If k = 1, we don't have any previous reflexion.
-    # If k = 2, we apply the last reflexion.
-    # If k ≥ 3, we only apply the two previous reflexions.
-
-    # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
-    if iter ≥ 3
-      # [cₖ₋₂  sₖ₋₂] [0 ] = [  ϵₖ₋₂ ]
-      # [s̄ₖ₋₂ -cₖ₋₂] [γₖ]   [λbarₖ₋₁]
-      ϵₖ₋₂    =  sₖ₋₂ * γₖ
-      λbarₖ₋₁ = -cₖ₋₂ * γₖ
+  function qmr!(solver :: QmrSolver{T,FC,S}, $(def_args_qmr...); $(def_kwargs_qmr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "QMR: system of size %d\n", n)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p
+    Δx, x, wₖ₋₂, wₖ₋₁, stats = solver.Δx, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    r₀ = warm_start ? q : b
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
     end
 
-    # Apply previous Givens reflections Qₖ₋₁.ₖ
-    if iter ≥ 2
-      iter == 2 && (λbarₖ₋₁ = γₖ)
-      # [cₖ₋₁  sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
-      # [s̄ₖ₋₁ -cₖ₋₁] [   αₖ  ]   [δbarₖ]
-      λₖ₋₁  =      cₖ₋₁  * λbarₖ₋₁ + sₖ₋₁ * αₖ
-      δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
-
-      # Update sₖ₋₂ and cₖ₋₂.
-      sₖ₋₂ = sₖ₋₁
-      cₖ₋₂ = cₖ₋₁
-    end
+    # Initial solution x₀ and residual norm ‖r₀‖.
+    x .= zero(FC)
+    rNorm = @knrm2(n, r₀)  # ‖r₀‖ = ‖b₀ - Ax₀‖
 
-    # Compute and apply current Givens reflection Qₖ.ₖ₊₁
-    iter == 1 && (δbarₖ = αₖ)
-    # [cₖ  sₖ] [δbarₖ] = [δₖ]
-    # [s̄ₖ -cₖ] [βₖ₊₁ ]   [0 ]
-    (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
-
-    # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
-    #                      [ 0  ]
-    #
-    # [cₖ  sₖ] [ζbarₖ] = [   ζₖ  ]
-    # [s̄ₖ -cₖ] [  0  ]   [ζbarₖ₊₁]
-    ζₖ      =      cₖ  * ζbarₖ
-    ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
-
-    # Update sₖ₋₁ and cₖ₋₁.
-    sₖ₋₁ = sₖ
-    cₖ₋₁ = cₖ
-
-    # Compute the direction wₖ, the last column of Wₖ = Vₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Vₖ)ᵀ.
-    # w₁ = v₁ / δ₁
-    if iter == 1
-      wₖ = wₖ₋₁
-      @kaxpy!(n, one(FC), vₖ, wₖ)
-      @. wₖ = wₖ / δₖ
-    end
-    # w₂ = (v₂ - λ₁w₁) / δ₂
-    if iter == 2
-      wₖ = wₖ₋₂
-      @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
-      @kaxpy!(n, one(FC), vₖ, wₖ)
-      @. wₖ = wₖ / δₖ
-    end
-    # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
-    if iter ≥ 3
-      @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
-      wₖ = wₖ₋₂
-      @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
-      @kaxpy!(n, one(FC), vₖ, wₖ)
-      @. wₖ = wₖ / δₖ
+    history && push!(rNorms, rNorm)
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.inconsistent = false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
     end
 
-    # Compute solution xₖ.
-    # xₖ ← xₖ₋₁ + ζₖ * wₖ
-    @kaxpy!(n, ζₖ, wₖ, x)
-
-    # Compute vₖ₊₁ and uₖ₊₁.
-    @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
-    @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
-
-    if pᵗq ≠ zero(FC)
-      @. vₖ = q / βₖ₊₁        # βₖ₊₁vₖ₊₁ = q
-      @. uₖ = p / conj(γₖ₊₁)  # γ̄ₖ₊₁uₖ₊₁ = p
+    iter = 0
+    itmax == 0 && (itmax = 2*n)
+
+    ε = atol + rtol * rNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+
+    # Initialize the Lanczos biorthogonalization process.
+    cᴴb = @kdot(n, c, r₀)  # ⟨c,r₀⟩
+    if cᴴb == 0
+      stats.niter = 0
+      stats.solved = false
+      stats.inconsistent = false
+      stats.timer = ktimer(start_time)
+      stats.status = "Breakdown bᴴc = 0"
+      solver.warm_start = false
+      return solver
     end
 
-    # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖²
-    τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ)
+    βₖ = √(abs(cᴴb))             # β₁γ₁ = cᴴ(b - Ax₀)
+    γₖ = cᴴb / βₖ                # β₁γ₁ = cᴴ(b - Ax₀)
+    vₖ₋₁ .= zero(FC)             # v₀ = 0
+    uₖ₋₁ .= zero(FC)             # u₀ = 0
+    vₖ .= r₀ ./ βₖ               # v₁ = (b - Ax₀) / β₁
+    uₖ .= c ./ conj(γₖ)          # u₁ = c / γ̄₁
+    cₖ₋₂ = cₖ₋₁ = cₖ = zero(T)   # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
+    sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
+    wₖ₋₂ .= zero(FC)             # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
+    wₖ₋₁ .= zero(FC)             # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
+    ζbarₖ = βₖ                   # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
+    τₖ = @kdotr(n, vₖ, vₖ)       # τₖ is used for the residual norm estimate
+
+    # Stopping criterion.
+    solved    = rNorm ≤ ε
+    breakdown = false
+    tired     = iter ≥ itmax
+    status    = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
+
+      # Continue the Lanczos biorthogonalization process.
+      # AVₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+      # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+      mul!(q, A , vₖ)  # Forms vₖ₊₁ : q ← Avₖ
+      mul!(p, Aᴴ, uₖ)  # Forms uₖ₊₁ : p ← Aᴴuₖ
+
+      @kaxpy!(n, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
+      @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - β̄ₖ * uₖ₋₁
+
+      αₖ = @kdot(n, uₖ, q)      # αₖ = ⟨uₖ,q⟩
+
+      @kaxpy!(n, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
+      @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
+
+      pᴴq = @kdot(n, p, q)      # pᴴq  = ⟨p,q⟩
+      βₖ₊₁ = √(abs(pᴴq))        # βₖ₊₁ = √(|pᴴq|)
+      γₖ₊₁ = pᴴq / βₖ₊₁         # γₖ₊₁ = pᴴq / βₖ₊₁
+
+      # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
+      #                                            [ Oᵀ ]
+      # [ α₁ γ₂ 0  •  •  •   0  ]      [ δ₁ λ₁ ϵ₁ 0  •  •  0  ]
+      # [ β₂ α₂ γ₃ •         •  ]      [ 0  δ₂ λ₂ •  •     •  ]
+      # [ 0  •  •  •  •      •  ]      [ •  •  δ₃ •  •  •  •  ]
+      # [ •  •  •  •  •  •   •  ] = Qₖ [ •     •  •  •  •  0  ]
+      # [ •     •  •  •  •   0  ]      [ •        •  •  • ϵₖ₋₂]
+      # [ •        •  •  •   γₖ ]      [ •           •  • λₖ₋₁]
+      # [ •           •  βₖ  αₖ ]      [ •              •  δₖ ]
+      # [ 0  •  •  •  •  0  βₖ₊₁]      [ 0  •  •  •  •  •  0  ]
+      #
+      # If k = 1, we don't have any previous reflexion.
+      # If k = 2, we apply the last reflexion.
+      # If k ≥ 3, we only apply the two previous reflexions.
+
+      # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
+      if iter ≥ 3
+        # [cₖ₋₂  sₖ₋₂] [0 ] = [  ϵₖ₋₂ ]
+        # [s̄ₖ₋₂ -cₖ₋₂] [γₖ]   [λbarₖ₋₁]
+        ϵₖ₋₂    =  sₖ₋₂ * γₖ
+        λbarₖ₋₁ = -cₖ₋₂ * γₖ
+      end
+
+      # Apply previous Givens reflections Qₖ₋₁.ₖ
+      if iter ≥ 2
+        iter == 2 && (λbarₖ₋₁ = γₖ)
+        # [cₖ₋₁  sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
+        # [s̄ₖ₋₁ -cₖ₋₁] [   αₖ  ]   [δbarₖ]
+        λₖ₋₁  =      cₖ₋₁  * λbarₖ₋₁ + sₖ₋₁ * αₖ
+        δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
+
+        # Update sₖ₋₂ and cₖ₋₂.
+        sₖ₋₂ = sₖ₋₁
+        cₖ₋₂ = cₖ₋₁
+      end
+
+      # Compute and apply current Givens reflection Qₖ.ₖ₊₁
+      iter == 1 && (δbarₖ = αₖ)
+      # [cₖ  sₖ] [δbarₖ] = [δₖ]
+      # [s̄ₖ -cₖ] [βₖ₊₁ ]   [0 ]
+      (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
+
+      # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
+      #                      [ 0  ]
+      #
+      # [cₖ  sₖ] [ζbarₖ] = [   ζₖ  ]
+      # [s̄ₖ -cₖ] [  0  ]   [ζbarₖ₊₁]
+      ζₖ      =      cₖ  * ζbarₖ
+      ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
+
+      # Update sₖ₋₁ and cₖ₋₁.
+      sₖ₋₁ = sₖ
+      cₖ₋₁ = cₖ
+
+      # Compute the direction wₖ, the last column of Wₖ = Vₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Vₖ)ᵀ.
+      # w₁ = v₁ / δ₁
+      if iter == 1
+        wₖ = wₖ₋₁
+        @kaxpy!(n, one(FC), vₖ, wₖ)
+        @. wₖ = wₖ / δₖ
+      end
+      # w₂ = (v₂ - λ₁w₁) / δ₂
+      if iter == 2
+        wₖ = wₖ₋₂
+        @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+        @kaxpy!(n, one(FC), vₖ, wₖ)
+        @. wₖ = wₖ / δₖ
+      end
+      # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
+      if iter ≥ 3
+        @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
+        wₖ = wₖ₋₂
+        @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+        @kaxpy!(n, one(FC), vₖ, wₖ)
+        @. wₖ = wₖ / δₖ
+      end
+
+      # Compute solution xₖ.
+      # xₖ ← xₖ₋₁ + ζₖ * wₖ
+      @kaxpy!(n, ζₖ, wₖ, x)
+
+      # Compute vₖ₊₁ and uₖ₊₁.
+      @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
+      @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
+
+      if pᴴq ≠ zero(FC)
+        @. vₖ = q / βₖ₊₁        # βₖ₊₁vₖ₊₁ = q
+        @. uₖ = p / conj(γₖ₊₁)  # γ̄ₖ₊₁uₖ₊₁ = p
+      end
+
+      # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖²
+      τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ)
+
+      # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁
+      rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁
+      history && push!(rNorms, rNorm)
+
+      # Update directions for x.
+      if iter ≥ 2
+        @kswap(wₖ₋₂, wₖ₋₁)
+      end
+
+      # Update ζbarₖ, βₖ, γₖ and τₖ.
+      ζbarₖ = ζbarₖ₊₁
+      βₖ    = βₖ₊₁
+      γₖ    = γₖ₊₁
+      τₖ    = τₖ₊₁
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      breakdown = !solved && (pᴴq == 0)
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm, ktimer(start_time))
+    end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁
-    rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁
-    history && push!(rNorms, rNorm)
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Update directions for x.
-    if iter ≥ 2
-      @kswap(wₖ₋₂, wₖ₋₁)
-    end
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-    # Update ζbarₖ, βₖ, γₖ and τₖ.
-    ζbarₖ = ζbarₖ₊₁
-    βₖ    = βₖ₊₁
-    γₖ    = γₖ₊₁
-    τₖ    = τₖ₊₁
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    solved = resid_decrease_lim || resid_decrease_mach
-    tired = iter ≥ itmax
-    breakdown = !solved && (pᵗq == 0)
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/symmlq.jl b/src/symmlq.jl
index 7b889c715..604698525 100644
--- a/src/symmlq.jl
+++ b/src/symmlq.jl
@@ -1,5 +1,5 @@
 # An implementation of SYMMLQ for the solution of the
-# linear system Ax = b, where A is square and symmetric.
+# linear system Ax = b, where A is Hermitian.
 #
 # This implementation follows the original implementation by
 # Michael Saunders described in
@@ -11,38 +11,63 @@
 
 export symmlq, symmlq!
 
-
 """
-    (x, stats) = symmlq(A, b::AbstractVector{FC}; window::Int=0,
-                        M=I, λ::T=zero(T), transfer_to_cg::Bool=true,
-                        λest::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
-                        etol::T=√eps(T), itmax::Int=0, conlim::T=1/√eps(T),
-                        verbose::Int=0, history::Bool=false,
-                        ldiv::Bool=false, callback=solver->false)
+    (x, stats) = symmlq(A, b::AbstractVector{FC};
+                        M=I, ldiv::Bool=false, window::Int=5,
+                        transfer_to_cg::Bool=true, λ::T=zero(T),
+                        λest::T=zero(T), etol::T=√eps(T),
+                        conlim::T=1/√eps(T), atol::T=√eps(T),
+                        rtol::T=√eps(T), itmax::Int=0,
+                        timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                        callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
+    (x, stats) = symmlq(A, b, x0::AbstractVector; kwargs...)
+
+SYMMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above
+
 Solve the shifted linear system
 
     (A + λI) x = b
 
-using the SYMMLQ method, where λ is a shift parameter,
-and A is square and symmetric.
+of size n using the SYMMLQ method, where λ is a shift parameter, and A is Hermitian.
+
+SYMMLQ produces monotonic errors ‖x* - x‖₂.
 
-SYMMLQ produces monotonic errors ‖x*-x‖₂.
+#### Input arguments
 
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
 
-SYMMLQ can be warm-started from an initial guess `x0` with the method
+#### Optional argument
 
-    (x, stats) = symmlq(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `transfer_to_cg`: transfer from the SYMMLQ point to the CG point, when it exists. The transfer is based on the residual norm;
+* `λ`: regularization parameter;
+* `λest`: positive strict lower bound on the smallest eigenvalue `λₘᵢₙ` when solving a positive-definite system, such as `λest = (1-10⁻⁷)λₘᵢₙ`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SymmlqStats`](@ref) structure.
 
 #### Reference
 
@@ -50,18 +75,6 @@ and `false` otherwise.
 """
 function symmlq end
 
-function symmlq(A, b :: AbstractVector{FC}, x0 :: AbstractVector; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = SymmlqSolver(A, b, window=window)
-  symmlq!(solver, A, b, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function symmlq(A, b :: AbstractVector{FC}; window :: Int=5, kwargs...) where FC <: FloatOrComplex
-  solver = SymmlqSolver(A, b, window=window)
-  symmlq!(solver, A, b; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = symmlq!(solver::SymmlqSolver, A, b; kwargs...)
     solver = symmlq!(solver::SymmlqSolver, A, b, x0; kwargs...)
@@ -72,182 +85,125 @@ See [`SymmlqSolver`](@ref) for more details about the `solver`.
 """
 function symmlq! end
 
-function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  symmlq!(solver, A, b; kwargs...)
-  return solver
-end
-
-function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
-                 M=I, λ :: T=zero(T), transfer_to_cg :: Bool=true,
-                 λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-                 etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
-                 verbose :: Int=0, history :: Bool=false,
-                 ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  m == n || error("System must be square")
-  length(b) == m || error("Inconsistent problem size")
-  (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n)
-
-  # Tests M = Iₙ
-  MisI = (M === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :v, S, n)
-  x, Mvold, Mv, Mv_next, w̅ = solver.x, solver.Mvold, solver.Mv, solver.Mv_next, solver.w̅
-  Δx, clist, zlist, sprod, stats = solver.Δx, solver.clist, solver.zlist, solver.sprod, solver.stats
-  warm_start = solver.warm_start
-  rNorms, rcgNorms = stats.residuals, stats.residualscg
-  errors, errorscg = stats.errors, stats.errorscg
-  reset!(stats)
-  v = MisI ? Mv : solver.v
-  vold = MisI ? Mvold : solver.v
-
-  ϵM = eps(T)
-  ctol = conlim > 0 ? 1 / conlim : zero(T)
-
-  # Initial solution x₀
-  x .= zero(FC)
-
-  if warm_start
-    mul!(Mvold, A, Δx)
-    (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold)
-    @kaxpby!(n, one(FC), b, -one(FC), Mvold)
-  else
-    Mvold .= b
+def_args_symmlq = (:(A                    ),
+                   :(b::AbstractVector{FC}))
+
+def_optargs_symmlq = (:(x0::AbstractVector),)
+
+def_kwargs_symmlq = (:(; M = I                      ),
+                     :(; ldiv::Bool = false         ),
+                     :(; transfer_to_cg::Bool = true),
+                     :(; λ::T = zero(T)             ),
+                     :(; λest::T = zero(T)          ),
+                     :(; atol::T = √eps(T)          ),
+                     :(; rtol::T = √eps(T)          ),
+                     :(; etol::T = √eps(T)          ),
+                     :(; conlim::T = 1/√eps(T)      ),
+                     :(; itmax::Int = 0             ),
+                     :(; timemax::Float64 = Inf     ),
+                     :(; verbose::Int = 0           ),
+                     :(; history::Bool = false      ),
+                     :(; callback = solver -> false ),
+                     :(; iostream::IO = kstdout     ))
+
+def_kwargs_symmlq = mapreduce(extract_parameters, vcat, def_kwargs_symmlq)
+
+args_symmlq = (:A, :b)
+optargs_symmlq = (:x0,)
+kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, :conlim, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function symmlq($(def_args_symmlq...), $(def_optargs_symmlq...); window :: Int=5, $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = SymmlqSolver(A, b; window)
+    warm_start!(solver, $(optargs_symmlq...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    symmlq!(solver, $(args_symmlq...); $(kwargs_symmlq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Initialize Lanczos process.
-  # β₁ M v₁ = b.
-  MisI || mulorldiv!(vold, M, Mvold, ldiv)
-  β₁ = @kdotr(m, vold, Mvold)
-  if β₁ == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.Anorm = T(NaN)
-    stats.Acond = T(NaN)
-    history && push!(rNorms, zero(T))
-    history && push!(rcgNorms, zero(T))
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function symmlq($(def_args_symmlq...); window :: Int=5, $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = SymmlqSolver(A, b; window)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    symmlq!(solver, $(args_symmlq...); $(kwargs_symmlq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
-  β₁ = sqrt(β₁)
-  β = β₁
-  @kscal!(m, one(FC) / β, vold)
-  MisI || @kscal!(m, one(FC) / β, Mvold)
-
-  w̅ .= vold
-
-  mul!(Mv, A, vold)
-  α = @kdotr(m, vold, Mv) + λ
-  @kaxpy!(m, -α, Mvold, Mv)  # Mv = Mv - α * Mvold
-  MisI || mulorldiv!(v, M, Mv, ldiv)
-  β = @kdotr(m, v, Mv)
-  β < 0 && error("Preconditioner is not positive definite")
-  β = sqrt(β)
-  @kscal!(m, one(FC) / β, v)
-  MisI || @kscal!(m, one(FC) / β, Mv)
-
-  # Start QR factorization
-  γbar = α
-  δbar = β
-  ϵold = zero(T)
-  cold = one(T)
-  sold = zero(T)
-
-  ηold = zero(T)
-  η    = β₁
-  ζold = zero(T)
-
-  ANorm² = α * α + β * β
-
-  γmax = T(-Inf)
-  γmin = T(Inf)
-  ANorm = zero(T)
-  Acond = zero(T)
-
-  xNorm = zero(T)
-  rNorm = β₁
-  history && push!(rNorms, rNorm)
-
-  if γbar ≠ 0
-    ζbar = η / γbar
-    xcgNorm = abs(ζbar)
-    rcgNorm = β₁ * abs(ζbar)
-    history && push!(rcgNorms, rcgNorm)
-  else
-    history && push!(rcgNorms, missing)
-  end
-
-  err = T(Inf)
-  errcg = T(Inf)
 
-  window = length(clist)
-  clist .= zero(T)
-  zlist .= zero(T)
-  sprod .= one(T)
-
-  if λest ≠ 0
-    # Start QR factorization of Tₖ - λest I
-    ρbar = α - λest
-    σbar = β
-    ρ = sqrt(ρbar * ρbar + β * β)
-    cwold = -one(T)
-    cw = ρbar / ρ
-    sw = β / ρ
-
-    history && push!(errors, abs(β₁/λest))
-    if γbar ≠ 0
-      history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2))
+  function symmlq!(solver :: SymmlqSolver{T,FC,S}, $(def_args_symmlq...); $(def_kwargs_symmlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    m == n || error("System must be square")
+    length(b) == m || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "SYMMLQ: system of size %d\n", n)
+
+    # Tests M = Iₙ
+    MisI = (M === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :v, S, n)
+    x, Mvold, Mv, Mv_next, w̅ = solver.x, solver.Mvold, solver.Mv, solver.Mv_next, solver.w̅
+    Δx, clist, zlist, sprod, stats = solver.Δx, solver.clist, solver.zlist, solver.sprod, solver.stats
+    warm_start = solver.warm_start
+    rNorms, rcgNorms = stats.residuals, stats.residualscg
+    errors, errorscg = stats.errors, stats.errorscg
+    reset!(stats)
+    v = MisI ? Mv : solver.v
+    vold = MisI ? Mvold : solver.v
+
+    ϵM = eps(T)
+    ctol = conlim > 0 ? 1 / conlim : zero(T)
+
+    # Initial solution x₀
+    x .= zero(FC)
+
+    if warm_start
+      mul!(Mvold, A, Δx)
+      (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold)
+      @kaxpby!(n, one(FC), b, -one(FC), Mvold)
     else
-      history && push!(errorscg, missing)
+      Mvold .= b
     end
-  end
 
-  iter = 0
-  itmax == 0 && (itmax = 2 * n)
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %8s  %8s  %7s  %7s  %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond)
-
-  tol = atol + rtol * β₁
-  status = "unknown"
-  solved_lq = solved_mach = solved_lim = (rNorm ≤ tol)
-  solved_cg = (γbar ≠ 0) && transfer_to_cg && rcgNorm ≤ tol
-  tired = iter ≥ itmax
-  ill_cond = ill_cond_mach = ill_cond_lim = false
-  solved = zero_resid = solved_lq || solved_cg
-  fwd_err = false
-  user_requested_exit = false
-
-  while ! (solved || tired || ill_cond || user_requested_exit)
-    iter = iter + 1
-
-    # Continue QR factorization
-    (c, s, γ) = sym_givens(γbar, β)
-
-    # Update SYMMLQ point
-    ηold = η
-    ζ = ηold / γ
-    @kaxpy!(n, c * ζ, w̅, x)
-    @kaxpy!(n, s * ζ, v, x)
-    # Update w̅
-    @kaxpby!(n, -c, v, s, w̅)
-
-    # Generate next Lanczos vector
-    oldβ = β
-    mul!(Mv_next, A, v)
-    α = @kdotr(m, v, Mv_next) + λ
-    @kaxpy!(m, -oldβ, Mvold, Mv_next)
-    @. Mvold = Mv
-    @kaxpy!(m, -α, Mv, Mv_next)
-    @. Mv = Mv_next
+    # Initialize Lanczos process.
+    # β₁ M v₁ = b.
+    MisI || mulorldiv!(vold, M, Mvold, ldiv)
+    β₁ = @kdotr(m, vold, Mvold)
+    if β₁ == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.Anorm = T(NaN)
+      stats.Acond = T(NaN)
+      history && push!(rNorms, zero(T))
+      history && push!(rcgNorms, zero(T))
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
+    end
+    β₁ = sqrt(β₁)
+    β = β₁
+    @kscal!(m, one(FC) / β, vold)
+    MisI || @kscal!(m, one(FC) / β, Mvold)
+
+    w̅ .= vold
+
+    mul!(Mv, A, vold)
+    α = @kdotr(m, vold, Mv) + λ
+    @kaxpy!(m, -α, Mvold, Mv)  # Mv = Mv - α * Mvold
     MisI || mulorldiv!(v, M, Mv, ldiv)
     β = @kdotr(m, v, Mv)
     β < 0 && error("Preconditioner is not positive definite")
@@ -255,148 +211,259 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
     @kscal!(m, one(FC) / β, v)
     MisI || @kscal!(m, one(FC) / β, Mv)
 
-    # Continue A norm estimate
-    ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
+    # Start QR factorization
+    γbar = α
+    δbar = β
+    ϵold = zero(T)
+    cold = one(T)
+    sold = zero(T)
 
-    if λest ≠ 0
-      η = -oldβ * oldβ * cwold / ρbar
-      ω = λest + η
-      ψ = c * δbar + s * ω
-      ωbar = s * δbar - c * ω
-    end
+    ηold = zero(T)
+    η    = β₁
+    ζold = zero(T)
+
+    ANorm² = α * α + β * β
 
-    # Continue QR factorization
-    δ = δbar * c + α * s
-    γbar = δbar * s - α * c
-    ϵ = β * s
-    δbar = -β * c
-    η = -ϵold * ζold - δ * ζ
+    γmax = T(-Inf)
+    γmin = T(Inf)
+    ANorm = zero(T)
+    Acond = zero(T)
 
-    rNorm = sqrt(γ * γ * ζ * ζ + ϵold * ϵold * ζold * ζold)
-    xNorm = xNorm + ζ * ζ
+    xNorm = zero(T)
+    rNorm = β₁
     history && push!(rNorms, rNorm)
 
     if γbar ≠ 0
       ζbar = η / γbar
-      rcgNorm = β * abs(s * ζ - c * ζbar)
-      xcgNorm = xNorm + ζbar * ζbar
+      xcgNorm = abs(ζbar)
+      rcgNorm = β₁ * abs(ζbar)
       history && push!(rcgNorms, rcgNorm)
     else
       history && push!(rcgNorms, missing)
     end
 
-    if window > 0 && λest ≠ 0
-      if iter < window && window > 1
-        for i = iter+1 : window
-          sprod[i] = s * sprod[i]
-        end
-      end
+    err = T(Inf)
+    errcg = T(Inf)
 
-      ix = ((iter-1) % window) + 1
-      clist[ix] = c
-      zlist[ix] = ζ
+    window = length(clist)
+    clist .= zero(T)
+    zlist .= zero(T)
+    sprod .= one(T)
 
-      if iter ≥ window
-          jx = mod(iter, window) + 1
-          zetabark = zlist[jx] / clist[jx]
+    if λest ≠ 0
+      # Start QR factorization of Tₖ - λest I
+      ρbar = α - λest
+      σbar = β
+      ρ = sqrt(ρbar * ρbar + β * β)
+      cwold = -one(T)
+      cw = ρbar / ρ
+      sw = β / ρ
 
-          if γbar ≠ 0
-            theta = abs(sum(clist[i] * sprod[i] * zlist[i] for i = 1 : window))
-            theta = zetabark * theta + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2
-            history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta)))
-          else
-            history && (errorscg[iter-window+1] = missing)
-          end
+      history && push!(errors, abs(β₁/λest))
+      if γbar ≠ 0
+        history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2))
+      else
+        history && push!(errorscg, missing)
       end
+    end
+
+    iter = 0
+    itmax == 0 && (itmax = 2 * n)
 
-      ix = (iter % window) + 1
-      if iter ≥ window && window > 1
-         sprod .= sprod ./ sprod[(ix % window) + 1]
-         sprod[ix] = sprod[mod(ix-2, window)+1] * s
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %8s  %8s  %7s  %7s  %7s  %5s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7s  %.2fs\n", iter, rNorm, β, cold, sold, ANorm, Acond, "✗ ✗ ✗ ✗", ktimer(start_time))
+
+    tol = atol + rtol * β₁
+    status = "unknown"
+    solved_lq = solved_mach = solved_lim = (rNorm ≤ tol)
+    solved_cg = (γbar ≠ 0) && transfer_to_cg && rcgNorm ≤ tol
+    tired = iter ≥ itmax
+    ill_cond = ill_cond_mach = ill_cond_lim = false
+    solved = zero_resid = solved_lq || solved_cg
+    fwd_err = false
+    user_requested_exit = false
+    overtimed = false
+
+    while ! (solved || tired || ill_cond || user_requested_exit || overtimed)
+      iter = iter + 1
+
+      # Continue QR factorization
+      (c, s, γ) = sym_givens(γbar, β)
+
+      # Update SYMMLQ point
+      ηold = η
+      ζ = ηold / γ
+      @kaxpy!(n, c * ζ, w̅, x)
+      @kaxpy!(n, s * ζ, v, x)
+      # Update w̅
+      @kaxpby!(n, -c, v, s, w̅)
+
+      # Generate next Lanczos vector
+      oldβ = β
+      mul!(Mv_next, A, v)
+      α = @kdotr(m, v, Mv_next) + λ
+      @kaxpy!(m, -oldβ, Mvold, Mv_next)
+      @. Mvold = Mv
+      @kaxpy!(m, -α, Mv, Mv_next)
+      @. Mv = Mv_next
+      MisI || mulorldiv!(v, M, Mv, ldiv)
+      β = @kdotr(m, v, Mv)
+      β < 0 && error("Preconditioner is not positive definite")
+      β = sqrt(β)
+      @kscal!(m, one(FC) / β, v)
+      MisI || @kscal!(m, one(FC) / β, Mv)
+
+      # Continue A norm estimate
+      ANorm² = ANorm² + α * α + oldβ * oldβ + β * β
+
+      if λest ≠ 0
+        η = -oldβ * oldβ * cwold / ρbar
+        ω = λest + η
+        ψ = c * δbar + s * ω
+        ωbar = s * δbar - c * ω
       end
-    end
 
-    if λest ≠ 0
-      err = abs((ϵold * ζold + ψ * ζ) / ωbar)
-      history && push!(errors, err)
+      # Continue QR factorization
+      δ = δbar * c + α * s
+      γbar = δbar * s - α * c
+      ϵ = β * s
+      δbar = -β * c
+      η = -ϵold * ζold - δ * ζ
+
+      rNorm = sqrt(γ * γ * ζ * ζ + ϵold * ϵold * ζold * ζold)
+      xNorm = xNorm + ζ * ζ
+      history && push!(rNorms, rNorm)
 
       if γbar ≠ 0
-        errcg = sqrt(abs(err * err - ζbar * ζbar))
-        history && push!(errorscg, errcg)
+        ζbar = η / γbar
+        rcgNorm = β * abs(s * ζ - c * ζbar)
+        xcgNorm = xNorm + ζbar * ζbar
+        history && push!(rcgNorms, rcgNorm)
       else
-        history && push!(errorscg, missing)
+        history && push!(rcgNorms, missing)
       end
 
-      ρbar = sw * σbar - cw * (α - λest)
-      σbar = -cw * β
-      ρ = sqrt(ρbar * ρbar + β * β)
+      if window > 0 && λest ≠ 0
+        if iter < window && window > 1
+          for i = iter+1 : window
+            sprod[i] = s * sprod[i]
+          end
+        end
+
+        ix = ((iter-1) % window) + 1
+        clist[ix] = c
+        zlist[ix] = ζ
+
+        if iter ≥ window
+            jx = mod(iter, window) + 1
+            zetabark = zlist[jx] / clist[jx]
+
+            if γbar ≠ 0
+              theta = zero(T)
+              for i = 1 : window
+                theta += clist[i] * sprod[i] * zlist[i]
+              end
+              theta = zetabark * abs(theta) + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2
+              history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta)))
+            else
+              history && (errorscg[iter-window+1] = missing)
+            end
+        end
 
-      cwold = cw
+        ix = (iter % window) + 1
+        if iter ≥ window && window > 1
+           sprod .= sprod ./ sprod[(ix % window) + 1]
+           sprod[ix] = sprod[mod(ix-2, window)+1] * s
+        end
+      end
 
-      cw = ρbar / ρ
-      sw = β / ρ
-    end
+      if λest ≠ 0
+        err = abs((ϵold * ζold + ψ * ζ) / ωbar)
+        history && push!(errors, err)
+
+        if γbar ≠ 0
+          errcg = sqrt(abs(err * err - ζbar * ζbar))
+          history && push!(errorscg, errcg)
+        else
+          history && push!(errorscg, missing)
+        end
 
-    # TODO: Use γ or γbar?
-    γmax = max(γmax, γ)
-    γmin = min(γmin, γ)
+        ρbar = sw * σbar - cw * (α - λest)
+        σbar = -cw * β
+        ρ = sqrt(ρbar * ρbar + β * β)
 
-    Acond = γmax / γmin
-    ANorm = sqrt(ANorm²)
-    test1 = rNorm / (ANorm * xNorm)
+        cwold = cw
 
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1)
+        cw = ρbar / ρ
+        sw = β / ρ
+      end
 
-    # Reset variables
-    ϵold = ϵ
-    ζold = ζ
-    cold = c
+      # TODO: Use γ or γbar?
+      γmax = max(γmax, γ)
+      γmin = min(γmin, γ)
+
+      Acond = γmax / γmin
+      ANorm = sqrt(ANorm²)
+      test1 = rNorm / (ANorm * xNorm)
+
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %8.1e  %8.1e  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, β, c, s, ANorm, Acond, test1, ktimer(start_time))
+
+      # Reset variables
+      ϵold = ϵ
+      ζold = ζ
+      cold = c
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (one(T) + rNorm ≤ one(T))
+      ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
+      zero_resid_mach = (one(T) + test1 ≤ one(T))
+      # solved_mach = (ϵx ≥ β₁)
+
+      # Stopping conditions based on user-provided tolerances.
+      tired = iter ≥ itmax
+      ill_cond_lim = (one(T) / Acond ≤ ctol)
+      zero_resid_lim = (test1 ≤ tol)
+      fwd_err = (err ≤ etol) || ((γbar ≠ 0) && (errcg ≤ etol))
+      solved_lq = rNorm ≤ tol
+      solved_cg = transfer_to_cg && (γbar ≠ 0) && rcgNorm ≤ tol
+      
+      user_requested_exit = callback(solver) :: Bool
+      zero_resid = solved_lq || solved_cg
+      ill_cond = ill_cond_mach || ill_cond_lim
+      solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+    end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (one(T) + rNorm ≤ one(T))
-    ill_cond_mach = (one(T) + one(T) / Acond ≤ one(T))
-    zero_resid_mach = (one(T) + test1 ≤ one(T))
-    # solved_mach = (ϵx ≥ β₁)
+    # Compute CG point
+    # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ
+    if solved_cg
+      @kaxpy!(m, ζbar, w̅, x)
+    end
 
-    # Stopping conditions based on user-provided tolerances.
-    tired = iter ≥ itmax
-    ill_cond_lim = (one(T) / Acond ≤ ctol)
-    zero_resid_lim = (test1 ≤ tol)
-    fwd_err = (err ≤ etol) || ((γbar ≠ 0) && (errcg ≤ etol))
-    solved_lq = rNorm ≤ tol
-    solved_cg = transfer_to_cg && (γbar ≠ 0) && rcgNorm ≤ tol
-    
-    user_requested_exit = callback(solver) :: Bool
-    zero_resid = solved_lq || solved_cg
-    ill_cond = ill_cond_mach || ill_cond_lim
-    solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach
-  end
-  (verbose > 0) && @printf("\n")
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    ill_cond_mach       && (status = "condition number seems too large for this machine")
+    ill_cond_lim        && (status = "condition number exceeds tolerance")
+    solved              && (status = "found approximate solution")
+    solved_lq           && (status = "solution xᴸ good enough given atol and rtol")
+    solved_cg           && (status = "solution xᶜ good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-  # Compute CG point
-  # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ
-  if solved_cg
-    @kaxpy!(m, ζbar, w̅, x)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.Anorm = ANorm
+    stats.Acond = Acond
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-
-  tired               && (status = "maximum number of iterations exceeded")
-  ill_cond_mach       && (status = "condition number seems too large for this machine")
-  ill_cond_lim        && (status = "condition number exceeds tolerance")
-  solved              && (status = "found approximate solution")
-  solved_lq           && (status = "solution xᴸ good enough given atol and rtol")
-  solved_cg           && (status = "solution xᶜ good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.Anorm = ANorm
-  stats.Acond = Acond
-  stats.status = status
-  return solver
 end
diff --git a/src/tricg.jl b/src/tricg.jl
index 5acff2d52..8250e6dfc 100644
--- a/src/tricg.jl
+++ b/src/tricg.jl
@@ -13,30 +13,32 @@ export tricg, tricg!
 
 """
     (x, y, stats) = tricg(A, b::AbstractVector{FC}, c::AbstractVector{FC};
-                          M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                          spd::Bool=false, snd::Bool=false, flip::Bool=false,
-                          τ::T=one(T), ν::T=-one(T), itmax::Int=0,
-                          verbose::Int=0, history::Bool=false,
-                          ldiv::Bool=false, callback=solver->false)
+                          M=I, N=I, ldiv::Bool=false,
+                          spd::Bool=false, snd::Bool=false,
+                          flip::Bool=false, τ::T=one(T),
+                          ν::T=-one(T), atol::T=√eps(T),
+                          rtol::T=√eps(T), itmax::Int=0,
+                          timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                          callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-TriCG solves the symmetric linear system
+    (x, y, stats) = tricg(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriCG can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
+Given a matrix `A` of dimension m × n, TriCG solves the Hermitian linear system
 
     [ τE    A ] [ x ] = [ b ]
-    [  Aᵀ  νF ] [ y ]   [ c ],
+    [  Aᴴ  νF ] [ y ]   [ c ],
 
-where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
+of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
 `b` and `c` must both be nonzero.
 TriCG could breakdown if `τ = 0` or `ν = 0`.
 It's recommended to use TriMR in these cases.
 
-By default, TriCG solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1.
-If `flip = true`, TriCG solves another known variant of SQD systems where τ = -1 and ν = 1.
-If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved.
-If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved.
-`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems.
+By default, TriCG solves Hermitian and quasi-definite linear systems with τ = 1 and ν = -1.
 
 TriCG is based on the preconditioned orthogonal tridiagonalization process
 and its relation with the preconditioned block-Lanczos process.
@@ -50,17 +52,40 @@ It's the Euclidean norm when `M` and `N` are identity operators.
 TriCG stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
 `atol` is an absolute tolerance and `rtol` is a relative tolerance.
 
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
 
-TriCG can be warm-started from initial guesses `x0` and `y0` with the method
+#### Optional arguments
 
-    (x, y, stats) = tricg(A, b, c, x0, y0; kwargs...)
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system;
+* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems;
+* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems;
+* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -68,18 +93,6 @@ and `false` otherwise.
 """
 function tricg end
 
-function tricg(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = TricgSolver(A, b)
-  tricg!(solver, A, b, c, x0, y0; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
-function tricg(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = TricgSolver(A, b)
-  tricg!(solver, A, b, c; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = tricg!(solver::TricgSolver, A, b, c; kwargs...)
     solver = tricg!(solver::TricgSolver, A, b, c, x0, y0; kwargs...)
@@ -90,322 +103,374 @@ See [`TricgSolver`](@ref) for more details about the `solver`.
 """
 function tricg! end
 
-function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0, y0)
-  tricg!(solver, A, b, c; kwargs...)
-  return solver
-end
-
-function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-                M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                spd :: Bool=false, snd :: Bool=false, flip :: Bool=false,
-                τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0,
-                verbose :: Int=0, history :: Bool=false,
-                ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("TriCG: system of %d equations in %d variables\n", m+n, m+n)
-
-  # Check flip, spd and snd parameters
-  spd && flip && error("The matrix cannot be SPD and SQD")
-  snd && flip && error("The matrix cannot be SND and SQD")
-  spd && snd  && error("The matrix cannot be SPD and SND")
-
-  # Check M = Iₘ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Determine τ and ν associated to SQD, SPD or SND systems.
-  flip && (τ = -one(T) ; ν =  one(T))
-  spd  && (τ =  one(T) ; ν =  one(T))
-  snd  && (τ = -one(T) ; ν = -one(T))
-
-  warm_start = solver.warm_start
-  warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
-  warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :vₖ, S, m)
-  allocate_if(!NisI, solver, :uₖ, S, n)
-  Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
-  Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
-  gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ
-  vₖ = MisI ? M⁻¹vₖ : solver.vₖ
-  uₖ = NisI ? N⁻¹uₖ : solver.uₖ
-  vₖ₊₁ = MisI ? q : vₖ
-  uₖ₊₁ = NisI ? p : uₖ
-  b₀ = warm_start ? q : b
-  c₀ = warm_start ? p : c
-
-  stats = solver.stats
-  rNorms = stats.residuals
-  reset!(stats)
-
-  # Initial solutions x₀ and y₀.
-  xₖ .= zero(FC)
-  yₖ .= zero(FC)
-
-  iter = 0
-  itmax == 0 && (itmax = m+n)
-
-  # Initialize preconditioned orthogonal tridiagonalization process.
-  M⁻¹vₖ₋₁ .= zero(FC)  # v₀ = 0
-  N⁻¹uₖ₋₁ .= zero(FC)  # u₀ = 0
-
-  # [ τI    A ] [ xₖ ] = [ b -  τΔx - AΔy ] = [ b₀ ]
-  # [  Aᵀ  νI ] [ yₖ ]   [ c - AᵀΔx - νΔy ]   [ c₀ ]
-  if warm_start
-    mul!(b₀, A, Δy)
-    (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
-    @kaxpby!(m, one(FC), b, -one(FC), b₀)
-    mul!(c₀, Aᵀ, Δx)
-    (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
-    @kaxpby!(n, one(FC), c, -one(FC), c₀)
-  end
-
-  # β₁Ev₁ = b ↔ β₁v₁ = Mb
-  M⁻¹vₖ .= b₀
-  MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
-  βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ))  # β₁ = ‖v₁‖_E
-  if βₖ ≠ 0
-    @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
-    MisI || @kscal!(m, one(FC) / βₖ, vₖ)
-  else
-    error("b must be nonzero")
+def_args_tricg = (:(A                    ),
+                  :(b::AbstractVector{FC}),
+                  :(c::AbstractVector{FC}))
+
+def_optargs_tricg = (:(x0::AbstractVector),
+                     :(y0::AbstractVector))
+
+def_kwargs_tricg = (:(; M = I                     ),
+                    :(; N = I                     ),
+                    :(; ldiv::Bool = false        ),
+                    :(; spd::Bool = false         ),
+                    :(; snd::Bool = false         ),
+                    :(; flip::Bool = false        ),
+                    :(; τ::T = one(T)             ),
+                    :(; ν::T = -one(T)            ),
+                    :(; atol::T = √eps(T)         ),
+                    :(; rtol::T = √eps(T)         ),
+                    :(; itmax::Int = 0            ),
+                    :(; timemax::Float64 = Inf    ),
+                    :(; verbose::Int = 0          ),
+                    :(; history::Bool = false     ),
+                    :(; callback = solver -> false),
+                    :(; iostream::IO = kstdout    ))
+
+def_kwargs_tricg = mapreduce(extract_parameters, vcat, def_kwargs_tricg)
+
+args_tricg = (:A, :b, :c)
+optargs_tricg = (:x0, :y0)
+kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function tricg($(def_args_tricg...), $(def_optargs_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = TricgSolver(A, b)
+    warm_start!(solver, $(optargs_tricg...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    tricg!(solver, $(args_tricg...); $(kwargs_tricg...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
-  N⁻¹uₖ .= c₀
-  NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
-  γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ))  # γ₁ = ‖u₁‖_F
-  if γₖ ≠ 0
-    @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
-    NisI || @kscal!(n, one(FC) / γₖ, uₖ)
-  else
-    error("c must be nonzero")
+  function tricg($(def_args_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = TricgSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    tricg!(solver, $(args_tricg...); $(kwargs_tricg...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ
-  gx₂ₖ₋₁ .= zero(FC)
-  gy₂ₖ₋₁ .= zero(FC)
-  gx₂ₖ   .= zero(FC)
-  gy₂ₖ   .= zero(FC)
-
-  # Compute ‖r₀‖² = (γ₁)² + (β₁)²
-  rNorm = sqrt(γₖ^2 + βₖ^2)
-  history && push!(rNorms, rNorm)
-  ε = atol + rtol * rNorm
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e\n", iter, rNorm, βₖ, γₖ)
-
-  # Set up workspace.
-  d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T)
-  π₂ₖ₋₃ = π₂ₖ₋₂ = zero(FC)
-  δₖ₋₁ = zero(FC)
-
-  # Tolerance for breakdown detection.
-  btol = eps(T)^(3/4)
-
-  # Stopping criterion.
-  breakdown = false
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || breakdown || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the orthogonal tridiagonalization process.
-    # AUₖ  = EVₖTₖ    + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
-    # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , uₖ)  # Forms Evₖ₊₁ : q ← Auₖ
-    mul!(p, Aᵀ, vₖ)  # Forms Fuₖ₊₁ : p ← Aᵀvₖ
-
-    if iter ≥ 2
-      @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q)  # q ← q - γₖ * M⁻¹vₖ₋₁
-      @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p)  # p ← p - βₖ * N⁻¹uₖ₋₁
-    end
-
-    αₖ = @kdot(m, vₖ, q)  # αₖ = ⟨vₖ,q⟩
-
-    @kaxpy!(m, -     αₖ , M⁻¹vₖ, q)  # q ← q - αₖ * M⁻¹vₖ
-    @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p)  # p ← p - ᾱₖ * N⁻¹uₖ
-
-    # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
-    M⁻¹vₖ₋₁ .= M⁻¹vₖ
-    N⁻¹uₖ₋₁ .= N⁻¹uₖ
-
-    # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0  ••• vₖ 0 ]
-    #                                [0  u₁ ••• 0  uₖ]
-    #
-    # rₖ = [ b ] - [ τE    A ] [ xₖ ] = [ b ] - [ τE    A ] Wₖzₖ
-    #      [ c ]   [  Aᵀ  νF ] [ yₖ ]   [ c ]   [  Aᵀ  νF ]
-    #
-    # block-Lanczos formulation : [ τE    A ] Wₖ = [ E   0 ] Wₖ₊₁Sₖ₊₁.ₖ
-    #                             [  Aᵀ  νF ]      [ 0   F ]
-    #
-    # TriCG subproblem : (Wₖ)ᵀ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
-    #
-    # Update the LDLᵀ factorization of Sₖ.ₖ.
-    #
-    # [ τ  α₁    γ₂ 0  •  •  •  •  0  ]
-    # [ ᾱ₁ ν  β₂       •           •  ]
-    # [    β₂ τ  α₂    γ₃ •        •  ]
-    # [ γ₂    ᾱ₂ ν  β₃       •     •  ]
-    # [ 0        β₃ •  •     •  •  •  ]
-    # [ •  •  γ₃    •  •  •        0  ]
-    # [ •     •        •  •  •     γₖ ]
-    # [ •        •  •     •  •  βₖ    ]
-    # [ •           •        βₖ τ  αₖ ]
-    # [ 0  •  •  •  •  0  γₖ    ᾱₖ ν  ]
-    if iter == 1
-      d₂ₖ₋₁ = τ
-      δₖ    = conj(αₖ) / d₂ₖ₋₁
-      d₂ₖ   = ν - abs2(δₖ) * d₂ₖ₋₁
-    else
-      σₖ    = βₖ / d₂ₖ₋₂
-      ηₖ    = γₖ / d₂ₖ₋₃
-      λₖ    = -(ηₖ * conj(δₖ₋₁) * d₂ₖ₋₃) / d₂ₖ₋₂
-      d₂ₖ₋₁ = τ - abs2(σₖ) * d₂ₖ₋₂
-      δₖ    = (conj(αₖ) - λₖ * conj(σₖ) * d₂ₖ₋₂) / d₂ₖ₋₁
-      d₂ₖ   = ν - abs2(ηₖ) * d₂ₖ₋₃ - abs2(λₖ) * d₂ₖ₋₂ - abs2(δₖ) * d₂ₖ₋₁
+  function tricg!(solver :: TricgSolver{T,FC,S}, $(def_args_tricg...); $(def_kwargs_tricg...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "TriCG: system of %d equations in %d variables\n", m+n, m+n)
+
+    # Check flip, spd and snd parameters
+    spd && flip && error("The matrix cannot be SPD and SQD")
+    snd && flip && error("The matrix cannot be SND and SQD")
+    spd && snd  && error("The matrix cannot be SPD and SND")
+
+    # Check M = Iₘ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Determine τ and ν associated to SQD, SPD or SND systems.
+    flip && (τ = -one(T) ; ν =  one(T))
+    spd  && (τ =  one(T) ; ν =  one(T))
+    snd  && (τ = -one(T) ; ν = -one(T))
+
+    warm_start = solver.warm_start
+    warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
+    warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :vₖ, S, m)
+    allocate_if(!NisI, solver, :uₖ, S, n)
+    Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
+    Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
+    gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ
+    vₖ = MisI ? M⁻¹vₖ : solver.vₖ
+    uₖ = NisI ? N⁻¹uₖ : solver.uₖ
+    vₖ₊₁ = MisI ? q : vₖ
+    uₖ₊₁ = NisI ? p : uₖ
+    b₀ = warm_start ? q : b
+    c₀ = warm_start ? p : c
+
+    stats = solver.stats
+    rNorms = stats.residuals
+    reset!(stats)
+
+    # Initial solutions x₀ and y₀.
+    xₖ .= zero(FC)
+    yₖ .= zero(FC)
+
+    iter = 0
+    itmax == 0 && (itmax = m+n)
+
+    # Initialize preconditioned orthogonal tridiagonalization process.
+    M⁻¹vₖ₋₁ .= zero(FC)  # v₀ = 0
+    N⁻¹uₖ₋₁ .= zero(FC)  # u₀ = 0
+
+    # [ τI    A ] [ xₖ ] = [ b -  τΔx - AΔy ] = [ b₀ ]
+    # [  Aᴴ  νI ] [ yₖ ]   [ c - AᴴΔx - νΔy ]   [ c₀ ]
+    if warm_start
+      mul!(b₀, A, Δy)
+      (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
+      @kaxpby!(m, one(FC), b, -one(FC), b₀)
+      mul!(c₀, Aᴴ, Δx)
+      (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
+      @kaxpby!(n, one(FC), c, -one(FC), c₀)
     end
 
-    # Solve LₖDₖpₖ = (β₁e₁ + γ₁e₂)
-    #
-    # [ 1  0  •  •  •  •  •  •  •  0 ] [ d₁                        ]      [ β₁ ]
-    # [ δ₁ 1  •                    • ] [    d₂                     ]      [ γ₁ ]
-    # [    σ₂ 1  •                 • ] [       •                   ]      [ 0  ]
-    # [ η₂ λ₂ δ₂ 1  •              • ] [         •                 ]      [ •  ]
-    # [ 0        σ₃ 1  •           • ] [           •               ] zₖ = [ •  ]
-    # [ •  •  η₃ λ₃ δ₃ 1  •        • ] [             •             ]      [ •  ]
-    # [ •     •        •  •  •     • ] [               •           ]      [ •  ]
-    # [ •        •  •  •  •  •  •  • ] [                 •         ]      [ •  ]
-    # [ •           •        σₖ 1  0 ] [                   d₂ₖ₋₁   ]      [ •  ]
-    # [ 0  •  •  •  •  0  ηₖ λₖ δₖ 1 ] [                        d₂ₖ]      [ 0  ]
-    if iter == 1
-      π₂ₖ₋₁ = βₖ / d₂ₖ₋₁
-      π₂ₖ   = (γₖ - δₖ * βₖ) / d₂ₖ
+    # β₁Ev₁ = b ↔ β₁v₁ = Mb
+    M⁻¹vₖ .= b₀
+    MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
+    βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ))  # β₁ = ‖v₁‖_E
+    if βₖ ≠ 0
+      @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
+      MisI || @kscal!(m, one(FC) / βₖ, vₖ)
     else
-      π₂ₖ₋₁ = -(σₖ * d₂ₖ₋₂ * π₂ₖ₋₂) / d₂ₖ₋₁
-      π₂ₖ   = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ
+      error("b must be nonzero")
     end
 
-    # Solve Gₖ = Wₖ(Lₖ)⁻ᵀ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
-    if iter == 1
-      # [ 1  0 ] [ gx₁ gy₁ ] = [ v₁ 0  ]
-      # [ δ̄₁ 1 ] [ gx₂ gy₂ ]   [ 0  u₁ ]
-      @. gx₂ₖ₋₁ = vₖ
-      @. gx₂ₖ   = - conj(δₖ) * gx₂ₖ₋₁
-      @. gy₂ₖ   = uₖ
+    # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
+    N⁻¹uₖ .= c₀
+    NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
+    γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ))  # γ₁ = ‖u₁‖_F
+    if γₖ ≠ 0
+      @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
+      NisI || @kscal!(n, one(FC) / γₖ, uₖ)
     else
-      # [ 0  σ̄ₖ 1  0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0  ]
-      # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ]   [ 0  uₖ ]
-      #                [ gx₂ₖ₋₁ gy₂ₖ₋₁ ]
-      #                [ gx₂ₖ   gy₂ₖ   ]
-      @. gx₂ₖ₋₁ = conj(ηₖ) * gx₂ₖ₋₁ + conj(λₖ) * gx₂ₖ
-      @. gy₂ₖ₋₁ = conj(ηₖ) * gy₂ₖ₋₁ + conj(λₖ) * gy₂ₖ
-
-      @. gx₂ₖ = vₖ - conj(σₖ) * gx₂ₖ
-      @. gy₂ₖ =    - conj(σₖ) * gy₂ₖ
-
-      @. gx₂ₖ₋₁ =    - gx₂ₖ₋₁ - conj(δₖ) * gx₂ₖ
-      @. gy₂ₖ₋₁ = uₖ - gy₂ₖ₋₁ - conj(δₖ) * gy₂ₖ
-
-      # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁
-      @kswap(gx₂ₖ₋₁, gx₂ₖ)
-      @kswap(gy₂ₖ₋₁, gy₂ₖ)
+      error("c must be nonzero")
     end
 
-    # Update xₖ = Gxₖ * pₖ
-    @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
-    @kaxpy!(m, π₂ₖ  , gx₂ₖ  , xₖ)
-
-    # Update yₖ = Gyₖ * pₖ
-    @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
-    @kaxpy!(n, π₂ₖ  , gy₂ₖ  , yₖ)
-
-    # Compute vₖ₊₁ and uₖ₊₁
-    MisI || mulorldiv!(vₖ₊₁, M, q, ldiv)  # βₖ₊₁vₖ₊₁ = MAuₖ  - γₖvₖ₋₁ - αₖvₖ
-    NisI || mulorldiv!(uₖ₊₁, N, p, ldiv)  # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+    # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ
+    gx₂ₖ₋₁ .= zero(FC)
+    gy₂ₖ₋₁ .= zero(FC)
+    gx₂ₖ   .= zero(FC)
+    gy₂ₖ   .= zero(FC)
 
-    βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q))  # βₖ₊₁ = ‖vₖ₊₁‖_E
-    γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p))  # γₖ₊₁ = ‖uₖ₊₁‖_F
-
-    # βₖ₊₁ ≠ 0
-    if βₖ₊₁ > btol
-      @kscal!(m, one(FC) / βₖ₊₁, q)
-      MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
-    end
+    # Compute ‖r₀‖² = (γ₁)² + (β₁)²
+    rNorm = sqrt(γₖ^2 + βₖ^2)
+    history && push!(rNorms, rNorm)
+    ε = atol + rtol * rNorm
 
-    # γₖ₊₁ ≠ 0
-    if γₖ₊₁ > btol
-      @kscal!(n, one(FC) / γₖ₊₁, p)
-      NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
-    end
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %5s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, βₖ, γₖ, ktimer(start_time))
 
-    # Update M⁻¹vₖ and N⁻¹uₖ
-    M⁻¹vₖ .= q
-    N⁻¹uₖ .= p
+    # Set up workspace.
+    d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T)
+    π₂ₖ₋₃ = π₂ₖ₋₂ = zero(FC)
+    δₖ₋₁ = zero(FC)
 
-    # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|²
-    ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ
-    ζ₂ₖ   = π₂ₖ
-    rNorm = sqrt(abs2(γₖ₊₁ * ζ₂ₖ₋₁) + abs2(βₖ₊₁ * ζ₂ₖ))
-    history && push!(rNorms, rNorm)
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
 
-    # Update βₖ, γₖ, π₂ₖ₋₃, π₂ₖ₋₂, d₂ₖ₋₃, d₂ₖ₋₂, δₖ₋₁, vₖ, uₖ.
-    βₖ    = βₖ₊₁
-    γₖ    = γₖ₊₁
-    π₂ₖ₋₃ = π₂ₖ₋₁
-    π₂ₖ₋₂ = π₂ₖ
-    d₂ₖ₋₃ = d₂ₖ₋₁
-    d₂ₖ₋₂ = d₂ₖ
-    δₖ₋₁  = δₖ
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
-    solved = resid_decrease_lim || resid_decrease_mach
+    # Stopping criterion.
+    breakdown = false
+    solved = rNorm ≤ ε
     tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
+
+      # Continue the orthogonal tridiagonalization process.
+      # AUₖ  = EVₖTₖ    + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
+      # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+      mul!(q, A , uₖ)  # Forms Evₖ₊₁ : q ← Auₖ
+      mul!(p, Aᴴ, vₖ)  # Forms Fuₖ₊₁ : p ← Aᴴvₖ
+
+      if iter ≥ 2
+        @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q)  # q ← q - γₖ * M⁻¹vₖ₋₁
+        @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p)  # p ← p - βₖ * N⁻¹uₖ₋₁
+      end
+
+      αₖ = @kdot(m, vₖ, q)  # αₖ = ⟨vₖ,q⟩
+
+      @kaxpy!(m, -     αₖ , M⁻¹vₖ, q)  # q ← q - αₖ * M⁻¹vₖ
+      @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p)  # p ← p - ᾱₖ * N⁻¹uₖ
+
+      # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
+      M⁻¹vₖ₋₁ .= M⁻¹vₖ
+      N⁻¹uₖ₋₁ .= N⁻¹uₖ
+
+      # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0  ••• vₖ 0 ]
+      #                                [0  u₁ ••• 0  uₖ]
+      #
+      # rₖ = [ b ] - [ τE    A ] [ xₖ ] = [ b ] - [ τE    A ] Wₖzₖ
+      #      [ c ]   [  Aᴴ  νF ] [ yₖ ]   [ c ]   [  Aᴴ  νF ]
+      #
+      # block-Lanczos formulation : [ τE    A ] Wₖ = [ E   0 ] Wₖ₊₁Sₖ₊₁.ₖ
+      #                             [  Aᴴ  νF ]      [ 0   F ]
+      #
+      # TriCG subproblem : (Wₖ)ᴴ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
+      #
+      # Update the LDLᴴ factorization of Sₖ.ₖ.
+      #
+      # [ τ  α₁    γ₂ 0  •  •  •  •  0  ]
+      # [ ᾱ₁ ν  β₂       •           •  ]
+      # [    β₂ τ  α₂    γ₃ •        •  ]
+      # [ γ₂    ᾱ₂ ν  β₃       •     •  ]
+      # [ 0        β₃ •  •     •  •  •  ]
+      # [ •  •  γ₃    •  •  •        0  ]
+      # [ •     •        •  •  •     γₖ ]
+      # [ •        •  •     •  •  βₖ    ]
+      # [ •           •        βₖ τ  αₖ ]
+      # [ 0  •  •  •  •  0  γₖ    ᾱₖ ν  ]
+      if iter == 1
+        d₂ₖ₋₁ = τ
+        δₖ    = conj(αₖ) / d₂ₖ₋₁
+        d₂ₖ   = ν - abs2(δₖ) * d₂ₖ₋₁
+      else
+        σₖ    = βₖ / d₂ₖ₋₂
+        ηₖ    = γₖ / d₂ₖ₋₃
+        λₖ    = -(ηₖ * conj(δₖ₋₁) * d₂ₖ₋₃) / d₂ₖ₋₂
+        d₂ₖ₋₁ = τ - abs2(σₖ) * d₂ₖ₋₂
+        δₖ    = (conj(αₖ) - λₖ * conj(σₖ) * d₂ₖ₋₂) / d₂ₖ₋₁
+        d₂ₖ   = ν - abs2(ηₖ) * d₂ₖ₋₃ - abs2(λₖ) * d₂ₖ₋₂ - abs2(δₖ) * d₂ₖ₋₁
+      end
+
+      # Solve LₖDₖpₖ = (β₁e₁ + γ₁e₂)
+      #
+      # [ 1  0  •  •  •  •  •  •  •  0 ] [ d₁                        ]      [ β₁ ]
+      # [ δ₁ 1  •                    • ] [    d₂                     ]      [ γ₁ ]
+      # [    σ₂ 1  •                 • ] [       •                   ]      [ 0  ]
+      # [ η₂ λ₂ δ₂ 1  •              • ] [         •                 ]      [ •  ]
+      # [ 0        σ₃ 1  •           • ] [           •               ] zₖ = [ •  ]
+      # [ •  •  η₃ λ₃ δ₃ 1  •        • ] [             •             ]      [ •  ]
+      # [ •     •        •  •  •     • ] [               •           ]      [ •  ]
+      # [ •        •  •  •  •  •  •  • ] [                 •         ]      [ •  ]
+      # [ •           •        σₖ 1  0 ] [                   d₂ₖ₋₁   ]      [ •  ]
+      # [ 0  •  •  •  •  0  ηₖ λₖ δₖ 1 ] [                        d₂ₖ]      [ 0  ]
+      if iter == 1
+        π₂ₖ₋₁ = βₖ / d₂ₖ₋₁
+        π₂ₖ   = (γₖ - δₖ * βₖ) / d₂ₖ
+      else
+        π₂ₖ₋₁ = -(σₖ * d₂ₖ₋₂ * π₂ₖ₋₂) / d₂ₖ₋₁
+        π₂ₖ   = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ
+      end
+
+      # Solve Gₖ = Wₖ(Lₖ)⁻ᴴ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
+      if iter == 1
+        # [ 1  0 ] [ gx₁ gy₁ ] = [ v₁ 0  ]
+        # [ δ̄₁ 1 ] [ gx₂ gy₂ ]   [ 0  u₁ ]
+        @. gx₂ₖ₋₁ = vₖ
+        @. gx₂ₖ   = - conj(δₖ) * gx₂ₖ₋₁
+        @. gy₂ₖ   = uₖ
+      else
+        # [ 0  σ̄ₖ 1  0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0  ]
+        # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ]   [ 0  uₖ ]
+        #                [ gx₂ₖ₋₁ gy₂ₖ₋₁ ]
+        #                [ gx₂ₖ   gy₂ₖ   ]
+        @. gx₂ₖ₋₁ = conj(ηₖ) * gx₂ₖ₋₁ + conj(λₖ) * gx₂ₖ
+        @. gy₂ₖ₋₁ = conj(ηₖ) * gy₂ₖ₋₁ + conj(λₖ) * gy₂ₖ
+
+        @. gx₂ₖ = vₖ - conj(σₖ) * gx₂ₖ
+        @. gy₂ₖ =    - conj(σₖ) * gy₂ₖ
+
+        @. gx₂ₖ₋₁ =    - gx₂ₖ₋₁ - conj(δₖ) * gx₂ₖ
+        @. gy₂ₖ₋₁ = uₖ - gy₂ₖ₋₁ - conj(δₖ) * gy₂ₖ
+
+        # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁
+        @kswap(gx₂ₖ₋₁, gx₂ₖ)
+        @kswap(gy₂ₖ₋₁, gy₂ₖ)
+      end
+
+      # Update xₖ = Gxₖ * pₖ
+      @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
+      @kaxpy!(m, π₂ₖ  , gx₂ₖ  , xₖ)
+
+      # Update yₖ = Gyₖ * pₖ
+      @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
+      @kaxpy!(n, π₂ₖ  , gy₂ₖ  , yₖ)
+
+      # Compute vₖ₊₁ and uₖ₊₁
+      MisI || mulorldiv!(vₖ₊₁, M, q, ldiv)  # βₖ₊₁vₖ₊₁ = MAuₖ  - γₖvₖ₋₁ - αₖvₖ
+      NisI || mulorldiv!(uₖ₊₁, N, p, ldiv)  # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+
+      βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q))  # βₖ₊₁ = ‖vₖ₊₁‖_E
+      γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p))  # γₖ₊₁ = ‖uₖ₊₁‖_F
+
+      # βₖ₊₁ ≠ 0
+      if βₖ₊₁ > btol
+        @kscal!(m, one(FC) / βₖ₊₁, q)
+        MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
+      end
+
+      # γₖ₊₁ ≠ 0
+      if γₖ₊₁ > btol
+        @kscal!(n, one(FC) / γₖ₊₁, p)
+        NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
+      end
+
+      # Update M⁻¹vₖ and N⁻¹uₖ
+      M⁻¹vₖ .= q
+      N⁻¹uₖ .= p
+
+      # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|²
+      ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ
+      ζ₂ₖ   = π₂ₖ
+      rNorm = sqrt(abs2(γₖ₊₁ * ζ₂ₖ₋₁) + abs2(βₖ₊₁ * ζ₂ₖ))
+      history && push!(rNorms, rNorm)
+
+      # Update βₖ, γₖ, π₂ₖ₋₃, π₂ₖ₋₂, d₂ₖ₋₃, d₂ₖ₋₂, δₖ₋₁, vₖ, uₖ.
+      βₖ    = βₖ₊₁
+      γₖ    = γₖ₊₁
+      π₂ₖ₋₃ = π₂ₖ₋₁
+      π₂ₖ₋₂ = π₂ₖ
+      d₂ₖ₋₃ = d₂ₖ₋₁
+      d₂ₖ₋₂ = d₂ₖ
+      δₖ₋₁  = δₖ
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, βₖ₊₁, γₖ₊₁, ktimer(start_time))
+    end
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "inconsistent linear system")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x and y
+    warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
+    warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !solved && breakdown
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "inconsistent linear system")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x and y
-  warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
-  warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !solved && breakdown
-  stats.status = status
-  return solver
 end
diff --git a/src/trilqr.jl b/src/trilqr.jl
index edcb4c9b9..2b584c216 100644
--- a/src/trilqr.jl
+++ b/src/trilqr.jl
@@ -1,5 +1,5 @@
 # An implementation of TRILQR for the solution of square or
-# rectangular consistent linear adjoint systems Ax = b and Aᵀy = c.
+# rectangular consistent linear adjoint systems Ax = b and Aᴴy = c.
 #
 # This method is described in
 #
@@ -14,32 +14,54 @@ export trilqr, trilqr!
 
 """
     (x, y, stats) = trilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
-                           atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true,
-                           itmax::Int=0, verbose::Int=0, history::Bool=false,
-                           callback=solver->false)
+                           transfer_to_usymcg::Bool=true, atol::T=√eps(T),
+                           rtol::T=√eps(T), itmax::Int=0,
+                           timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                           callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
+    (x, y, stats) = trilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
 Combine USYMLQ and USYMQR to solve adjoint systems.
 
     [0  A] [y] = [b]
-    [Aᵀ 0] [x]   [c]
+    [Aᴴ 0] [x]   [c]
+
+USYMLQ is used for solving primal system `Ax = b` of size m × n.
+USYMQR is used for solving dual system `Aᴴy = c` of size n × m.
+
+#### Input arguments
 
-USYMLQ is used for solving primal system `Ax = b`.
-USYMQR is used for solving dual system `Aᵀy = c`.
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
 
-An option gives the possibility of transferring from the USYMLQ point to the
-USYMCG point, when it exists. The transfer is based on the residual norm.
+#### Optional arguments
 
-TriLQR can be warm-started from initial guesses `x0` and `y0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x;
+* `y0`: a vector of length m that represents an initial guess of the solution y.
 
-    (x, y, stats) = trilqr(A, b, c, x0, y0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure.
 
 #### Reference
 
@@ -47,18 +69,6 @@ and `false` otherwise.
 """
 function trilqr end
 
-function trilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = TrilqrSolver(A, b)
-  trilqr!(solver, A, b, c, x0, y0; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
-function trilqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = TrilqrSolver(A, b)
-  trilqr!(solver, A, b, c; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = trilqr!(solver::TrilqrSolver, A, b, c; kwargs...)
     solver = trilqr!(solver::TrilqrSolver, A, b, c, x0, y0; kwargs...)
@@ -69,349 +79,396 @@ See [`TrilqrSolver`](@ref) for more details about the `solver`.
 """
 function trilqr! end
 
-function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0, y0)
-  trilqr!(solver, A, b, c; kwargs...)
-  return solver
-end
+def_args_trilqr = (:(A                    ),
+                   :(b::AbstractVector{FC}),
+                   :(c::AbstractVector{FC}))
+
+def_optargs_trilqr = (:(x0::AbstractVector),
+                      :(y0::AbstractVector))
+
+def_kwargs_trilqr = (:(; transfer_to_usymcg::Bool = true),
+                     :(; atol::T = √eps(T)              ),
+                     :(; rtol::T = √eps(T)              ),
+                     :(; itmax::Int = 0                 ),
+                     :(; timemax::Float64 = Inf         ),
+                     :(; verbose::Int = 0               ),
+                     :(; history::Bool = false          ),
+                     :(; callback = solver -> false     ),
+                     :(; iostream::IO = kstdout         ))
+
+def_kwargs_trilqr = mapreduce(extract_parameters, vcat, def_kwargs_trilqr)
+
+args_trilqr = (:A, :b, :c)
+optargs_trilqr = (:x0, :y0)
+kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function trilqr($(def_args_trilqr...), $(def_optargs_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = TrilqrSolver(A, b)
+    warm_start!(solver, $(optargs_trilqr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    trilqr!(solver, $(args_trilqr...); $(kwargs_trilqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
+  end
 
-function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-                 atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
-                 itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-                 callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("TRILQR: primal system of %d equations in %d variables\n", m, n)
-  (verbose > 0) && @printf("TRILQR: dual system of %d equations in %d variables\n", n, m)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats
-  vₖ₋₁, vₖ, q, t, wₖ₋₃, wₖ₋₂ = solver.vₖ₋₁, solver.vₖ, solver.q, solver.y, solver.wₖ₋₃, solver.wₖ₋₂
-  Δx, Δy, warm_start = solver.Δx, solver.Δy, solver.warm_start
-  rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
-  reset!(stats)
-  r₀ = warm_start ? q : b
-  s₀ = warm_start ? p : c
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
-    mul!(s₀, Aᵀ, Δy)
-    @kaxpby!(n, one(FC), c, -one(FC), s₀)
+  function trilqr($(def_args_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = TrilqrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    trilqr!(solver, $(args_trilqr...); $(kwargs_trilqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # Initial solution x₀ and residual r₀ = b - Ax₀.
-  x .= zero(FC)          # x₀
-  bNorm = @knrm2(m, r₀)  # rNorm = ‖r₀‖
-
-  # Initial solution y₀ and residual s₀ = c - Aᵀy₀.
-  t .= zero(FC)          # t₀
-  cNorm = @knrm2(n, s₀)  # sNorm = ‖s₀‖
-
-  iter = 0
-  itmax == 0 && (itmax = m+n)
-
-  history && push!(rNorms, bNorm)
-  history && push!(sNorms, cNorm)
-  εL = atol + rtol * bNorm
-  εQ = atol + rtol * cNorm
-  ξ = zero(T)
-  (verbose > 0) && @printf("%5s  %7s  %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e\n", iter, bNorm, cNorm)
-
-  # Set up workspace.
-  βₖ = @knrm2(m, r₀)          # β₁ = ‖r₀‖ = ‖v₁‖
-  γₖ = @knrm2(n, s₀)          # γ₁ = ‖s₀‖ = ‖u₁‖
-  vₖ₋₁ .= zero(FC)            # v₀ = 0
-  uₖ₋₁ .= zero(FC)            # u₀ = 0
-  vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
-  uₖ .= s₀ ./ γₖ              # u₁ = (c - Aᵀy₀) / γ₁
-  cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
-  sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ .= zero(FC)               # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
-  ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
-  ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
-  δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
-  ψbarₖ₋₁ = ψₖ₋₁ = zero(FC)   # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
-  ϵₖ₋₃ = λₖ₋₂ = zero(FC)      # Components of Lₖ₋₁
-  wₖ₋₃ .= zero(FC)            # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ
-  wₖ₋₂ .= zero(FC)            # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ
-
-  # Stopping criterion.
-  inconsistent = false
-  solved_lq = bNorm == 0
-  solved_lq_tol = solved_lq_mach = false
-  solved_cg = solved_cg_tol = solved_cg_mach = false
-  solved_primal = solved_lq || solved_cg
-  solved_qr_tol = solved_qr_mach = false
-  solved_dual = cNorm == 0
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !((solved_primal && solved_dual) || tired || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the SSY tridiagonalization process.
-    # AUₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
-    # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , uₖ)  # Forms vₖ₊₁ : q ← Auₖ
-    mul!(p, Aᵀ, vₖ)  # Forms uₖ₊₁ : p ← Aᵀvₖ
-
-    @kaxpy!(m, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
-    @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - βₖ * uₖ₋₁
-
-    αₖ = @kdot(m, vₖ, q)      # αₖ = ⟨vₖ,q⟩
-
-    @kaxpy!(m, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
-    @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
-
-    βₖ₊₁ = @knrm2(m, q)       # βₖ₊₁ = ‖q‖
-    γₖ₊₁ = @knrm2(n, p)       # γₖ₊₁ = ‖p‖
-
-    # Update the LQ factorization of Tₖ = L̅ₖQₖ.
-    # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
-    # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
-    # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
-    # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
-    # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
-    # [ •        •  •  •  γₖ]   [ •         •   •  λₖ₋₂ δₖ₋₁  0   ]
-    # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
-    if iter == 1
-      δbarₖ = αₖ
-    elseif iter == 2
-      # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
-      # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
-    else
-      # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
-      #             [sₖ₋₁  -cₖ₋₁   0]
-      #             [ 0      0     1]
-      #
-      # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
-      # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
-      #                       [0   sₖ -cₖ]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      ϵₖ₋₂  =  sₖ₋₁ * βₖ
-      λₖ₋₁  = -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+  function trilqr!(solver :: TrilqrSolver{T,FC,S}, $(def_args_trilqr...); $(def_kwargs_trilqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "TRILQR: primal system of %d equations in %d variables\n", m, n)
+    (verbose > 0) && @printf(iostream, "TRILQR: dual system of %d equations in %d variables\n", n, m)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats
+    vₖ₋₁, vₖ, q, t, wₖ₋₃, wₖ₋₂ = solver.vₖ₋₁, solver.vₖ, solver.q, solver.y, solver.wₖ₋₃, solver.wₖ₋₂
+    Δx, Δy, warm_start = solver.Δx, solver.Δy, solver.warm_start
+    rNorms, sNorms = stats.residuals_primal, stats.residuals_dual
+    reset!(stats)
+    r₀ = warm_start ? q : b
+    s₀ = warm_start ? p : c
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
+      mul!(s₀, Aᴴ, Δy)
+      @kaxpby!(n, one(FC), c, -one(FC), s₀)
     end
 
-    if !solved_primal
-      # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
-      # [δbar₁] [ζbar₁] = [β₁]
-      if iter == 1
-        ηₖ = βₖ
-      end
-      # [δ₁    0  ] [  ζ₁ ] = [β₁]
-      # [λ₁  δbar₂] [ζbar₂]   [0 ]
-      if iter == 2
-        ηₖ₋₁ = ηₖ
-        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-        ηₖ   = -λₖ₋₁ * ζₖ₋₁
-      end
-      # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
-      # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
-      #                     [ζbarₖ]
-      if iter ≥ 3
-        ζₖ₋₂ = ζₖ₋₁
-        ηₖ₋₁ = ηₖ
-        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-        ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
-      end
+    # Initial solution x₀ and residual r₀ = b - Ax₀.
+    x .= zero(FC)          # x₀
+    bNorm = @knrm2(m, r₀)  # rNorm = ‖r₀‖
+
+    # Initial solution y₀ and residual s₀ = c - Aᴴy₀.
+    t .= zero(FC)          # t₀
+    cNorm = @knrm2(n, s₀)  # sNorm = ‖s₀‖
+
+    iter = 0
+    itmax == 0 && (itmax = m+n)
+
+    history && push!(rNorms, bNorm)
+    history && push!(sNorms, cNorm)
+    εL = atol + rtol * bNorm
+    εQ = atol + rtol * cNorm
+    ξ = zero(T)
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %5s\n", "k", "‖rₖ‖", "‖sₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %.2fs\n", iter, bNorm, cNorm, ktimer(start_time))
+
+    # Set up workspace.
+    βₖ = @knrm2(m, r₀)          # β₁ = ‖r₀‖ = ‖v₁‖
+    γₖ = @knrm2(n, s₀)          # γ₁ = ‖s₀‖ = ‖u₁‖
+    vₖ₋₁ .= zero(FC)            # v₀ = 0
+    uₖ₋₁ .= zero(FC)            # u₀ = 0
+    vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
+    uₖ .= s₀ ./ γₖ              # u₁ = (c - Aᴴy₀) / γ₁
+    cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
+    sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
+    d̅ .= zero(FC)               # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
+    ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+    ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+    δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
+    ψbarₖ₋₁ = ψₖ₋₁ = zero(FC)   # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
+    ϵₖ₋₃ = λₖ₋₂ = zero(FC)      # Components of Lₖ₋₁
+    wₖ₋₃ .= zero(FC)            # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ
+    wₖ₋₂ .= zero(FC)            # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ
+
+    # Stopping criterion.
+    inconsistent = false
+    solved_lq = bNorm == 0
+    solved_lq_tol = solved_lq_mach = false
+    solved_cg = solved_cg_tol = solved_cg_mach = false
+    solved_primal = solved_lq || solved_cg
+    solved_qr_tol = solved_qr_mach = false
+    solved_dual = cNorm == 0
+    tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
 
-      # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
-      # [d̅ₖ₋₁ uₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
-      #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
-      if iter ≥ 2
-        # Compute solution xₖ.
-        # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
-        @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
-        @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
-      end
+    while !((solved_primal && solved_dual) || tired || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
 
-      # Compute d̅ₖ.
-      if iter == 1
-        # d̅₁ = u₁
-        @. d̅ = uₖ
-      else
-        # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
-        @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
-      end
+      # Continue the SSY tridiagonalization process.
+      # AUₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+      # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
 
-      # Compute USYMLQ residual norm
-      # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
-      if iter == 1
-        rNorm_lq = bNorm
-      else
-        μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
-        ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
-        rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
-      end
-      history && push!(rNorms, rNorm_lq)
-
-      # Compute USYMCG residual norm
-      # ‖rₖ‖ = |ρₖ|
-      if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
-        ζbarₖ = ηₖ / δbarₖ
-        ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
-        rNorm_cg = abs(ρₖ)
-      end
+      mul!(q, A , uₖ)  # Forms vₖ₊₁ : q ← Auₖ
+      mul!(p, Aᴴ, vₖ)  # Forms uₖ₊₁ : p ← Aᴴvₖ
 
-      # Update primal stopping criterion
-      solved_lq_tol = rNorm_lq ≤ εL
-      solved_lq_mach = rNorm_lq + 1 ≤ 1
-      solved_lq = solved_lq_tol || solved_lq_mach
-      solved_cg_tol = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
-      solved_cg_mach = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
-      solved_cg = solved_cg_tol || solved_cg_mach
-      solved_primal = solved_lq || solved_cg
-    end
+      @kaxpy!(m, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
+      @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - βₖ * uₖ₋₁
+
+      αₖ = @kdot(m, vₖ, q)      # αₖ = ⟨vₖ,q⟩
+
+      @kaxpy!(m, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
+      @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
+
+      βₖ₊₁ = @knrm2(m, q)       # βₖ₊₁ = ‖q‖
+      γₖ₊₁ = @knrm2(n, p)       # γₖ₊₁ = ‖p‖
+
+      # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+      # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
+      # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
+      # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
+      # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
+      # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
+      # [ •        •  •  •  γₖ]   [ •         •   •  λₖ₋₂ δₖ₋₁  0   ]
+      # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
 
-    if !solved_dual
-      # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ₁e₁.
       if iter == 1
-        ψbarₖ = γₖ
+        δbarₖ = αₖ
+      elseif iter == 2
+        # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
+        # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
       else
-        # [cₖ  s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
-        # [sₖ -cₖ] [   0   ]   [ ψbarₖ]
-        ψₖ₋₁  = cₖ * ψbarₖ₋₁
-        ψbarₖ = sₖ * ψbarₖ₋₁
+        # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
+        #             [sₖ₋₁  -cₖ₋₁   0]
+        #             [ 0      0     1]
+        #
+        # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
+        # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
+        #                       [0   sₖ -cₖ]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        ϵₖ₋₂  =  sₖ₋₁ * βₖ
+        λₖ₋₁  = -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
       end
 
-      # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
-      # w₁ = v₁ / δ̄₁
-      if iter == 2
-        wₖ₋₁ = wₖ₋₂
-        @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
-        @. wₖ₋₁ = vₖ₋₁ / conj(δₖ₋₁)
+      if !solved_primal
+        # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+        # [δbar₁] [ζbar₁] = [β₁]
+        if iter == 1
+          ηₖ = βₖ
+        end
+        # [δ₁    0  ] [  ζ₁ ] = [β₁]
+        # [λ₁  δbar₂] [ζbar₂]   [0 ]
+        if iter == 2
+          ηₖ₋₁ = ηₖ
+          ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+          ηₖ   = -λₖ₋₁ * ζₖ₋₁
+        end
+        # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
+        # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
+        #                     [ζbarₖ]
+        if iter ≥ 3
+          ζₖ₋₂ = ζₖ₋₁
+          ηₖ₋₁ = ηₖ
+          ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+          ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+        end
+
+        # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
+        # [d̅ₖ₋₁ uₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
+        #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+        if iter ≥ 2
+          # Compute solution xₖ.
+          # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁
+          @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
+          @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
+        end
+
+        # Compute d̅ₖ.
+        if iter == 1
+          # d̅₁ = u₁
+          @. d̅ = uₖ
+        else
+          # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+          @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
+        end
+
+        # Compute USYMLQ residual norm
+        # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
+        if iter == 1
+          rNorm_lq = bNorm
+        else
+          μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+          ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+          rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
+        end
+        history && push!(rNorms, rNorm_lq)
+
+        # Compute USYMCG residual norm
+        # ‖rₖ‖ = |ρₖ|
+        if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
+          ζbarₖ = ηₖ / δbarₖ
+          ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+          rNorm_cg = abs(ρₖ)
+        end
+
+        # Update primal stopping criterion
+        solved_lq_tol = rNorm_lq ≤ εL
+        solved_lq_mach = rNorm_lq + 1 ≤ 1
+        solved_lq = solved_lq_tol || solved_lq_mach
+        solved_cg_tol = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ εL)
+        solved_cg_mach = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg + 1 ≤ 1)
+        solved_cg = solved_cg_tol || solved_cg_mach
+        solved_primal = solved_lq || solved_cg
       end
-      # w₂ = (v₂ - λ̄₁w₁) / δ̄₂
-      if iter == 3
-        wₖ₋₁ = wₖ₋₃
-        @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
-        @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
-        @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+      if !solved_dual
+        # Compute ψₖ₋₁ and ψbarₖ the last coefficients of h̅ₖ = Qₖγ₁e₁.
+        if iter == 1
+          ψbarₖ = γₖ
+        else
+          # [cₖ  s̄ₖ] [ψbarₖ₋₁] = [ ψₖ₋₁ ]
+          # [sₖ -cₖ] [   0   ]   [ ψbarₖ]
+          ψₖ₋₁  = cₖ * ψbarₖ₋₁
+          ψbarₖ = sₖ * ψbarₖ₋₁
+        end
+
+        # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
+        # w₁ = v₁ / δ̄₁
+        if iter == 2
+          wₖ₋₁ = wₖ₋₂
+          @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
+          @. wₖ₋₁ = vₖ₋₁ / conj(δₖ₋₁)
+        end
+        # w₂ = (v₂ - λ̄₁w₁) / δ̄₂
+        if iter == 3
+          wₖ₋₁ = wₖ₋₃
+          @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
+          @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+          @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+        end
+        # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
+        if iter ≥ 4
+          @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃)
+          wₖ₋₁ = wₖ₋₃
+          @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
+          @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
+          @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+        end
+
+        if iter ≥ 3
+          # Swap pointers.
+          @kswap(wₖ₋₃, wₖ₋₂)
+        end
+
+        if iter ≥ 2
+          # Compute solution tₖ₋₁.
+          # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
+          @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t)
+        end
+
+        # Update ψbarₖ₋₁
+        ψbarₖ₋₁ = ψbarₖ
+
+        # Compute USYMQR residual norm ‖sₖ₋₁‖ = |ψbarₖ|.
+        sNorm = abs(ψbarₖ)
+        history && push!(sNorms, sNorm)
+
+        # Compute ‖Asₖ₋₁‖ = |ψbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
+        AsNorm = abs(ψbarₖ) * √(abs2(δbarₖ) + abs2(cₖ * βₖ₊₁))
+
+        # Update dual stopping criterion
+        iter == 1 && (ξ = atol + rtol * AsNorm)
+        solved_qr_tol = sNorm ≤ εQ
+        solved_qr_mach = sNorm + 1 ≤ 1
+        inconsistent = AsNorm ≤ ξ
+        solved_dual = solved_qr_tol || solved_qr_mach || inconsistent
       end
-      # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
-      if iter ≥ 4
-        @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃)
-        wₖ₋₁ = wₖ₋₃
-        @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁)
-        @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
-        @. wₖ₋₁ = wₖ₋₁ / conj(δₖ₋₁)
+
+      # Compute uₖ₊₁ and uₖ₊₁.
+      @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
+      @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
+
+      if βₖ₊₁ ≠ zero(T)
+        @. vₖ = q / βₖ₊₁  # βₖ₊₁vₖ₊₁ = q
+      end
+      if γₖ₊₁ ≠ zero(T)
+        @. uₖ = p / γₖ₊₁  # γₖ₊₁uₖ₊₁ = p
       end
 
+      # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
       if iter ≥ 3
-        # Swap pointers.
-        @kswap(wₖ₋₃, wₖ₋₂)
+        ϵₖ₋₃ = ϵₖ₋₂
       end
-
       if iter ≥ 2
-        # Compute solution tₖ₋₁.
-        # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁
-        @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t)
+        λₖ₋₂ = λₖ₋₁
       end
-
-      # Update ψbarₖ₋₁
-      ψbarₖ₋₁ = ψbarₖ
-
-      # Compute USYMQR residual norm ‖sₖ₋₁‖ = |ψbarₖ|.
-      sNorm = abs(ψbarₖ)
-      history && push!(sNorms, sNorm)
-
-      # Compute ‖Asₖ₋₁‖ = |ψbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
-      AsNorm = abs(ψbarₖ) * √(abs2(δbarₖ) + abs2(cₖ * βₖ₊₁))
-
-      # Update dual stopping criterion
-      iter == 1 && (ξ = atol + rtol * AsNorm)
-      solved_qr_tol = sNorm ≤ εQ
-      solved_qr_mach = sNorm + 1 ≤ 1
-      inconsistent = AsNorm ≤ ξ
-      solved_dual = solved_qr_tol || solved_qr_mach || inconsistent
-    end
-
-    # Compute uₖ₊₁ and uₖ₊₁.
-    @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
-    @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
-
-    if βₖ₊₁ ≠ zero(T)
-      @. vₖ = q / βₖ₊₁  # βₖ₊₁vₖ₊₁ = q
-    end
-    if γₖ₊₁ ≠ zero(T)
-      @. uₖ = p / γₖ₊₁  # γₖ₊₁uₖ₊₁ = p
+      δbarₖ₋₁ = δbarₖ
+      cₖ₋₁    = cₖ
+      sₖ₋₁    = sₖ
+      γₖ      = γₖ₊₁
+      βₖ      = βₖ₊₁
+
+      user_requested_exit = callback(solver) :: Bool
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+
+      kdisplay(iter, verbose) &&  solved_primal && !solved_dual && @printf(iostream, "%5d  %7s  %7.1e  %.2fs\n", iter, "✗ ✗ ✗ ✗", sNorm, ktimer(start_time))
+      kdisplay(iter, verbose) && !solved_primal &&  solved_dual && @printf(iostream, "%5d  %7.1e  %7s  %.2fs\n", iter, rNorm_lq, "✗ ✗ ✗ ✗", ktimer(start_time))
+      kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d  %7.1e  %7.1e  %.2fs\n", iter, rNorm_lq, sNorm, ktimer(start_time))
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.
-    if iter ≥ 3
-      ϵₖ₋₃ = ϵₖ₋₂
+    # Compute USYMCG point
+    # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+    if solved_cg
+      @kaxpy!(n, ζbarₖ, d̅, x)
     end
-    if iter ≥ 2
-      λₖ₋₂ = λₖ₋₁
-    end
-    δbarₖ₋₁ = δbarₖ
-    cₖ₋₁    = cₖ
-    sₖ₋₁    = sₖ
-    γₖ      = γₖ₊₁
-    βₖ      = βₖ₊₁
-
-    user_requested_exit = callback(solver) :: Bool
-    tired = iter ≥ itmax
 
-    kdisplay(iter, verbose) &&  solved_primal && !solved_dual && @printf("%5d  %7s  %7.1e\n", iter, "", sNorm)
-    kdisplay(iter, verbose) && !solved_primal &&  solved_dual && @printf("%5d  %7.1e  %7s\n", iter, rNorm_lq, "")
-    kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d  %7.1e  %7.1e\n", iter, rNorm_lq, sNorm)
+    # Termination status
+    tired                            && (status = "maximum number of iterations exceeded")
+    solved_lq_tol  && !solved_dual   && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
+    solved_cg_tol  && !solved_dual   && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
+    !solved_primal && solved_qr_tol  && (status = "Only the dual solution t is good enough given atol and rtol")
+    solved_lq_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
+    solved_cg_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
+    solved_lq_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᴸ")
+    solved_cg_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᶜ")
+    !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
+    solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
+    solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
+    solved_lq_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
+    solved_cg_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
+    solved_lq_tol  && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+    solved_cg_tol  && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
+    user_requested_exit              && (status = "user-requested exit")
+    overtimed                        && (status = "time limit exceeded")
+
+    # Update x and y
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    warm_start && @kaxpy!(m, one(FC), Δy, t)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved_primal = solved_primal
+    stats.solved_dual = solved_dual
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  # Compute USYMCG point
-  # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
-  if solved_cg
-    @kaxpy!(n, ζbarₖ, d̅, x)
-  end
-
-  tired                            && (status = "maximum number of iterations exceeded")
-  solved_lq_tol  && !solved_dual   && (status = "Only the primal solution xᴸ is good enough given atol and rtol")
-  solved_cg_tol  && !solved_dual   && (status = "Only the primal solution xᶜ is good enough given atol and rtol")
-  !solved_primal && solved_qr_tol  && (status = "Only the dual solution t is good enough given atol and rtol")
-  solved_lq_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᴸ, t) are good enough given atol and rtol")
-  solved_cg_tol  && solved_qr_tol  && (status = "Both primal and dual solutions (xᶜ, t) are good enough given atol and rtol")
-  solved_lq_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᴸ")
-  solved_cg_mach && !solved_dual   && (status = "Only found approximate zero-residual primal solution xᶜ")
-  !solved_primal && solved_qr_mach && (status = "Only found approximate zero-residual dual solution t")
-  solved_lq_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᴸ, t)")
-  solved_cg_mach && solved_qr_mach && (status = "Found approximate zero-residual primal and dual solutions (xᶜ, t)")
-  solved_lq_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᴸ and a dual solution t good enough given atol and rtol")
-  solved_cg_mach && solved_qr_tol  && (status = "Found approximate zero-residual primal solutions xᶜ and a dual solution t good enough given atol and rtol")
-  solved_lq_tol  && solved_qr_mach && (status = "Found a primal solution xᴸ good enough given atol and rtol and an approximate zero-residual dual solutions t")
-  solved_cg_tol  && solved_qr_mach && (status = "Found a primal solution xᶜ good enough given atol and rtol and an approximate zero-residual dual solutions t")
-  user_requested_exit              && (status = "user-requested exit")
-
-  # Update x and y
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  warm_start && @kaxpy!(m, one(FC), Δy, t)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.status = status
-  stats.solved_primal = solved_primal
-  stats.solved_dual = solved_dual
-  return solver
 end
diff --git a/src/trimr.jl b/src/trimr.jl
index bc53633c2..ae61b785a 100644
--- a/src/trimr.jl
+++ b/src/trimr.jl
@@ -13,30 +13,31 @@ export trimr, trimr!
 
 """
     (x, y, stats) = trimr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
-                          M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
-                          spd::Bool=false, snd::Bool=false, flip::Bool=false, sp::Bool=false,
-                          τ::T=one(T), ν::T=-one(T), itmax::Int=0,
-                          verbose::Int=0, history::Bool=false,
-                          ldiv::Bool=false, callback=solver->false)
+                          M=I, N=I, ldiv::Bool=false,
+                          spd::Bool=false, snd::Bool=false,
+                          flip::Bool=false, sp::Bool=false,
+                          τ::T=one(T), ν::T=-one(T), atol::T=√eps(T),
+                          rtol::T=√eps(T), itmax::Int=0,
+                          timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                          callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-TriMR solves the symmetric linear system
+    (x, y, stats) = trimr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
+Given a matrix `A` of dimension m × n, TriMR solves the symmetric linear system
 
     [ τE    A ] [ x ] = [ b ]
-    [  Aᵀ  νF ] [ y ]   [ c ],
+    [  Aᴴ  νF ] [ y ]   [ c ],
 
-where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
+of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
 `b` and `c` must both be nonzero.
 TriMR handles saddle-point systems (`τ = 0` or `ν = 0`) and adjoint systems (`τ = 0` and `ν = 0`) without any risk of breakdown.
 
 By default, TriMR solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1.
-If `flip = true`, TriMR solves another known variant of SQD systems where τ = -1 and ν = 1.
-If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved.
-If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved.
-If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is solved.
-`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems.
 
 TriMR is based on the preconditioned orthogonal tridiagonalization process
 and its relation with the preconditioned block-Lanczos process.
@@ -50,17 +51,41 @@ It's the Euclidean norm when `M` and `N` are identity operators.
 TriMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
 `atol` is an absolute tolerance and `rtol` is a relative tolerance.
 
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
 
-TriMR can be warm-started from initial guesses `x0` and `y0` with the method
+#### Optional arguments
 
-    (x, y, stats) = trimr(A, b, c, x0, y0; kwargs...)
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system;
+* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems;
+* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems;
+* `sp`: if `true`, set `τ = 1` and `ν = 0` for saddle-point systems;
+* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### Reference
 
@@ -68,18 +93,6 @@ and `false` otherwise.
 """
 function trimr end
 
-function trimr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = TrimrSolver(A, b)
-  trimr!(solver, A, b, c, x0, y0; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
-function trimr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = TrimrSolver(A, b)
-  trimr!(solver, A, b, c; kwargs...)
-  return (solver.x, solver.y, solver.stats)
-end
-
 """
     solver = trimr!(solver::TrimrSolver, A, b, c; kwargs...)
     solver = trimr!(solver::TrimrSolver, A, b, c, x0, y0; kwargs...)
@@ -90,424 +103,477 @@ See [`TrimrSolver`](@ref) for more details about the `solver`.
 """
 function trimr! end
 
-function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                x0 :: AbstractVector, y0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0, y0)
-  trimr!(solver, A, b, c; kwargs...)
-  return solver
-end
-
-function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-                M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
-                spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false,
-                τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0,
-                verbose :: Int=0, history :: Bool=false,
-                ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("TriMR: system of %d equations in %d variables\n", m+n, m+n)
-
-  # Check flip, sp, spd and snd parameters
-  spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !")
-  spd && snd  && error("The matrix cannot be symmetric positive definite and symmetric negative definite !")
-  spd && sp   && error("The matrix cannot be symmetric positive definite and a saddle-point !")
-  snd && flip && error("The matrix cannot be symmetric negative definite and symmetric quasi-definite !")
-  snd && sp   && error("The matrix cannot be symmetric negative definite and a saddle-point !")
-  sp  && flip && error("The matrix cannot be symmetric quasi-definite and a saddle-point !")
-
-  # Check M = Iₘ and N = Iₙ
-  MisI = (M === I)
-  NisI = (N === I)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Determine τ and ν associated to SQD, SPD or SND systems.
-  flip && (τ = -one(T) ; ν =  one(T))
-  spd  && (τ =  one(T) ; ν =  one(T))
-  snd  && (τ = -one(T) ; ν = -one(T))
-  sp   && (τ =  one(T) ; ν = zero(T))
-
-  warm_start = solver.warm_start
-  warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
-  warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  allocate_if(!MisI, solver, :vₖ, S, m)
-  allocate_if(!NisI, solver, :uₖ, S, n)
-  Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
-  Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
-  gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ
-  gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ
-  vₖ = MisI ? M⁻¹vₖ : solver.vₖ
-  uₖ = NisI ? N⁻¹uₖ : solver.uₖ
-  vₖ₊₁ = MisI ? q : M⁻¹vₖ₋₁
-  uₖ₊₁ = NisI ? p : N⁻¹uₖ₋₁
-  b₀ = warm_start ? q : b
-  c₀ = warm_start ? p : c
-
-  stats = solver.stats
-  rNorms = stats.residuals
-  reset!(stats)
-
-  # Initial solutions x₀ and y₀.
-  xₖ .= zero(FC)
-  yₖ .= zero(FC)
-
-  iter = 0
-  itmax == 0 && (itmax = m+n)
-
-  # Initialize preconditioned orthogonal tridiagonalization process.
-  M⁻¹vₖ₋₁ .= zero(FC)  # v₀ = 0
-  N⁻¹uₖ₋₁ .= zero(FC)  # u₀ = 0
-
-  # [ τI    A ] [ xₖ ] = [ b -  τΔx - AΔy ] = [ b₀ ]
-  # [  Aᵀ  νI ] [ yₖ ]   [ c - AᵀΔx - νΔy ]   [ c₀ ]
-  if warm_start
-    mul!(b₀, A, Δy)
-    (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
-    @kaxpby!(m, one(FC), b, -one(FC), b₀)
-    mul!(c₀, Aᵀ, Δx)
-    (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
-    @kaxpby!(n, one(FC), c, -one(FC), c₀)
+def_args_trimr = (:(A                    ),
+                  :(b::AbstractVector{FC}),
+                  :(c::AbstractVector{FC}))
+
+def_optargs_trimr = (:(x0::AbstractVector),
+                     :(y0::AbstractVector))
+
+def_kwargs_trimr = (:(; M = I                     ),
+                    :(; N = I                     ),
+                    :(; ldiv::Bool = false        ),
+                    :(; spd::Bool = false         ),
+                    :(; snd::Bool = false         ),
+                    :(; flip::Bool = false        ),
+                    :(; sp::Bool = false          ),
+                    :(; τ::T = one(T)             ),
+                    :(; ν::T = -one(T)            ),
+                    :(; atol::T = √eps(T)         ),
+                    :(; rtol::T = √eps(T)         ),
+                    :(; itmax::Int = 0            ),
+                    :(; timemax::Float64 = Inf    ),
+                    :(; verbose::Int = 0          ),
+                    :(; history::Bool = false     ),
+                    :(; callback = solver -> false),
+                    :(; iostream::IO = kstdout    ))
+
+def_kwargs_trimr = mapreduce(extract_parameters, vcat, def_kwargs_trimr)
+
+args_trimr = (:A, :b, :c)
+optargs_trimr = (:x0, :y0)
+kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function trimr($(def_args_trimr...), $(def_optargs_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = TrimrSolver(A, b)
+    warm_start!(solver, $(optargs_trimr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    trimr!(solver, $(args_trimr...); $(kwargs_trimr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # β₁Ev₁ = b ↔ β₁v₁ = Mb
-  M⁻¹vₖ .= b₀
-  MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
-  βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ))  # β₁ = ‖v₁‖_E
-  if βₖ ≠ 0
-    @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
-    MisI || @kscal!(m, one(FC) / βₖ, vₖ)
-  else
-    error("b must be nonzero")
+  function trimr($(def_args_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = TrimrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    trimr!(solver, $(args_trimr...); $(kwargs_trimr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.y, solver.stats)
   end
 
-  # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
-  N⁻¹uₖ .= c₀
-  NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
-  γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ))  # γ₁ = ‖u₁‖_F
-  if γₖ ≠ 0
-    @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
-    NisI || @kscal!(n, one(FC) / γₖ, uₖ)
-  else
-    error("c must be nonzero")
-  end
+  function trimr!(solver :: TrimrSolver{T,FC,S}, $(def_args_trimr...); $(def_kwargs_trimr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "TriMR: system of %d equations in %d variables\n", m+n, m+n)
+
+    # Check flip, sp, spd and snd parameters
+    spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !")
+    spd && snd  && error("The matrix cannot be symmetric positive definite and symmetric negative definite !")
+    spd && sp   && error("The matrix cannot be symmetric positive definite and a saddle-point !")
+    snd && flip && error("The matrix cannot be symmetric negative definite and symmetric quasi-definite !")
+    snd && sp   && error("The matrix cannot be symmetric negative definite and a saddle-point !")
+    sp  && flip && error("The matrix cannot be symmetric quasi-definite and a saddle-point !")
+
+    # Check M = Iₘ and N = Iₙ
+    MisI = (M === I)
+    NisI = (N === I)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Determine τ and ν associated to SQD, SPD or SND systems.
+    flip && (τ = -one(T) ; ν =  one(T))
+    spd  && (τ =  one(T) ; ν =  one(T))
+    snd  && (τ = -one(T) ; ν = -one(T))
+    sp   && (τ =  one(T) ; ν = zero(T))
+
+    warm_start = solver.warm_start
+    warm_start && (τ ≠ 0) && !MisI && error("Warm-start with preconditioners is not supported.")
+    warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    allocate_if(!MisI, solver, :vₖ, S, m)
+    allocate_if(!NisI, solver, :uₖ, S, n)
+    Δy, yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, p = solver.Δy, solver.y, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.p
+    Δx, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, q = solver.Δx, solver.x, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.q
+    gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ
+    gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ
+    vₖ = MisI ? M⁻¹vₖ : solver.vₖ
+    uₖ = NisI ? N⁻¹uₖ : solver.uₖ
+    vₖ₊₁ = MisI ? q : M⁻¹vₖ₋₁
+    uₖ₊₁ = NisI ? p : N⁻¹uₖ₋₁
+    b₀ = warm_start ? q : b
+    c₀ = warm_start ? p : c
+
+    stats = solver.stats
+    rNorms = stats.residuals
+    reset!(stats)
+
+    # Initial solutions x₀ and y₀.
+    xₖ .= zero(FC)
+    yₖ .= zero(FC)
+
+    iter = 0
+    itmax == 0 && (itmax = m+n)
+
+    # Initialize preconditioned orthogonal tridiagonalization process.
+    M⁻¹vₖ₋₁ .= zero(FC)  # v₀ = 0
+    N⁻¹uₖ₋₁ .= zero(FC)  # u₀ = 0
+
+    # [ τI    A ] [ xₖ ] = [ b -  τΔx - AΔy ] = [ b₀ ]
+    # [  Aᴴ  νI ] [ yₖ ]   [ c - AᴴΔx - νΔy ]   [ c₀ ]
+    if warm_start
+      mul!(b₀, A, Δy)
+      (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
+      @kaxpby!(m, one(FC), b, -one(FC), b₀)
+      mul!(c₀, Aᴴ, Δx)
+      (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
+      @kaxpby!(n, one(FC), c, -one(FC), c₀)
+    end
 
-  # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ.
-  gx₂ₖ₋₃ .= zero(FC)
-  gy₂ₖ₋₃ .= zero(FC)
-  gx₂ₖ₋₂ .= zero(FC)
-  gy₂ₖ₋₂ .= zero(FC)
-  gx₂ₖ₋₁ .= zero(FC)
-  gy₂ₖ₋₁ .= zero(FC)
-  gx₂ₖ   .= zero(FC)
-  gy₂ₖ   .= zero(FC)
-
-  # Compute ‖r₀‖² = (γ₁)² + (β₁)²
-  rNorm = sqrt(γₖ^2 + βₖ^2)
-  history && push!(rNorms, rNorm)
-  ε = atol + rtol * rNorm
-
-  (verbose > 0) && @printf("%5s  %7s  %7s  %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e\n", iter, rNorm, βₖ, γₖ)
-
-  # Set up workspace.
-  old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T)
-  old_s₁ₖ = old_s₂ₖ = old_s₃ₖ = old_s₄ₖ = zero(FC)
-  σbar₂ₖ₋₂ = ηbar₂ₖ₋₃ = λbar₂ₖ₋₃ = μ₂ₖ₋₅ = λ₂ₖ₋₄ = μ₂ₖ₋₄ = zero(FC)
-  πbar₂ₖ₋₁ = βₖ
-  πbar₂ₖ = γₖ
-
-  # Tolerance for breakdown detection.
-  btol = eps(T)^(3/4)
-
-  # Stopping criterion.
-  breakdown = false
-  solved = rNorm ≤ ε
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  θbarₖ = δbar₂ₖ₋₁ = δbar₂ₖ = σbar₂ₖ₋₁ = σbar₂ₖ = λbar₂ₖ₋₁ = ηbar₂ₖ₋₁ = zero(FC)
-
-  while !(solved || tired || breakdown || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the orthogonal tridiagonalization process.
-    # AUₖ  = EVₖTₖ    + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
-    # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , uₖ)  # Forms Evₖ₊₁ : q ← Auₖ
-    mul!(p, Aᵀ, vₖ)  # Forms Fuₖ₊₁ : p ← Aᵀvₖ
-
-    if iter ≥ 2
-      @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q)  # q ← q - γₖ * M⁻¹vₖ₋₁
-      @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p)  # p ← p - βₖ * N⁻¹uₖ₋₁
+    # β₁Ev₁ = b ↔ β₁v₁ = Mb
+    M⁻¹vₖ .= b₀
+    MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv)
+    βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ))  # β₁ = ‖v₁‖_E
+    if βₖ ≠ 0
+      @kscal!(m, one(FC) / βₖ, M⁻¹vₖ)
+      MisI || @kscal!(m, one(FC) / βₖ, vₖ)
+    else
+      error("b must be nonzero")
     end
 
-    αₖ = @kdot(m, vₖ, q)  # αₖ = ⟨vₖ,q⟩
+    # γ₁Fu₁ = c ↔ γ₁u₁ = Nc
+    N⁻¹uₖ .= c₀
+    NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv)
+    γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ))  # γ₁ = ‖u₁‖_F
+    if γₖ ≠ 0
+      @kscal!(n, one(FC) / γₖ, N⁻¹uₖ)
+      NisI || @kscal!(n, one(FC) / γₖ, uₖ)
+    else
+      error("c must be nonzero")
+    end
 
-    @kaxpy!(m, -     αₖ , M⁻¹vₖ, q)  # q ← q - αₖ * M⁻¹vₖ
-    @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p)  # p ← p - ᾱₖ * N⁻¹uₖ
+    # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ.
+    gx₂ₖ₋₃ .= zero(FC)
+    gy₂ₖ₋₃ .= zero(FC)
+    gx₂ₖ₋₂ .= zero(FC)
+    gy₂ₖ₋₂ .= zero(FC)
+    gx₂ₖ₋₁ .= zero(FC)
+    gy₂ₖ₋₁ .= zero(FC)
+    gx₂ₖ   .= zero(FC)
+    gy₂ₖ   .= zero(FC)
+
+    # Compute ‖r₀‖² = (γ₁)² + (β₁)²
+    rNorm = sqrt(γₖ^2 + βₖ^2)
+    history && push!(rNorms, rNorm)
+    ε = atol + rtol * rNorm
 
-    # Compute vₖ₊₁ and uₖ₊₁
-    MisI || mulorldiv!(vₖ₊₁, M, q, ldiv)  # βₖ₊₁vₖ₊₁ = MAuₖ  - γₖvₖ₋₁ - αₖvₖ
-    NisI || mulorldiv!(uₖ₊₁, N, p, ldiv)  # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %7s  %7s  %5s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, βₖ, γₖ, ktimer(start_time))
 
-    βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q))  # βₖ₊₁ = ‖vₖ₊₁‖_E
-    γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p))  # γₖ₊₁ = ‖uₖ₊₁‖_F
+    # Set up workspace.
+    old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T)
+    old_s₁ₖ = old_s₂ₖ = old_s₃ₖ = old_s₄ₖ = zero(FC)
+    σbar₂ₖ₋₂ = ηbar₂ₖ₋₃ = λbar₂ₖ₋₃ = μ₂ₖ₋₅ = λ₂ₖ₋₄ = μ₂ₖ₋₄ = zero(FC)
+    πbar₂ₖ₋₁ = βₖ
+    πbar₂ₖ = γₖ
 
-    # βₖ₊₁ ≠ 0
-    if βₖ₊₁ > btol
-      @kscal!(m, one(FC) / βₖ₊₁, q)
-      MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
-    end
+    # Tolerance for breakdown detection.
+    btol = eps(T)^(3/4)
 
-    # γₖ₊₁ ≠ 0
-    if γₖ₊₁ > btol
-      @kscal!(n, one(FC) / γₖ₊₁, p)
-      NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
-    end
+    # Stopping criterion.
+    breakdown = false
+    solved = rNorm ≤ ε
+    tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
 
-    # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0  ••• vₖ 0 ]
-    #                                [0  u₁ ••• 0  uₖ]
-    #
-    # rₖ = [ b ] - [ τE    A ] [ xₖ ] = [ b ] - [ τE    A ] Wₖzₖ
-    #      [ c ]   [  Aᵀ  νF ] [ yₖ ]   [ c ]   [  Aᵀ  νF ]
-    #
-    # block-Lanczos formulation : [ τE    A ] Wₖ = [ E   0 ] Wₖ₊₁Sₖ₊₁.ₖ
-    #                             [  Aᵀ  νF ]      [ 0   F ]
-    #
-    # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖
-    #
-    # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
-    #                                            [ Oᵀ ]
-    if iter == 1
-      θbarₖ    = conj(αₖ)
-      δbar₂ₖ₋₁ = τ
-      δbar₂ₖ   = ν
-      σbar₂ₖ₋₁ = αₖ
-      σbar₂ₖ   = βₖ₊₁
-      λbar₂ₖ₋₁ = γₖ₊₁
-      ηbar₂ₖ₋₁ = zero(FC)
-    else
-      # Apply previous reflections
-      #        [ 1                    ][ 1                    ][ c₂.ₖ₋₁  s₂.ₖ₋₁       ][ 1                    ]
-      # Ζₖ₋₁ = [    c₄.ₖ₋₁  s₄.ₖ₋₁    ][    c₃.ₖ₋₁     s₃.ₖ₋₁ ][ s̄₂.ₖ₋₁ -c₂.ₖ₋₁       ][    c₁.ₖ₋₁     s₁.ₖ₋₁ ]
-      #        [    s̄₄.ₖ₋₁ -c₄.ₖ₋₁    ][            1         ][                 1    ][            1         ]
-      #        [                    1 ][    s̄₃.ₖ₋₁    -c₃.ₖ₋₁ ][                    1 ][    s̄₁.ₖ₋₁    -c₁.ₖ₋₁ ]
-      #
-      #        [ δbar₂ₖ₋₃  σbar₂ₖ₋₃ ηbar₂ₖ₋₃ λbar₂ₖ₋₃  0      0  ]   [ δ₂ₖ₋₃   σ₂ₖ₋₃  η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃       0      ]
-      # Ζₖ₋₁ * [ θbarₖ₋₁   δbar₂ₖ₋₂ σbar₂ₖ₋₂    0      0      0  ] = [  0      δ₂ₖ₋₂  σ₂ₖ₋₂     η₂ₖ₋₂     λ₂ₖ₋₂     μ₂ₖ₋₂    ]
-      #        [    0         βₖ       τ        αₖ     0    γₖ₊₁ ]   [  0        0    δbar₂ₖ₋₁  σbar₂ₖ₋₁  ηbar₂ₖ₋₁  λbar₂ₖ₋₁ ]
-      #        [    γₖ        0        ᾱₖ       ν     βₖ₊₁    0  ]   [  0        0    θbarₖ     δbar₂ₖ    σbar₂ₖ      0      ]
-      #
-      # [ 1                    ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃  0      0  ]   [ ηbar₂ₖ₋₃  λbar₂ₖ₋₃    0        0   ]
-      # [    c₁.ₖ₋₁     s₁.ₖ₋₁ ] [ σbar₂ₖ₋₂    0      0      0  ] = [ σbis₂ₖ₋₂  ηbis₂ₖ₋₂  λbis₂ₖ₋₂   0   ]
-      # [            1         ] [    τ        αₖ     0    γₖ₊₁ ]   [   τ        αₖ         0       γₖ₊₁ ]
-      # [    s̄₁.ₖ₋₁    -c₁.ₖ₋₁ ] [    ᾱₖ       ν     βₖ₊₁    0  ]   [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]
-      σbis₂ₖ₋₂ =      old_c₁ₖ  * σbar₂ₖ₋₂ + old_s₁ₖ * conj(αₖ)
-      ηbis₂ₖ₋₂ =                            old_s₁ₖ * ν
-      λbis₂ₖ₋₂ =                            old_s₁ₖ * βₖ₊₁
-      θbisₖ    = conj(old_s₁ₖ) * σbar₂ₖ₋₂ - old_c₁ₖ * conj(αₖ)
-      δbis₂ₖ   =                          - old_c₁ₖ * ν
-      σbis₂ₖ   =                          - old_c₁ₖ * βₖ₊₁
-      # [ c₂.ₖ₋₁  s₂.ₖ₋₁       ] [ ηbar₂ₖ₋₃  λbar₂ₖ₋₃    0        0   ]   [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]
-      # [ s̄₂.ₖ₋₁ -c₂.ₖ₋₁       ] [ σbis₂ₖ₋₂  ηbis₂ₖ₋₂  λbis₂ₖ₋₂   0   ] = [ σhat₂ₖ₋₂  ηhat₂ₖ₋₂  λhat₂ₖ₋₂   0   ]
-      # [                 1    ] [   τ        αₖ         0       γₖ₊₁ ]   [   τ        αₖ         0       γₖ₊₁ ]
-      # [                    1 ] [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]   [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]
-      η₂ₖ₋₃    =      old_c₂ₖ  * ηbar₂ₖ₋₃ + old_s₂ₖ * σbis₂ₖ₋₂
-      λ₂ₖ₋₃    =      old_c₂ₖ  * λbar₂ₖ₋₃ + old_s₂ₖ * ηbis₂ₖ₋₂
-      μ₂ₖ₋₃    =                            old_s₂ₖ * λbis₂ₖ₋₂
-      σhat₂ₖ₋₂ = conj(old_s₂ₖ) * ηbar₂ₖ₋₃ - old_c₂ₖ * σbis₂ₖ₋₂
-      ηhat₂ₖ₋₂ = conj(old_s₂ₖ) * λbar₂ₖ₋₃ - old_c₂ₖ * ηbis₂ₖ₋₂
-      λhat₂ₖ₋₂ =                          - old_c₂ₖ * λbis₂ₖ₋₂
-      # [ 1                    ] [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]   [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]
-      # [    c₃.ₖ₋₁     s₃.ₖ₋₁ ] [ σhat₂ₖ₋₂  ηhat₂ₖ₋₂  λhat₂ₖ₋₂   0   ] = [ σtmp₂ₖ₋₂  ηtmp₂ₖ₋₂  λtmp₂ₖ₋₂   0   ]
-      # [            1         ] [   τ        αₖ         0       γₖ₊₁ ]   [   τ        αₖ         0       γₖ₊₁ ]
-      # [    s̄₃.ₖ₋₁    -c₃.ₖ₋₁ ] [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]   [  θbarₖ    δbar₂ₖ    σbar₂ₖ     0   ]
-      σtmp₂ₖ₋₂ =      old_c₃ₖ  * σhat₂ₖ₋₂ + old_s₃ₖ * θbisₖ
-      ηtmp₂ₖ₋₂ =      old_c₃ₖ  * ηhat₂ₖ₋₂ + old_s₃ₖ * δbis₂ₖ
-      λtmp₂ₖ₋₂ =      old_c₃ₖ  * λhat₂ₖ₋₂ + old_s₃ₖ * σbis₂ₖ
-      θbarₖ    = conj(old_s₃ₖ) * σhat₂ₖ₋₂ - old_c₃ₖ * θbisₖ
-      δbar₂ₖ   = conj(old_s₃ₖ) * ηhat₂ₖ₋₂ - old_c₃ₖ * δbis₂ₖ
-      σbar₂ₖ   = conj(old_s₃ₖ) * λhat₂ₖ₋₂ - old_c₃ₖ * σbis₂ₖ
-      # [ 1                    ] [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]   [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃       0      ]
-      # [    c₄.ₖ₋₁  s₄.ₖ₋₁    ] [ σtmp₂ₖ₋₂  ηtmp₂ₖ₋₂  λtmp₂ₖ₋₂   0   ] = [ σ₂ₖ₋₂     η₂ₖ₋₂     λ₂ₖ₋₂     μ₂ₖ₋₂    ]
-      # [    s̄₄.ₖ₋₁ -c₄.ₖ₋₁    ] [   τ        αₖ         0       γₖ₊₁ ]   [ δbar₂ₖ₋₁  σbar₂ₖ₋₁  ηbar₂ₖ₋₁  λbar₂ₖ₋₁ ]
-      # [                    1 ] [  θbarₖ    δbar₂ₖ    σbar₂ₖ     0   ]   [ θbarₖ     δbar₂ₖ    σbar₂ₖ      0      ]
-      σ₂ₖ₋₂    =      old_c₄ₖ  * σtmp₂ₖ₋₂ + old_s₄ₖ * τ
-      η₂ₖ₋₂    =      old_c₄ₖ  * ηtmp₂ₖ₋₂ + old_s₄ₖ * αₖ
-      λ₂ₖ₋₂    =      old_c₄ₖ  * λtmp₂ₖ₋₂
-      μ₂ₖ₋₂    =                            old_s₄ₖ * γₖ₊₁
-      δbar₂ₖ₋₁ = conj(old_s₄ₖ) * σtmp₂ₖ₋₂ - old_c₄ₖ * τ
-      σbar₂ₖ₋₁ = conj(old_s₄ₖ) * ηtmp₂ₖ₋₂ - old_c₄ₖ * αₖ
-      ηbar₂ₖ₋₁ = conj(old_s₄ₖ) * λtmp₂ₖ₋₂
-      λbar₂ₖ₋₁ =                          - old_c₄ₖ * γₖ₊₁
-    end
+    θbarₖ = δbar₂ₖ₋₁ = δbar₂ₖ = σbar₂ₖ₋₁ = σbar₂ₖ = λbar₂ₖ₋₁ = ηbar₂ₖ₋₁ = zero(FC)
 
-    # [ 1                ] [ δbar₂ₖ₋₁  σbar₂ₖ₋₁ ]   [ δbar₂ₖ₋₁  σbar₂ₖ₋₁ ]
-    # [    c₁.ₖ     s₁.ₖ ] [  θbarₖ     δbar₂ₖ  ] = [   θₖ       δbar₂ₖ  ]
-    # [          1       ] [   0         βₖ₊₁   ]   [   0         βₖ₊₁   ]
-    # [    s̄₁.ₖ    -c₁.ₖ ] [  γₖ₊₁        0     ]   [   0         gₖ     ]
-    (c₁ₖ, s₁ₖ, θₖ) = sym_givens(θbarₖ, γₖ₊₁)
-    gₖ     = conj(s₁ₖ) * δbar₂ₖ
-    δbar₂ₖ =      c₁ₖ  * δbar₂ₖ
-
-    # [ c₂.ₖ  s₂.ₖ       ] [ δbar₂ₖ₋₁  σbar₂ₖ₋₁ ]   [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]
-    # [ s̄₂.ₖ -c₂.ₖ       ] [   θₖ       δbar₂ₖ  ] = [  0     δbis₂ₖ ]
-    # [             1    ] [   0         βₖ₊₁   ]   [  0      βₖ₊₁  ]
-    # [                1 ] [   0         gₖ     ]   [  0       gₖ   ]
-    (c₂ₖ, s₂ₖ, δ₂ₖ₋₁) = sym_givens(δbar₂ₖ₋₁, θₖ)
-    σ₂ₖ₋₁  =      c₂ₖ  * σbar₂ₖ₋₁ + s₂ₖ * δbar₂ₖ
-    δbis₂ₖ = conj(s₂ₖ) * σbar₂ₖ₋₁ - c₂ₖ * δbar₂ₖ
-
-    # [ 1                ] [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]   [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]
-    # [    c₃.ₖ     s₃.ₖ ] [  0     δbis₂ₖ ] = [  0     δhat₂ₖ ]
-    # [          1       ] [  0      βₖ₊₁  ]   [  0      βₖ₊₁  ]
-    # [    s̄₃.ₖ    -c₃.ₖ ] [  0       gₖ   ]   [  0       0    ]
-    (c₃ₖ, s₃ₖ, δhat₂ₖ) = sym_givens(δbis₂ₖ, gₖ)
-
-    # [ 1                ] [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]   [ δ₂ₖ₋₁  σ₂ₖ₋₁ ]
-    # [    c₄.ₖ  s₄.ₖ    ] [  0     δhat₂ₖ ] = [  0      δ₂ₖ  ]
-    # [    s̄₄.ₖ -c₄.ₖ    ] [  0      βₖ₊₁  ]   [  0       0   ]
-    # [                1 ] [  0       0    ]   [  0       0   ]
-    (c₄ₖ, s₄ₖ, δ₂ₖ) = sym_givens(δhat₂ₖ, βₖ₊₁)
-
-    # Solve Gₖ = Wₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Gₖ)ᵀ = (Wₖ)ᵀ.
-    if iter == 1
-      # [ δ₁  0  ] [ gx₁ gy₁ ] = [ v₁ 0  ]
-      # [ σ₁  δ₂ ] [ gx₂ gy₂ ]   [ 0  u₁ ]
-      @. gx₂ₖ₋₁ = vₖ / δ₂ₖ₋₁
-      @. gx₂ₖ   = - σ₂ₖ₋₁ / δ₂ₖ * gx₂ₖ₋₁
-      @. gy₂ₖ   = uₖ / δ₂ₖ
-    elseif iter == 2
-      # [ η₁ σ₂ δ₃ 0  ] [ gx₁ gy₁ ] = [ v₂ 0  ]
-      # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ]   [ 0  u₂ ]
-      #                 [ gx₃ gy₃ ]
-      #                 [ gx₄ gy₄ ]
-      @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
-      @kswap(gx₂ₖ₋₂, gx₂ₖ)
-      @kswap(gy₂ₖ₋₂, gy₂ₖ)
-      @. gx₂ₖ₋₁ = (vₖ - η₂ₖ₋₃ * gx₂ₖ₋₃ - σ₂ₖ₋₂ * gx₂ₖ₋₂                 ) / δ₂ₖ₋₁
-      @. gx₂ₖ   = (   - λ₂ₖ₋₃ * gx₂ₖ₋₃ - η₂ₖ₋₂ * gx₂ₖ₋₂ - σ₂ₖ₋₁ * gx₂ₖ₋₁) / δ₂ₖ
-      @. gy₂ₖ₋₁ = (   - η₂ₖ₋₃ * gy₂ₖ₋₃ - σ₂ₖ₋₂ * gy₂ₖ₋₂                 ) / δ₂ₖ₋₁
-      @. gy₂ₖ   = (uₖ - λ₂ₖ₋₃ * gy₂ₖ₋₃ - η₂ₖ₋₂ * gy₂ₖ₋₂ - σ₂ₖ₋₁ * gy₂ₖ₋₁) / δ₂ₖ
-    else
-      # μ₂ₖ₋₅ * gx₂ₖ₋₅ + λ₂ₖ₋₄ * gx₂ₖ₋₄ + η₂ₖ₋₃ * gx₂ₖ₋₃ + σ₂ₖ₋₂ * gx₂ₖ₋₂ + δ₂ₖ₋₁ * gx₂ₖ₋₁              = vₖ
-      #                  μ₂ₖ₋₄ * gx₂ₖ₋₄ + λ₂ₖ₋₃ * gx₂ₖ₋₃ + η₂ₖ₋₂ * gx₂ₖ₋₂ + σ₂ₖ₋₁ * gx₂ₖ₋₁ + δ₂ₖ * gx₂ₖ = 0
-      g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ
-      @. g₂ₖ₋₁ = (vₖ - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂                ) / δ₂ₖ₋₁
-      @. g₂ₖ   = (                   - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
-      @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
-      @kswap(gx₂ₖ₋₂, gx₂ₖ)
-      # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁              = 0
-      #                  μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ
-      g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ
-      @. g₂ₖ₋₁ = (     - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂                ) / δ₂ₖ₋₁
-      @. g₂ₖ   = (uₖ                   - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
-      @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁)
-      @kswap(gy₂ₖ₋₂, gy₂ₖ)
-    end
+    while !(solved || tired || breakdown || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
 
-    # Update p̅ₖ = (Qₖ)ᵀ * (β₁e₁ + γ₁e₂)
-    πbis₂ₖ   =      c₁ₖ  * πbar₂ₖ
-    πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ
-    #
-    π₂ₖ₋₁  =      c₂ₖ  * πbar₂ₖ₋₁ + s₂ₖ * πbis₂ₖ
-    πhat₂ₖ = conj(s₂ₖ) * πbar₂ₖ₋₁ - c₂ₖ * πbis₂ₖ
-    #
-    πtmp₂ₖ   =      c₃ₖ  * πhat₂ₖ + s₃ₖ * πbis₂ₖ₊₂
-    πbar₂ₖ₊₂ = conj(s₃ₖ) * πhat₂ₖ - c₃ₖ * πbis₂ₖ₊₂
-    #
-    π₂ₖ      =      c₄ₖ  * πtmp₂ₖ
-    πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ
-
-    # Update xₖ = Gxₖ * pₖ
-    @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
-    @kaxpy!(m, π₂ₖ  , gx₂ₖ  , xₖ)
-
-    # Update yₖ = Gyₖ * pₖ
-    @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
-    @kaxpy!(n, π₂ₖ  , gy₂ₖ  , yₖ)
-
-    # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|²
-    rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂))
-    history && push!(rNorms, rNorm)
+      # Continue the orthogonal tridiagonalization process.
+      # AUₖ  = EVₖTₖ    + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
+      # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+      mul!(q, A , uₖ)  # Forms Evₖ₊₁ : q ← Auₖ
+      mul!(p, Aᴴ, vₖ)  # Forms Fuₖ₊₁ : p ← Aᴴvₖ
+
+      if iter ≥ 2
+        @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q)  # q ← q - γₖ * M⁻¹vₖ₋₁
+        @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p)  # p ← p - βₖ * N⁻¹uₖ₋₁
+      end
+
+      αₖ = @kdot(m, vₖ, q)  # αₖ = ⟨vₖ,q⟩
+
+      @kaxpy!(m, -     αₖ , M⁻¹vₖ, q)  # q ← q - αₖ * M⁻¹vₖ
+      @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p)  # p ← p - ᾱₖ * N⁻¹uₖ
+
+      # Compute vₖ₊₁ and uₖ₊₁
+      MisI || mulorldiv!(vₖ₊₁, M, q, ldiv)  # βₖ₊₁vₖ₊₁ = MAuₖ  - γₖvₖ₋₁ - αₖvₖ
+      NisI || mulorldiv!(uₖ₊₁, N, p, ldiv)  # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+
+      βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q))  # βₖ₊₁ = ‖vₖ₊₁‖_E
+      γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p))  # γₖ₊₁ = ‖uₖ₊₁‖_F
 
-    # Update vₖ and uₖ
-    MisI || (vₖ .= vₖ₊₁)
-    NisI || (uₖ .= uₖ₊₁)
-
-    # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
-    M⁻¹vₖ₋₁ .= M⁻¹vₖ
-    N⁻¹uₖ₋₁ .= N⁻¹uₖ
-
-    # Update M⁻¹vₖ and N⁻¹uₖ
-    M⁻¹vₖ .= q
-    N⁻¹uₖ .= p
-
-    # Update cosines and sines
-    old_s₁ₖ = s₁ₖ
-    old_s₂ₖ = s₂ₖ
-    old_s₃ₖ = s₃ₖ
-    old_s₄ₖ = s₄ₖ
-    old_c₁ₖ = c₁ₖ
-    old_c₂ₖ = c₂ₖ
-    old_c₃ₖ = c₃ₖ
-    old_c₄ₖ = c₄ₖ
-
-    # Update workspace
-    βₖ = βₖ₊₁
-    γₖ = γₖ₊₁
-    σbar₂ₖ₋₂ = σbar₂ₖ
-    ηbar₂ₖ₋₃ = ηbar₂ₖ₋₁
-    λbar₂ₖ₋₃ = λbar₂ₖ₋₁
-    if iter ≥ 2
-      μ₂ₖ₋₅ = μ₂ₖ₋₃
-      μ₂ₖ₋₄ = μ₂ₖ₋₂
-      λ₂ₖ₋₄ = λ₂ₖ₋₂
+      # βₖ₊₁ ≠ 0
+      if βₖ₊₁ > btol
+        @kscal!(m, one(FC) / βₖ₊₁, q)
+        MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁)
+      end
+
+      # γₖ₊₁ ≠ 0
+      if γₖ₊₁ > btol
+        @kscal!(n, one(FC) / γₖ₊₁, p)
+        NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁)
+      end
+
+      # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0  ••• vₖ 0 ]
+      #                                [0  u₁ ••• 0  uₖ]
+      #
+      # rₖ = [ b ] - [ τE    A ] [ xₖ ] = [ b ] - [ τE    A ] Wₖzₖ
+      #      [ c ]   [  Aᴴ  νF ] [ yₖ ]   [ c ]   [  Aᴴ  νF ]
+      #
+      # block-Lanczos formulation : [ τE    A ] Wₖ = [ E   0 ] Wₖ₊₁Sₖ₊₁.ₖ
+      #                             [  Aᴴ  νF ]      [ 0   F ]
+      #
+      # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖
+      #
+      # Update the QR factorization of Sₖ₊₁.ₖ = Qₖ [ Rₖ ].
+      #                                            [ Oᵀ ]
+      if iter == 1
+        θbarₖ    = conj(αₖ)
+        δbar₂ₖ₋₁ = τ
+        δbar₂ₖ   = ν
+        σbar₂ₖ₋₁ = αₖ
+        σbar₂ₖ   = βₖ₊₁
+        λbar₂ₖ₋₁ = γₖ₊₁
+        ηbar₂ₖ₋₁ = zero(FC)
+      else
+        # Apply previous reflections
+        #        [ 1                    ][ 1                    ][ c₂.ₖ₋₁  s₂.ₖ₋₁       ][ 1                    ]
+        # Ζₖ₋₁ = [    c₄.ₖ₋₁  s₄.ₖ₋₁    ][    c₃.ₖ₋₁     s₃.ₖ₋₁ ][ s̄₂.ₖ₋₁ -c₂.ₖ₋₁       ][    c₁.ₖ₋₁     s₁.ₖ₋₁ ]
+        #        [    s̄₄.ₖ₋₁ -c₄.ₖ₋₁    ][            1         ][                 1    ][            1         ]
+        #        [                    1 ][    s̄₃.ₖ₋₁    -c₃.ₖ₋₁ ][                    1 ][    s̄₁.ₖ₋₁    -c₁.ₖ₋₁ ]
+        #
+        #        [ δbar₂ₖ₋₃  σbar₂ₖ₋₃ ηbar₂ₖ₋₃ λbar₂ₖ₋₃  0      0  ]   [ δ₂ₖ₋₃   σ₂ₖ₋₃  η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃       0      ]
+        # Ζₖ₋₁ * [ θbarₖ₋₁   δbar₂ₖ₋₂ σbar₂ₖ₋₂    0      0      0  ] = [  0      δ₂ₖ₋₂  σ₂ₖ₋₂     η₂ₖ₋₂     λ₂ₖ₋₂     μ₂ₖ₋₂    ]
+        #        [    0         βₖ       τ        αₖ     0    γₖ₊₁ ]   [  0        0    δbar₂ₖ₋₁  σbar₂ₖ₋₁  ηbar₂ₖ₋₁  λbar₂ₖ₋₁ ]
+        #        [    γₖ        0        ᾱₖ       ν     βₖ₊₁    0  ]   [  0        0    θbarₖ     δbar₂ₖ    σbar₂ₖ      0      ]
+        #
+        # [ 1                    ] [ ηbar₂ₖ₋₃ λbar₂ₖ₋₃  0      0  ]   [ ηbar₂ₖ₋₃  λbar₂ₖ₋₃    0        0   ]
+        # [    c₁.ₖ₋₁     s₁.ₖ₋₁ ] [ σbar₂ₖ₋₂    0      0      0  ] = [ σbis₂ₖ₋₂  ηbis₂ₖ₋₂  λbis₂ₖ₋₂   0   ]
+        # [            1         ] [    τ        αₖ     0    γₖ₊₁ ]   [   τ        αₖ         0       γₖ₊₁ ]
+        # [    s̄₁.ₖ₋₁    -c₁.ₖ₋₁ ] [    ᾱₖ       ν     βₖ₊₁    0  ]   [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]
+        σbis₂ₖ₋₂ =      old_c₁ₖ  * σbar₂ₖ₋₂ + old_s₁ₖ * conj(αₖ)
+        ηbis₂ₖ₋₂ =                            old_s₁ₖ * ν
+        λbis₂ₖ₋₂ =                            old_s₁ₖ * βₖ₊₁
+        θbisₖ    = conj(old_s₁ₖ) * σbar₂ₖ₋₂ - old_c₁ₖ * conj(αₖ)
+        δbis₂ₖ   =                          - old_c₁ₖ * ν
+        σbis₂ₖ   =                          - old_c₁ₖ * βₖ₊₁
+        # [ c₂.ₖ₋₁  s₂.ₖ₋₁       ] [ ηbar₂ₖ₋₃  λbar₂ₖ₋₃    0        0   ]   [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]
+        # [ s̄₂.ₖ₋₁ -c₂.ₖ₋₁       ] [ σbis₂ₖ₋₂  ηbis₂ₖ₋₂  λbis₂ₖ₋₂   0   ] = [ σhat₂ₖ₋₂  ηhat₂ₖ₋₂  λhat₂ₖ₋₂   0   ]
+        # [                 1    ] [   τ        αₖ         0       γₖ₊₁ ]   [   τ        αₖ         0       γₖ₊₁ ]
+        # [                    1 ] [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]   [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]
+        η₂ₖ₋₃    =      old_c₂ₖ  * ηbar₂ₖ₋₃ + old_s₂ₖ * σbis₂ₖ₋₂
+        λ₂ₖ₋₃    =      old_c₂ₖ  * λbar₂ₖ₋₃ + old_s₂ₖ * ηbis₂ₖ₋₂
+        μ₂ₖ₋₃    =                            old_s₂ₖ * λbis₂ₖ₋₂
+        σhat₂ₖ₋₂ = conj(old_s₂ₖ) * ηbar₂ₖ₋₃ - old_c₂ₖ * σbis₂ₖ₋₂
+        ηhat₂ₖ₋₂ = conj(old_s₂ₖ) * λbar₂ₖ₋₃ - old_c₂ₖ * ηbis₂ₖ₋₂
+        λhat₂ₖ₋₂ =                          - old_c₂ₖ * λbis₂ₖ₋₂
+        # [ 1                    ] [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]   [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]
+        # [    c₃.ₖ₋₁     s₃.ₖ₋₁ ] [ σhat₂ₖ₋₂  ηhat₂ₖ₋₂  λhat₂ₖ₋₂   0   ] = [ σtmp₂ₖ₋₂  ηtmp₂ₖ₋₂  λtmp₂ₖ₋₂   0   ]
+        # [            1         ] [   τ        αₖ         0       γₖ₊₁ ]   [   τ        αₖ         0       γₖ₊₁ ]
+        # [    s̄₃.ₖ₋₁    -c₃.ₖ₋₁ ] [  θbisₖ    δbis₂ₖ    σbis₂ₖ     0   ]   [  θbarₖ    δbar₂ₖ    σbar₂ₖ     0   ]
+        σtmp₂ₖ₋₂ =      old_c₃ₖ  * σhat₂ₖ₋₂ + old_s₃ₖ * θbisₖ
+        ηtmp₂ₖ₋₂ =      old_c₃ₖ  * ηhat₂ₖ₋₂ + old_s₃ₖ * δbis₂ₖ
+        λtmp₂ₖ₋₂ =      old_c₃ₖ  * λhat₂ₖ₋₂ + old_s₃ₖ * σbis₂ₖ
+        θbarₖ    = conj(old_s₃ₖ) * σhat₂ₖ₋₂ - old_c₃ₖ * θbisₖ
+        δbar₂ₖ   = conj(old_s₃ₖ) * ηhat₂ₖ₋₂ - old_c₃ₖ * δbis₂ₖ
+        σbar₂ₖ   = conj(old_s₃ₖ) * λhat₂ₖ₋₂ - old_c₃ₖ * σbis₂ₖ
+        # [ 1                    ] [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃      0   ]   [ η₂ₖ₋₃     λ₂ₖ₋₃     μ₂ₖ₋₃       0      ]
+        # [    c₄.ₖ₋₁  s₄.ₖ₋₁    ] [ σtmp₂ₖ₋₂  ηtmp₂ₖ₋₂  λtmp₂ₖ₋₂   0   ] = [ σ₂ₖ₋₂     η₂ₖ₋₂     λ₂ₖ₋₂     μ₂ₖ₋₂    ]
+        # [    s̄₄.ₖ₋₁ -c₄.ₖ₋₁    ] [   τ        αₖ         0       γₖ₊₁ ]   [ δbar₂ₖ₋₁  σbar₂ₖ₋₁  ηbar₂ₖ₋₁  λbar₂ₖ₋₁ ]
+        # [                    1 ] [  θbarₖ    δbar₂ₖ    σbar₂ₖ     0   ]   [ θbarₖ     δbar₂ₖ    σbar₂ₖ      0      ]
+        σ₂ₖ₋₂    =      old_c₄ₖ  * σtmp₂ₖ₋₂ + old_s₄ₖ * τ
+        η₂ₖ₋₂    =      old_c₄ₖ  * ηtmp₂ₖ₋₂ + old_s₄ₖ * αₖ
+        λ₂ₖ₋₂    =      old_c₄ₖ  * λtmp₂ₖ₋₂
+        μ₂ₖ₋₂    =                            old_s₄ₖ * γₖ₊₁
+        δbar₂ₖ₋₁ = conj(old_s₄ₖ) * σtmp₂ₖ₋₂ - old_c₄ₖ * τ
+        σbar₂ₖ₋₁ = conj(old_s₄ₖ) * ηtmp₂ₖ₋₂ - old_c₄ₖ * αₖ
+        ηbar₂ₖ₋₁ = conj(old_s₄ₖ) * λtmp₂ₖ₋₂
+        λbar₂ₖ₋₁ =                          - old_c₄ₖ * γₖ₊₁
+      end
+
+      # [ 1                ] [ δbar₂ₖ₋₁  σbar₂ₖ₋₁ ]   [ δbar₂ₖ₋₁  σbar₂ₖ₋₁ ]
+      # [    c₁.ₖ     s₁.ₖ ] [  θbarₖ     δbar₂ₖ  ] = [   θₖ       δbar₂ₖ  ]
+      # [          1       ] [   0         βₖ₊₁   ]   [   0         βₖ₊₁   ]
+      # [    s̄₁.ₖ    -c₁.ₖ ] [  γₖ₊₁        0     ]   [   0         gₖ     ]
+      (c₁ₖ, s₁ₖ, θₖ) = sym_givens(θbarₖ, γₖ₊₁)
+      gₖ     = conj(s₁ₖ) * δbar₂ₖ
+      δbar₂ₖ =      c₁ₖ  * δbar₂ₖ
+
+      # [ c₂.ₖ  s₂.ₖ       ] [ δbar₂ₖ₋₁  σbar₂ₖ₋₁ ]   [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]
+      # [ s̄₂.ₖ -c₂.ₖ       ] [   θₖ       δbar₂ₖ  ] = [  0     δbis₂ₖ ]
+      # [             1    ] [   0         βₖ₊₁   ]   [  0      βₖ₊₁  ]
+      # [                1 ] [   0         gₖ     ]   [  0       gₖ   ]
+      (c₂ₖ, s₂ₖ, δ₂ₖ₋₁) = sym_givens(δbar₂ₖ₋₁, θₖ)
+      σ₂ₖ₋₁  =      c₂ₖ  * σbar₂ₖ₋₁ + s₂ₖ * δbar₂ₖ
+      δbis₂ₖ = conj(s₂ₖ) * σbar₂ₖ₋₁ - c₂ₖ * δbar₂ₖ
+
+      # [ 1                ] [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]   [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]
+      # [    c₃.ₖ     s₃.ₖ ] [  0     δbis₂ₖ ] = [  0     δhat₂ₖ ]
+      # [          1       ] [  0      βₖ₊₁  ]   [  0      βₖ₊₁  ]
+      # [    s̄₃.ₖ    -c₃.ₖ ] [  0       gₖ   ]   [  0       0    ]
+      (c₃ₖ, s₃ₖ, δhat₂ₖ) = sym_givens(δbis₂ₖ, gₖ)
+
+      # [ 1                ] [ δ₂ₖ₋₁  σ₂ₖ₋₁  ]   [ δ₂ₖ₋₁  σ₂ₖ₋₁ ]
+      # [    c₄.ₖ  s₄.ₖ    ] [  0     δhat₂ₖ ] = [  0      δ₂ₖ  ]
+      # [    s̄₄.ₖ -c₄.ₖ    ] [  0      βₖ₊₁  ]   [  0       0   ]
+      # [                1 ] [  0       0    ]   [  0       0   ]
+      (c₄ₖ, s₄ₖ, δ₂ₖ) = sym_givens(δhat₂ₖ, βₖ₊₁)
+
+      # Solve Gₖ = Wₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Gₖ)ᵀ = (Wₖ)ᵀ.
+      if iter == 1
+        # [ δ₁  0  ] [ gx₁ gy₁ ] = [ v₁ 0  ]
+        # [ σ₁  δ₂ ] [ gx₂ gy₂ ]   [ 0  u₁ ]
+        @. gx₂ₖ₋₁ = vₖ / δ₂ₖ₋₁
+        @. gx₂ₖ   = - σ₂ₖ₋₁ / δ₂ₖ * gx₂ₖ₋₁
+        @. gy₂ₖ   = uₖ / δ₂ₖ
+      elseif iter == 2
+        # [ η₁ σ₂ δ₃ 0  ] [ gx₁ gy₁ ] = [ v₂ 0  ]
+        # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ]   [ 0  u₂ ]
+        #                 [ gx₃ gy₃ ]
+        #                 [ gx₄ gy₄ ]
+        @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
+        @kswap(gx₂ₖ₋₂, gx₂ₖ)
+        @kswap(gy₂ₖ₋₂, gy₂ₖ)
+        @. gx₂ₖ₋₁ = (vₖ - η₂ₖ₋₃ * gx₂ₖ₋₃ - σ₂ₖ₋₂ * gx₂ₖ₋₂                 ) / δ₂ₖ₋₁
+        @. gx₂ₖ   = (   - λ₂ₖ₋₃ * gx₂ₖ₋₃ - η₂ₖ₋₂ * gx₂ₖ₋₂ - σ₂ₖ₋₁ * gx₂ₖ₋₁) / δ₂ₖ
+        @. gy₂ₖ₋₁ = (   - η₂ₖ₋₃ * gy₂ₖ₋₃ - σ₂ₖ₋₂ * gy₂ₖ₋₂                 ) / δ₂ₖ₋₁
+        @. gy₂ₖ   = (uₖ - λ₂ₖ₋₃ * gy₂ₖ₋₃ - η₂ₖ₋₂ * gy₂ₖ₋₂ - σ₂ₖ₋₁ * gy₂ₖ₋₁) / δ₂ₖ
+      else
+        # μ₂ₖ₋₅ * gx₂ₖ₋₅ + λ₂ₖ₋₄ * gx₂ₖ₋₄ + η₂ₖ₋₃ * gx₂ₖ₋₃ + σ₂ₖ₋₂ * gx₂ₖ₋₂ + δ₂ₖ₋₁ * gx₂ₖ₋₁              = vₖ
+        #                  μ₂ₖ₋₄ * gx₂ₖ₋₄ + λ₂ₖ₋₃ * gx₂ₖ₋₃ + η₂ₖ₋₂ * gx₂ₖ₋₂ + σ₂ₖ₋₁ * gx₂ₖ₋₁ + δ₂ₖ * gx₂ₖ = 0
+        g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ
+        @. g₂ₖ₋₁ = (vₖ - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂                ) / δ₂ₖ₋₁
+        @. g₂ₖ   = (                   - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
+        @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁)
+        @kswap(gx₂ₖ₋₂, gx₂ₖ)
+        # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁              = 0
+        #                  μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ
+        g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ
+        @. g₂ₖ₋₁ = (     - μ₂ₖ₋₅ * g₂ₖ₋₅ - λ₂ₖ₋₄ * g₂ₖ₋₄ - η₂ₖ₋₃ * g₂ₖ₋₃ - σ₂ₖ₋₂ * g₂ₖ₋₂                ) / δ₂ₖ₋₁
+        @. g₂ₖ   = (uₖ                   - μ₂ₖ₋₄ * g₂ₖ₋₄ - λ₂ₖ₋₃ * g₂ₖ₋₃ - η₂ₖ₋₂ * g₂ₖ₋₂ - σ₂ₖ₋₁ * g₂ₖ₋₁) / δ₂ₖ
+        @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁)
+        @kswap(gy₂ₖ₋₂, gy₂ₖ)
+      end
+
+      # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂)
+      πbis₂ₖ   =      c₁ₖ  * πbar₂ₖ
+      πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ
+      #
+      π₂ₖ₋₁  =      c₂ₖ  * πbar₂ₖ₋₁ + s₂ₖ * πbis₂ₖ
+      πhat₂ₖ = conj(s₂ₖ) * πbar₂ₖ₋₁ - c₂ₖ * πbis₂ₖ
+      #
+      πtmp₂ₖ   =      c₃ₖ  * πhat₂ₖ + s₃ₖ * πbis₂ₖ₊₂
+      πbar₂ₖ₊₂ = conj(s₃ₖ) * πhat₂ₖ - c₃ₖ * πbis₂ₖ₊₂
+      #
+      π₂ₖ      =      c₄ₖ  * πtmp₂ₖ
+      πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ
+
+      # Update xₖ = Gxₖ * pₖ
+      @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ)
+      @kaxpy!(m, π₂ₖ  , gx₂ₖ  , xₖ)
+
+      # Update yₖ = Gyₖ * pₖ
+      @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ)
+      @kaxpy!(n, π₂ₖ  , gy₂ₖ  , yₖ)
+
+      # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|²
+      rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂))
+      history && push!(rNorms, rNorm)
+
+      # Update vₖ and uₖ
+      MisI || (vₖ .= vₖ₊₁)
+      NisI || (uₖ .= uₖ₊₁)
+
+      # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁
+      M⁻¹vₖ₋₁ .= M⁻¹vₖ
+      N⁻¹uₖ₋₁ .= N⁻¹uₖ
+
+      # Update M⁻¹vₖ and N⁻¹uₖ
+      M⁻¹vₖ .= q
+      N⁻¹uₖ .= p
+
+      # Update cosines and sines
+      old_s₁ₖ = s₁ₖ
+      old_s₂ₖ = s₂ₖ
+      old_s₃ₖ = s₃ₖ
+      old_s₄ₖ = s₄ₖ
+      old_c₁ₖ = c₁ₖ
+      old_c₂ₖ = c₂ₖ
+      old_c₃ₖ = c₃ₖ
+      old_c₄ₖ = c₄ₖ
+
+      # Update workspace
+      βₖ = βₖ₊₁
+      γₖ = γₖ₊₁
+      σbar₂ₖ₋₂ = σbar₂ₖ
+      ηbar₂ₖ₋₃ = ηbar₂ₖ₋₁
+      λbar₂ₖ₋₃ = λbar₂ₖ₋₁
+      if iter ≥ 2
+        μ₂ₖ₋₅ = μ₂ₖ₋₃
+        μ₂ₖ₋₄ = μ₂ₖ₋₂
+        λ₂ₖ₋₄ = λ₂ₖ₋₂
+      end
+      πbar₂ₖ₋₁ = πbar₂ₖ₊₁
+      πbar₂ₖ   = πbar₂ₖ₊₂
+
+      # Stopping conditions that do not depend on user input.
+      # This is to guard against tolerances that are unreasonably small.
+      resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      resid_decrease_lim = rNorm ≤ ε
+      breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
+      solved = resid_decrease_lim || resid_decrease_mach
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %7.1e  %7.1e  %.2fs\n", iter, rNorm, βₖ₊₁, γₖ₊₁, ktimer(start_time))
     end
-    πbar₂ₖ₋₁ = πbar₂ₖ₊₁
-    πbar₂ₖ   = πbar₂ₖ₊₂
-
-    # Stopping conditions that do not depend on user input.
-    # This is to guard against tolerances that are unreasonably small.
-    resid_decrease_mach = (rNorm + one(T) ≤ one(T))
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    resid_decrease_lim = rNorm ≤ ε
-    breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
-    solved = resid_decrease_lim || resid_decrease_mach
-    tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e  %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
+    (verbose > 0) && @printf(iostream, "\n")
+
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    breakdown           && (status = "inconsistent linear system")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
+
+    # Update x and y
+    warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
+    warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
+    solver.warm_start = false
+
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = !solved && breakdown
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-
-  tired               && (status = "maximum number of iterations exceeded")
-  breakdown           && (status = "inconsistent linear system")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x and y
-  warm_start && @kaxpy!(m, one(FC), Δx, xₖ)
-  warm_start && @kaxpy!(n, one(FC), Δy, yₖ)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = !solved && breakdown
-  stats.status = status
-  return solver
 end
diff --git a/src/usymlq.jl b/src/usymlq.jl
index 71670c80f..b80f0a622 100644
--- a/src/usymlq.jl
+++ b/src/usymlq.jl
@@ -21,34 +21,54 @@ export usymlq, usymlq!
 
 """
     (x, stats) = usymlq(A, b::AbstractVector{FC}, c::AbstractVector{FC};
-                        atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true,
-                        itmax::Int=0, verbose::Int=0, history::Bool=false,
-                        callback=solver->false)
+                        transfer_to_usymcg::Bool=true, atol::T=√eps(T),
+                        rtol::T=√eps(T), itmax::Int=0,
+                        timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                        callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the linear system Ax = b using the USYMLQ method.
+    (x, stats) = usymlq(A, b, c, x0::AbstractVector; kwargs...)
+
+USYMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+USYMLQ determines the least-norm solution of the consistent linear system Ax = b of size m × n.
 
 USYMLQ is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
 The error norm ‖x - x*‖ monotonously decreases in USYMLQ.
 It's considered as a generalization of SYMMLQ.
 
 It can also be applied to under-determined and over-determined problems.
 In all cases, problems must be consistent.
 
-An option gives the possibility of transferring to the USYMCG point,
-when it exists. The transfer is based on the residual norm.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional argument
 
-USYMLQ can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-    (x, stats) = usymlq(A, b, c, x0; kwargs...)
+#### Keyword arguments
 
-where `kwargs` are the same keyword arguments as above.
+* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -58,18 +78,6 @@ and `false` otherwise.
 """
 function usymlq end
 
-function usymlq(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = UsymlqSolver(A, b)
-  usymlq!(solver, A, b, c, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function usymlq(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = UsymlqSolver(A, b)
-  usymlq!(solver, A, b, c; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = usymlq!(solver::UsymlqSolver, A, b, c; kwargs...)
     solver = usymlq!(solver::UsymlqSolver, A, b, c, x0; kwargs...)
@@ -80,243 +88,290 @@ See [`UsymlqSolver`](@ref) for more details about the `solver`.
 """
 function usymlq! end
 
-function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                 x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  usymlq!(solver, A, b, c; kwargs...)
-  return solver
-end
-
-function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-                 atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
-                 itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-                 callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("USYMLQ: system of %d equations in %d variables\n", m, n)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x
-  vₖ₋₁, vₖ, q, d̅, stats = solver.vₖ₋₁, solver.vₖ, solver.q, solver.d̅, solver.stats
-  warm_start = solver.warm_start
-  rNorms = stats.residuals
-  reset!(stats)
-  r₀ = warm_start ? q : b
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_usymlq = (:(A                    ),
+                   :(b::AbstractVector{FC}),
+                   :(c::AbstractVector{FC}))
+
+def_optargs_usymlq = (:(x0::AbstractVector),)
+
+def_kwargs_usymlq = (:(; transfer_to_usymcg::Bool = true),
+                     :(; atol::T = √eps(T)              ),
+                     :(; rtol::T = √eps(T)              ),
+                     :(; itmax::Int = 0                 ),
+                     :(; timemax::Float64 = Inf         ),
+                     :(; verbose::Int = 0               ),
+                     :(; history::Bool = false          ),
+                     :(; callback = solver -> false     ),
+                     :(; iostream::IO = kstdout         ))
+
+def_kwargs_usymlq = mapreduce(extract_parameters, vcat, def_kwargs_usymlq)
+
+args_usymlq = (:A, :b, :c)
+optargs_usymlq = (:x0,)
+kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function usymlq($(def_args_usymlq...), $(def_optargs_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = UsymlqSolver(A, b)
+    warm_start!(solver, $(optargs_usymlq...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    usymlq!(solver, $(args_usymlq...); $(kwargs_usymlq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Initial solution x₀ and residual norm ‖r₀‖.
-  x .= zero(FC)
-  bNorm = @knrm2(m, r₀)
-  history && push!(rNorms, bNorm)
-  if bNorm == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.inconsistent = false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function usymlq($(def_args_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = UsymlqSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    usymlq!(solver, $(args_usymlq...); $(kwargs_usymlq...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = m+n)
-
-  ε = atol + rtol * bNorm
-  (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, bNorm)
-
-  βₖ = @knrm2(m, r₀)          # β₁ = ‖v₁‖ = ‖r₀‖
-  γₖ = @knrm2(n, c)           # γ₁ = ‖u₁‖ = ‖c‖
-  vₖ₋₁ .= zero(FC)            # v₀ = 0
-  uₖ₋₁ .= zero(FC)            # u₀ = 0
-  vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
-  uₖ .= c ./ γₖ               # u₁ = c / γ₁
-  cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
-  sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ .= zero(FC)               # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
-  ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
-  ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
-  δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
-
-  # Stopping criterion.
-  solved_lq = bNorm ≤ ε
-  solved_cg = false
-  tired     = iter ≥ itmax
-  status    = "unknown"
-  user_requested_exit = false
-
-  while !(solved_lq || solved_cg || tired || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the SSY tridiagonalization process.
-    # AUₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
-    # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , uₖ)  # Forms vₖ₊₁ : q ← Auₖ
-    mul!(p, Aᵀ, vₖ)  # Forms uₖ₊₁ : p ← Aᵀvₖ
-
-    @kaxpy!(m, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
-    @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - βₖ * uₖ₋₁
-
-    αₖ = @kdot(m, vₖ, q)      # αₖ = ⟨vₖ,q⟩
-
-    @kaxpy!(m, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
-    @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
-
-    βₖ₊₁ = @knrm2(m, q)       # βₖ₊₁ = ‖q‖
-    γₖ₊₁ = @knrm2(n, p)       # γₖ₊₁ = ‖p‖
-
-    # Update the LQ factorization of Tₖ = L̅ₖQₖ.
-    # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
-    # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
-    # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
-    # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
-    # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
-    # [ •        •  •  •  γₖ]   [ •         •   •   •    •    0   ]
-    # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
-
-    if iter == 1
-      δbarₖ = αₖ
-    elseif iter == 2
-      # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
-      # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
-    else
-      # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
-      #             [sₖ₋₁  -cₖ₋₁   0]
-      #             [ 0      0     1]
-      #
-      # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
-      # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
-      #                       [0   sₖ -cₖ]
-      (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
-      ϵₖ₋₂  =  sₖ₋₁ * βₖ
-      λₖ₋₁  = -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
-      δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+  function usymlq!(solver :: UsymlqSolver{T,FC,S}, $(def_args_usymlq...); $(def_kwargs_usymlq...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "USYMLQ: system of %d equations in %d variables\n", m, n)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x
+    vₖ₋₁, vₖ, q, d̅, stats = solver.vₖ₋₁, solver.vₖ, solver.q, solver.d̅, solver.stats
+    warm_start = solver.warm_start
+    rNorms = stats.residuals
+    reset!(stats)
+    r₀ = warm_start ? q : b
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
     end
 
-    # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
-    # [δbar₁] [ζbar₁] = [β₁]
-    if iter == 1
-      ηₖ = βₖ
-    end
-    # [δ₁    0  ] [  ζ₁ ] = [β₁]
-    # [λ₁  δbar₂] [ζbar₂]   [0 ]
-    if iter == 2
-      ηₖ₋₁ = ηₖ
-      ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-      ηₖ   = -λₖ₋₁ * ζₖ₋₁
-    end
-    # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
-    # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
-    #                     [ζbarₖ]
-    if iter ≥ 3
-      ζₖ₋₂ = ζₖ₋₁
-      ηₖ₋₁ = ηₖ
-      ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
-      ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+    # Initial solution x₀ and residual norm ‖r₀‖.
+    x .= zero(FC)
+    bNorm = @knrm2(m, r₀)
+    history && push!(rNorms, bNorm)
+    if bNorm == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.inconsistent = false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
     end
 
-    # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
-    # [d̅ₖ₋₁ uₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
-    #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
-    if iter ≥ 2
-      # Compute solution xₖ.
-      # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
-      @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
-      @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
+    iter = 0
+    itmax == 0 && (itmax = m+n)
+
+    ε = atol + rtol * bNorm
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %5s\n", "k", "‖rₖ‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, bNorm, ktimer(start_time))
+
+    βₖ = @knrm2(m, r₀)          # β₁ = ‖v₁‖ = ‖r₀‖
+    γₖ = @knrm2(n, c)           # γ₁ = ‖u₁‖ = ‖c‖
+    vₖ₋₁ .= zero(FC)            # v₀ = 0
+    uₖ₋₁ .= zero(FC)            # u₀ = 0
+    vₖ .= r₀ ./ βₖ              # v₁ = (b - Ax₀) / β₁
+    uₖ .= c ./ γₖ               # u₁ = c / γ₁
+    cₖ₋₁ = cₖ = -one(T)         # Givens cosines used for the LQ factorization of Tₖ
+    sₖ₋₁ = sₖ = zero(FC)        # Givens sines used for the LQ factorization of Tₖ
+    d̅ .= zero(FC)               # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
+    ζₖ₋₁ = ζbarₖ = zero(FC)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
+    ζₖ₋₂ = ηₖ = zero(FC)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
+    δbarₖ₋₁ = δbarₖ = zero(FC)  # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
+
+    # Stopping criterion.
+    solved_lq = bNorm ≤ ε
+    solved_cg = false
+    tired     = iter ≥ itmax
+    status    = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved_lq || solved_cg || tired || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
+
+      # Continue the SSY tridiagonalization process.
+      # AUₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+      # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+      mul!(q, A , uₖ)  # Forms vₖ₊₁ : q ← Auₖ
+      mul!(p, Aᴴ, vₖ)  # Forms uₖ₊₁ : p ← Aᴴvₖ
+
+      @kaxpy!(m, -γₖ, vₖ₋₁, q)  # q ← q - γₖ * vₖ₋₁
+      @kaxpy!(n, -βₖ, uₖ₋₁, p)  # p ← p - βₖ * uₖ₋₁
+
+      αₖ = @kdot(m, vₖ, q)      # αₖ = ⟨vₖ,q⟩
+
+      @kaxpy!(m, -     αₖ , vₖ, q)    # q ← q - αₖ * vₖ
+      @kaxpy!(n, -conj(αₖ), uₖ, p)    # p ← p - ᾱₖ * uₖ
+
+      βₖ₊₁ = @knrm2(m, q)       # βₖ₊₁ = ‖q‖
+      γₖ₊₁ = @knrm2(n, p)       # γₖ₊₁ = ‖p‖
+
+      # Update the LQ factorization of Tₖ = L̅ₖQₖ.
+      # [ α₁ γ₂ 0  •  •  •  0 ]   [ δ₁   0    •   •   •    •    0   ]
+      # [ β₂ α₂ γ₃ •        • ]   [ λ₁   δ₂   •                 •   ]
+      # [ 0  •  •  •  •     • ]   [ ϵ₁   λ₂   δ₃  •             •   ]
+      # [ •  •  •  •  •  •  • ] = [ 0    •    •   •   •         •   ] Qₖ
+      # [ •     •  •  •  •  0 ]   [ •    •    •   •   •    •    •   ]
+      # [ •        •  •  •  γₖ]   [ •         •   •   •    •    0   ]
+      # [ 0  •  •  •  0  βₖ αₖ]   [ •    •    •   0  ϵₖ₋₂ λₖ₋₁ δbarₖ]
+
+      if iter == 1
+        δbarₖ = αₖ
+      elseif iter == 2
+        # [δbar₁ γ₂] [c₂  s̄₂] = [δ₁   0  ]
+        # [ β₂   α₂] [s₂ -c₂]   [λ₁ δbar₂]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        λₖ₋₁  =      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = conj(sₖ) * βₖ - cₖ * αₖ
+      else
+        # [0  βₖ  αₖ] [cₖ₋₁   s̄ₖ₋₁   0] = [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ]
+        #             [sₖ₋₁  -cₖ₋₁   0]
+        #             [ 0      0     1]
+        #
+        # [ λₖ₋₂   δbarₖ₋₁  γₖ] [1   0   0 ] = [λₖ₋₂  δₖ₋₁    0  ]
+        # [sₖ₋₁βₖ  -cₖ₋₁βₖ  αₖ] [0   cₖ  s̄ₖ]   [ϵₖ₋₂  λₖ₋₁  δbarₖ]
+        #                       [0   sₖ -cₖ]
+        (cₖ, sₖ, δₖ₋₁) = sym_givens(δbarₖ₋₁, γₖ)
+        ϵₖ₋₂  =  sₖ₋₁ * βₖ
+        λₖ₋₁  = -cₖ₋₁ *      cₖ  * βₖ + sₖ * αₖ
+        δbarₖ = -cₖ₋₁ * conj(sₖ) * βₖ - cₖ * αₖ
+      end
+
+      # Compute ζₖ₋₁ and ζbarₖ, last components of the solution of L̅ₖz̅ₖ = β₁e₁
+      # [δbar₁] [ζbar₁] = [β₁]
+      if iter == 1
+        ηₖ = βₖ
+      end
+      # [δ₁    0  ] [  ζ₁ ] = [β₁]
+      # [λ₁  δbar₂] [ζbar₂]   [0 ]
+      if iter == 2
+        ηₖ₋₁ = ηₖ
+        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+        ηₖ   = -λₖ₋₁ * ζₖ₋₁
+      end
+      # [λₖ₋₂  δₖ₋₁    0  ] [ζₖ₋₂ ] = [0]
+      # [ϵₖ₋₂  λₖ₋₁  δbarₖ] [ζₖ₋₁ ]   [0]
+      #                     [ζbarₖ]
+      if iter ≥ 3
+        ζₖ₋₂ = ζₖ₋₁
+        ηₖ₋₁ = ηₖ
+        ζₖ₋₁ = ηₖ₋₁ / δₖ₋₁
+        ηₖ   = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
+      end
+
+      # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
+      # [d̅ₖ₋₁ uₖ] [cₖ  s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
+      #           [sₖ -cₖ]             ⟷ d̅ₖ   = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+      if iter ≥ 2
+        # Compute solution xₖ.
+        # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁
+        @kaxpy!(n, ζₖ₋₁ * cₖ,  d̅, x)
+        @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x)
+      end
+
+      # Compute d̅ₖ.
+      if iter == 1
+        # d̅₁ = u₁
+        @. d̅ = uₖ
+      else
+        # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
+        @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
+      end
+
+      # Compute uₖ₊₁ and uₖ₊₁.
+      @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
+      @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
+
+      if βₖ₊₁ ≠ zero(T)
+        @. vₖ = q / βₖ₊₁  # βₖ₊₁vₖ₊₁ = q
+      end
+      if γₖ₊₁ ≠ zero(T)
+        @. uₖ = p / γₖ₊₁  # γₖ₊₁uₖ₊₁ = p
+      end
+
+      # Compute USYMLQ residual norm
+      # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
+      if iter == 1
+        rNorm_lq = bNorm
+      else
+        μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
+        ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
+        rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
+      end
+      history && push!(rNorms, rNorm_lq)
+
+      # Compute USYMCG residual norm
+      # ‖rₖ‖ = |ρₖ|
+      if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
+        ζbarₖ = ηₖ / δbarₖ
+        ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
+        rNorm_cg = abs(ρₖ)
+      end
+
+      # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ and δbarₖ₋₁.
+      sₖ₋₁    = sₖ
+      cₖ₋₁    = cₖ
+      γₖ      = γₖ₊₁
+      βₖ      = βₖ₊₁
+      δbarₖ₋₁ = δbarₖ
+
+      # Update stopping criterion.
+      user_requested_exit = callback(solver) :: Bool
+      solved_lq = rNorm_lq ≤ ε
+      solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %.2fs\n", iter, rNorm_lq, ktimer(start_time))
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Compute d̅ₖ.
-    if iter == 1
-      # d̅₁ = u₁
-      @. d̅ = uₖ
-    else
-      # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
-      @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅)
+    # Compute USYMCG point
+    # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
+    if solved_cg
+      @kaxpy!(n, ζbarₖ, d̅, x)
     end
 
-    # Compute uₖ₊₁ and uₖ₊₁.
-    @. vₖ₋₁ = vₖ  # vₖ₋₁ ← vₖ
-    @. uₖ₋₁ = uₖ  # uₖ₋₁ ← uₖ
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved_lq           && (status = "solution xᴸ good enough given atol and rtol")
+    solved_cg           && (status = "solution xᶜ good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    if βₖ₊₁ ≠ zero(T)
-      @. vₖ = q / βₖ₊₁  # βₖ₊₁vₖ₊₁ = q
-    end
-    if γₖ₊₁ ≠ zero(T)
-      @. uₖ = p / γₖ₊₁  # γₖ₊₁uₖ₊₁ = p
-    end
-
-    # Compute USYMLQ residual norm
-    # ‖rₖ‖ = √(|μₖ|² + |ωₖ|²)
-    if iter == 1
-      rNorm_lq = bNorm
-    else
-      μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
-      ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
-      rNorm_lq = sqrt(abs2(μₖ) + abs2(ωₖ))
-    end
-    history && push!(rNorms, rNorm_lq)
-
-    # Compute USYMCG residual norm
-    # ‖rₖ‖ = |ρₖ|
-    if transfer_to_usymcg && (abs(δbarₖ) > eps(T))
-      ζbarₖ = ηₖ / δbarₖ
-      ρₖ = βₖ₊₁ * (sₖ * ζₖ₋₁ - cₖ * ζbarₖ)
-      rNorm_cg = abs(ρₖ)
-    end
-
-    # Update sₖ₋₁, cₖ₋₁, γₖ, βₖ and δbarₖ₋₁.
-    sₖ₋₁    = sₖ
-    cₖ₋₁    = cₖ
-    γₖ      = γₖ₊₁
-    βₖ      = βₖ₊₁
-    δbarₖ₋₁ = δbarₖ
-
-    # Update stopping criterion.
-    user_requested_exit = callback(solver) :: Bool
-    solved_lq = rNorm_lq ≤ ε
-    solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
-    tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm_lq)
-  end
-  (verbose > 0) && @printf("\n")
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-  # Compute USYMCG point
-  # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
-  if solved_cg
-    @kaxpy!(n, ζbarₖ, d̅, x)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved_lq || solved_cg
+    stats.inconsistent = false
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-
-  tired               && (status = "maximum number of iterations exceeded")
-  solved_lq           && (status = "solution xᴸ good enough given atol and rtol")
-  solved_cg           && (status = "solution xᶜ good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved_lq || solved_cg
-  stats.inconsistent = false
-  stats.status = status
-  return solver
 end
diff --git a/src/usymqr.jl b/src/usymqr.jl
index 863390c3f..0aae23335 100644
--- a/src/usymqr.jl
+++ b/src/usymqr.jl
@@ -21,31 +21,53 @@ export usymqr, usymqr!
 
 """
     (x, stats) = usymqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
-                        atol::T=√eps(T), rtol::T=√eps(T),
-                        itmax::Int=0, verbose::Int=0, history::Bool=false,
-                        callback=solver->false)
+                        atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+                        timemax::Float64=Inf, verbose::Int=0, history::Bool=false,
+                        callback=solver->false, iostream::IO=kstdout)
 
 `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
 `FC` is `T` or `Complex{T}`.
 
-Solve the linear system Ax = b using the USYMQR method.
+    (x, stats) = usymqr(A, b, c, x0::AbstractVector; kwargs...)
+
+USYMQR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+USYMQR solves the linear least-squares problem min ‖b - Ax‖² of size m × n.
+USYMQR solves Ax = b if it is consistent.
 
 USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
 The residual norm ‖b - Ax‖ monotonously decreases in USYMQR.
 It's considered as a generalization of MINRES.
 
 It can also be applied to under-determined and over-determined problems.
 USYMQR finds the minimum-norm solution if problems are inconsistent.
 
-USYMQR can be warm-started from an initial guess `x0` with the method
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional argument
 
-    (x, stats) = usymqr(A, b, c, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
 
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
 
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `timemax`: the time limit in seconds;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
 
 #### References
 
@@ -55,18 +77,6 @@ and `false` otherwise.
 """
 function usymqr end
 
-function usymqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where FC <: FloatOrComplex
-  solver = UsymqrSolver(A, b)
-  usymqr!(solver, A, b, c, x0; kwargs...)
-  return (solver.x, solver.stats)
-end
-
-function usymqr(A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; kwargs...) where FC <: FloatOrComplex
-  solver = UsymqrSolver(A, b)
-  usymqr!(solver, A, b, c; kwargs...)
-  return (solver.x, solver.stats)
-end
-
 """
     solver = usymqr!(solver::UsymqrSolver, A, b, c; kwargs...)
     solver = usymqr!(solver::UsymqrSolver, A, b, c, x0; kwargs...)
@@ -77,235 +87,282 @@ See [`UsymqrSolver`](@ref) for more details about the `solver`.
 """
 function usymqr! end
 
-function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC},
-                 x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-  warm_start!(solver, x0)
-  usymqr!(solver, A, b, c; kwargs...)
-  return solver
-end
-
-function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
-                 atol :: T=√eps(T), rtol :: T=√eps(T),
-                 itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
-                 callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
-  m, n = size(A)
-  length(b) == m || error("Inconsistent problem size")
-  length(c) == n || error("Inconsistent problem size")
-  (verbose > 0) && @printf("USYMQR: system of %d equations in %d variables\n", m, n)
-
-  # Check type consistency
-  eltype(A) == FC || error("eltype(A) ≠ $FC")
-  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
-  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
-
-  # Compute the adjoint of A
-  Aᵀ = A'
-
-  # Set up workspace.
-  vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p
-  wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats
-  warm_start = solver.warm_start
-  rNorms, AᵀrNorms = stats.residuals, stats.Aresiduals
-  reset!(stats)
-  r₀ = warm_start ? q : b
-
-  if warm_start
-    mul!(r₀, A, Δx)
-    @kaxpby!(n, one(FC), b, -one(FC), r₀)
+def_args_usymqr = (:(A                    ),
+                   :(b::AbstractVector{FC}),
+                   :(c::AbstractVector{FC}))
+
+def_optargs_usymqr = (:(x0::AbstractVector),)
+
+def_kwargs_usymqr = (:(; atol::T = √eps(T)         ),
+                     :(; rtol::T = √eps(T)         ),
+                     :(; itmax::Int = 0            ),
+                     :(; timemax::Float64 = Inf    ),
+                     :(; verbose::Int = 0          ),
+                     :(; history::Bool = false     ),
+                     :(; callback = solver -> false),
+                     :(; iostream::IO = kstdout    ))
+
+def_kwargs_usymqr = mapreduce(extract_parameters, vcat, def_kwargs_usymqr)
+
+args_usymqr = (:A, :b, :c)
+optargs_usymqr = (:x0,)
+kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream)
+
+@eval begin
+  function usymqr($(def_args_usymqr...), $(def_optargs_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = UsymqrSolver(A, b)
+    warm_start!(solver, $(optargs_usymqr...))
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    usymqr!(solver, $(args_usymqr...); $(kwargs_usymqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  # Initial solution x₀ and residual norm ‖r₀‖.
-  x .= zero(FC)
-  rNorm = @knrm2(m, r₀)
-  history && push!(rNorms, rNorm)
-  if rNorm == 0
-    stats.niter = 0
-    stats.solved = true
-    stats.inconsistent = false
-    stats.status = "x = 0 is a zero-residual solution"
-    solver.warm_start = false
-    return solver
+  function usymqr($(def_args_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+    start_time = time_ns()
+    solver = UsymqrSolver(A, b)
+    elapsed_time = ktimer(start_time)
+    timemax -= elapsed_time
+    usymqr!(solver, $(args_usymqr...); $(kwargs_usymqr...))
+    solver.stats.timer += elapsed_time
+    return (solver.x, solver.stats)
   end
 
-  iter = 0
-  itmax == 0 && (itmax = m+n)
-
-  ε = atol + rtol * rNorm
-  κ = zero(T)
-  (verbose > 0) && @printf("%5s  %7s  %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖")
-  kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
-
-  βₖ = @knrm2(m, r₀)           # β₁ = ‖v₁‖ = ‖r₀‖
-  γₖ = @knrm2(n, c)            # γ₁ = ‖u₁‖ = ‖c‖
-  vₖ₋₁ .= zero(FC)             # v₀ = 0
-  uₖ₋₁ .= zero(FC)             # u₀ = 0
-  vₖ .= r₀ ./ βₖ               # v₁ = (b - Ax₀) / β₁
-  uₖ .= c ./ γₖ                # u₁ = c / γ₁
-  cₖ₋₂ = cₖ₋₁ = cₖ = one(T)    # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
-  sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
-  wₖ₋₂ .= zero(FC)             # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
-  wₖ₋₁ .= zero(FC)             # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
-  ζbarₖ = βₖ                   # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
-
-  # Stopping criterion.
-  solved = rNorm ≤ ε
-  inconsistent = false
-  tired = iter ≥ itmax
-  status = "unknown"
-  user_requested_exit = false
-
-  while !(solved || tired || inconsistent || user_requested_exit)
-    # Update iteration index.
-    iter = iter + 1
-
-    # Continue the SSY tridiagonalization process.
-    # AUₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
-    # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
-
-    mul!(q, A , uₖ)  # Forms vₖ₊₁ : q ← Auₖ
-    mul!(p, Aᵀ, vₖ)  # Forms uₖ₊₁ : p ← Aᵀvₖ
-
-    @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
-    @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
-
-    αₖ = @kdot(m, vₖ, q)     # αₖ = ⟨vₖ,q⟩
-
-    @kaxpy!(m, -     αₖ , vₖ, q)   # q ← q - αₖ * vₖ
-    @kaxpy!(n, -conj(αₖ), uₖ, p)   # p ← p - ᾱₖ * uₖ
-
-    βₖ₊₁ = @knrm2(m, q)      # βₖ₊₁ = ‖q‖
-    γₖ₊₁ = @knrm2(n, p)      # γₖ₊₁ = ‖p‖
-
-    # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
-    #                                            [ Oᵀ ]
-    # [ α₁ γ₂ 0  •  •  •   0  ]      [ δ₁ λ₁ ϵ₁ 0  •  •  0  ]
-    # [ β₂ α₂ γ₃ •         •  ]      [ 0  δ₂ λ₂ •  •     •  ]
-    # [ 0  •  •  •  •      •  ]      [ •  •  δ₃ •  •  •  •  ]
-    # [ •  •  •  •  •  •   •  ] = Qₖ [ •     •  •  •  •  0  ]
-    # [ •     •  •  •  •   0  ]      [ •        •  •  • ϵₖ₋₂]
-    # [ •        •  •  •   γₖ ]      [ •           •  • λₖ₋₁]
-    # [ •           •  βₖ  αₖ ]      [ 0  •  •  •  •  0  δₖ ]
-    # [ 0  •  •  •  •  0  βₖ₊₁]      [ 0  •  •  •  •  •  0  ]
-    #
-    # If k = 1, we don't have any previous reflexion.
-    # If k = 2, we apply the last reflexion.
-    # If k ≥ 3, we only apply the two previous reflexions.
-
-    # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
-    if iter ≥ 3
-      # [cₖ₋₂  sₖ₋₂] [0 ] = [  ϵₖ₋₂ ]
-      # [s̄ₖ₋₂ -cₖ₋₂] [γₖ]   [λbarₖ₋₁]
-      ϵₖ₋₂    =  sₖ₋₂ * γₖ
-      λbarₖ₋₁ = -cₖ₋₂ * γₖ
+  function usymqr!(solver :: UsymqrSolver{T,FC,S}, $(def_args_usymqr...); $(def_kwargs_usymqr...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}}
+
+    # Timer
+    start_time = time_ns()
+    timemax_ns = 1e9 * timemax
+
+    m, n = size(A)
+    (m == solver.m && n == solver.n) || error("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m, $n)")
+    length(b) == m || error("Inconsistent problem size")
+    length(c) == n || error("Inconsistent problem size")
+    (verbose > 0) && @printf(iostream, "USYMQR: system of %d equations in %d variables\n", m, n)
+
+    # Check type consistency
+    eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products."
+    ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+    ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
+
+    # Compute the adjoint of A
+    Aᴴ = A'
+
+    # Set up workspace.
+    vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p
+    wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats
+    warm_start = solver.warm_start
+    rNorms, AᴴrNorms = stats.residuals, stats.Aresiduals
+    reset!(stats)
+    r₀ = warm_start ? q : b
+
+    if warm_start
+      mul!(r₀, A, Δx)
+      @kaxpby!(n, one(FC), b, -one(FC), r₀)
     end
 
-    # Apply previous Givens reflections Qₖ₋₁.ₖ
-    if iter ≥ 2
-      iter == 2 && (λbarₖ₋₁ = γₖ)
-      # [cₖ₋₁  sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
-      # [s̄ₖ₋₁ -cₖ₋₁] [   αₖ  ]   [δbarₖ]
-      λₖ₋₁  =      cₖ₋₁  * λbarₖ₋₁ + sₖ₋₁ * αₖ
-      δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
+    # Initial solution x₀ and residual norm ‖r₀‖.
+    x .= zero(FC)
+    rNorm = @knrm2(m, r₀)
+    history && push!(rNorms, rNorm)
+    if rNorm == 0
+      stats.niter = 0
+      stats.solved = true
+      stats.inconsistent = false
+      stats.timer = ktimer(start_time)
+      stats.status = "x = 0 is a zero-residual solution"
+      solver.warm_start = false
+      return solver
     end
 
-    # Compute and apply current Givens reflection Qₖ.ₖ₊₁
-    iter == 1 && (δbarₖ = αₖ)
-    # [cₖ  sₖ] [δbarₖ] = [δₖ]
-    # [s̄ₖ -cₖ] [βₖ₊₁ ]   [0 ]
-    (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
-
-    # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
-    #                      [ 0  ]
-    #
-    # [cₖ  sₖ] [ζbarₖ] = [   ζₖ  ]
-    # [s̄ₖ -cₖ] [  0  ]   [ζbarₖ₊₁]
-    ζₖ      =      cₖ  * ζbarₖ
-    ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
-
-    # Compute the direction wₖ, the last column of Wₖ = Uₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Uₖ)ᵀ.
-    # w₁ = u₁ / δ₁
-    if iter == 1
-      wₖ = wₖ₋₁
-      @kaxpy!(n, one(FC), uₖ, wₖ)
-      @. wₖ = wₖ / δₖ
-    end
-    # w₂ = (u₂ - λ₁w₁) / δ₂
-    if iter == 2
-      wₖ = wₖ₋₂
-      @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
-      @kaxpy!(n, one(FC), uₖ, wₖ)
-      @. wₖ = wₖ / δₖ
-    end
-    # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
-    if iter ≥ 3
-      @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
-      wₖ = wₖ₋₂
-      @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
-      @kaxpy!(n, one(FC), uₖ, wₖ)
-      @. wₖ = wₖ / δₖ
+    iter = 0
+    itmax == 0 && (itmax = m+n)
+
+    ε = atol + rtol * rNorm
+    κ = zero(T)
+    (verbose > 0) && @printf(iostream, "%5s  %7s  %8s  %5s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖", "timer")
+    kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %8s  %.2fs\n", iter, rNorm, " ✗ ✗ ✗ ✗", ktimer(start_time))
+
+    βₖ = @knrm2(m, r₀)           # β₁ = ‖v₁‖ = ‖r₀‖
+    γₖ = @knrm2(n, c)            # γ₁ = ‖u₁‖ = ‖c‖
+    vₖ₋₁ .= zero(FC)             # v₀ = 0
+    uₖ₋₁ .= zero(FC)             # u₀ = 0
+    vₖ .= r₀ ./ βₖ               # v₁ = (b - Ax₀) / β₁
+    uₖ .= c ./ γₖ                # u₁ = c / γ₁
+    cₖ₋₂ = cₖ₋₁ = cₖ = one(T)    # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
+    sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
+    wₖ₋₂ .= zero(FC)             # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
+    wₖ₋₁ .= zero(FC)             # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
+    ζbarₖ = βₖ                   # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
+
+    # Stopping criterion.
+    solved = rNorm ≤ ε
+    inconsistent = false
+    tired = iter ≥ itmax
+    status = "unknown"
+    user_requested_exit = false
+    overtimed = false
+
+    while !(solved || tired || inconsistent || user_requested_exit || overtimed)
+      # Update iteration index.
+      iter = iter + 1
+
+      # Continue the SSY tridiagonalization process.
+      # AUₖ  = VₖTₖ    + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
+      # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
+
+      mul!(q, A , uₖ)  # Forms vₖ₊₁ : q ← Auₖ
+      mul!(p, Aᴴ, vₖ)  # Forms uₖ₊₁ : p ← Aᴴvₖ
+
+      @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
+      @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
+
+      αₖ = @kdot(m, vₖ, q)     # αₖ = ⟨vₖ,q⟩
+
+      @kaxpy!(m, -     αₖ , vₖ, q)   # q ← q - αₖ * vₖ
+      @kaxpy!(n, -conj(αₖ), uₖ, p)   # p ← p - ᾱₖ * uₖ
+
+      βₖ₊₁ = @knrm2(m, q)      # βₖ₊₁ = ‖q‖
+      γₖ₊₁ = @knrm2(n, p)      # γₖ₊₁ = ‖p‖
+
+      # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
+      #                                            [ Oᵀ ]
+      # [ α₁ γ₂ 0  •  •  •   0  ]      [ δ₁ λ₁ ϵ₁ 0  •  •  0  ]
+      # [ β₂ α₂ γ₃ •         •  ]      [ 0  δ₂ λ₂ •  •     •  ]
+      # [ 0  •  •  •  •      •  ]      [ •  •  δ₃ •  •  •  •  ]
+      # [ •  •  •  •  •  •   •  ] = Qₖ [ •     •  •  •  •  0  ]
+      # [ •     •  •  •  •   0  ]      [ •        •  •  • ϵₖ₋₂]
+      # [ •        •  •  •   γₖ ]      [ •           •  • λₖ₋₁]
+      # [ •           •  βₖ  αₖ ]      [ 0  •  •  •  •  0  δₖ ]
+      # [ 0  •  •  •  •  0  βₖ₊₁]      [ 0  •  •  •  •  •  0  ]
+      #
+      # If k = 1, we don't have any previous reflexion.
+      # If k = 2, we apply the last reflexion.
+      # If k ≥ 3, we only apply the two previous reflexions.
+
+      # Apply previous Givens reflections Qₖ₋₂.ₖ₋₁
+      if iter ≥ 3
+        # [cₖ₋₂  sₖ₋₂] [0 ] = [  ϵₖ₋₂ ]
+        # [s̄ₖ₋₂ -cₖ₋₂] [γₖ]   [λbarₖ₋₁]
+        ϵₖ₋₂    =  sₖ₋₂ * γₖ
+        λbarₖ₋₁ = -cₖ₋₂ * γₖ
+      end
+
+      # Apply previous Givens reflections Qₖ₋₁.ₖ
+      if iter ≥ 2
+        iter == 2 && (λbarₖ₋₁ = γₖ)
+        # [cₖ₋₁  sₖ₋₁] [λbarₖ₋₁] = [λₖ₋₁ ]
+        # [s̄ₖ₋₁ -cₖ₋₁] [   αₖ  ]   [δbarₖ]
+        λₖ₋₁  =      cₖ₋₁  * λbarₖ₋₁ + sₖ₋₁ * αₖ
+        δbarₖ = conj(sₖ₋₁) * λbarₖ₋₁ - cₖ₋₁ * αₖ
+      end
+
+      # Compute and apply current Givens reflection Qₖ.ₖ₊₁
+      iter == 1 && (δbarₖ = αₖ)
+      # [cₖ  sₖ] [δbarₖ] = [δₖ]
+      # [s̄ₖ -cₖ] [βₖ₊₁ ]   [0 ]
+      (cₖ, sₖ, δₖ) = sym_givens(δbarₖ, βₖ₊₁)
+
+      # Update z̅ₖ₊₁ = Qₖ.ₖ₊₁ [ z̄ₖ ]
+      #                      [ 0  ]
+      #
+      # [cₖ  sₖ] [ζbarₖ] = [   ζₖ  ]
+      # [s̄ₖ -cₖ] [  0  ]   [ζbarₖ₊₁]
+      ζₖ      =      cₖ  * ζbarₖ
+      ζbarₖ₊₁ = conj(sₖ) * ζbarₖ
+
+      # Compute the direction wₖ, the last column of Wₖ = Uₖ(Rₖ)⁻¹ ⟷ (Rₖ)ᵀ(Wₖ)ᵀ = (Uₖ)ᵀ.
+      # w₁ = u₁ / δ₁
+      if iter == 1
+        wₖ = wₖ₋₁
+        @kaxpy!(n, one(FC), uₖ, wₖ)
+        @. wₖ = wₖ / δₖ
+      end
+      # w₂ = (u₂ - λ₁w₁) / δ₂
+      if iter == 2
+        wₖ = wₖ₋₂
+        @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+        @kaxpy!(n, one(FC), uₖ, wₖ)
+        @. wₖ = wₖ / δₖ
+      end
+      # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ
+      if iter ≥ 3
+        @kscal!(n, -ϵₖ₋₂, wₖ₋₂)
+        wₖ = wₖ₋₂
+        @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ)
+        @kaxpy!(n, one(FC), uₖ, wₖ)
+        @. wₖ = wₖ / δₖ
+      end
+
+      # Compute solution xₖ.
+      # xₖ ← xₖ₋₁ + ζₖ * wₖ
+      @kaxpy!(n, ζₖ, wₖ, x)
+
+      # Compute ‖rₖ‖ = |ζbarₖ₊₁|.
+      rNorm = abs(ζbarₖ₊₁)
+      history && push!(rNorms, rNorm)
+
+      # Compute ‖Aᴴrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
+      AᴴrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
+      history && push!(AᴴrNorms, AᴴrNorm)
+
+      # Compute uₖ₊₁ and uₖ₊₁.
+      @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
+      @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
+
+      if βₖ₊₁ ≠ zero(T)
+        @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
+      end
+      if γₖ₊₁ ≠ zero(T)
+        @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
+      end
+
+      # Update directions for x.
+      if iter ≥ 2
+        @kswap(wₖ₋₂, wₖ₋₁)
+      end
+
+      # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ.
+      if iter ≥ 2
+        sₖ₋₂ = sₖ₋₁
+        cₖ₋₂ = cₖ₋₁
+      end
+      sₖ₋₁  = sₖ
+      cₖ₋₁  = cₖ
+      ζbarₖ = ζbarₖ₊₁
+      γₖ    = γₖ₊₁
+      βₖ    = βₖ₊₁
+
+      # Update stopping criterion.
+      iter == 1 && (κ = atol + rtol * AᴴrNorm)
+      user_requested_exit = callback(solver) :: Bool
+      solved = rNorm ≤ ε
+      inconsistent = !solved && AᴴrNorm ≤ κ
+      tired = iter ≥ itmax
+      timer = time_ns() - start_time
+      overtimed = timer > timemax_ns
+      kdisplay(iter, verbose) && @printf(iostream, "%5d  %7.1e  %8.1e  %.2fs\n", iter, rNorm, AᴴrNorm, ktimer(start_time))
     end
+    (verbose > 0) && @printf(iostream, "\n")
 
-    # Compute solution xₖ.
-    # xₖ ← xₖ₋₁ + ζₖ * wₖ
-    @kaxpy!(n, ζₖ, wₖ, x)
-
-    # Compute ‖rₖ‖ = |ζbarₖ₊₁|.
-    rNorm = abs(ζbarₖ₊₁)
-    history && push!(rNorms, rNorm)
-
-    # Compute ‖Aᵀrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
-    AᵀrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
-    history && push!(AᵀrNorms, AᵀrNorm)
-
-    # Compute uₖ₊₁ and uₖ₊₁.
-    @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
-    @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
-
-    if βₖ₊₁ ≠ zero(T)
-      @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
-    end
-    if γₖ₊₁ ≠ zero(T)
-      @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
-    end
+    # Termination status
+    tired               && (status = "maximum number of iterations exceeded")
+    solved              && (status = "solution good enough given atol and rtol")
+    user_requested_exit && (status = "user-requested exit")
+    overtimed           && (status = "time limit exceeded")
 
-    # Update directions for x.
-    if iter ≥ 2
-      @kswap(wₖ₋₂, wₖ₋₁)
-    end
+    # Update x
+    warm_start && @kaxpy!(n, one(FC), Δx, x)
+    solver.warm_start = false
 
-    # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ.
-    if iter ≥ 2
-      sₖ₋₂ = sₖ₋₁
-      cₖ₋₂ = cₖ₋₁
-    end
-    sₖ₋₁  = sₖ
-    cₖ₋₁  = cₖ
-    ζbarₖ = ζbarₖ₊₁
-    γₖ    = γₖ₊₁
-    βₖ    = βₖ₊₁
-
-    # Update stopping criterion.
-    iter == 1 && (κ = atol + rtol * AᵀrNorm)
-    user_requested_exit = callback(solver) :: Bool
-    solved = rNorm ≤ ε
-    inconsistent = !solved && AᵀrNorm ≤ κ
-    tired = iter ≥ itmax
-    kdisplay(iter, verbose) && @printf("%5d  %7.1e  %7.1e\n", iter, rNorm, AᵀrNorm)
+    # Update stats
+    stats.niter = iter
+    stats.solved = solved
+    stats.inconsistent = inconsistent
+    stats.timer = ktimer(start_time)
+    stats.status = status
+    return solver
   end
-  (verbose > 0) && @printf("\n")
-  tired               && (status = "maximum number of iterations exceeded")
-  solved              && (status = "solution good enough given atol and rtol")
-  user_requested_exit && (status = "user-requested exit")
-
-  # Update x
-  warm_start && @kaxpy!(n, one(FC), Δx, x)
-  solver.warm_start = false
-
-  # Update stats
-  stats.niter = iter
-  stats.solved = solved
-  stats.inconsistent = inconsistent
-  stats.status = status
-  return solver
 end
diff --git a/test/callback_utils.jl b/test/callback_utils.jl
new file mode 100644
index 000000000..f88f01848
--- /dev/null
+++ b/test/callback_utils.jl
@@ -0,0 +1,152 @@
+mutable struct StorageGetxRestartedGmres{S}
+  x::S
+  y::S
+  p::S
+end
+StorageGetxRestartedGmres(solver::GmresSolver; N = I) = 
+  StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
+
+function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, 
+                                stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
+  NisI = (N === I)
+  x2, y2, p2 = stor.x, stor.y, stor.p
+  n = size(A, 2)
+  # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
+  nr = sum(1:solver.inner_iter)
+  y = solver.z  # yᵢ = zᵢ
+  y2 .= y
+  R = solver.R
+  V = solver.V
+  x2 .= solver.Δx
+  for i = solver.inner_iter : -1 : 1
+    pos = nr + i - solver.inner_iter      # position of rᵢ.ₖ
+    for j = solver.inner_iter : -1 : i+1
+      y2[i] = y2[i] - R[pos] * y2[j]  # yᵢ ← yᵢ - rᵢⱼyⱼ
+      pos = pos - j + 1            # position of rᵢ.ⱼ₋₁
+    end
+    # Rₖ can be singular if the system is inconsistent
+    if abs(R[pos]) ≤ eps(T)^(3/4)
+      y2[i] = zero(FC)
+      inconsistent = true
+    else
+      y2[i] = y2[i] / R[pos]  # yᵢ ← yᵢ / rᵢᵢ
+    end
+  end
+
+  # Form xₖ = N⁻¹Vₖyₖ
+  for i = 1 : solver.inner_iter
+    Krylov.@kaxpy!(n, y2[i], V[i], x2)
+  end
+  if !NisI
+    p2 .= solver.p
+    p2 .= x2
+    mul!(x2, N, p2)
+  end
+  x2 .+= solver.x
+end
+
+mutable struct TestCallbackN2{T, S, M}
+  A::M
+  b::S
+  storage_vec::S
+  tol::T
+end
+TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
+
+function (cb_n2::TestCallbackN2)(solver)
+  mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+  cb_n2.storage_vec .-= cb_n2.b
+  return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2Adjoint{T, S, M}
+  A::M
+  b::S
+  c::S
+  storage_vec1::S
+  storage_vec2::S
+  tol::T
+end
+TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2Adjoint)(solver)
+  mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+  cb_n2.storage_vec1 .-= cb_n2.b
+  mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
+  cb_n2.storage_vec2 .-= cb_n2.c
+  return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2Shifts{T, S, M}
+  A::M
+  b::S
+  shifts::Vector{T}
+  tol::T
+end
+TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
+
+function (cb_n2::TestCallbackN2Shifts)(solver)
+  r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
+  return all(map(norm, r) .≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2LS{T, S, M}
+  A::M
+  b::S
+  λ::T
+  storage_vec1::S
+  storage_vec2::S
+  tol::T
+end
+TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
+
+function (cb_n2::TestCallbackN2LS)(solver)
+  mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+  cb_n2.storage_vec1 .-= cb_n2.b
+  mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
+  cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
+  return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2LN{T, S, M}
+  A::M
+  b::S
+  λ::T
+  storage_vec::S
+  tol::T
+end
+TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
+
+function (cb_n2::TestCallbackN2LN)(solver)
+  mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+  cb_n2.storage_vec .-= cb_n2.b
+  cb_n2.λ != 0 && (cb_n2.storage_vec .+= cb_n2.λ .* solver.x)
+  return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2SaddlePts{T, S, M}
+  A::M
+  b::S
+  c::S
+  storage_vec1::S
+  storage_vec2::S
+  tol::T
+end
+TestCallbackN2SaddlePts(A, b, c; tol = 0.1) = 
+  TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2SaddlePts)(solver)
+  mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
+  cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
+  mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
+  cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
+  return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
+  get_x_restarted_gmres!(solver, A, stor, N)
+  x = stor.x
+  mul!(storage_vec, A, x)
+  storage_vec .-= b
+  return (norm(storage_vec) ≤ tol)
+end
diff --git a/test/get_div_grad.jl b/test/get_div_grad.jl
index 6d6bf012e..ae27e5061 100644
--- a/test/get_div_grad.jl
+++ b/test/get_div_grad.jl
@@ -1,8 +1,8 @@
 # Identity matrix.
 eye(n::Int; FC=Float64) = sparse(one(FC) * I, n, n)
 
-# Compute the energy norm ‖r‖ₚ = √(rᵀPr) where P is a symmetric and positive definite matrix.
-metric(r, P) = sqrt(dot(r, P * r))
+# Compute the energy norm ‖r‖ₚ = √(rᴴPr) where P is a symmetric and positive definite matrix.
+metric(r, P) = sqrt(real(dot(r, P * r)))
 
 # Based on Lars Ruthotto's initial implementation.
 function get_div_grad(n1 :: Int, n2 :: Int, n3 :: Int)
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
new file mode 100644
index 000000000..9fb6cdffd
--- /dev/null
+++ b/test/gpu/amd.jl
@@ -0,0 +1,111 @@
+using AMDGPU
+
+include("gpu.jl")
+
+@testset "AMD -- AMDGPU.jl" begin
+
+  @test AMDGPU.functional()
+  AMDGPU.allowscalar(false)
+
+  @testset "documentation" begin
+    A_cpu = rand(ComplexF64, 20, 20)
+    A_cpu = A_cpu + A_cpu'
+    b_cpu = rand(ComplexF64, 20)
+    A_gpu = ROCMatrix(A_cpu)
+    b_gpu = ROCVector(b_cpu)
+    x, stats = minres(A_gpu, b_gpu)
+  end
+
+  for FC in (Float32, Float64, ComplexF32, ComplexF64)
+    S = ROCVector{FC}
+    M = ROCMatrix{FC}
+    T = real(FC)
+    n = 10
+    x = rand(FC, n)
+    x = S(x)
+    y = rand(FC, n)
+    y = S(y)
+    a = rand(FC)
+    b = rand(FC)
+    s = rand(FC)
+    a2 = rand(T)
+    b2 = rand(T)
+    c = rand(T)
+
+    @testset "kdot -- $FC" begin
+      Krylov.@kdot(n, x, y)
+    end
+
+    @testset "kdotr -- $FC" begin
+      Krylov.@kdotr(n, x, y)
+    end
+
+    @testset "knrm2 -- $FC" begin
+      Krylov.@knrm2(n, x)
+    end
+
+    @testset "kaxpy! -- $FC" begin
+      Krylov.@kaxpy!(n, a, x, y)
+      Krylov.@kaxpy!(n, a2, x, y)
+    end
+
+    @testset "kaxpby! -- $FC" begin
+      Krylov.@kaxpby!(n, a, x, b, y)
+      Krylov.@kaxpby!(n, a2, x, b, y)
+      Krylov.@kaxpby!(n, a, x, b2, y)
+      Krylov.@kaxpby!(n, a2, x, b2, y)
+    end
+
+    @testset "kcopy! -- $FC" begin
+      Krylov.@kcopy!(n, x, y)
+    end
+
+    @testset "kswap -- $FC" begin
+      Krylov.@kswap(x, y)
+    end
+
+    @testset "kref! -- $FC" begin
+      Krylov.@kref!(n, x, y, c, s)
+    end
+
+    @testset "conversion -- $FC" begin
+      test_conversion(S, M)
+    end
+
+    ε = eps(T)
+    atol = √ε
+    rtol = √ε
+
+    @testset "GMRES -- $FC" begin
+      A, b = nonsymmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = gmres(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "CG -- $FC" begin
+      A, b = symmetric_definite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = cg(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "MINRES-QLP -- $FC" begin
+      A, b = symmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = minres_qlp(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    # @testset "processes -- $FC" begin
+    #   test_processes(S, M)
+    # end
+
+    @testset "solver -- $FC" begin
+      test_solver(S, M)
+    end
+  end
+end
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
new file mode 100644
index 000000000..65e123be1
--- /dev/null
+++ b/test/gpu/gpu.jl
@@ -0,0 +1,52 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov
+
+include("../test_utils.jl")
+
+function test_processes(S, M)
+  m = 250
+  n = 500
+  k = 20
+  FC = eltype(S)
+
+  cpu_A, cpu_b = symmetric_indefinite(n, FC=FC)
+  gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+  V, T = hermitian_lanczos(gpu_A, gpu_b, k)
+
+  cpu_A, cpu_b = nonsymmetric_definite(n, FC=FC)
+  cpu_c = -cpu_b
+  gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c)
+  V, T, U, Tᴴ = nonhermitian_lanczos(gpu_A, gpu_b, gpu_c, k)
+
+  cpu_A, cpu_b = nonsymmetric_indefinite(n, FC=FC)
+  gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+  V, H = arnoldi(gpu_A, gpu_b, k)
+
+  cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+  gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+  V, U, L = golub_kahan(gpu_A, gpu_b, k)
+
+  cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+  _, cpu_c = over_consistent(n, m, FC=FC)
+  gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c)
+  V, T, U, Tᴴ = saunders_simon_yip(gpu_A, gpu_b, gpu_c, k)
+
+  cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+  cpu_B, cpu_c = over_consistent(n, m, FC=FC)
+  gpu_A, gpu_B, gpu_b, gpu_c = M(cpu_A), M(cpu_B), S(cpu_b), S(cpu_c)
+  V, H, U, F = montoison_orban(gpu_A, gpu_B, gpu_b, gpu_c, k)
+end
+
+function test_solver(S, M)
+  n = 10
+  memory = 5
+  A = M(undef, n, n)
+  b = S(undef, n)
+  solver = GmresSolver(n, n, memory, S)
+  solve!(solver, A, b)  # Test that we don't have errors
+end
+
+function test_conversion(S, M)
+  @test Krylov.vector_to_matrix(S) <: M
+  @test Krylov.matrix_to_vector(M) <: S
+end
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
new file mode 100644
index 000000000..f03176199
--- /dev/null
+++ b/test/gpu/intel.jl
@@ -0,0 +1,113 @@
+using oneAPI
+
+include("gpu.jl")
+
+@testset "Intel -- oneAPI.jl" begin
+
+  @test oneAPI.functional()
+  oneAPI.allowscalar(false)
+
+  @testset "documentation" begin
+    T = Float32
+    m = 20
+    n = 10
+    A_cpu = rand(T, m, n)
+    b_cpu = rand(T, m)
+    A_gpu = oneMatrix(A_cpu)
+    b_gpu = oneVector(b_cpu)
+    x, stats = lsqr(A_gpu, b_gpu)
+  end
+
+  for FC ∈ (Float32, ComplexF32)
+    S = oneVector{FC}
+    M = oneMatrix{FC}
+    T = real(FC)
+    n = 10
+    x = rand(FC, n)
+    x = S(x)
+    y = rand(FC, n)
+    y = S(y)
+    a = rand(FC)
+    b = rand(FC)
+    s = rand(FC)
+    a2 = rand(T)
+    b2 = rand(T)
+    c = rand(T)
+
+    @testset "kdot -- $FC" begin
+      Krylov.@kdot(n, x, y)
+    end
+
+    @testset "kdotr -- $FC" begin
+      Krylov.@kdotr(n, x, y)
+    end
+
+    @testset "knrm2 -- $FC" begin
+      Krylov.@knrm2(n, x)
+    end
+
+    @testset "kaxpy! -- $FC" begin
+      Krylov.@kaxpy!(n, a, x, y)
+      Krylov.@kaxpy!(n, a2, x, y)
+    end
+
+    @testset "kaxpby! -- $FC" begin
+      Krylov.@kaxpby!(n, a, x, b, y)
+      Krylov.@kaxpby!(n, a2, x, b, y)
+      Krylov.@kaxpby!(n, a, x, b2, y)
+      Krylov.@kaxpby!(n, a2, x, b2, y)
+    end
+
+    @testset "kcopy! -- $FC" begin
+      Krylov.@kcopy!(n, x, y)
+    end
+
+    @testset "kswap -- $FC" begin
+      Krylov.@kswap(x, y)
+    end
+
+    @testset "kref! -- $FC" begin
+      Krylov.@kref!(n, x, y, c, s)
+    end
+
+    @testset "conversion -- $FC" begin
+      test_conversion(S, M)
+    end
+
+    ε = eps(T)
+    atol = √ε
+    rtol = √ε
+
+    @testset "GMRES -- $FC" begin
+      A, b = nonsymmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = gmres(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "CG -- $FC" begin
+      A, b = symmetric_definite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = cg(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "MINRES-QLP -- $FC" begin
+      A, b = symmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = minres_qlp(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    # @testset "processes -- $FC" begin
+    #   test_processes(S, M)
+    # end
+
+    @testset "solver -- $FC" begin
+      test_solver(S, M)
+    end
+  end
+end
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
new file mode 100644
index 000000000..2e684e21f
--- /dev/null
+++ b/test/gpu/metal.jl
@@ -0,0 +1,113 @@
+using Metal
+
+include("gpu.jl")
+
+@testset "Apple M1 GPUs -- Metal.jl" begin
+
+  # @test Metal.functional()
+  Metal.allowscalar(false)
+
+  @testset "documentation" begin
+    T = Float32
+    n = 10
+    m = 20
+    A_cpu = rand(T, n, m)
+    b_cpu = rand(T, n)
+    A_gpu = MtlMatrix(A_cpu)
+    b_gpu = MtlVector(b_cpu)
+    x, stats = craig(A_gpu, b_gpu)
+  end
+
+  for FC in (Float32, ComplexF32)
+    S = MtlVector{FC}
+    M = MtlMatrix{FC}
+    T = real(FC)
+    n = 10
+    x = rand(FC, n)
+    x = S(x)
+    y = rand(FC, n)
+    y = S(y)
+    a = rand(FC)
+    b = rand(FC)
+    s = rand(FC)
+    a2 = rand(T)
+    b2 = rand(T)
+    c = rand(T)
+
+    @testset "kdot -- $FC" begin
+      Krylov.@kdot(n, x, y)
+    end
+
+    @testset "kdotr -- $FC" begin
+      Krylov.@kdotr(n, x, y)
+    end
+
+    @testset "knrm2 -- $FC" begin
+      Krylov.@knrm2(n, x)
+    end
+
+    @testset "kaxpy! -- $FC" begin
+      Krylov.@kaxpy!(n, a, x, y)
+      Krylov.@kaxpy!(n, a2, x, y)
+    end
+
+    @testset "kaxpby! -- $FC" begin
+      Krylov.@kaxpby!(n, a, x, b, y)
+      Krylov.@kaxpby!(n, a2, x, b, y)
+      Krylov.@kaxpby!(n, a, x, b2, y)
+      Krylov.@kaxpby!(n, a2, x, b2, y)
+    end
+
+    @testset "kcopy! -- $FC" begin
+      Krylov.@kcopy!(n, x, y)
+    end
+
+    @testset "kswap -- $FC" begin
+      Krylov.@kswap(x, y)
+    end
+
+    @testset "kref! -- $FC" begin
+      Krylov.@kref!(n, x, y, c, s)
+    end
+
+    @testset "conversion -- $FC" begin
+      test_conversion(S, M)
+    end
+
+    ε = eps(T)
+    atol = √ε
+    rtol = √ε
+
+    @testset "GMRES -- $FC" begin
+      A, b = nonsymmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = gmres(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "CG -- $FC" begin
+      A, b = symmetric_definite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = cg(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "MINRES-QLP -- $FC" begin
+      A, b = symmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = minres_qlp(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    # @testset "processes -- $FC" begin
+    #   test_processes(S, M)
+    # end
+
+    @testset "solver -- $FC" begin
+      test_solver(S, M)
+    end
+  end
+end
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
new file mode 100644
index 000000000..8cb44136d
--- /dev/null
+++ b/test/gpu/nvidia.jl
@@ -0,0 +1,215 @@
+using LinearOperators, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+
+include("gpu.jl")
+
+@testset "Nvidia -- CUDA.jl" begin
+
+  @test CUDA.functional()
+  CUDA.allowscalar(false)
+
+  @testset "documentation" begin
+    A_cpu = rand(20, 20)
+    b_cpu = rand(20)
+    A_gpu = CuMatrix(A_cpu)
+    b_gpu = CuVector(b_cpu)
+    x, stats = bilq(A_gpu, b_gpu)
+
+    A_cpu = sprand(200, 100, 0.3)
+    b_cpu = rand(200)
+    A_gpu = CuSparseMatrixCSC(A_cpu)
+    b_gpu = CuVector(b_cpu)
+    x, stats = lsmr(A_gpu, b_gpu)
+
+    @testset "ic0" begin
+      A_cpu, b_cpu = sparse_laplacian()
+      @test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == true
+
+      b_gpu = CuVector(b_cpu)
+      n = length(b_gpu)
+      T = eltype(b_gpu)
+      z = CUDA.zeros(T, n)
+      symmetric = hermitian = true
+
+      A_gpu = CuSparseMatrixCSC(A_cpu)
+      P = ic02(A_gpu)
+      function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z)
+        ldiv!(z, UpperTriangular(P)', x)
+        ldiv!(y, UpperTriangular(P), z)
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
+      x, stats = cg(A_gpu, b_gpu, M=opM)
+      @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+      @test stats.niter ≤ 19
+
+      A_gpu = CuSparseMatrixCSR(A_gpu)
+      P = ic02(A_gpu)
+      function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z)
+        ldiv!(z, LowerTriangular(P), x)
+        ldiv!(y, LowerTriangular(P)', z)
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
+      x, stats = cg(A_gpu, b_gpu, M=opM)
+      @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+      @test stats.niter ≤ 19
+    end
+
+    @testset "ilu0" begin
+      A_cpu = Float64[1  0 0  4;
+                      0  0 7  8;
+                      9  0 0 12;
+                      0 14 0 16]
+      A_cpu = sparse(A_cpu)
+      b_cpu = ones(4)
+      @test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == false
+
+      p = zfd(A_cpu)
+      p .+= 1
+      invp = invperm(p)
+      @test reduce(&, invp .== p) == false
+
+      b_gpu = CuVector(b_cpu)
+      n = length(b_gpu)
+      T = eltype(b_gpu)
+      z = CUDA.zeros(T, n)
+      symmetric = hermitian = false
+
+      A_gpu = CuSparseMatrixCSC(A_cpu[:,p])
+      P = ilu02(A_gpu)
+      function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z)
+        ldiv!(z, LowerTriangular(P), x)
+        ldiv!(y, UnitUpperTriangular(P), z)
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
+      x̄, stats = gmres(A_gpu, b_gpu, M=opM)
+      x = Vector(x̄)[invp]
+      @test norm(b_gpu - A_gpu * x̄) ≤ 1e-6
+      @test norm(b_cpu - A_cpu * x) ≤ 1e-6
+
+      A_gpu = CuSparseMatrixCSR(A_cpu[:,p])
+      P = ilu02(A_gpu)
+      function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z)
+        ldiv!(z, UnitLowerTriangular(P), x)
+        ldiv!(y, UpperTriangular(P), z)
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
+      x̄, stats = gmres(A_gpu, b_gpu, M=opM)
+      x = Vector(x̄)[invp]
+      @test norm(b_gpu - A_gpu * x̄) ≤ 1e-6
+      @test norm(b_cpu - A_cpu * x) ≤ 1e-6
+    end
+  end
+
+  for FC in (Float32, Float64, ComplexF32, ComplexF64)
+    S = CuVector{FC}
+    V = CuSparseVector{FC}
+    M = CuMatrix{FC}
+    T = real(FC)
+    n = 10
+    x = rand(FC, n)
+    x = S(x)
+    y = rand(FC, n)
+    y = S(y)
+    a = rand(FC)
+    b = rand(FC)
+    s = rand(FC)
+    a2 = rand(T)
+    b2 = rand(T)
+    c = rand(T)
+
+    @testset "kdot -- $FC" begin
+      Krylov.@kdot(n, x, y)
+    end
+
+    @testset "kdotr -- $FC" begin
+      Krylov.@kdotr(n, x, y)
+    end
+
+    @testset "knrm2 -- $FC" begin
+      Krylov.@knrm2(n, x)
+    end
+
+    @testset "kaxpy! -- $FC" begin
+      Krylov.@kaxpy!(n, a, x, y)
+      Krylov.@kaxpy!(n, a2, x, y)
+    end
+
+    @testset "kaxpby! -- $FC" begin
+      Krylov.@kaxpby!(n, a, x, b, y)
+      Krylov.@kaxpby!(n, a2, x, b, y)
+      Krylov.@kaxpby!(n, a, x, b2, y)
+      Krylov.@kaxpby!(n, a2, x, b2, y)
+    end
+
+    @testset "kcopy! -- $FC" begin
+      Krylov.@kcopy!(n, x, y)
+    end
+
+    @testset "kswap -- $FC" begin
+      Krylov.@kswap(x, y)
+    end
+
+    @testset "kref! -- $FC" begin
+      Krylov.@kref!(n, x, y, c, s)
+    end
+
+    @testset "conversion -- $FC" begin
+      test_conversion(S, M)
+    end
+
+    ε = eps(T)
+    atol = √ε
+    rtol = √ε
+
+    @testset "GMRES -- $FC" begin
+      A, b = nonsymmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = gmres(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "CG -- $FC" begin
+      A, b = symmetric_definite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = cg(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "MINRES-QLP -- $FC" begin
+      A, b = symmetric_indefinite(FC=FC)
+      A = M(A)
+      b = S(b)
+      x, stats = minres_qlp(A, b)
+      @test norm(b - A * x) ≤ atol + rtol * norm(b)
+    end
+
+    @testset "processes -- $FC" begin
+      test_processes(S, M)
+    end
+
+    @testset "solver -- $FC" begin
+      test_solver(S, M)
+    end
+
+    @testset "ktypeof -- $FC" begin
+      dv = S(rand(FC, 10))
+      b = view(dv, 4:8)
+      @test Krylov.ktypeof(dv) <: S
+      @test Krylov.ktypeof(b)  <: S
+
+      dm = M(rand(FC, 10, 10))
+      b = view(dm, :, 3)
+      @test Krylov.ktypeof(b) <: S
+
+      sv = V(sprand(FC, 10, 0.5))
+      b = view(sv, 4:8)
+      @test Krylov.ktypeof(sv) <: S
+      @test Krylov.ktypeof(b)  <: S
+    end
+  end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 99ab25fda..5381fd10e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,7 +4,9 @@ import Krylov.KRYLOV_SOLVERS
 include("test_utils.jl")
 include("test_aux.jl")
 include("test_stats.jl")
+include("test_processes.jl")
 
+include("test_fgmres.jl")
 include("test_gpmr.jl")
 include("test_fom.jl")
 include("test_gmres.jl")
@@ -42,3 +44,5 @@ include("test_allocations.jl")
 include("test_mp.jl")
 include("test_solvers.jl")
 include("test_warm_start.jl")
+include("test_verbose.jl")
+include("test_extensions.jl")
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
index 4c6817499..174d0ae55 100644
--- a/test/test_allocations.jl
+++ b/test/test_allocations.jl
@@ -1,26 +1,27 @@
 @testset "allocations" begin
 
-  for FC in (Float64, ComplexF64)
+  for FC in (Float32, Float64, ComplexF32, ComplexF64)
     @testset "Data Type: $FC" begin
 
-      A   = FC.(get_div_grad(16, 16, 16))  # Dimension n x n
-      n   = size(A, 1)
-      m   = div(n, 2)
-      Au  = A[1:m,:]  # Dimension m x n
-      Ao  = A[:,1:m]  # Dimension n x m
-      b   = Ao * ones(FC, m) # Dimension n
-      c   = Au * ones(FC, n) # Dimension m
+      A   = FC.(get_div_grad(18, 18, 18))  # Dimension m x n
+      m,n = size(A)
+      k   = div(n, 2)
+      Au  = A[1:k,:]          # Dimension k x n
+      Ao  = A[:,1:k]          # Dimension m x k
+      b   = Ao * ones(FC, k)  # Dimension m
+      c   = Au * ones(FC, n)  # Dimension k
       mem = 200
 
-      shifts  = [1.0; 2.0; 3.0; 4.0; 5.0]
+      T = real(FC)
+      shifts  = T[1; 2; 3; 4; 5]
       nshifts = 5
-      nbits = sizeof(FC)  # 8 bits for Float64 and 16 bits for ComplexF64
+      nbits_FC = sizeof(FC)  # 8 bits for ComplexF32 and 16 bits for ComplexF64
+      nbits_T = sizeof(T)    # 4 bits for Float32 and 8 bits for Float64
 
       @testset "SYMMLQ" begin
         # SYMMLQ needs:
         # 5 n-vectors: x, Mvold, Mv, Mv_next, w̅
-        storage_symmlq(n) = 5 * n
-        storage_symmlq_bytes(n) = nbits * storage_symmlq(n)
+        storage_symmlq_bytes(n) = nbits_FC * 5 * n
 
         expected_symmlq_bytes = storage_symmlq_bytes(n)
         symmlq(A, b)  # warmup
@@ -36,8 +37,7 @@
       @testset "CG" begin
         # CG needs:
         # 4 n-vectors: x, r, p, Ap
-        storage_cg(n) = 4 * n
-        storage_cg_bytes(n) = nbits * storage_cg(n)
+        storage_cg_bytes(n) = nbits_FC * 4 * n
 
         expected_cg_bytes = storage_cg_bytes(n)
         cg(A, b)  # warmup
@@ -53,8 +53,7 @@
       @testset "CG-LANCZOS" begin
         # CG-LANCZOS needs:
         # 5 n-vectors: x, Mv, Mv_prev, p, Mv_next
-        storage_cg_lanczos(n) = 5 * n
-        storage_cg_lanczos_bytes(n) = nbits * storage_cg_lanczos(n)
+        storage_cg_lanczos_bytes(n) = nbits_FC * 5 * n
 
         expected_cg_lanczos_bytes = storage_cg_lanczos_bytes(n)
         cg_lanczos(A, b)  # warmup
@@ -73,9 +72,7 @@
         # - 2 (n*nshifts)-matrices: x, p
         # - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms
         # - 3 nshifts-bitVector: indefinite, converged, not_cv
-        storage_cg_lanczos_shift(n, nshifts) = (3 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64)
-        storage_cg_lanczos_shift_bytes(n, nshifts) = nbits * storage_cg_lanczos_shift(n, nshifts)
-
+        storage_cg_lanczos_shift_bytes(n, nshifts) = nbits_FC * ((3 * n) + (2 * n * nshifts)) + nbits_T * (5 * nshifts) + (3 * nshifts)
         expected_cg_lanczos_shift_bytes = storage_cg_lanczos_shift_bytes(n, nshifts)
         cg_lanczos_shift(A, b, shifts)  # warmup
         actual_cg_lanczos_shift_bytes = @allocated cg_lanczos_shift(A, b, shifts)
@@ -90,8 +87,7 @@
       @testset "CR" begin
         # CR needs:
         # 5 n-vectors: x, r, p, q, Ar
-        storage_cr(n) = 5 * n
-        storage_cr_bytes(n) = nbits * storage_cr(n)
+        storage_cr_bytes(n) = nbits_FC * 5 * n
 
         expected_cr_bytes = storage_cr_bytes(n)
         cr(A, b)  # warmup
@@ -107,8 +103,7 @@
       @testset "MINRES" begin
         # MINRES needs:
         # 6 n-vectors: x, r1, r2, w1, w2, y
-        storage_minres(n) = 6 * n
-        storage_minres_bytes(n) = nbits * storage_minres(n)
+        storage_minres_bytes(n) = nbits_FC * 6 * n
 
         expected_minres_bytes = storage_minres_bytes(n)
         minres(A, b)  # warmup
@@ -124,8 +119,7 @@
       @testset "MINRES-QLP" begin
         # MINRES-QLP needs:
         # - 6 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x, p
-        storage_minres_qlp(n) = 6 * n
-        storage_minres_qlp_bytes(n) = nbits * storage_minres_qlp(n)
+        storage_minres_qlp_bytes(n) = nbits_FC * 6 * n
 
         expected_minres_qlp_bytes = storage_minres_qlp_bytes(n)
         minres_qlp(A, b)  # warmup
@@ -141,11 +135,11 @@
       @testset "DIOM" begin
         # DIOM needs:
         # - 2 n-vectors: x, t
-        # - 2 (n*mem)-matrices: P, V
-        # - 1 mem-vector: L
-        # - 1 (mem+2)-vector: H
-        storage_diom(mem, n) = (2 * n) + (2 * n * mem) + (mem) + (mem + 2)
-        storage_diom_bytes(mem, n) = nbits * storage_diom(mem, n)
+        # - 1 (n*mem)-matrix: V
+        # - 1 n*(mem-1)-matrix: P
+        # - 1 (mem-1)-vector: L
+        # - 1 mem-vector: H
+        storage_diom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (n * (mem-1)) + (mem-1) + (mem))
 
         expected_diom_bytes = storage_diom_bytes(mem, n)
         diom(A, b, memory=mem)  # warmup
@@ -164,8 +158,7 @@
         # - 1 (n*mem)-matrix: V
         # - 2 mem-vectors: l, z
         # - 1 (mem*(mem+1)/2)-vector: U
-        storage_fom(mem, n) = (2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)
-        storage_fom_bytes(mem, n) = nbits * storage_fom(mem, n)
+        storage_fom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2))
 
         expected_fom_bytes = storage_fom_bytes(mem, n)
         fom(A, b, memory=mem)  # warmup
@@ -183,9 +176,8 @@
         # - 2 n-vectors: x, t
         # - 2 (n*mem)-matrices: P, V
         # - 2 mem-vectors: c, s
-        # - 1 (mem+2)-vector: H
-        storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 2)
-        storage_dqgmres_bytes(mem, n) = nbits * storage_dqgmres(mem, n)
+        # - 1 (mem+1)-vector: H
+        storage_dqgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + mem + (mem + 1)) + nbits_T * mem
 
         expected_dqgmres_bytes = storage_dqgmres_bytes(mem, n)
         dqgmres(A, b, memory=mem)  # warmup
@@ -204,8 +196,7 @@
         # - 1 n*(mem)-matrix: V
         # - 3 mem-vectors: c, s, z
         # - 1 (mem*(mem+1)/2)-vector: R
-        storage_gmres(mem, n) = (2 * n) + (n * mem) + (3 * mem) + (mem * (mem+1) / 2)
-        storage_gmres_bytes(mem, n) = nbits * storage_gmres(mem, n)
+        storage_gmres_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem
 
         expected_gmres_bytes = storage_gmres_bytes(mem, n)
         gmres(A, b, memory=mem)  # warmup
@@ -218,11 +209,29 @@
         @test inplace_gmres_bytes == 0
       end
 
+      @testset "FGMRES" begin
+        # FGMRES needs:
+        # - 2 n-vectors: x, w
+        # - 2 n*(mem)-matrix: V, Z
+        # - 3 mem-vectors: c, s, z
+        # - 1 (mem*(mem+1)/2)-vector: R
+        storage_fgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem
+
+        expected_fgmres_bytes = storage_fgmres_bytes(mem, n)
+        fgmres(A, b, memory=mem)  # warmup
+        actual_fgmres_bytes = @allocated fgmres(A, b, memory=mem)
+        @test expected_fgmres_bytes ≤ actual_fgmres_bytes ≤ 1.02 * expected_fgmres_bytes
+
+        solver = FgmresSolver(A, b, mem)
+        fgmres!(solver, A, b)  # warmup
+        inplace_fgmres_bytes = @allocated fgmres!(solver, A, b)
+        @test inplace_fgmres_bytes == 0
+      end
+
       @testset "CGS" begin
         # CGS needs:
         # 6 n-vectors: x, r, u, p, q, ts
-        storage_cgs(n) = 6 * n
-        storage_cgs_bytes(n) = nbits * storage_cgs(n)
+        storage_cgs_bytes(n) = nbits_FC * 6 * n
 
         expected_cgs_bytes = storage_cgs_bytes(n)
         cgs(A, b)  # warmup
@@ -238,8 +247,7 @@
       @testset "BICGSTAB" begin
         # BICGSTAB needs:
         # 6 n-vectors: x, r, p, v, s, qd
-        storage_bicgstab(n) = 6 * n
-        storage_bicgstab_bytes(n) = nbits * storage_bicgstab(n)
+        storage_bicgstab_bytes(n) = nbits_FC * 6 * n
 
         expected_bicgstab_bytes = storage_bicgstab_bytes(n)
         bicgstab(A, b)  # warmup
@@ -254,12 +262,11 @@
 
       @testset "CGNE" begin
         # CGNE needs:
-        # - 3 n-vectors: x, p, Aᵀz
+        # - 3 n-vectors: x, p, Aᴴz
         # - 2 m-vectors: r, q
-        storage_cgne(n, m) = 3 * n + 2 * m
-        storage_cgne_bytes(n, m) = nbits * storage_cgne(n, m)
+        storage_cgne_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
 
-        expected_cgne_bytes = storage_cgne_bytes(n, m)
+        expected_cgne_bytes = storage_cgne_bytes(k, n)
         (x, stats) = cgne(Au, c)  # warmup
         actual_cgne_bytes = @allocated cgne(Au, c)
         @test expected_cgne_bytes ≤ actual_cgne_bytes ≤ 1.02 * expected_cgne_bytes
@@ -272,12 +279,11 @@
 
       @testset "CRMR" begin
         # CRMR needs:
-        # - 3 n-vectors: x, p, Aᵀr
+        # - 3 n-vectors: x, p, Aᴴr
         # - 2 m-vectors: r, q
-        storage_crmr(n, m) = 3 * n + 2 * m
-        storage_crmr_bytes(n, m) = nbits * storage_crmr(n, m)
+        storage_crmr_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
 
-        expected_crmr_bytes = storage_crmr_bytes(n, m)
+        expected_crmr_bytes = storage_crmr_bytes(k, n)
         (x, stats) = crmr(Au, c)  # warmup
         actual_crmr_bytes = @allocated crmr(Au, c)
         @test expected_crmr_bytes ≤ actual_crmr_bytes ≤ 1.02 * expected_crmr_bytes
@@ -290,12 +296,11 @@
 
       @testset "LNLQ" begin
         # LNLQ needs:
-        # - 3 n-vectors: x, v, Aᵀu
+        # - 3 n-vectors: x, v, Aᴴu
         # - 4 m-vectors: y, w̄, u, Av
-        storage_lnlq(n, m) = 3 * n + 4 * m
-        storage_lnlq_bytes(n, m) = nbits * storage_lnlq(n, m)
+        storage_lnlq_bytes(m, n) = nbits_FC * (3 * n + 4 * m)
 
-        expected_lnlq_bytes = storage_lnlq_bytes(n, m)
+        expected_lnlq_bytes = storage_lnlq_bytes(k, n)
         lnlq(Au, c)  # warmup
         actual_lnlq_bytes = @allocated lnlq(Au, c)
         @test expected_lnlq_bytes ≤ actual_lnlq_bytes ≤ 1.02 * expected_lnlq_bytes
@@ -308,12 +313,11 @@
 
       @testset "CRAIG" begin
         # CRAIG needs:
-        # - 3 n-vectors: x, v, Aᵀu
+        # - 3 n-vectors: x, v, Aᴴu
         # - 4 m-vectors: y, w, u, Av
-        storage_craig(n, m) = 3 * n + 4 * m
-        storage_craig_bytes(n, m) = nbits * storage_craig(n, m)
+        storage_craig_bytes(m, n) = nbits_FC * (3 * n + 4 * m)
 
-        expected_craig_bytes = storage_craig_bytes(n, m)
+        expected_craig_bytes = storage_craig_bytes(k, n)
         craig(Au, c)  # warmup
         actual_craig_bytes = @allocated craig(Au, c)
         @test expected_craig_bytes ≤ actual_craig_bytes ≤ 1.02 * expected_craig_bytes
@@ -326,12 +330,11 @@
 
       @testset "CRAIGMR" begin
         # CRAIGMR needs:
-        # - 4 n-vectors: x, v, Aᵀu, d
+        # - 4 n-vectors: x, v, Aᴴu, d
         # - 5 m-vectors: y, u, w, wbar, Av
-        storage_craigmr(n, m) = 4 * n + 5 * m
-        storage_craigmr_bytes(n, m) = nbits * storage_craigmr(n, m)
+        storage_craigmr_bytes(m, n) = nbits_FC * (4 * n + 5 * m)
 
-        expected_craigmr_bytes = storage_craigmr_bytes(n, m)
+        expected_craigmr_bytes = storage_craigmr_bytes(k, n)
         craigmr(Au, c)  # warmup
         actual_craigmr_bytes = @allocated craigmr(Au, c)
         @test expected_craigmr_bytes ≤ actual_craigmr_bytes ≤ 1.02 * expected_craigmr_bytes
@@ -344,12 +347,11 @@
 
       @testset "CGLS" begin
         # CGLS needs:
-        # - 3 m-vectors: x, p, s
-        # - 2 n-vectors: r, q
-        storage_cgls(n, m) = 3 * m + 2 * n
-        storage_cgls_bytes(n, m) = nbits * storage_cgls(n, m)
+        # - 3 n-vectors: x, p, s
+        # - 2 m-vectors: r, q
+        storage_cgls_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
 
-        expected_cgls_bytes = storage_cgls_bytes(n, m)
+        expected_cgls_bytes = storage_cgls_bytes(m, k)
         (x, stats) = cgls(Ao, b)  # warmup
         actual_cgls_bytes = @allocated cgls(Ao, b)
         @test expected_cgls_bytes ≤ actual_cgls_bytes ≤ 1.02 * expected_cgls_bytes
@@ -362,12 +364,11 @@
 
       @testset "LSLQ" begin
         # LSLQ needs:
-        # - 4 m-vectors: x_lq, v, Aᵀu, w̄ (= x_cg)
-        # - 2 n-vectors: u, Av
-        storage_lslq(n, m) = 4 * m + 2 * n
-        storage_lslq_bytes(n, m) = nbits * storage_lslq(n, m)
+        # - 4 n-vectors: x_lq, v, Aᴴu, w̄ (= x_cg)
+        # - 2 m-vectors: u, Av
+        storage_lslq_bytes(m, n) = nbits_FC * (4 * n + 2 * m)
 
-        expected_lslq_bytes = storage_lslq_bytes(n, m)
+        expected_lslq_bytes = storage_lslq_bytes(m, k)
         (x, stats) = lslq(Ao, b)  # warmup
         actual_lslq_bytes = @allocated lslq(Ao, b)
         @test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.02 * expected_lslq_bytes
@@ -380,12 +381,11 @@
 
       @testset "CRLS" begin
         # CRLS needs:
-        # - 4 m-vectors: x, p, Ar, q
-        # - 3 n-vectors: r, Ap, s
-        storage_crls(n, m) = 4 * m + 3 * n
-        storage_crls_bytes(n, m) = nbits * storage_crls(n, m)
+        # - 4 n-vectors: x, p, Ar, q
+        # - 3 m-vectors: r, Ap, s
+        storage_crls_bytes(m, n) = nbits_FC * (4 * n + 3 * m)
 
-        expected_crls_bytes = storage_crls_bytes(n, m)
+        expected_crls_bytes = storage_crls_bytes(m, k)
         (x, stats) = crls(Ao, b)  # warmup
         actual_crls_bytes = @allocated crls(Ao, b)
         @test expected_crls_bytes ≤ actual_crls_bytes ≤ 1.02 * expected_crls_bytes
@@ -398,12 +398,11 @@
 
       @testset "LSQR" begin
         # LSQR needs:
-        # - 4 m-vectors: x, v, w, Aᵀu
-        # - 2 n-vectors: u, Av
-        storage_lsqr(n, m) = 4 * m + 2 * n
-        storage_lsqr_bytes(n, m) = nbits * storage_lsqr(n, m)
+        # - 4 n-vectors: x, v, w, Aᴴu
+        # - 2 m-vectors: u, Av
+        storage_lsqr_bytes(m, n) = nbits_FC * (4 * n + 2 * m)
 
-        expected_lsqr_bytes = storage_lsqr_bytes(n, m)
+        expected_lsqr_bytes = storage_lsqr_bytes(m, k)
         (x, stats) = lsqr(Ao, b)  # warmup
         actual_lsqr_bytes = @allocated lsqr(Ao, b)
         @test expected_lsqr_bytes ≤ actual_lsqr_bytes ≤ 1.02 * expected_lsqr_bytes
@@ -416,12 +415,11 @@
 
       @testset "LSMR" begin
         # LSMR needs:
-        # - 5 m-vectors: x, v, h, hbar, Aᵀu
-        # - 2 n-vectors: u, Av
-        storage_lsmr(n, m) = 5 * m + 2 * n
-        storage_lsmr_bytes(n, m) = nbits * storage_lsmr(n, m)
+        # - 5 n-vectors: x, v, h, hbar, Aᴴu
+        # - 2 m-vectors: u, Av
+        storage_lsmr_bytes(m, n) = nbits_FC * (5 * n + 2 * m)
 
-        expected_lsmr_bytes = storage_lsmr_bytes(n, m)
+        expected_lsmr_bytes = storage_lsmr_bytes(m, k)
         (x, stats) = lsmr(Ao, b)  # warmup
         actual_lsmr_bytes = @allocated lsmr(Ao, b)
         @test expected_lsmr_bytes ≤ actual_lsmr_bytes ≤ 1.02 * expected_lsmr_bytes
@@ -435,8 +433,7 @@
       @testset "BiLQ" begin
         # BILQ needs:
         # - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q
-        storage_bilq(n) = 8 * n
-        storage_bilq_bytes(n) = nbits * storage_bilq(n)
+        storage_bilq_bytes(n) = nbits_FC * 8 * n
 
         expected_bilq_bytes = storage_bilq_bytes(n)
         bilq(A, b)  # warmup
@@ -452,8 +449,7 @@
       @testset "QMR" begin
         # QMR needs:
         # - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q
-        storage_qmr(n) = 9 * n
-        storage_qmr_bytes(n) = nbits * storage_qmr(n)
+        storage_qmr_bytes(n) = nbits_FC * 9 * n
 
         expected_qmr_bytes = storage_qmr_bytes(n)
         qmr(A, b)  # warmup
@@ -469,8 +465,7 @@
       @testset "BiLQR" begin
         # BILQR needs:
         # - 11 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ, p, q
-        storage_bilqr(n) = 11 * n
-        storage_bilqr_bytes(n) = nbits * storage_bilqr(n)
+        storage_bilqr_bytes(n) = nbits_FC * 11 * n
 
         expected_bilqr_bytes = storage_bilqr_bytes(n)
         bilqr(A, b, b)  # warmup
@@ -487,10 +482,9 @@
         # USYMLQ needs:
         # - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p
         # - 3 m-vectors: vₖ₋₁, vₖ, q
-        storage_usymlq(n, m) = 5 * n + 3 * m
-        storage_usymlq_bytes(n, m) = nbits * storage_usymlq(n, m)
+        storage_usymlq_bytes(m, n) = nbits_FC * (5 * n + 3 * m)
 
-        expected_usymlq_bytes = storage_usymlq_bytes(n, m)
+        expected_usymlq_bytes = storage_usymlq_bytes(k, n)
         usymlq(Au, c, b)  # warmup
         actual_usymlq_bytes = @allocated usymlq(Au, c, b)
         @test expected_usymlq_bytes ≤ actual_usymlq_bytes ≤ 1.02 * expected_usymlq_bytes
@@ -503,12 +497,11 @@
 
       @testset "USYMQR" begin
         # USYMQR needs:
-        # - 6 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p
-        # - 3 n-vectors: uₖ₋₁, uₖ, q
-        storage_usymqr(n, m) = 6 * m + 3 * n
-        storage_usymqr_bytes(n, m) = nbits * storage_usymqr(n, m)
+        # - 6 n-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p
+        # - 3 m-vectors: uₖ₋₁, uₖ, q
+        storage_usymqr_bytes(m, n) = nbits_FC * (6 * n + 3 * m)
 
-        expected_usymqr_bytes = storage_usymqr_bytes(n, m)
+        expected_usymqr_bytes = storage_usymqr_bytes(m, k)
         (x, stats) = usymqr(Ao, b, c) # warmup
         actual_usymqr_bytes = @allocated usymqr(Ao, b, c)
         @test expected_usymqr_bytes ≤ actual_usymqr_bytes ≤ 1.02 * expected_usymqr_bytes
@@ -523,8 +516,7 @@
         # TRILQR needs:
         # - 6 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ, q
         # - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p
-        storage_trilqr(n, m) = 6 * m + 5 * n
-        storage_trilqr_bytes(n, m) = nbits * storage_trilqr(n, m)
+        storage_trilqr_bytes(m, n) = nbits_FC * (6 * m + 5 * n)
 
         expected_trilqr_bytes = storage_trilqr_bytes(n, n)
         trilqr(A, b, b)  # warmup
@@ -541,10 +533,9 @@
         # TriCG needs:
         # - 6 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ, p
         # - 6 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ, q
-        storage_tricg(n, m) = 6 * n + 6 * m
-        storage_tricg_bytes(n, m) = nbits * storage_tricg(n, m)
+        storage_tricg_bytes(m, n) = nbits_FC * (6 * n + 6 * m)
 
-        expected_tricg_bytes = storage_tricg_bytes(n, m)
+        expected_tricg_bytes = storage_tricg_bytes(k, n)
         tricg(Au, c, b)  # warmup
         actual_tricg_bytes = @allocated tricg(Au, c, b)
         @test expected_tricg_bytes ≤ actual_tricg_bytes ≤ 1.02 * expected_tricg_bytes
@@ -559,10 +550,9 @@
         # TriMR needs:
         # - 8 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, p
         # - 8 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, q
-        storage_trimr(n, m) = 8 * n + 8 * m
-        storage_trimr_bytes(n, m) = nbits * storage_trimr(n, m)
+        storage_trimr_bytes(m, n) = nbits_FC * (8 * n + 8 * m)
 
-        expected_trimr_bytes = storage_trimr_bytes(n, m)
+        expected_trimr_bytes = storage_trimr_bytes(k, n)
         trimr(Au, c, b)  # warmup
         actual_trimr_bytes = @allocated trimr(Au, c, b)
         @test expected_trimr_bytes ≤ actual_trimr_bytes ≤ 1.02 * expected_trimr_bytes
@@ -575,17 +565,16 @@
 
       @testset "GPMR" begin
         # GPMR needs:
-        # - 2 n-vectors: x, q
-        # - 2 m-vectors: y, p
-        # - 1 (n*mem)-matrix: V
-        # - 1 (m*mem)-matrix: U
+        # - 2 m-vectors: x, q
+        # - 2 n-vectors: y, p
+        # - 1 (m*mem)-matrix: V
+        # - 1 (n*mem)-matrix: U
         # - 1 (2*mem)-vector: zt
         # - 2 (4*mem)-vectors: gc, gs
         # - 1 (mem*(2mem+1))-vector: R
-        storage_gpmr(mem, n, m) = (mem + 2) * (n + m) + mem * (2 * mem + 11)
-        storage_gpmr_bytes(mem, n, m) = nbits * storage_gpmr(mem, n, m)
+        storage_gpmr_bytes(mem, m, n) = nbits_FC * ((mem + 2) * (n + m) + mem * (2 * mem + 7)) + nbits_T * 4 * mem
 
-        expected_gpmr_bytes = storage_gpmr_bytes(mem, n, m)
+        expected_gpmr_bytes = storage_gpmr_bytes(mem, m, k)
         gpmr(Ao, Au, b, c, memory=mem, itmax=mem)  # warmup
         actual_gpmr_bytes = @allocated gpmr(Ao, Au, b, c, memory=mem, itmax=mem)
         @test expected_gpmr_bytes ≤ actual_gpmr_bytes ≤ 1.02 * expected_gpmr_bytes
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 11bdb7c2d..6c43142c0 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -1,119 +1,203 @@
 @testset "aux" begin
-  # test Givens reflector corner cases
-  (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
-  @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
-
-  a = 3.14
-  (c, s, ρ) = Krylov.sym_givens(a, 0.0)
-  @test (c == 1.0) && (s == 0.0) && (ρ == a)
-  (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
-  @test (c == -1.0) && (s == 0.0) && (ρ == a)
-
-  b = 3.14
-  (c, s, ρ) = Krylov.sym_givens(0.0, b)
-  @test (c == 0.0) && (s == 1.0) && (ρ == b)
-  (c, s, ρ) = Krylov.sym_givens(0.0, -b)
-  @test (c == 0.0) && (s == -1.0) && (ρ == b)
-
-  (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
-  @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
-
-  a = Complex(1.0, 1.0)
-  (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
-  @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
-  (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
-  @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
-
-  b = Complex(1.0, 1.0)
-  (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
-  @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
-  (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
-  @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
-
-  # test roots of a quadratic
-  roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
-  @test length(roots) == 1
-  @test roots[1] == 0.0
-
-  roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
-  @test length(roots) == 0
-
-  roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
-  @test length(roots) == 1
-  @test roots[1] == 1.0 / 3.14
-
-  roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
-  @test length(roots) == 0
-
-  roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
-  @test length(roots) == 2
-  @test roots[1] == 0.0
-  @test roots[2] == 0.0
-
-  roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
-  @test length(roots) == 2
-  @test roots[1] ≈ -2.0
-  @test roots[2] ≈ -1.0
-
-  roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
-  @test length(roots) == 0
-
-  # ill-conditioned quadratic
-  roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
-  @test length(roots) == 2
-  @test roots[1] == 1.0e+13
-  @test roots[2] == 0.0
-
-  # iterative refinement is crucial!
-  roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
-  @test length(roots) == 2
-  @test roots[1] == 1.0e+13
-  @test roots[2] == -1.0e-05
-
-  # not ill-conditioned quadratic
-  roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
-  @test length(roots) == 2
-  @test isapprox(roots[1],  1.0e+7, rtol=1.0e-6)
-  @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
-  roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
-  @test length(roots) == 2
-  @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
-  @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
-  # test trust-region boundary
-  x = ones(5)
-  d = ones(5); d[1:2:5] .= -1
-  @test_throws ErrorException Krylov.to_boundary(x, d, -1.0)
-  @test_throws ErrorException Krylov.to_boundary(x, d, 0.5)
-  @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0)
-  @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178
-  @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782
-  @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782
-  @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
-
-  # test kzeros and kones
-  @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10)
-  @test Krylov.kones(Vector{Float64}, 10) == ones(10)
-
-  # test ktypeof
-  a = rand(Float32, 10)
-  b = view(a, 4:8)
-  @test Krylov.ktypeof(a) == Vector{Float32}
-  @test Krylov.ktypeof(b) == Vector{Float32}
-
-  a = rand(Float64, 10)
-  b = view(a, 4:8)
-  @test Krylov.ktypeof(a) == Vector{Float64}
-  @test Krylov.ktypeof(b) == Vector{Float64}
-
-  a = sprand(Float32, 10, 0.5)
-  b = view(a, 4:8)
-  @test Krylov.ktypeof(a) == Vector{Float32}
-  @test Krylov.ktypeof(b) == Vector{Float32}
-
-  a = sprand(Float64, 10, 0.5)
-  b = view(a, 4:8)
-  @test Krylov.ktypeof(a) == Vector{Float64}
-  @test Krylov.ktypeof(b) == Vector{Float64}
+
+  @testset "sym_givens" begin
+    # test Givens reflector corner cases
+    (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
+    @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
+
+    a = 3.14
+    (c, s, ρ) = Krylov.sym_givens(a, 0.0)
+    @test (c == 1.0) && (s == 0.0) && (ρ == a)
+    (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
+    @test (c == -1.0) && (s == 0.0) && (ρ == a)
+
+    b = 3.14
+    (c, s, ρ) = Krylov.sym_givens(0.0, b)
+    @test (c == 0.0) && (s == 1.0) && (ρ == b)
+    (c, s, ρ) = Krylov.sym_givens(0.0, -b)
+    @test (c == 0.0) && (s == -1.0) && (ρ == b)
+
+    (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
+    @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
+
+    a = Complex(1.0, 1.0)
+    (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
+    @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
+    (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
+    @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
+
+    b = Complex(1.0, 1.0)
+    (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
+    @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
+    (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
+    @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
+  end
+
+  @testset "roots_quadratic" begin
+    # test roots of a quadratic
+    roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
+    @test roots[1] == 0.0
+    @test roots[2] == 0.0
+
+    @test_throws ErrorException Krylov.roots_quadratic(0.0, 0.0, 1.0)
+
+    roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
+    @test roots[1] == 1.0 / 3.14
+    @test roots[2] == 1.0 / 3.14
+
+    @test_throws ErrorException Krylov.roots_quadratic(1.0, 0.0, 1.0)
+
+    roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
+    @test roots[1] == 0.0
+    @test roots[2] == 0.0
+
+    roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
+    @test roots[1] ≈ -2.0
+    @test roots[2] ≈ -1.0
+
+    @test_throws ErrorException Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
+
+    # ill-conditioned quadratic
+    roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+    @test roots[1] == 1.0e+13
+    @test roots[2] == 0.0
+
+    # iterative refinement is crucial!
+    roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+    @test roots[1] == 1.0e+13
+    @test roots[2] == -1.0e-05
+
+    # not ill-conditioned quadratic
+    roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+    @test isapprox(roots[1],  1.0e+7, rtol=1.0e-6)
+    @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+    roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+    @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+    @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+    allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+    @test allocations == 0
+
+    allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+    @test allocations == 0
+  end
+
+  @testset "to_boundary" begin
+    # test trust-region boundary
+    n = 5
+    x = ones(n)
+    d = ones(n); d[1:2:n] .= -1
+    @test_throws ErrorException Krylov.to_boundary(n, x, d, -1.0)
+    @test_throws ErrorException Krylov.to_boundary(n, x, d, 0.5)
+    @test_throws ErrorException Krylov.to_boundary(n, x, zeros(n), 1.0)
+    @test maximum(Krylov.to_boundary(n, x, d, 5.0)) ≈ 2.209975124224178
+    @test minimum(Krylov.to_boundary(n, x, d, 5.0)) ≈ -1.8099751242241782
+    @test maximum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ 1.8099751242241782
+    @test minimum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ -2.209975124224178
+  end
+
+  @testset "kzeros" begin
+    # test kzeros
+    @test Krylov.kzeros(Vector{Float64}, 10) == zeros(Float64, 10)
+    @test Krylov.kzeros(Vector{ComplexF32}, 10) == zeros(ComplexF32, 10)
+  end
+
+  @testset "kones" begin
+    # test kones
+    @test Krylov.kones(Vector{Float64}, 10) == ones(Float64, 10)
+    @test Krylov.kones(Vector{ComplexF32}, 10) == ones(ComplexF32, 10)
+  end
+
+  @testset "ktypeof" begin
+    # test ktypeof
+    for FC in (Float32, Float64, ComplexF32, ComplexF64)
+      dv = rand(FC, 10)
+      b = view(dv, 4:8)
+      @test Krylov.ktypeof(dv) == Vector{FC}
+      @test Krylov.ktypeof(b)  == Vector{FC}
+
+      dm = rand(FC, 10, 10)
+      b = view(dm, :, 3)
+      @test Krylov.ktypeof(b) == Vector{FC}
+
+      sv = sprand(FC, 10, 0.5)
+      b = view(sv, 4:8)
+      @test Krylov.ktypeof(sv) == Vector{FC}
+      @test Krylov.ktypeof(b)  == Vector{FC}
+    end
+  end
+
+  @testset "vector_to_matrix" begin
+    # test vector_to_matrix
+    for FC in (Float32, Float64, ComplexF32, ComplexF64)
+      S = Vector{FC}
+      M = Krylov.vector_to_matrix(S)
+      @test M == Matrix{FC}
+    end
+  end
+
+  @testset "matrix_to_vector" begin
+    # test matrix_to_vector
+    for FC in (Float32, Float64, ComplexF32, ComplexF64)
+      M = Matrix{FC}
+      S = Krylov.matrix_to_vector(M)
+      @test S == Vector{FC}
+    end
+  end
+
+  @testset "macros" begin
+    # test macros
+    for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64)
+      n = 10
+      x = rand(FC, n)
+      y = rand(FC, n)
+      a = rand(FC)
+      b = rand(FC)
+      c = rand(FC)
+      s = rand(FC)
+
+      T = real(FC)
+      a2 = rand(T)
+      b2 = rand(T)
+
+      Krylov.@kdot(n, x, y)
+
+      Krylov.@kdotr(n, x, y)
+
+      Krylov.@knrm2(n, x)
+
+      Krylov.@kaxpy!(n, a, x, y)
+      Krylov.@kaxpy!(n, a2, x, y)
+
+      Krylov.@kaxpby!(n, a, x, b, y)
+      Krylov.@kaxpby!(n, a2, x, b, y)
+      Krylov.@kaxpby!(n, a, x, b2, y)
+      Krylov.@kaxpby!(n, a2, x, b2, y)
+
+      Krylov.@kcopy!(n, x, y)
+
+      Krylov.@kswap(x, y)
+
+      Krylov.@kref!(n, x, y, c, s)
+    end
+  end
 end
diff --git a/test/test_bicgstab.jl b/test/test_bicgstab.jl
index ce4e6dcd4..6817acf3d 100644
--- a/test/test_bicgstab.jl
+++ b/test/test_bicgstab.jl
@@ -82,10 +82,10 @@
       @test(resid ≤ bicgstab_tol)
       @test(stats.solved)
 
-      # Test bᵀc == 0
+      # Test bᴴc == 0
       A, b, c = bc_breakdown(FC=FC)
       (x, stats) = bicgstab(A, b, c=c)
-      @test stats.status == "Breakdown bᵀc = 0"
+      @test stats.status == "Breakdown bᴴc = 0"
 
       # test callback function
       solver = BicgstabSolver(A, b)
diff --git a/test/test_bilq.jl b/test/test_bilq.jl
index 900d1f6e5..40b9872db 100644
--- a/test/test_bilq.jl
+++ b/test/test_bilq.jl
@@ -66,10 +66,10 @@
       @test(resid ≤ bilq_tol)
       @test(stats.solved)
 
-      # Test bᵀc == 0
+      # Test bᴴc == 0
       A, b, c = bc_breakdown(FC=FC)
       (x, stats) = bilq(A, b, c=c)
-      @test stats.status == "Breakdown bᵀc = 0"
+      @test stats.status == "Breakdown bᴴc = 0"
 
       
       # test callback function
diff --git a/test/test_bilqr.jl b/test/test_bilqr.jl
index 6dab06ec7..fd46aade4 100644
--- a/test/test_bilqr.jl
+++ b/test/test_bilqr.jl
@@ -46,10 +46,10 @@
       @test(resid_dual ≤ bilqr_tol)
       @test(stats.solved_dual)
 
-      # Test bᵀc == 0
+      # Test bᴴc == 0
       A, b, c = bc_breakdown(FC=FC)
       (x, t, stats) = bilqr(A, b, c)
-      @test stats.status == "Breakdown bᵀc = 0"
+      @test stats.status == "Breakdown bᴴc = 0"
 
       # test callback function
       A, b, c = adjoint_pde(FC=FC)
diff --git a/test/test_cgne.jl b/test/test_cgne.jl
index 64cbc0ea7..c1a3e798b 100644
--- a/test/test_cgne.jl
+++ b/test/test_cgne.jl
@@ -1,6 +1,6 @@
-function test_cgne(A, b; λ=0.0, M=I)
+function test_cgne(A, b; λ=0.0, N=I, history=false)
   (nrow, ncol) = size(A)
-  (x, stats) = cgne(A, b, λ=λ, M=M)
+  (x, stats) = cgne(A, b, λ=λ, N=N, history=history)
   r = b - A * x
   if λ > 0
     s = r / sqrt(λ)
@@ -69,8 +69,8 @@ end
       @test stats.status == "x = 0 is a zero-residual solution"
 
       # Test with Jacobi (or diagonal) preconditioner
-      A, b, M = square_preconditioned(FC=FC)
-      (x, stats, resid) = test_cgne(A, b, M=M)
+      A, b, N = square_preconditioned(FC=FC)
+      (x, stats, resid) = test_cgne(A, b, N=N)
       @test(resid ≤ cgne_tol)
       @test(stats.solved)
       (xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -81,8 +81,8 @@ end
       A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
                  2.0  2.0  2.0  2.0  2.0 2.0 2.0 2.0 2.0 2.0]
       b = [1.0; 0.0]
-      M = Diagonal(1 ./ (A * A'))
-      (x, stats, resid) = test_cgne(A, b, M=M)
+      N = Diagonal(1 ./ (A * A'))
+      (x, stats, resid) = test_cgne(A, b, N=N)
       @test(resid ≤ cgne_tol)
       @test(stats.solved)
       (xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -92,7 +92,7 @@ end
       for transpose ∈ (false, true)
         A, b, c, D = small_sp(transpose, FC=FC)
         D⁻¹ = inv(D)
-        (x, stats) = cgne(A, b, M=D⁻¹, λ=1.0)
+        (x, stats) = cgne(A, b, N=D⁻¹, λ=1.0)
       end
 
       # test callback function
diff --git a/test/test_cgs.jl b/test/test_cgs.jl
index 5c505bb70..832cd76c3 100644
--- a/test/test_cgs.jl
+++ b/test/test_cgs.jl
@@ -74,10 +74,10 @@
       @test(resid ≤ cgs_tol)
       @test(stats.solved)
 
-      # Test bᵀc == 0
+      # Test bᴴc == 0
       A, b, c = bc_breakdown(FC=FC)
       (x, stats) = cgs(A, b, c=c)
-      @test stats.status == "Breakdown bᵀc = 0"
+      @test stats.status == "Breakdown bᴴc = 0"
 
       # test callback function
       A, b = sparse_laplacian(FC=FC)
diff --git a/test/test_crmr.jl b/test/test_crmr.jl
index 6354f329f..d0f902df6 100644
--- a/test/test_crmr.jl
+++ b/test/test_crmr.jl
@@ -1,6 +1,6 @@
-function test_crmr(A, b; λ=0.0, M=I, history=false)
+function test_crmr(A, b; λ=0.0, N=I, history=false)
   (nrow, ncol) = size(A)
-  (x, stats) = crmr(A, b, λ=λ, M=M, history=history)
+  (x, stats) = crmr(A, b, λ=λ, N=N, history=history)
   r = b - A * x
   if λ > 0
     s = r / sqrt(λ)
@@ -76,8 +76,8 @@ end
       A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
                   2.0  2.0  2.0  2.0  2.0 2.0 2.0 2.0 2.0 2.0]
       b = [1.0; 0.0]
-      M = Diagonal(1 ./ (A * A'))
-      (x, stats, resid) = test_crmr(A, b, M=M)
+      N = Diagonal(1 ./ (A * A'))
+      (x, stats, resid) = test_crmr(A, b, N=N)
       @test(resid ≤ crmr_tol)
       @test(stats.solved)
       (xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -87,7 +87,7 @@ end
       for transpose ∈ (false, true)
         A, b, c, D = small_sp(transpose, FC=FC)
         D⁻¹ = inv(D)
-        (x, stats) = crmr(A, b, M=D⁻¹, λ=1.0)
+        (x, stats) = crmr(A, b, N=D⁻¹, λ=1.0)
       end
 
       # test callback function
diff --git a/test/test_diom.jl b/test/test_diom.jl
index 4f1a8ecea..62a38b198 100644
--- a/test/test_diom.jl
+++ b/test/test_diom.jl
@@ -60,7 +60,7 @@
 
       # Poisson equation in polar coordinates.
       A, b = polar_poisson(FC=FC)
-      (x, stats) = diom(A, b, memory=200)
+      (x, stats) = diom(A, b, memory=150)
       r = b - A * x
       resid = norm(r) / norm(b)
       @test(resid ≤ diom_tol)
diff --git a/test/test_extensions.jl b/test/test_extensions.jl
new file mode 100644
index 000000000..81bbe12ca
--- /dev/null
+++ b/test/test_extensions.jl
@@ -0,0 +1,56 @@
+using ComponentArrays
+using FillArrays
+using StaticArrays
+
+@testset "extensions" begin
+    @testset "ComponentArrays" begin
+        n = 5
+        for T in (Float32, Float64)
+            A = rand(T, n, n)
+
+            b = ComponentVector(; b1=rand(T, n - 1), b2=rand(T))
+            @test Krylov.ktypeof(b) == Vector{T}
+            x, stats = gmres(A, b)
+            @test stats.solved
+        end
+    end
+
+    @testset "FillArrays" begin
+        n = 5
+        for T in (Float32, Float64)
+            A = rand(T, n, n)
+
+            b = Ones(T, n)
+            @test Krylov.ktypeof(b) == Vector{T}
+            x, stats = gmres(A, b)
+            @test stats.solved
+
+            b = Zeros(T, n)
+            @test Krylov.ktypeof(b) == Vector{T}
+            x, stats = gmres(A, b)
+            @test stats.solved
+        end
+    end
+
+    @testset "StaticArrays" begin
+        n = 5
+        for T in (Float32, Float64)
+            A = rand(T, n, n)
+
+            b = SVector{n}(rand(T, n))
+            @test Krylov.ktypeof(b) == Vector{T}
+            x, stats = gmres(A, b)
+            @test stats.solved
+
+            b = MVector{n}(rand(T, n))
+            @test Krylov.ktypeof(b) == Vector{T}
+            x, stats = gmres(A, b)
+            @test stats.solved
+
+            b = SizedVector{n}(rand(T, n))
+            @test Krylov.ktypeof(b) == Vector{T}
+            x, stats = gmres(A, b)
+            @test stats.solved
+        end
+    end
+end
diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl
new file mode 100644
index 000000000..9bb73d3e4
--- /dev/null
+++ b/test/test_fgmres.jl
@@ -0,0 +1,154 @@
+import LinearAlgebra.mul!
+
+mutable struct FlexiblePreconditioner{T,S}
+  D::Diagonal{T, S}
+  ω::T
+end
+
+function mul!(y::Vector, P::FlexiblePreconditioner, x::Vector)
+  P.ω = -P.ω
+  mul!(y, P.D, x)
+  y .*= P.ω
+end
+
+@testset "fgmres" begin
+  fgmres_tol = 1.0e-6
+
+  for FC in (Float64, ComplexF64)
+    @testset "Data Type: $FC" begin
+
+      # Symmetric and positive definite system.
+      A, b = symmetric_definite(FC=FC)
+      (x, stats) = fgmres(A, b)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Symmetric indefinite variant.
+      A, b = symmetric_indefinite(FC=FC)
+      (x, stats) = fgmres(A, b)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Nonsymmetric and positive definite systems.
+      A, b = nonsymmetric_definite(FC=FC)
+      (x, stats) = fgmres(A, b)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Nonsymmetric indefinite variant.
+      A, b = nonsymmetric_indefinite(FC=FC)
+      (x, stats) = fgmres(A, b)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Symmetric indefinite variant, almost singular.
+      A, b = almost_singular(FC=FC)
+      (x, stats) = fgmres(A, b)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ 100 * fgmres_tol)
+      @test(stats.solved)
+
+      # Singular system.
+      A, b = square_inconsistent(FC=FC)
+      (x, stats) = fgmres(A, b)
+      r = b - A * x
+      Aresid = norm(A' * r) / norm(A' * b)
+      @test(Aresid ≤ fgmres_tol)
+      @test(stats.inconsistent)
+
+      # Test b == 0
+      A, b = zero_rhs(FC=FC)
+      (x, stats) = fgmres(A, b)
+      @test norm(x) == 0
+      @test stats.status == "x = 0 is a zero-residual solution"
+
+      # Poisson equation in polar coordinates.
+      A, b = polar_poisson(FC=FC)
+      (x, stats) = fgmres(A, b, reorthogonalization=true)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Left preconditioning
+      A, b, M = square_preconditioned(FC=FC)
+      (x, stats) = fgmres(A, b, M=M)
+      r = b - A * x
+      resid = norm(M * r) / norm(M * b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Right preconditioning
+      A, b, N = square_preconditioned(FC=FC)
+      (x, stats) = fgmres(A, b, N=N)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Split preconditioning
+      A, b, M, N = two_preconditioners(FC=FC)
+      (x, stats) = fgmres(A, b, M=M, N=N)
+      r = b - A * x
+      resid = norm(M * r) / norm(M * b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+
+      # Restart
+      for restart ∈ (false, true)
+        memory = 10
+
+        A, b = sparse_laplacian(FC=FC)
+        (x, stats) = fgmres(A, b, restart=restart, memory=memory)
+        r = b - A * x
+        resid = norm(r) / norm(b)
+        @test(resid ≤ fgmres_tol)
+        @test(stats.niter > memory)
+        @test(stats.solved)
+
+        M = Diagonal(1 ./ diag(A))
+        (x, stats) = fgmres(A, b, M=M, restart=restart, memory=memory)
+        r = b - A * x
+        resid = norm(M * r) / norm(M * b)
+        @test(resid ≤ fgmres_tol)
+        @test(stats.niter > memory)
+        @test(stats.solved)
+
+        N = Diagonal(1 ./ diag(A))
+        (x, stats) = fgmres(A, b, N=N, restart=restart, memory=memory)
+        r = b - A * x
+        resid = norm(r) / norm(b)
+        @test(resid ≤ fgmres_tol)
+        @test(stats.niter > memory)
+        @test(stats.solved)
+
+        N = Diagonal(1 ./ sqrt.(diag(A)))
+        N = Diagonal(1 ./ sqrt.(diag(A)))
+        (x, stats) = fgmres(A, b, M=M, N=N, restart=restart, memory=memory)
+        r = b - A * x
+        resid = norm(M * r) / norm(M * b)
+        @test(resid ≤ fgmres_tol)
+        @test(stats.niter > memory)
+        @test(stats.solved)
+      end
+
+      A, b = polar_poisson(FC=FC)
+      J = inv(Diagonal(A))  # Jacobi preconditioner
+      N = FlexiblePreconditioner(J, 1.0)
+      (x, stats) = fgmres(A, b, N=N)
+      r = b - A * x
+      resid = norm(r) / norm(b)
+      @test(resid ≤ fgmres_tol)
+      @test(stats.solved)
+    end
+  end
+end
diff --git a/test/test_fom.jl b/test/test_fom.jl
index 9469b6b9c..0500d139f 100644
--- a/test/test_fom.jl
+++ b/test/test_fom.jl
@@ -126,13 +126,6 @@
       end
       
       # test callback function
-      solver = FomSolver(A, b)
-      tol = 1.0e-1
-      cb_n2 = TestCallbackN2(A, b, tol = tol)
-      fom!(solver, A, b, restart = true, callback = cb_n2)
-      @test solver.stats.status == "user-requested exit"
-      @test cb_n2(solver)
-
       @test_throws TypeError fom(A, b, restart = true, callback = solver -> "string", history = true)
     end
   end
diff --git a/test/test_lnlq.jl b/test/test_lnlq.jl
index 888119db8..b308609fa 100644
--- a/test/test_lnlq.jl
+++ b/test/test_lnlq.jl
@@ -1,5 +1,5 @@
 function test_lnlq(A, b,transfer_to_craig)
-  (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0)
+  (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0)
   r = b - A * x
   resid = norm(r) / norm(b)
   return (x, y, stats, resid)
@@ -61,8 +61,8 @@ end
 
         # Test regularization
         A, b, λ = regularization(FC=FC)
-        (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0)
-        (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, etolx=1e-10, etoly=1e-10, λ=λ)
+        (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0)
+        (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, utolx=1e-10, utoly=1e-10, λ=λ)
         for (x, y) in ((x, y), (xₛ, yₛ))
           s = λ * y
           r = b - (A * x + λ * s)
diff --git a/test/test_minres_qlp.jl b/test/test_minres_qlp.jl
index 6e983e49a..0b4d2046d 100644
--- a/test/test_minres_qlp.jl
+++ b/test/test_minres_qlp.jl
@@ -80,7 +80,7 @@
       solver = MinresQlpSolver(A, b)
       tol = 1.0
       cb_n2 = TestCallbackN2(A, b, tol = tol)
-      minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, ctol = 0.0, callback = cb_n2)
+      minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, Artol = 0.0, callback = cb_n2)
       @test solver.stats.status == "user-requested exit"
       @test cb_n2(solver)
 
diff --git a/test/test_mp.jl b/test/test_mp.jl
index b7aa43d38..96300bea6 100644
--- a/test/test_mp.jl
+++ b/test/test_mp.jl
@@ -3,55 +3,57 @@
   for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr,
              :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres,
              :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom,
-             :cg_lanczos_shift)
-    for T in (Float16, Float32, Float64, BigFloat)
-      for FC in (T, Complex{T})
-        A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1))
-        B = spdiagm(-1 => -ones(FC,n-1), 0 => 5*ones(FC,n), 1 => -ones(FC,n-1))
-        b = ones(FC, n)
-        c = -ones(FC, n)
-        shifts = [-one(T), one(T)]
-        if fn in (:usymlq, :usymqr)
-          x, _ = @eval $fn($A, $b, $c)
-        elseif fn in (:trilqr, :bilqr)
-          x, t, _ = @eval $fn($A, $b, $c)
-        elseif fn in (:tricg, :trimr)
-          x, y, _ = @eval $fn($A, $b, $c)
-        elseif fn == :gpmr
-          x, y, _ = @eval $fn($A, $B, $b, $c)
-        elseif fn in (:lnlq, :craig, :craigmr)
-          x, y, _ = @eval $fn($A, $b)
-        elseif fn == :cg_lanczos_shift
-          x, _ = @eval $fn($A, $b, $shifts)
-        else
-          x, _ = @eval $fn($A, $b)
-        end
-        atol = √eps(T)
-        rtol = √eps(T)
-        Κ = (T == Float16 ? 10 : 1)
-        if fn in (:tricg, :trimr)
-          @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
-          @test norm(A' * x - y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
-          @test eltype(y) == FC
-        elseif fn == :gpmr
-          @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
-          @test norm(B * x + y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
-          @test eltype(y) == FC
-        elseif fn == :cg_lanczos_shift
-          @test norm((A - I) * x[1] - b) ≤ Κ * (atol + norm(b) * rtol)
-          @test norm((A + I) * x[2] - b) ≤ Κ * (atol + norm(b) * rtol)
-          @test eltype(x) == Vector{FC}
-        else
-          @test norm(A * x - b) ≤ Κ * (atol + norm(b) * rtol)
-          @test eltype(x) == FC
-        end
-        if fn in (:trilqr, :bilqr)
-          @test norm(A' * t - c) ≤ Κ * (atol + norm(c) * rtol)
-          @test eltype(t) == FC
-        end
-        if fn in (:lnlq, :craig, :craigmr)
-          @test norm(A * A' * y - b) ≤ Κ * (atol + norm(b) * rtol)
-          @test eltype(y) == FC
+             :fgmres, :cg_lanczos_shift)
+    @testset "$fn" begin
+      for T in (Float16, Float32, Float64, BigFloat)
+        for FC in (T, Complex{T})
+          A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1))
+          B = spdiagm(-1 => -ones(FC,n-1), 0 => 5*ones(FC,n), 1 => -ones(FC,n-1))
+          b = ones(FC, n)
+          c = -ones(FC, n)
+          shifts = [-one(T), one(T)]
+          if fn in (:usymlq, :usymqr)
+            x, _ = @eval $fn($A, $b, $c)
+          elseif fn in (:trilqr, :bilqr)
+            x, t, _ = @eval $fn($A, $b, $c)
+          elseif fn in (:tricg, :trimr)
+            x, y, _ = @eval $fn($A, $b, $c)
+          elseif fn == :gpmr
+            x, y, _ = @eval $fn($A, $B, $b, $c)
+          elseif fn in (:lnlq, :craig, :craigmr)
+            x, y, _ = @eval $fn($A, $b)
+          elseif fn == :cg_lanczos_shift
+            x, _ = @eval $fn($A, $b, $shifts)
+          else
+            x, _ = @eval $fn($A, $b)
+          end
+          atol = √eps(T)
+          rtol = √eps(T)
+          Κ = (T == Float16 ? 10 : 1)
+          if fn in (:tricg, :trimr)
+            @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
+            @test norm(A' * x - y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
+            @test eltype(y) == FC
+          elseif fn == :gpmr
+            @test norm(x + A * y - b) ≤ Κ * (atol + norm([b; c]) * rtol)
+            @test norm(B * x + y - c) ≤ Κ * (atol + norm([b; c]) * rtol)
+            @test eltype(y) == FC
+          elseif fn == :cg_lanczos_shift
+            @test norm((A - I) * x[1] - b) ≤ Κ * (atol + norm(b) * rtol)
+            @test norm((A + I) * x[2] - b) ≤ Κ * (atol + norm(b) * rtol)
+            @test eltype(x) == Vector{FC}
+          else
+            @test norm(A * x - b) ≤ Κ * (atol + norm(b) * rtol)
+            @test eltype(x) == FC
+          end
+          if fn in (:trilqr, :bilqr)
+            @test norm(A' * t - c) ≤ Κ * (atol + norm(c) * rtol)
+            @test eltype(t) == FC
+          end
+          if fn in (:lnlq, :craig, :craigmr)
+            @test norm(A * A' * y - b) ≤ Κ * (atol + norm(b) * rtol)
+            @test eltype(y) == FC
+          end
         end
       end
     end
diff --git a/test/test_processes.jl b/test/test_processes.jl
new file mode 100644
index 000000000..eb3ad19af
--- /dev/null
+++ b/test/test_processes.jl
@@ -0,0 +1,146 @@
+"""
+    P = permutation_paige(k)
+
+Return the sparse (2k) × (2k) matrix
+
+    [e₁    •    eₖ   ]
+    [   e₁    •    eₖ]
+"""
+function permutation_paige(k)
+  P = spzeros(Float64, 2k, 2k)
+  for i = 1:k
+    P[i,2i-1] = 1.0
+    P[i+k,2i] = 1.0
+  end
+  return P
+end
+
+@testset "processes" begin
+  m = 250
+  n = 500
+  k = 20
+  
+  for FC in (Float64, ComplexF64)
+    R = real(FC)
+    nbits_FC = sizeof(FC)
+    nbits_R = sizeof(R)
+    nbits_I = sizeof(Int)
+
+    @testset "Data Type: $FC" begin
+      
+      @testset "Hermitian Lanczos" begin
+        A, b = symmetric_indefinite(n, FC=FC)
+        V, T = hermitian_lanczos(A, b, k)
+
+        @test A * V[:,1:k] ≈ V * T
+
+        storage_hermitian_lanczos_bytes(n, k) = 4k * nbits_I + (3k-1) * nbits_R + n*(k+1) * nbits_FC
+
+        expected_hermitian_lanczos_bytes = storage_hermitian_lanczos_bytes(n, k)
+        actual_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, b, k)
+        @test expected_hermitian_lanczos_bytes ≤ actual_hermitian_lanczos_bytes ≤ 1.02 * expected_hermitian_lanczos_bytes
+      end
+
+      @testset "Non-Hermitian Lanczos" begin
+        A, b = nonsymmetric_definite(n, FC=FC)
+        c = -b
+        V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k)
+
+        @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]'
+        @test A  * V[:,1:k] ≈ V * T
+        @test A' * U[:,1:k] ≈ U * Tᴴ
+
+        storage_nonhermitian_lanczos_bytes(n, k) = 4k * nbits_I + (6k-2) * nbits_FC + 2*n*(k+1) * nbits_FC
+
+        expected_nonhermitian_lanczos_bytes = storage_nonhermitian_lanczos_bytes(n, k)
+        actual_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, b, c, k)
+        @test expected_nonhermitian_lanczos_bytes ≤ actual_nonhermitian_lanczos_bytes ≤ 1.02 * expected_nonhermitian_lanczos_bytes
+      end
+
+      @testset "Arnoldi" begin
+        A, b = nonsymmetric_indefinite(n, FC=FC)
+        V, H = arnoldi(A, b, k)
+
+        @test A * V[:,1:k] ≈ V * H
+
+        function storage_arnoldi_bytes(n, k)
+          return k*(k+1) * nbits_FC + n*(k+1) * nbits_FC
+        end
+
+        expected_arnoldi_bytes = storage_arnoldi_bytes(n, k)
+        actual_arnoldi_bytes = @allocated arnoldi(A, b, k)
+        @test expected_arnoldi_bytes ≤ actual_arnoldi_bytes ≤ 1.02 * expected_arnoldi_bytes
+      end
+
+      @testset "Golub-Kahan" begin
+        A, b = under_consistent(m, n, FC=FC)
+        V, U, L = golub_kahan(A, b, k)
+        B = L[1:k+1,1:k]
+
+        @test A  * V[:,1:k] ≈ U * B
+        @test A' * U ≈ V * L'
+        @test A' * A  * V[:,1:k] ≈ V * L' * B
+        @test A  * A' * U[:,1:k] ≈ U * B * L[1:k,1:k]'
+
+        storage_golub_kahan_bytes(m, n, k) = 3*(k+1) * nbits_I + (2k+1) * nbits_R + (n+m)*(k+1) * nbits_FC
+
+        expected_golub_kahan_bytes = storage_golub_kahan_bytes(m, n, k)
+        actual_golub_kahan_bytes = @allocated golub_kahan(A, b, k)
+        @test expected_golub_kahan_bytes ≤ actual_golub_kahan_bytes ≤ 1.02 * expected_golub_kahan_bytes
+      end
+
+      @testset "Saunders-Simon-Yip" begin
+        A, b = under_consistent(m, n, FC=FC)
+        _, c = over_consistent(n, m, FC=FC)
+        V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k)
+
+        @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]'
+        @test A  * U[:,1:k] ≈ V * T
+        @test A' * V[:,1:k] ≈ U * Tᴴ
+        @test A' * A  * U[:,1:k-1] ≈ U * Tᴴ * T[1:k,1:k-1]
+        @test A  * A' * V[:,1:k-1] ≈ V * T * Tᴴ[1:k,1:k-1]
+
+        K = [zeros(FC,m,m) A; A' zeros(FC,n,n)]
+        Pₖ = permutation_paige(k)
+        Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ
+        Pₖ₊₁ = permutation_paige(k+1)
+        Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁
+        G = Pₖ₊₁' * [zeros(FC,k+1,k) T; Tᴴ zeros(FC,k+1,k)] * Pₖ
+        @test K * Wₖ ≈ Wₖ₊₁ * G
+
+        storage_saunders_simon_yip_bytes(m, n, k) = 4k * nbits_I + (6k-2) * nbits_FC + (n+m)*(k+1) * nbits_FC
+
+        expected_saunders_simon_yip_bytes = storage_saunders_simon_yip_bytes(m, n, k)
+        actual_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, b, c, k)
+        @test expected_saunders_simon_yip_bytes ≤ actual_saunders_simon_yip_bytes ≤ 1.02 * expected_saunders_simon_yip_bytes
+      end
+
+      @testset "Montoison-Orban" begin
+        A, b = under_consistent(m, n, FC=FC)
+        B, c = over_consistent(n, m, FC=FC)
+        V, H, U, F = montoison_orban(A, B, b, c, k)
+
+        @test A * U[:,1:k] ≈ V * H
+        @test B * V[:,1:k] ≈ U * F
+        @test B * A * U[:,1:k-1] ≈ U * F * H[1:k,1:k-1]
+        @test A * B * V[:,1:k-1] ≈ V * H * F[1:k,1:k-1]
+
+        K = [zeros(FC,m,m) A; B zeros(FC,n,n)]
+        Pₖ = permutation_paige(k)
+        Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ
+        Pₖ₊₁ = permutation_paige(k+1)
+        Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁
+        G = Pₖ₊₁' * [zeros(FC,k+1,k) H; F zeros(FC,k+1,k)] * Pₖ
+        @test K * Wₖ ≈ Wₖ₊₁ * G
+
+        function storage_montoison_orban_bytes(m, n, k)
+          return 2*k*(k+1) * nbits_FC + (n+m)*(k+1) * nbits_FC
+        end
+
+        expected_montoison_orban_bytes = storage_montoison_orban_bytes(m, n, k)
+        actual_montoison_orban_bytes = @allocated montoison_orban(A, B, b, c, k)
+        @test expected_montoison_orban_bytes ≤ actual_montoison_orban_bytes ≤ 1.02 * expected_montoison_orban_bytes
+      end
+    end
+  end
+end
diff --git a/test/test_qmr.jl b/test/test_qmr.jl
index 184b9877d..4a6b8c1c9 100644
--- a/test/test_qmr.jl
+++ b/test/test_qmr.jl
@@ -58,10 +58,10 @@
       @test(resid ≤ qmr_tol)
       @test(stats.solved)
 
-      # Test bᵀc == 0
+      # Test bᴴc == 0
       A, b, c = bc_breakdown(FC=FC)
       (x, stats) = qmr(A, b, c=c)
-      @test stats.status == "Breakdown bᵀc = 0"
+      @test stats.status == "Breakdown bᴴc = 0"
 
       # test callback function
       solver = QmrSolver(A, b)
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index 468fa5a05..71885029f 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -11,1139 +11,183 @@ function test_solvers(FC)
   nshifts = 5
   T = real(FC)
   S = Vector{FC}
+  solvers = Dict{Symbol, KrylovSolver}()
 
   @eval begin
-    cg_solver = $(KRYLOV_SOLVERS[:cg])($n, $n, $S)
-    symmlq_solver = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S)
-    minres_solver = $(KRYLOV_SOLVERS[:minres])($n, $n, $S)
-    cg_lanczos_solver = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S)
-    diom_solver = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S)
-    fom_solver = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
-    dqgmres_solver = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
-    gmres_solver = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
-    cr_solver = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
-    crmr_solver = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
-    cgs_solver = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
-    bicgstab_solver = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S)
-    craigmr_solver = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S)
-    cgne_solver = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S)
-    lnlq_solver = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S)
-    craig_solver = $(KRYLOV_SOLVERS[:craig])($m, $n, $S)
-    lslq_solver = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S)
-    cgls_solver = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S)
-    lsqr_solver = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S)
-    crls_solver = $(KRYLOV_SOLVERS[:crls])($n, $m, $S)
-    lsmr_solver = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S)
-    usymqr_solver = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S)
-    trilqr_solver = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S)
-    bilq_solver = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S)
-    bilqr_solver = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S)
-    minres_qlp_solver = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S)
-    qmr_solver = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S)
-    usymlq_solver = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S)
-    tricg_solver = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S)
-    trimr_solver = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S)
-    gpmr_solver = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S)
-    cg_lanczos_shift_solver = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $m, $nshifts, $S)
+    $solvers[:cg] = $(KRYLOV_SOLVERS[:cg])($n, $n, $S)
+    $solvers[:symmlq] = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S)
+    $solvers[:minres] = $(KRYLOV_SOLVERS[:minres])($n, $n, $S)
+    $solvers[:cg_lanczos] = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S)
+    $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S)
+    $solvers[:diom] = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S)
+    $solvers[:fom] = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
+    $solvers[:dqgmres] = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
+    $solvers[:gmres] = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
+    $solvers[:fgmres] = $(KRYLOV_SOLVERS[:fgmres])($n, $n, $mem, $S)
+    $solvers[:cr] = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
+    $solvers[:crmr] = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
+    $solvers[:cgs] = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
+    $solvers[:bicgstab] = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S)
+    $solvers[:craigmr] = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S)
+    $solvers[:cgne] = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S)
+    $solvers[:lnlq] = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S)
+    $solvers[:craig] = $(KRYLOV_SOLVERS[:craig])($m, $n, $S)
+    $solvers[:lslq] = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S)
+    $solvers[:cgls] = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S)
+    $solvers[:lsqr] = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S)
+    $solvers[:crls] = $(KRYLOV_SOLVERS[:crls])($n, $m, $S)
+    $solvers[:lsmr] = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S)
+    $solvers[:usymqr] = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S)
+    $solvers[:trilqr] = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S)
+    $solvers[:bilq] = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S)
+    $solvers[:bilqr] = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S)
+    $solvers[:minres_qlp] = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S)
+    $solvers[:qmr] = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S)
+    $solvers[:usymlq] = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S)
+    $solvers[:tricg] = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S)
+    $solvers[:trimr] = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S)
+    $solvers[:gpmr] = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S)
+    $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S)
   end
 
-  for i = 1 : 3
-    A  = i * A
-    Au = i * Au
-    Ao = i * Ao
-    b  = 5 * b
-    c  = 3 * c
-
-    solver = solve!(cg_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(symmlq_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(minres_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(cg_lanczos_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(cg_lanczos_shift_solver, A, b, shifts)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(diom_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(fom_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(dqgmres_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(gmres_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(cr_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(crmr_solver, Au, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(cgs_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == 2 * niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(bicgstab_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == 2 * niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(craigmr_solver, Au, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 2
-    @test issolved(solver)
-
-    solver = solve!(cgne_solver, Au, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(lnlq_solver, Au, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved(solver)
-
-    solver = solve!(craig_solver, Au, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved(solver)
-
-    solver = solve!(lslq_solver, Ao, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(cgls_solver, Ao, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(lsqr_solver, Ao, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(crls_solver, Ao, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(lsmr_solver, Ao, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(usymqr_solver, Ao, b, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(trilqr_solver, A, b, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved_primal(solver)
-    @test issolved_dual(solver)
-    @test issolved(solver)
-
-    solver = solve!(bilq_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(bilqr_solver, A, b, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved_primal(solver)
-    @test issolved_dual(solver)
-    @test issolved(solver)
-
-    solver = solve!(minres_qlp_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(qmr_solver, A, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(usymlq_solver, Au, c, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test nsolution(solver) == 1
-    @test issolved(solver)
-
-    solver = solve!(tricg_solver, Au, c, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved(solver)
-
-    solver = solve!(trimr_solver, Au, c, b)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved(solver)
-
-    solver = solve!(gpmr_solver, Ao, Au, b, c)
-    niter = niterations(solver)
-    @test niter > 0
-    @test Aprod(solver) == niter
-    @test Atprod(solver) == 0
-    @test Bprod(solver) == niter
-    @test statistics(solver) === solver.stats
-    @test solution(solver, 1) === solver.x
-    @test solution(solver, 2) === solver.y
-    @test nsolution(solver) == 2
-    @test issolved(solver)
+  @testset "Check compatibility between KrylovSolvers and the dimension of the linear problems" begin
+    A2  = FC.(get_div_grad(2, 2, 2))
+    n2  = size(A2, 1)
+    m2  = div(n2, 2)
+    Au2 = A2[1:m2,:]
+    Ao2 = A2[:,1:m2]
+    b2  = Ao2 * ones(FC, m2)
+    c2  = Au2 * ones(FC, n2)
+    shifts2 = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0]
+    for (method, solver) in solvers
+      if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr)
+        @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2)
+      end
+      method == :cg_lanczos_shift && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, shifts2)
+      method == :cg_lanczos_shift && @test_throws ErrorException("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $(length(shifts2))") solve!(solver, A, b, shifts2)
+      method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2)
+      method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2)
+      method ∈ (:bilqr, :trilqr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, b2)
+      method == :gpmr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, Au2, b2, c2)
+      method ∈ (:tricg, :trimr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2)
+      method == :usymlq && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2, b2)
+      method == :usymqr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2)
+    end
   end
 
-  io = IOBuffer()
-  show(io, cg_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │  CgSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │        Δx│    Vector{$FC}│                0│
-  │         x│    Vector{$FC}│               64│
-  │         r│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │        Ap│    Vector{$FC}│               64│
-  │         z│    Vector{$FC}│                0│
-  │warm_start│           Bool│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, symmlq_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌────────────┬───────────────┬─────────────────┐
-  │SymmlqSolver│Precision: $FC │Architecture: CPU│
-  ├────────────┼───────────────┼─────────────────┤
-  │   Attribute│           Type│             Size│
-  ├────────────┼───────────────┼─────────────────┤
-  │          Δx│    Vector{$FC}│                0│
-  │           x│    Vector{$FC}│               64│
-  │       Mvold│    Vector{$FC}│               64│
-  │          Mv│    Vector{$FC}│               64│
-  │     Mv_next│    Vector{$FC}│               64│
-  │           w̅│    Vector{$FC}│               64│
-  │           v│    Vector{$FC}│                0│
-  │       clist│     Vector{$T}│                5│
-  │       zlist│     Vector{$T}│                5│
-  │       sprod│     Vector{$T}│                5│
-  │  warm_start│           Bool│                0│
-  └────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, minres_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌────────────┬───────────────┬─────────────────┐
-  │MinresSolver│Precision: $FC │Architecture: CPU│
-  ├────────────┼───────────────┼─────────────────┤
-  │   Attribute│           Type│             Size│
-  ├────────────┼───────────────┼─────────────────┤
-  │          Δx│    Vector{$FC}│                0│
-  │           x│    Vector{$FC}│               64│
-  │          r1│    Vector{$FC}│               64│
-  │          r2│    Vector{$FC}│               64│
-  │          w1│    Vector{$FC}│               64│
-  │          w2│    Vector{$FC}│               64│
-  │           y│    Vector{$FC}│               64│
-  │           v│    Vector{$FC}│                0│
-  │     err_vec│     Vector{$T}│                5│
-  │  warm_start│           Bool│                0│
-  └────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, cg_lanczos_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────────┬───────────────┬─────────────────┐
-  │CgLanczosSolver│Precision: $FC │Architecture: CPU│
-  ├───────────────┼───────────────┼─────────────────┤
-  │      Attribute│           Type│             Size│
-  ├───────────────┼───────────────┼─────────────────┤
-  │             Δx│    Vector{$FC}│                0│
-  │              x│    Vector{$FC}│               64│
-  │             Mv│    Vector{$FC}│               64│
-  │        Mv_prev│    Vector{$FC}│               64│
-  │              p│    Vector{$FC}│               64│
-  │        Mv_next│    Vector{$FC}│               64│
-  │              v│    Vector{$FC}│                0│
-  │     warm_start│           Bool│                0│
-  └───────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, cg_lanczos_shift_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌────────────────────┬───────────────────┬─────────────────┐
-  │CgLanczosShiftSolver│    Precision: $FC │Architecture: CPU│
-  ├────────────────────┼───────────────────┼─────────────────┤
-  │           Attribute│               Type│             Size│
-  ├────────────────────┼───────────────────┼─────────────────┤
-  │                  Mv│        Vector{$FC}│               64│
-  │             Mv_prev│        Vector{$FC}│               64│
-  │             Mv_next│        Vector{$FC}│               64│
-  │                   v│        Vector{$FC}│                0│
-  │                   x│Vector{Vector{$FC}}│           5 x 64│
-  │                   p│Vector{Vector{$FC}}│           5 x 64│
-  │                   σ│         Vector{$T}│                5│
-  │                δhat│         Vector{$T}│                5│
-  │                   ω│         Vector{$T}│                5│
-  │                   γ│         Vector{$T}│                5│
-  │              rNorms│         Vector{$T}│                5│
-  │           converged│          BitVector│                5│
-  │              not_cv│          BitVector│                5│
-  └────────────────────┴───────────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, diom_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────────┬─────────────────┐
-  │DiomSolver│    Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────────┼─────────────────┤
-  │ Attribute│               Type│             Size│
-  ├──────────┼───────────────────┼─────────────────┤
-  │        Δx│        Vector{$FC}│                0│
-  │         x│        Vector{$FC}│               64│
-  │         t│        Vector{$FC}│               64│
-  │         z│        Vector{$FC}│                0│
-  │         w│        Vector{$FC}│                0│
-  │         P│Vector{Vector{$FC}}│          10 x 64│
-  │         V│Vector{Vector{$FC}}│          10 x 64│
-  │         L│        Vector{$FC}│               10│
-  │         H│        Vector{$FC}│               12│
-  │warm_start│               Bool│                0│
-  └──────────┴───────────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, fom_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────────┬─────────────────┐
-  │ FomSolver│    Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────────┼─────────────────┤
-  │ Attribute│               Type│             Size│
-  ├──────────┼───────────────────┼─────────────────┤
-  │        Δx│        Vector{$FC}│                0│
-  │         x│        Vector{$FC}│               64│
-  │         w│        Vector{$FC}│               64│
-  │         p│        Vector{$FC}│                0│
-  │         q│        Vector{$FC}│                0│
-  │         V│Vector{Vector{$FC}}│          10 x 64│
-  │         l│        Vector{$FC}│               10│
-  │         z│        Vector{$FC}│               10│
-  │         U│        Vector{$FC}│               55│
-  │warm_start│               Bool│                0│
-  └──────────┴───────────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, dqgmres_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌─────────────┬───────────────────┬─────────────────┐
-  │DqgmresSolver│    Precision: $FC │Architecture: CPU│
-  ├─────────────┼───────────────────┼─────────────────┤
-  │    Attribute│               Type│             Size│
-  ├─────────────┼───────────────────┼─────────────────┤
-  │           Δx│        Vector{$FC}│                0│
-  │            x│        Vector{$FC}│               64│
-  │            t│        Vector{$FC}│               64│
-  │            z│        Vector{$FC}│                0│
-  │            w│        Vector{$FC}│                0│
-  │            P│Vector{Vector{$FC}}│          10 x 64│
-  │            V│Vector{Vector{$FC}}│          10 x 64│
-  │            c│         Vector{$T}│               10│
-  │            s│        Vector{$FC}│               10│
-  │            H│        Vector{$FC}│               12│
-  │   warm_start│               Bool│                0│
-  └─────────────┴───────────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, gmres_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────┬───────────────────┬─────────────────┐
-  │GmresSolver│    Precision: $FC │Architecture: CPU│
-  ├───────────┼───────────────────┼─────────────────┤
-  │  Attribute│               Type│             Size│
-  ├───────────┼───────────────────┼─────────────────┤
-  │         Δx│        Vector{$FC}│                0│
-  │          x│        Vector{$FC}│               64│
-  │          w│        Vector{$FC}│               64│
-  │          p│        Vector{$FC}│                0│
-  │          q│        Vector{$FC}│                0│
-  │          V│Vector{Vector{$FC}}│          10 x 64│
-  │          c│         Vector{$T}│               10│
-  │          s│        Vector{$FC}│               10│
-  │          z│        Vector{$FC}│               10│
-  │          R│        Vector{$FC}│               55│
-  │ warm_start│               Bool│                0│
-  │ inner_iter│              Int64│                0│
-  └───────────┴───────────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, cr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │  CrSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │        Δx│    Vector{$FC}│                0│
-  │         x│    Vector{$FC}│               64│
-  │         r│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │         q│    Vector{$FC}│               64│
-  │        Ar│    Vector{$FC}│               64│
-  │        Mq│    Vector{$FC}│                0│
-  │warm_start│           Bool│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, crmr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │CrmrSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │       Aᵀr│    Vector{$FC}│               64│
-  │         r│    Vector{$FC}│               32│
-  │         q│    Vector{$FC}│               32│
-  │        Mq│    Vector{$FC}│                0│
-  │         s│    Vector{$FC}│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, cgs_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │ CgsSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │Attribute │           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │        Δx│    Vector{$FC}│                0│
-  │         x│    Vector{$FC}│               64│
-  │         r│    Vector{$FC}│               64│
-  │         u│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │         q│    Vector{$FC}│               64│
-  │        ts│    Vector{$FC}│               64│
-  │        yz│    Vector{$FC}│                0│
-  │        vw│    Vector{$FC}│                0│
-  │warm_start│           Bool│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, bicgstab_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────────┬───────────────┬─────────────────┐
-  │BicgstabSolver│Precision: $FC │Architecture: CPU│
-  ├──────────────┼───────────────┼─────────────────┤
-  │     Attribute│           Type│             Size│
-  ├──────────────┼───────────────┼─────────────────┤
-  │            Δx│    Vector{$FC}│                0│
-  │             x│    Vector{$FC}│               64│
-  │             r│    Vector{$FC}│               64│
-  │             p│    Vector{$FC}│               64│
-  │             v│    Vector{$FC}│               64│
-  │             s│    Vector{$FC}│               64│
-  │            qd│    Vector{$FC}│               64│
-  │            yz│    Vector{$FC}│                0│
-  │             t│    Vector{$FC}│                0│
-  │    warm_start│           Bool│                0│
-  └──────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, craigmr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌─────────────┬───────────────┬─────────────────┐
-  │CraigmrSolver│Precision: $FC │Architecture: CPU│
-  ├─────────────┼───────────────┼─────────────────┤
-  │    Attribute│           Type│             Size│
-  ├─────────────┼───────────────┼─────────────────┤
-  │            x│    Vector{$FC}│               64│
-  │           Nv│    Vector{$FC}│               64│
-  │          Aᵀu│    Vector{$FC}│               64│
-  │            d│    Vector{$FC}│               64│
-  │            y│    Vector{$FC}│               32│
-  │           Mu│    Vector{$FC}│               32│
-  │            w│    Vector{$FC}│               32│
-  │         wbar│    Vector{$FC}│               32│
-  │           Av│    Vector{$FC}│               32│
-  │            u│    Vector{$FC}│                0│
-  │            v│    Vector{$FC}│                0│
-  │            q│    Vector{$FC}│                0│
-  └─────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, cgne_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │CgneSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │       Aᵀz│    Vector{$FC}│               64│
-  │         r│    Vector{$FC}│               32│
-  │         q│    Vector{$FC}│               32│
-  │         s│    Vector{$FC}│                0│
-  │         z│    Vector{$FC}│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, lnlq_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │LnlqSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               64│
-  │        Nv│    Vector{$FC}│               64│
-  │       Aᵀu│    Vector{$FC}│               64│
-  │         y│    Vector{$FC}│               32│
-  │         w̄│    Vector{$FC}│               32│
-  │        Mu│    Vector{$FC}│               32│
-  │        Av│    Vector{$FC}│               32│
-  │         u│    Vector{$FC}│                0│
-  │         v│    Vector{$FC}│                0│
-  │         q│    Vector{$FC}│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, craig_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────┬───────────────┬─────────────────┐
-  │CraigSolver│Precision: $FC │Architecture: CPU│
-  ├───────────┼───────────────┼─────────────────┤
-  │  Attribute│           Type│             Size│
-  ├───────────┼───────────────┼─────────────────┤
-  │          x│    Vector{$FC}│               64│
-  │         Nv│    Vector{$FC}│               64│
-  │        Aᵀu│    Vector{$FC}│               64│
-  │          y│    Vector{$FC}│               32│
-  │          w│    Vector{$FC}│               32│
-  │         Mu│    Vector{$FC}│               32│
-  │         Av│    Vector{$FC}│               32│
-  │          u│    Vector{$FC}│                0│
-  │          v│    Vector{$FC}│                0│
-  │         w2│    Vector{$FC}│                0│
-  └───────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, lslq_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │LslqSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               32│
-  │        Nv│    Vector{$FC}│               32│
-  │       Aᵀu│    Vector{$FC}│               32│
-  │         w̄│    Vector{$FC}│               32│
-  │        Mu│    Vector{$FC}│               64│
-  │        Av│    Vector{$FC}│               64│
-  │         u│    Vector{$FC}│                0│
-  │         v│    Vector{$FC}│                0│
-  │   err_vec│     Vector{$T}│                5│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, cgls_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │CglsSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               32│
-  │         p│    Vector{$FC}│               32│
-  │         s│    Vector{$FC}│               32│
-  │         r│    Vector{$FC}│               64│
-  │         q│    Vector{$FC}│               64│
-  │        Mr│    Vector{$FC}│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, lsqr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │LsqrSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               32│
-  │        Nv│    Vector{$FC}│               32│
-  │       Aᵀu│    Vector{$FC}│               32│
-  │         w│    Vector{$FC}│               32│
-  │        Mu│    Vector{$FC}│               64│
-  │        Av│    Vector{$FC}│               64│
-  │         u│    Vector{$FC}│                0│
-  │         v│    Vector{$FC}│                0│
-  │   err_vec│     Vector{$T}│                5│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, crls_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │CrlsSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               32│
-  │         p│    Vector{$FC}│               32│
-  │        Ar│    Vector{$FC}│               32│
-  │         q│    Vector{$FC}│               32│
-  │         r│    Vector{$FC}│               64│
-  │        Ap│    Vector{$FC}│               64│
-  │         s│    Vector{$FC}│               64│
-  │        Ms│    Vector{$FC}│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, lsmr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │LsmrSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │         x│    Vector{$FC}│               32│
-  │        Nv│    Vector{$FC}│               32│
-  │       Aᵀu│    Vector{$FC}│               32│
-  │         h│    Vector{$FC}│               32│
-  │      hbar│    Vector{$FC}│               32│
-  │        Mu│    Vector{$FC}│               64│
-  │        Av│    Vector{$FC}│               64│
-  │         u│    Vector{$FC}│                0│
-  │         v│    Vector{$FC}│                0│
-  │   err_vec│     Vector{$T}│                5│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, usymqr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌────────────┬───────────────┬─────────────────┐
-  │UsymqrSolver│Precision: $FC │Architecture: CPU│
-  ├────────────┼───────────────┼─────────────────┤
-  │   Attribute│           Type│             Size│
-  ├────────────┼───────────────┼─────────────────┤
-  │        vₖ₋₁│    Vector{$FC}│               64│
-  │          vₖ│    Vector{$FC}│               64│
-  │           q│    Vector{$FC}│               64│
-  │          Δx│    Vector{$FC}│                0│
-  │           x│    Vector{$FC}│               32│
-  │        wₖ₋₂│    Vector{$FC}│               32│
-  │        wₖ₋₁│    Vector{$FC}│               32│
-  │        uₖ₋₁│    Vector{$FC}│               32│
-  │          uₖ│    Vector{$FC}│               32│
-  │           p│    Vector{$FC}│               32│
-  │  warm_start│           Bool│                0│
-  └────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, trilqr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌────────────┬───────────────┬─────────────────┐
-  │TrilqrSolver│Precision: $FC │Architecture: CPU│
-  ├────────────┼───────────────┼─────────────────┤
-  │   Attribute│           Type│             Size│
-  ├────────────┼───────────────┼─────────────────┤
-  │        uₖ₋₁│    Vector{$FC}│               64│
-  │          uₖ│    Vector{$FC}│               64│
-  │           p│    Vector{$FC}│               64│
-  │           d̅│    Vector{$FC}│               64│
-  │          Δx│    Vector{$FC}│                0│
-  │           x│    Vector{$FC}│               64│
-  │        vₖ₋₁│    Vector{$FC}│               64│
-  │          vₖ│    Vector{$FC}│               64│
-  │           q│    Vector{$FC}│               64│
-  │          Δy│    Vector{$FC}│                0│
-  │           y│    Vector{$FC}│               64│
-  │        wₖ₋₃│    Vector{$FC}│               64│
-  │        wₖ₋₂│    Vector{$FC}│               64│
-  │  warm_start│           Bool│                0│
-  └────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, bilq_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │BilqSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │      uₖ₋₁│    Vector{$FC}│               64│
-  │        uₖ│    Vector{$FC}│               64│
-  │         q│    Vector{$FC}│               64│
-  │      vₖ₋₁│    Vector{$FC}│               64│
-  │        vₖ│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │        Δx│    Vector{$FC}│                0│
-  │         x│    Vector{$FC}│               64│
-  │         d̅│    Vector{$FC}│               64│
-  │warm_start│           Bool│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, bilqr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────┬───────────────┬─────────────────┐
-  │BilqrSolver│Precision: $FC │Architecture: CPU│
-  ├───────────┼───────────────┼─────────────────┤
-  │  Attribute│           Type│             Size│
-  ├───────────┼───────────────┼─────────────────┤
-  │       uₖ₋₁│    Vector{$FC}│               64│
-  │         uₖ│    Vector{$FC}│               64│
-  │          q│    Vector{$FC}│               64│
-  │       vₖ₋₁│    Vector{$FC}│               64│
-  │         vₖ│    Vector{$FC}│               64│
-  │          p│    Vector{$FC}│               64│
-  │         Δx│    Vector{$FC}│                0│
-  │          x│    Vector{$FC}│               64│
-  │         Δy│    Vector{$FC}│                0│
-  │          y│    Vector{$FC}│               64│
-  │          d̅│    Vector{$FC}│               64│
-  │       wₖ₋₃│    Vector{$FC}│               64│
-  │       wₖ₋₂│    Vector{$FC}│               64│
-  │ warm_start│           Bool│                0│
-  └───────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, minres_qlp_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────────┬───────────────┬─────────────────┐
-  │MinresQlpSolver│Precision: $FC │Architecture: CPU│
-  ├───────────────┼───────────────┼─────────────────┤
-  │      Attribute│           Type│             Size│
-  ├───────────────┼───────────────┼─────────────────┤
-  │             Δx│    Vector{$FC}│                0│
-  │           wₖ₋₁│    Vector{$FC}│               64│
-  │             wₖ│    Vector{$FC}│               64│
-  │        M⁻¹vₖ₋₁│    Vector{$FC}│               64│
-  │          M⁻¹vₖ│    Vector{$FC}│               64│
-  │              x│    Vector{$FC}│               64│
-  │              p│    Vector{$FC}│               64│
-  │             vₖ│    Vector{$FC}│                0│
-  │     warm_start│           Bool│                0│
-  └───────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, qmr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────┬─────────────────┐
-  │ QmrSolver│Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────┼─────────────────┤
-  │ Attribute│           Type│             Size│
-  ├──────────┼───────────────┼─────────────────┤
-  │      uₖ₋₁│    Vector{$FC}│               64│
-  │        uₖ│    Vector{$FC}│               64│
-  │         q│    Vector{$FC}│               64│
-  │      vₖ₋₁│    Vector{$FC}│               64│
-  │        vₖ│    Vector{$FC}│               64│
-  │         p│    Vector{$FC}│               64│
-  │        Δx│    Vector{$FC}│                0│
-  │         x│    Vector{$FC}│               64│
-  │      wₖ₋₂│    Vector{$FC}│               64│
-  │      wₖ₋₁│    Vector{$FC}│               64│
-  │warm_start│           Bool│                0│
-  └──────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, usymlq_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌────────────┬───────────────┬─────────────────┐
-  │UsymlqSolver│Precision: $FC │Architecture: CPU│
-  ├────────────┼───────────────┼─────────────────┤
-  │   Attribute│           Type│             Size│
-  ├────────────┼───────────────┼─────────────────┤
-  │        uₖ₋₁│    Vector{$FC}│               64│
-  │          uₖ│    Vector{$FC}│               64│
-  │           p│    Vector{$FC}│               64│
-  │          Δx│    Vector{$FC}│                0│
-  │           x│    Vector{$FC}│               64│
-  │           d̅│    Vector{$FC}│               64│
-  │        vₖ₋₁│    Vector{$FC}│               32│
-  │          vₖ│    Vector{$FC}│               32│
-  │           q│    Vector{$FC}│               32│
-  │  warm_start│           Bool│                0│
-  └────────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, tricg_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────┬───────────────┬─────────────────┐
-  │TricgSolver│Precision: $FC │Architecture: CPU│
-  ├───────────┼───────────────┼─────────────────┤
-  │  Attribute│           Type│             Size│
-  ├───────────┼───────────────┼─────────────────┤
-  │          y│    Vector{$FC}│               64│
-  │    N⁻¹uₖ₋₁│    Vector{$FC}│               64│
-  │      N⁻¹uₖ│    Vector{$FC}│               64│
-  │          p│    Vector{$FC}│               64│
-  │     gy₂ₖ₋₁│    Vector{$FC}│               64│
-  │       gy₂ₖ│    Vector{$FC}│               64│
-  │          x│    Vector{$FC}│               32│
-  │    M⁻¹vₖ₋₁│    Vector{$FC}│               32│
-  │      M⁻¹vₖ│    Vector{$FC}│               32│
-  │          q│    Vector{$FC}│               32│
-  │     gx₂ₖ₋₁│    Vector{$FC}│               32│
-  │       gx₂ₖ│    Vector{$FC}│               32│
-  │         Δx│    Vector{$FC}│                0│
-  │         Δy│    Vector{$FC}│                0│
-  │         uₖ│    Vector{$FC}│                0│
-  │         vₖ│    Vector{$FC}│                0│
-  │ warm_start│           Bool│                0│
-  └───────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
-  io = IOBuffer()
-  show(io, trimr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌───────────┬───────────────┬─────────────────┐
-  │TrimrSolver│Precision: $FC │Architecture: CPU│
-  ├───────────┼───────────────┼─────────────────┤
-  │  Attribute│           Type│             Size│
-  ├───────────┼───────────────┼─────────────────┤
-  │          y│    Vector{$FC}│               64│
-  │    N⁻¹uₖ₋₁│    Vector{$FC}│               64│
-  │      N⁻¹uₖ│    Vector{$FC}│               64│
-  │          p│    Vector{$FC}│               64│
-  │     gy₂ₖ₋₃│    Vector{$FC}│               64│
-  │     gy₂ₖ₋₂│    Vector{$FC}│               64│
-  │     gy₂ₖ₋₁│    Vector{$FC}│               64│
-  │       gy₂ₖ│    Vector{$FC}│               64│
-  │          x│    Vector{$FC}│               32│
-  │    M⁻¹vₖ₋₁│    Vector{$FC}│               32│
-  │      M⁻¹vₖ│    Vector{$FC}│               32│
-  │          q│    Vector{$FC}│               32│
-  │     gx₂ₖ₋₃│    Vector{$FC}│               32│
-  │     gx₂ₖ₋₂│    Vector{$FC}│               32│
-  │     gx₂ₖ₋₁│    Vector{$FC}│               32│
-  │       gx₂ₖ│    Vector{$FC}│               32│
-  │         Δx│    Vector{$FC}│                0│
-  │         Δy│    Vector{$FC}│                0│
-  │         uₖ│    Vector{$FC}│                0│
-  │         vₖ│    Vector{$FC}│                0│
-  │ warm_start│           Bool│                0│
-  └───────────┴───────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
+  @testset "Test the keyword argument timemax" begin
+    timemax = 0.0
+    for (method, solver) in solvers
+      method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr) && solve!(solver, A, b, timemax=timemax)
+      method == :cg_lanczos_shift && solve!(solver, A, b, shifts, timemax=timemax)
+      method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && solve!(solver, Au, c, timemax=timemax)
+      method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && solve!(solver, Ao, b, timemax=timemax)
+      method ∈ (:bilqr, :trilqr) && solve!(solver, A, b, b, timemax=timemax)
+      method == :gpmr && solve!(solver, Ao, Au, b, c, timemax=timemax)
+      method ∈ (:tricg, :trimr) && solve!(solver, Au, c, b, timemax=timemax)
+      method == :usymlq && solve!(solver, Au, c, b, timemax=timemax)
+      method == :usymqr && solve!(solver, Ao, b, c, timemax=timemax)
+      @test solver.stats.status == "time limit exceeded"
+    end
+  end
 
-  io = IOBuffer()
-  show(io, gpmr_solver, show_stats=false)
-  showed = String(take!(io))
-  expected = """
-  ┌──────────┬───────────────────┬─────────────────┐
-  │GpmrSolver│    Precision: $FC │Architecture: CPU│
-  ├──────────┼───────────────────┼─────────────────┤
-  │ Attribute│               Type│             Size│
-  ├──────────┼───────────────────┼─────────────────┤
-  │        wA│        Vector{$FC}│                0│
-  │        wB│        Vector{$FC}│                0│
-  │        dA│        Vector{$FC}│               64│
-  │        dB│        Vector{$FC}│               32│
-  │        Δx│        Vector{$FC}│                0│
-  │        Δy│        Vector{$FC}│                0│
-  │         x│        Vector{$FC}│               64│
-  │         y│        Vector{$FC}│               32│
-  │         q│        Vector{$FC}│                0│
-  │         p│        Vector{$FC}│                0│
-  │         V│Vector{Vector{$FC}}│          10 x 64│
-  │         U│Vector{Vector{$FC}}│          10 x 32│
-  │        gs│        Vector{$FC}│               40│
-  │        gc│         Vector{$T}│               40│
-  │        zt│        Vector{$FC}│               20│
-  │         R│        Vector{$FC}│              210│
-  │warm_start│               Bool│                0│
-  └──────────┴───────────────────┴─────────────────┘
-  """
-  @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
+  for (method, solver) in solvers
+    @testset "$(method)" begin
+      for i = 1 : 3
+        A  = i * A
+        Au = i * Au
+        Ao = i * Ao
+        b  = 5 * b
+        c  = 3 * c
+
+        if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom,
+                     :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr, :cg_lanczos_shift)
+          method == :cg_lanczos_shift ? solve!(solver, A, b, shifts) : solve!(solver, A, b)
+          niter = niterations(solver)
+          @test Aprod(solver) == (method ∈ (:cgs, :bicgstab) ? 2 * niter : niter)
+          @test Atprod(solver) == (method ∈ (:bilq, :qmr) ? niter : 0)
+          @test solution(solver) === solver.x
+          @test nsolution(solver) == 1
+        end
+
+        if method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr)
+          solve!(solver, Au, c)
+          niter = niterations(solver)
+          @test Aprod(solver) == niter
+          @test Atprod(solver) == niter
+          @test solution(solver, 1) === solver.x
+          @test nsolution(solver) == (method ∈ (:cgne, :crmr) ? 1 : 2)
+          (nsolution == 2) && (@test solution(solver, 2) == solver.y)
+        end
+
+        if method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr)
+          solve!(solver, Ao, b)
+          niter = niterations(solver)
+          @test Aprod(solver) == niter
+          @test Atprod(solver) == niter
+          @test solution(solver) === solver.x
+          @test nsolution(solver) == 1
+        end
+
+        if method ∈ (:bilqr, :trilqr)
+          solve!(solver, A, b, b)
+          niter = niterations(solver)
+          @test Aprod(solver) == niter
+          @test Atprod(solver) == niter
+          @test solution(solver, 1) === solver.x
+          @test solution(solver, 2) === solver.y
+          @test nsolution(solver) == 2
+          @test issolved_primal(solver)
+          @test issolved_dual(solver)
+        end
+
+        if method ∈ (:tricg, :trimr, :gpmr)
+          method == :gpmr ? solve!(solver, Ao, Au, b, c) : solve!(solver, Au, c, b)
+          niter = niterations(solver)
+          @test Aprod(solver) == niter
+          method != :gpmr && (@test Atprod(solver) == niter)
+          method == :gpmr && (@test Bprod(solver) == niter)
+          @test solution(solver, 1) === solver.x
+          @test solution(solver, 2) === solver.y
+          @test nsolution(solver) == 2
+        end
+
+        if method ∈ (:usymlq, :usymqr)
+          method == :usymlq ? solve!(solver, Au, c, b) : solve!(solver, Ao, b, c)
+          niter = niterations(solver)
+          @test Aprod(solver) == niter
+          @test Atprod(solver) == niter
+          @test solution(solver) === solver.x
+          @test nsolution(solver) == 1
+        end
+
+        @test niter > 0
+        @test statistics(solver) === solver.stats
+        @test issolved(solver)
+      end
+
+      io = IOBuffer()
+      show(io, solver, show_stats=false)
+      showed = String(take!(io))
+
+      # Test that the lines have the same length
+      str = split(showed, '\n', keepempty=false)
+      len_row = length(str[1])
+      @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_row, &, str)
+
+      # Test that the columns have the same length
+      str2 = split(showed, ['│','┌','┬','┐','├','┼','┤','└','┴','┴','┘','\n'], keepempty=false)
+      len_col1 = length(str2[1])
+      len_col2 = length(str2[2])
+      len_col3 = length(str2[3])
+      @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col1, &, str2[1:3:end-2])
+      @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col2, &, str2[2:3:end-1])
+      @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col3, &, str2[3:3:end])
+
+      # Code coverage
+      show(io, solver, show_stats=true)
+    end
+  end
 end
 
 @testset "solvers" begin
diff --git a/test/test_stats.jl b/test/test_stats.jl
index 4289a78a3..f4c212d50 100644
--- a/test/test_stats.jl
+++ b/test/test_stats.jl
@@ -1,28 +1,30 @@
 @testset "stats" begin
-  stats = Krylov.SimpleStats(0, true, true, Float64[1.0], Float64[2.0], Float64[], "t")
+  stats = Krylov.SimpleStats(0, true, true, Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """Simple stats
+  expected = """SimpleStats
   niter: 0
   solved: true
   inconsistent: true
   residuals: [ 1.0e+00 ]
   Aresiduals: [ 2.0e+00 ]
   κ₂(A): []
-  status: t"""
+  timer: 1.23s
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), "t")
+  stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), 0.1234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """Lsmr stats
+  expected = """LsmrStats
   niter: 0
   solved: true
   inconsistent: true
@@ -33,53 +35,59 @@
   κ₂(A): 5.0
   ‖A‖F: 6.0
   xNorm: 7.0
-  status: t"""
+  timer: 123.40ms
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, "t")
+  stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, 1.234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """Lanczos stats
+  expected = """LanczosStats
   niter: 0
   solved: true
   residuals: [ 3.0e+00 ]
   indefinite: true
   ‖A‖F: NaN
   κ₂(A): NaN
-  status: t"""
+  timer: 1.23s
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, "t")
+  stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, 0.00056789, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """LanczosShift stats
+  expected = """LanczosShiftStats
   niter: 0
   solved: true
   residuals: [[0.9, 0.5], [0.6, 0.4, 0.1]]
   indefinite: Bool[0, 1]
   ‖A‖F: NaN
   κ₂(A): NaN
-  status: t"""
-  @test (VERSION < v"1.5") || strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
+  timer: 567.89μs
+  status: unknown"""
+  @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, "t")
+  stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, 1.234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """Symmlq stats
+  expected = """SymmlqStats
   niter: 0
   solved: true
   residuals: [ 4.0e+00 ]
@@ -88,53 +96,59 @@
   errors (cg): [ 7.0e+00  ✗✗✗✗ ]
   ‖A‖F: NaN
   κ₂(A): NaN
-  status: t"""
+  timer: 1.23s
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], "t")
+  stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], 1.234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """Adjoint stats
+  expected = """AdjointStats
   niter: 0
   solved primal: true
   solved dual: true
   residuals primal: [ 8.0e+00 ]
   residuals dual: [ 9.0e+00 ]
-  status: t"""
+  timer: 1.23s
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], "t")
+  stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], 1.234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """LNLQ stats
+  expected = """LNLQStats
   niter: 0
   solved: true
   residuals: [ 1.0e+01 ]
   error with bnd: false
   error bnd x: [ 1.1e+01 ]
   error bnd y: [ 1.2e+01 ]
-  status: t"""
+  timer: 1.23s
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 
-  stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], "t")
+  stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], 1.234, "unknown")
   io = IOBuffer()
   show(io, stats)
   showed = String(take!(io))
   storage_type = typeof(stats)
-  expected = """LSLQ stats
+  expected = """LSLQStats
   niter: 0
   solved: true
   inconsistent: false
@@ -144,9 +158,11 @@
   error with bnd: false
   error bound LQ: [ 1.6e+01 ]
   error bound CG: [ 1.7e+01 ]
-  status: t"""
+  timer: 1.23s
+  status: unknown"""
   @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
   Krylov.reset!(stats)
   check_reset(stats)
-  @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+  nbytes_allocated = @allocated Krylov.reset!(stats)
+  @test nbytes_allocated == 0
 end
diff --git a/test/test_trilqr.jl b/test/test_trilqr.jl
index 7d7927372..baf8a597e 100644
--- a/test/test_trilqr.jl
+++ b/test/test_trilqr.jl
@@ -74,7 +74,7 @@
       @test(resid_dual ≤ trilqr_tol)
       @test(stats.solved_dual)
 
-      # Test consistent Ax = b and inconsistent Aᵀt = c.
+      # Test consistent Ax = b and inconsistent Aᴴt = c.
       A, b, c = rectangular_adjoint(FC=FC)
       (x, t, stats) = trilqr(A, b, c)
 
diff --git a/test/test_utils.jl b/test/test_utils.jl
index ed72056b6..f1c3ca44e 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -1,50 +1,51 @@
 include("get_div_grad.jl")
 include("gen_lsq.jl")
 include("check_min_norm.jl")
+include("callback_utils.jl")
 
 # Symmetric and positive definite systems.
 function symmetric_definite(n :: Int=10; FC=Float64)
-  α = FC <: Complex ? im : 1
+  α = FC <: Complex ? FC(im) : one(FC)
   A = spdiagm(-1 => α * ones(FC, n-1), 0 => 4 * ones(FC, n), 1 => conj(α) * ones(FC, n-1))
-  b = A * [1:n;]
+  b = A * FC[1:n;]
   return A, b
 end
 
 # Symmetric and indefinite systems.
 function symmetric_indefinite(n :: Int=10; FC=Float64)
-  α = FC <: Complex ? im : 1
+  α = FC <: Complex ? FC(im) : one(FC)
   A = spdiagm(-1 => α * ones(FC, n-1), 0 => ones(FC, n), 1 => conj(α) * ones(FC, n-1))
-  b = A * [1:n;]
+  b = A * FC[1:n;]
   return A, b
 end
 
 # Nonsymmetric and positive definite systems.
 function nonsymmetric_definite(n :: Int=10; FC=Float64)
   if FC <: Complex
-    A = [i == j ? n * one(FC) : im * one(FC) for i=1:n, j=1:n]
+    A = [i == j ? n * one(FC) : FC(im) * one(FC) for i=1:n, j=1:n]
   else
     A = [i == j ? n * one(FC) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
   end
-  b = A * [1:n;]
+  b = A * FC[1:n;]
   return A, b
 end
 
 # Nonsymmetric and indefinite systems.
 function nonsymmetric_indefinite(n :: Int=10; FC=Float64)
   if FC <: Complex
-    A = [i == j ? n * (-one(FC))^(i*j) : im * one(FC) for i=1:n, j=1:n]
+    A = [i == j ? n * (-one(FC))^(i*j) : FC(im) * one(FC) for i=1:n, j=1:n]
   else
     A = [i == j ? n * (-one(FC))^(i*j) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
   end
-  b = A * [1:n;]
+  b = A * FC[1:n;]
   return A, b
 end
 
 # Underdetermined and consistent systems.
 function under_consistent(n :: Int=10, m :: Int=25; FC=Float64)
   n < m || error("Square or overdetermined system!")
-  α = FC <: Complex ? im : 1
-  A = [i/j - α * j/i for i=1:n, j=1:m]
+  α = FC <: Complex ? FC(im) : one(FC)
+  A = FC[i/j - α * j/i for i=1:n, j=1:m]
   b = A * ones(FC, m)
   return A, b
 end
@@ -52,7 +53,7 @@ end
 # Underdetermined and inconsistent systems.
 function under_inconsistent(n :: Int=10, m :: Int=25; FC=Float64)
   n < m || error("Square or overdetermined system!")
-  α = FC <: Complex ? 1 + im : 1
+  α = FC <: Complex ? FC(1 + im) : one(FC)
   A = α * ones(FC, n, m)
   b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n]
   return A, b
@@ -84,8 +85,8 @@ end
 # Overdetermined and consistent systems.
 function over_consistent(n :: Int=25, m :: Int=10; FC=Float64)
   n > m || error("Underdetermined or square system!")
-  α = FC <: Complex ? im : 1
-  A = [i/j - α * j/i for i=1:n, j=1:m]
+  α = FC <: Complex ? FC(im) : one(FC)
+  A = FC[i/j - α * j/i for i=1:n, j=1:m]
   b = A * ones(FC, m)
   return A, b
 end
@@ -93,7 +94,7 @@ end
 # Overdetermined and inconsistent systems.
 function over_inconsistent(n :: Int=25, m :: Int=10; FC=Float64)
   n > m || error("Underdetermined or square system!")
-  α = FC <: Complex ? 1 + im : 1
+  α = FC <: Complex ? FC(1 + im) : one(FC)
   A = α * ones(FC, n, m)
   b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n]
   return A, b
@@ -162,23 +163,23 @@ end
 function underdetermined_adjoint(n :: Int=100, m :: Int=200; FC=Float64)
   n < m || error("Square or overdetermined system!")
   A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m]
-  b = A * [1:m;]
-  c = A' * [-n:-1;]
+  b = A * FC[1:m;]
+  c = A' * FC[-n:-1;]
   return A, b, c
 end
 
 # Square consistent adjoint systems.
 function square_adjoint(n :: Int=100; FC=Float64)
   A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
-  b = A * [1:n;]
-  c = A' * [-n:-1;]
+  b = A * FC[1:n;]
+  c = A' * FC[-n:-1;]
   return A, b, c
 end
 
-# Adjoint systems with Ax = b underdetermined consistent and Aᵀt = c overdetermined insconsistent.
+# Adjoint systems with Ax = b underdetermined consistent and Aᴴt = c overdetermined insconsistent.
 function rectangular_adjoint(n :: Int=10, m :: Int=25; FC=Float64)
-  Aᵀ, c = over_inconsistent(m, n; FC=FC)
-  A = adjoint(Aᵀ)
+  Aᴴ, c = over_inconsistent(m, n; FC=FC)
+  A = adjoint(Aᴴ)
   b = A * ones(FC, m)
   return A, b, c
 end
@@ -187,8 +188,8 @@ end
 function overdetermined_adjoint(n :: Int=200, m :: Int=100; FC=Float64)
   n > m || error("Underdetermined or square system!")
   A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m]
-  b = A * [1:m;]
-  c = A' * [-n:-1;]
+  b = A * FC[1:m;]
+  c = A' * FC[-n:-1;]
   return A, b, c
 end
 
@@ -251,7 +252,7 @@ end
 # Square and preconditioned problems.
 function square_preconditioned(n :: Int=10; FC=Float64)
   A   = ones(FC, n, n) + (n-1) * eye(n)
-  b   = FC(10.0) * [1:n;]
+  b   = 10 * FC[1:n;]
   M⁻¹ = FC(1/n) * eye(n)
   return A, b, M⁻¹
 end
@@ -363,110 +364,3 @@ function check_reset(stats :: KS) where KS <: Krylov.KrylovStats
     end
   end
 end
-
-# Test callback
-mutable struct TestCallbackN2{T, S, M}
-  A::M
-  b::S
-  storage_vec::S
-  tol::T
-end
-TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
-
-function (cb_n2::TestCallbackN2)(solver)
-  mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
-  cb_n2.storage_vec .-= cb_n2.b
-  return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2Adjoint{T, S, M}
-  A::M
-  b::S
-  c::S
-  storage_vec1::S
-  storage_vec2::S
-  tol::T
-end
-TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2Adjoint)(solver)
-  mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
-  cb_n2.storage_vec1 .-= cb_n2.b
-  mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
-  cb_n2.storage_vec2 .-= cb_n2.c
-  return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2Shifts{T, S, M}
-  A::M
-  b::S
-  shifts::Vector{T}
-  tol::T
-end
-TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
-
-function (cb_n2::TestCallbackN2Shifts)(solver)
-  r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
-  return all(map(norm, r) .≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2LS{T, S, M}
-  A::M
-  b::S
-  λ::T
-  storage_vec1::S
-  storage_vec2::S
-  tol::T
-end
-TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
-
-function (cb_n2::TestCallbackN2LS)(solver)
-  mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
-  cb_n2.storage_vec1 .-= cb_n2.b
-  mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
-  cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
-  return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2LN{T, S, M}
-  A::M
-  b::S
-  λ::T
-  storage_vec::S
-  tol::T
-end
-TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
-
-function (cb_n2::TestCallbackN2LN)(solver)
-  mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
-  cb_n2.storage_vec .-= cb_n2.b
-  cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s)
-  return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2SaddlePts{T, S, M}
-  A::M
-  b::S
-  c::S
-  storage_vec1::S
-  storage_vec2::S
-  tol::T
-end
-TestCallbackN2SaddlePts(A, b, c; tol = 0.1) = 
-  TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2SaddlePts)(solver)
-  mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
-  cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
-  mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
-  cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
-  return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
-  get_x_restarted_gmres!(solver, A, stor, N)
-  x = stor.x
-  mul!(storage_vec, A, x)
-  storage_vec .-= b
-  return (norm(storage_vec) ≤ tol)
-end
diff --git a/test/test_verbose.jl b/test/test_verbose.jl
new file mode 100644
index 000000000..ebc42c8f7
--- /dev/null
+++ b/test/test_verbose.jl
@@ -0,0 +1,60 @@
+function test_verbose(FC)
+  A   = FC.(get_div_grad(4, 4, 4))  # Dimension m x n
+  m,n = size(A)
+  k   = div(n, 2)
+  Au  = A[1:k,:]          # Dimension k x n
+  Ao  = A[:,1:k]          # Dimension m x k
+  b   = Ao * ones(FC, k)  # Dimension m
+  c   = Au * ones(FC, n)  # Dimension k
+  mem = 10
+
+  T = real(FC)
+  shifts  = T[1; 2; 3; 4; 5]
+  nshifts = 5
+
+  for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr,
+             :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres,
+             :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom,
+             :fgmres, :cg_lanczos_shift)
+
+    @testset "$fn" begin
+      io = IOBuffer()
+      if fn in (:trilqr, :bilqr)
+        @eval $fn($A, $b, $b, verbose=1, iostream=$io)
+      elseif fn in (:tricg, :trimr)
+        @eval $fn($Au, $c, $b, verbose=1, iostream=$io)
+      elseif fn in (:lnlq, :craig, :craigmr, :cgne, :crmr)
+        @eval $fn($Au, $c, verbose=1, iostream=$io)
+      elseif fn in (:lslq, :lsqr, :lsmr, :cgls, :crls)
+        @eval $fn($Ao, $b, verbose=1, iostream=$io)
+      elseif fn == :usymlq
+        @eval $fn($Au, $c, $b, verbose=1, iostream=$io)
+      elseif fn == :usymqr
+        @eval $fn($Ao, $b, $c, verbose=1, iostream=$io)
+      elseif fn == :gpmr
+        @eval $fn($Ao, $Au, $b, $c, verbose=1, iostream=$io)
+      elseif fn == :cg_lanczos_shift
+        @eval $fn($A, $b, $shifts, verbose=1, iostream=$io)
+      else
+        @eval $fn($A, $b, verbose=1, iostream=$io)
+      end
+
+      showed = String(take!(io))
+      str = split(showed, '\n', keepempty=false)
+      nrows = length(str)
+      first_row = fn in (:bilqr, :trilqr) ? 3 : 2
+      last_row = fn == :cg ? nrows-1 : nrows
+      str = str[first_row:last_row]
+      len_header = length(str[1])
+      @test mapreduce(x -> length(x) == len_header, &, str)
+    end
+  end
+end
+
+@testset "verbose" begin
+  for FC in (Float64, ComplexF64)
+    @testset "Data Type: $FC" begin
+      test_verbose(FC)
+    end
+  end
+end
diff --git a/test/test_warm_start.jl b/test/test_warm_start.jl
index 66a1cbea7..c788ed7e8 100644
--- a/test/test_warm_start.jl
+++ b/test/test_warm_start.jl
@@ -8,41 +8,126 @@ function test_warm_start(FC)
   nshifts = 5
   tol = 1.0e-6
 
+  x, y, stats = bilqr(A, b, c, x0, y0)
+  r = b - A * x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+  s = c - A' * y
+  resid = norm(s) / norm(c)
+  @test(resid ≤ tol)
+
+  solver = BilqrSolver(A, b)
+  solve!(solver, A, b, c, x0, y0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+  s = c - A' * solver.y
+  resid = norm(s) / norm(c)
+  @test(resid ≤ tol)
+
+  x, y, stats = trilqr(A, b, c, x0, y0)
+  r = b - A * x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+  s = c - A' * y
+  resid = norm(s) / norm(c)
+  @test(resid ≤ tol)
+
+  solver = TrilqrSolver(A, b)
+  solve!(solver, A, b, c, x0, y0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+  s = c - A' * solver.y
+  resid = norm(s) / norm(c)
+  @test(resid ≤ tol)
+
   x, y, stats = tricg(A, b, b, x0, y0)
   r = [b - x - A * y; b - A' * x + y]
   resid = norm(r) / norm([b; b])
   @test(resid ≤ tol)
 
+  solver = TricgSolver(A, b)
+  solve!(solver, A, b, b, x0, y0)
+  r = [b - solver.x - A * solver.y; b - A' * solver.x + solver.y]
+  resid = norm(r) / norm([b; b])
+  @test(resid ≤ tol)
+
   x, y, stats = trimr(A, b, b, x0, y0)
   r = [b - x - A * y; b - A' * x + y]
   resid = norm(r) / norm([b; b])
   @test(resid ≤ tol)
 
+  solver = TrimrSolver(A, b)
+  solve!(solver, A, b, b, x0, y0)
+  r = [b - solver.x - A * solver.y; b - A' * solver.x + solver.y]
+  resid = norm(r) / norm([b; b])
+  @test(resid ≤ tol)
+
   x, y, stats = gpmr(A, A', b, b, x0, y0)
   r = [b - x - A * y; b - A' * x - y]
   resid = norm(r) / norm([b; b])
   @test(resid ≤ tol)
 
+  solver = GpmrSolver(A, b)
+  solve!(solver, A, A', b, b, x0, y0)
+  r = [b - solver.x - A * solver.y; b - A' * solver.x - solver.y]
+  resid = norm(r) / norm([b; b])
+  @test(resid ≤ tol)
+
   x, stats = minres_qlp(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = MinresQlpSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = symmlq(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = SymmlqSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = cg(A, b, x0)
   r = b - A * x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
+  solver = CgSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
   x, stats = cr(A, b, x0)
   r = b - A * x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
+  solver = CrSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
   x, stats = cg_lanczos(A, b, x0)
   r = b - A * x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
+  solver = CgLanczosSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
   x, stats = minres(A, b, x0)
@@ -50,70 +135,131 @@ function test_warm_start(FC)
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = MinresSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = diom(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = DiomSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = dqgmres(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = DqgmresSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = fom(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
-      
+
+  solver = FomSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = gmres(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = GmresSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
+  x, stats = fgmres(A, b, x0)
+  r = b - A * x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
+  solver = FgmresSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = bicgstab(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = BicgstabSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = cgs(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
+  solver = CgsSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
+  @test(resid ≤ tol)
+
   x, stats = bilq(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
-  x, stats = qmr(A, b, x0)
-  r = b - A * x
+  solver = BilqSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
-  x, stats = usymlq(A, b, c, x0)
+  x, stats = qmr(A, b, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
-  x, stats = usymqr(A, b, c, x0)
-  r = b - A * x
+  solver = QmrSolver(A, b)
+  solve!(solver, A, b, x0)
+  r = b - A * solver.x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
-  x, y, stats = bilqr(A, b, c, x0, y0)
+  x, stats = usymlq(A, b, c, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
-  s = c - A' * y
-  resid = norm(s) / norm(c)
+
+  solver = UsymlqSolver(A, b)
+  solve!(solver, A, b, c, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 
-  x, y, stats = trilqr(A, b, c, x0, y0)
+  x, stats = usymqr(A, b, c, x0)
   r = b - A * x
   resid = norm(r) / norm(b)
   @test(resid ≤ tol)
-  s = c - A' * y
-  resid = norm(s) / norm(c)
+
+  solver = UsymqrSolver(A, b)
+  solve!(solver, A, b, c, x0)
+  r = b - A * solver.x
+  resid = norm(r) / norm(b)
   @test(resid ≤ tol)
 end