Skip to content

Commit

Permalink
Fix the documentation for CUDA.jl 4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Mar 24, 2023
1 parent aaf83e5 commit fcbf189
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 27 deletions.
2 changes: 2 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ steps:
Pkg.add("CUDA")
Pkg.add("LinearOperators")
Pkg.instantiate()
using CUDA
CUDA.set_runtime_version!(v"11.8")
include("test/gpu/nvidia.jl")'
timeout_in_minutes: 30

Expand Down
20 changes: 13 additions & 7 deletions docs/src/gpu.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ if CUDA.functional()
b_gpu = CuVector(b_cpu)

# IC(0) decomposition LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ic02(A_gpu, 'O')
P = ic02(A_gpu)

# Additional vector required for solving triangular systems
n = length(b_gpu)
Expand Down Expand Up @@ -101,18 +101,17 @@ using SparseArrays, Krylov, LinearOperators
using CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER

if CUDA.functional()
# Optional -- Compute a permutation vector p such that A[p,:] has no zero diagonal
p = zfd(A_cpu, 'O')
# Optional -- Compute a permutation vector p such that A[:,p] has no zero diagonal
p = zfd(A_cpu)
p .+= 1
A_cpu = A_cpu[p,:]
b_cpu = b_cpu[p]
A_cpu = A_cpu[:,p]

# Transfer the linear system from the CPU to the GPU
A_gpu = CuSparseMatrixCSR(A_cpu) # A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)

# ILU(0) decomposition LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ilu02(A_gpu, 'O')
P = ilu02(A_gpu)

# Additional vector required for solving triangular systems
n = length(b_gpu)
Expand All @@ -137,10 +136,17 @@ if CUDA.functional()
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))

# Solve a non-Hermitian system with an ILU(0) preconditioner on GPU
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
x̄, stats = bicgstab(A_gpu, b_gpu, M=opM)

# Recover the solution of Ax = b with the solution of A[:,p]x̄ = b
invp = invperm(p)
x = x̄[invp]
end
```

!!! note
The `CUSPARSE` library of CUDA toolkits `v"12.x"` has some bugs. We recommend to use an older CUDA toolkit with `CUDA.set_runtime_version!(v"11.8")`.

## AMD GPUs

All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs.
Expand Down
49 changes: 29 additions & 20 deletions test/gpu/nvidia.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ include("gpu.jl")

@testset "ic0" begin
A_cpu, b_cpu = sparse_laplacian()
@test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == true

b_gpu = CuVector(b_cpu)
n = length(b_gpu)
Expand All @@ -30,7 +31,7 @@ include("gpu.jl")
symmetric = hermitian = true

A_gpu = CuSparseMatrixCSC(A_cpu)
P = ic02(A_gpu, 'O')
P = ic02(A_gpu)
function ldiv_ic0!(P::CuSparseMatrixCSC, x, y, z)
ldiv!(z, UpperTriangular(P)', x)
ldiv!(y, UpperTriangular(P), z)
Expand All @@ -39,10 +40,10 @@ include("gpu.jl")
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
x, stats = cg(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6
@test stats.niter 38
@test stats.niter 19

A_gpu = CuSparseMatrixCSR(A_cpu)
P = ic02(A_gpu, 'O')
A_gpu = CuSparseMatrixCSR(A_gpu)
P = ic02(A_gpu)
function ldiv_ic0!(P::CuSparseMatrixCSR, x, y, z)
ldiv!(z, LowerTriangular(P), x)
ldiv!(y, LowerTriangular(P)', z)
Expand All @@ -51,46 +52,54 @@ include("gpu.jl")
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(P, x, y, z))
x, stats = cg(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6
@test stats.niter 38
@test stats.niter 19
end

@testset "ilu0" begin
A_cpu, b_cpu = polar_poisson()

p = zfd(A_cpu, 'O')
A_cpu = Float64[1 0 0 4;
0 0 7 8;
9 0 0 12;
0 14 0 16]
A_cpu = sparse(A_cpu)
b_cpu = ones(4)
@test mapreduce(Aᵢᵢ -> Aᵢᵢ != 0, &, diag(A_cpu)) == false

p = zfd(A_cpu)
p .+= 1
A_cpu = A_cpu[p,:]
b_cpu = b_cpu[p]
invp = invperm(p)
@test reduce(&, invp .== p) == false

b_gpu = CuVector(b_cpu)
n = length(b_gpu)
T = eltype(b_gpu)
z = similar(CuVector{T}, n)
symmetric = hermitian = false

A_gpu = CuSparseMatrixCSC(A_cpu)
P = ilu02(A_gpu, 'O')
A_gpu = CuSparseMatrixCSC(A_cpu[:,p])
P = ilu02(A_gpu)
function ldiv_ilu0!(P::CuSparseMatrixCSC, x, y, z)
ldiv!(z, LowerTriangular(P), x)
ldiv!(y, UnitUpperTriangular(P), z)
return y
end
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6
@test stats.niter 1659
x̄, stats = gmres(A_gpu, b_gpu, M=opM)
x = Vector(x̄)[invp]
@test norm(b_gpu - A_gpu * x̄) 1e-6
@test norm(b_cpu - A_cpu * x) 1e-6

A_gpu = CuSparseMatrixCSR(A_cpu)
P = ilu02(A_gpu, 'O')
A_gpu = CuSparseMatrixCSR(A_cpu[:,p])
P = ilu02(A_gpu)
function ldiv_ilu0!(P::CuSparseMatrixCSR, x, y, z)
ldiv!(z, UnitLowerTriangular(P), x)
ldiv!(y, UpperTriangular(P), z)
return y
end
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(P, x, y, z))
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6
@test stats.niter 1659
x̄, stats = gmres(A_gpu, b_gpu, M=opM)
x = Vector(x̄)[invp]
@test norm(b_gpu - A_gpu * x̄) 1e-6
@test norm(b_cpu - A_cpu * x) 1e-6
end
end

Expand Down

0 comments on commit fcbf189

Please sign in to comment.