[documentation] Test the code of GPU backends with buildkite

JuliaSmoothOptimizers · Sep 13, 2022 · 3c67dfb · 3c67dfb
1 parent b19c74b
commit 3c67dfb
Show file tree

Hide file tree

Showing 5 changed files with 125 additions and 7 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -10,6 +10,7 @@ steps:
       julia --color=yes --project -e '
       using Pkg
       Pkg.add("CUDA")
+      Pkg.add("LinearOperators")
       Pkg.instantiate()
       include("test/gpu/nvidia.jl")'
     timeout_in_minutes: 30

diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
@@ -6,6 +6,16 @@ using Krylov, AMDGPU
   @test AMDGPU.functional()
   AMDGPU.allowscalar(false)
 
+  @testset "documentation" begin
+    A_cpu = rand(ComplexF64, 20, 20)
+    A_cpu = A_cpu + A_cpu'
+    b_cpu = rand(ComplexF64, 20)
+    A = A + A'
+    A_gpu = ROCMatrix(A)
+    b_gpu = ROCVector(b)
+    x, stats = minres(A_gpu, b_gpu)
+  end
+
   for FC in (Float32, Float64, ComplexF32, ComplexF64)
     S = ROCVector{FC}
     T = real(FC)

diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
@@ -1,18 +1,28 @@
 using LinearAlgebra, SparseArrays, Test
 using Krylov, oneAPI
 
+# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
 import Krylov.kdot
 function kdot(n :: Integer, x :: oneVector{T}, dx :: Integer, y :: oneVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
-  z = similar(x)
-  z .= conj.(x) .* y
-  reduce(+, z)
+  return mapreduce(dot, +, x, y)
 end
 
 @testset "Intel -- oneAPI.jl" begin
 
   @test oneAPI.functional()
   oneAPI.allowscalar(false)
 
+  @testset "documentation" begin
+    T = Float32
+    m = 20
+    n = 10
+    A_cpu = rand(T, m, n)
+    b_cpu = rand(T, m)
+    A_gpu = oneMatrix(A_cpu)
+    b_gpu = oneVector(b_cpu)
+    x, stats = lsqr(A_gpu, b_gpu)
+  end
+
   for FC ∈ (Float32, ComplexF32)
     S = oneVector{FC}
     T = real(FC)

diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
@@ -5,18 +5,28 @@ using Krylov, Metal
 const MtlVector{T} = MtlArray{T,1}
 const MtlMatrix{T} = MtlArray{T,2}
 
+# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
 import Krylov.kdot
 function kdot(n :: Integer, x :: MtlVector{T}, dx :: Integer, y :: MtlVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
-  z = similar(x)
-  z .= conj.(x) .* y
-  reduce(+, z)
+  return mapreduce(dot, +, x, y)
 end
 
 @testset "Apple M1 GPUs -- Metal.jl" begin
 
   # @test Metal.functional()
   Metal.allowscalar(false)
 
+  @testset "documentation" begin
+    T = Float32
+    n = 10
+    n = 20
+    A_cpu = rand(T, n, m)
+    b_cpu = rand(T, n)
+    A_gpu = MtlMatrix(A_cpu)
+    b_gpu = MtlVector(b_cpu)
+    x, stats = craig(A_gpu, b_gpu)
+  end
+
   for FC in (Float32, ComplexF32)
     S = MtlVector{FC}
     T = real(FC)

diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
@@ -1,11 +1,98 @@
 using LinearAlgebra, SparseArrays, Test
-using Krylov, CUDA, CUDA.CUSPARSE
+using LinearOperators, Krylov, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+
+include("../test_utils.jl")
 
 @testset "Nvidia -- CUDA.jl" begin
 
   @test CUDA.functional()
   CUDA.allowscalar(false)
 
+  @testset "documentation" begin
+    A_cpu = rand(20, 20)
+    b_cpu = rand(20)
+    A_gpu = CuMatrix(A_cpu)
+    b_gpu = CuVector(b_cpu)
+    x, stats = bilq(A_gpu, b_gpu)
+
+    A_cpu = sprand(200, 100, 0.3)
+    b_cpu = rand(200)
+    A_gpu = CuSparseMatrixCSC(A_cpu)
+    b_gpu = CuVector(b_cpu)
+    x, stats = lsmr(A_gpu, b_gpu)
+
+    @testset "ic0" begin
+      A_cpu, b_cpu = sparse_laplacian()
+
+      b_gpu = CuVector(b_cpu)
+      n = length(b_gpu)
+      T = eltype(b_gpu)
+      symmetric = hermitian = true
+
+      A_gpu = CuSparseMatrixCSC(A_cpu)
+      P = ic02(A_gpu, 'O')
+      function ldiv_ic0!(y, P, x)
+        copyto!(y, x)
+        sv2!('T', 'U', 'N', 1.0, P, y, 'O')
+        sv2!('N', 'U', 'N', 1.0, P, y, 'O')
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
+      x, stats = cg(A_gpu, b_gpu, M=opM)
+      @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+
+      A_gpu = CuSparseMatrixCSR(A_cpu)
+      P = ic02(A_gpu, 'O')
+      function ldiv_ic0!(y, P, x)
+        copyto!(y, x)
+        sv2!('N', 'L', 'N', 1.0, P, y, 'O')
+        sv2!('T', 'L', 'N', 1.0, P, y, 'O')
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
+      x, stats = cg(A_gpu, b_gpu, M=opM)
+      @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+    end
+
+    @testset "ilu0" begin
+      A_cpu, b_cpu = polar_poisson()
+
+      p = zfd(A_cpu, 'O')
+      p .+= 1
+      A_cpu = A_cpu[p,:]
+      b_cpu = b_cpu[p]
+
+      b_gpu = CuVector(b_cpu)
+      n = length(b_gpu)
+      T = eltype(b_gpu)
+      symmetric = hermitian = false
+
+      A_gpu = CuSparseMatrixCSC(A_cpu)
+      P = ilu02(A_gpu, 'O')
+      function ldiv_ilu0!(y, P, x)
+        copyto!(y, x)
+        sv2!('N', 'L', 'N', 1.0, P, y, 'O')
+        sv2!('N', 'U', 'U', 1.0, P, y, 'O')
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
+      x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+      @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+
+      A_gpu = CuSparseMatrixCSR(A_cpu)
+      P = ilu02(A_gpu, 'O')
+      function ldiv_ilu0!(y, P, x)
+        copyto!(y, x)
+        sv2!('N', 'L', 'U', 1.0, P, y, 'O')
+        sv2!('N', 'U', 'N', 1.0, P, y, 'O')
+        return y
+      end
+      opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
+      x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+      @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+    end
+  end
+
   for FC in (Float32, Float64, ComplexF32, ComplexF64)
     S = CuVector{FC}
     T = real(FC)