Skip to content

Commit

Permalink
[documentation] Test the code of GPU backends with buildkite
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Sep 13, 2022
1 parent b19c74b commit 3c67dfb
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 7 deletions.
1 change: 1 addition & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ steps:
julia --color=yes --project -e '
using Pkg
Pkg.add("CUDA")
Pkg.add("LinearOperators")
Pkg.instantiate()
include("test/gpu/nvidia.jl")'
timeout_in_minutes: 30
Expand Down
10 changes: 10 additions & 0 deletions test/gpu/amd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ using Krylov, AMDGPU
@test AMDGPU.functional()
AMDGPU.allowscalar(false)

@testset "documentation" begin
A_cpu = rand(ComplexF64, 20, 20)
A_cpu = A_cpu + A_cpu'
b_cpu = rand(ComplexF64, 20)
A = A + A'
A_gpu = ROCMatrix(A)
b_gpu = ROCVector(b)
x, stats = minres(A_gpu, b_gpu)
end

for FC in (Float32, Float64, ComplexF32, ComplexF64)
S = ROCVector{FC}
T = real(FC)
Expand Down
16 changes: 13 additions & 3 deletions test/gpu/intel.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
using LinearAlgebra, SparseArrays, Test
using Krylov, oneAPI

# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
import Krylov.kdot
function kdot(n :: Integer, x :: oneVector{T}, dx :: Integer, y :: oneVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
z = similar(x)
z .= conj.(x) .* y
reduce(+, z)
return mapreduce(dot, +, x, y)
end

@testset "Intel -- oneAPI.jl" begin

@test oneAPI.functional()
oneAPI.allowscalar(false)

@testset "documentation" begin
T = Float32
m = 20
n = 10
A_cpu = rand(T, m, n)
b_cpu = rand(T, m)
A_gpu = oneMatrix(A_cpu)
b_gpu = oneVector(b_cpu)
x, stats = lsqr(A_gpu, b_gpu)
end

for FC (Float32, ComplexF32)
S = oneVector{FC}
T = real(FC)
Expand Down
16 changes: 13 additions & 3 deletions test/gpu/metal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,28 @@ using Krylov, Metal
const MtlVector{T} = MtlArray{T,1}
const MtlMatrix{T} = MtlArray{T,2}

# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
import Krylov.kdot
function kdot(n :: Integer, x :: MtlVector{T}, dx :: Integer, y :: MtlVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
z = similar(x)
z .= conj.(x) .* y
reduce(+, z)
return mapreduce(dot, +, x, y)
end

@testset "Apple M1 GPUs -- Metal.jl" begin

# @test Metal.functional()
Metal.allowscalar(false)

@testset "documentation" begin
T = Float32
n = 10
n = 20
A_cpu = rand(T, n, m)
b_cpu = rand(T, n)
A_gpu = MtlMatrix(A_cpu)
b_gpu = MtlVector(b_cpu)
x, stats = craig(A_gpu, b_gpu)
end

for FC in (Float32, ComplexF32)
S = MtlVector{FC}
T = real(FC)
Expand Down
89 changes: 88 additions & 1 deletion test/gpu/nvidia.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,98 @@
using LinearAlgebra, SparseArrays, Test
using Krylov, CUDA, CUDA.CUSPARSE
using LinearOperators, Krylov, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER

include("../test_utils.jl")

@testset "Nvidia -- CUDA.jl" begin

@test CUDA.functional()
CUDA.allowscalar(false)

@testset "documentation" begin
A_cpu = rand(20, 20)
b_cpu = rand(20)
A_gpu = CuMatrix(A_cpu)
b_gpu = CuVector(b_cpu)
x, stats = bilq(A_gpu, b_gpu)

A_cpu = sprand(200, 100, 0.3)
b_cpu = rand(200)
A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)
x, stats = lsmr(A_gpu, b_gpu)

@testset "ic0" begin
A_cpu, b_cpu = sparse_laplacian()

b_gpu = CuVector(b_cpu)
n = length(b_gpu)
T = eltype(b_gpu)
symmetric = hermitian = true

A_gpu = CuSparseMatrixCSC(A_cpu)
P = ic02(A_gpu, 'O')
function ldiv_ic0!(y, P, x)
copyto!(y, x)
sv2!('T', 'U', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O')
return y
end
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
x, stats = cg(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6

A_gpu = CuSparseMatrixCSR(A_cpu)
P = ic02(A_gpu, 'O')
function ldiv_ic0!(y, P, x)
copyto!(y, x)
sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('T', 'L', 'N', 1.0, P, y, 'O')
return y
end
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
x, stats = cg(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6
end

@testset "ilu0" begin
A_cpu, b_cpu = polar_poisson()

p = zfd(A_cpu, 'O')
p .+= 1
A_cpu = A_cpu[p,:]
b_cpu = b_cpu[p]

b_gpu = CuVector(b_cpu)
n = length(b_gpu)
T = eltype(b_gpu)
symmetric = hermitian = false

A_gpu = CuSparseMatrixCSC(A_cpu)
P = ilu02(A_gpu, 'O')
function ldiv_ilu0!(y, P, x)
copyto!(y, x)
sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'U', 1.0, P, y, 'O')
return y
end
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6

A_gpu = CuSparseMatrixCSR(A_cpu)
P = ilu02(A_gpu, 'O')
function ldiv_ilu0!(y, P, x)
copyto!(y, x)
sv2!('N', 'L', 'U', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O')
return y
end
opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) 1e-6
end
end

for FC in (Float32, Float64, ComplexF32, ComplexF64)
S = CuVector{FC}
T = real(FC)
Expand Down

0 comments on commit 3c67dfb

Please sign in to comment.