Skip to content

Commit

Permalink
Use GPUArrays allocations cache (#717)
Browse files Browse the repository at this point in the history
  • Loading branch information
pxl-th authored Jan 9, 2025
1 parent c8f8afd commit 0fece1f
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 32 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Atomix = "0.1, 1"
CEnum = "0.4, 0.5"
ChainRulesCore = "1"
ExprTools = "0.1"
GPUArrays = "11.1"
GPUArrays = "11.2"
GPUCompiler = "0.27, 1.0"
KernelAbstractions = "0.9.2"
LLD_jll = "15, 16, 17"
Expand Down
2 changes: 0 additions & 2 deletions src/AMDGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ include("ROCKernels.jl")
import .ROCKernels: ROCBackend
export ROCBackend

# include("cache_allocator.jl")

function __init__()
# Used to shutdown hostcalls if any is running.
atexit(() -> begin Runtime.RT_EXITING[] = true end)
Expand Down
26 changes: 6 additions & 20 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,17 @@ mutable struct ROCArray{T, N, B} <: AbstractGPUArray{T, N}
dims::Dims{N}
offset::Int # Offset is in number of elements (not bytes).

function ROCArray{T, N, B}(
::UndefInitializer, dims::Dims{N},
) where {T, N, B <: Mem.AbstractAMDBuffer}
function ROCArray{T, N, B}(::UndefInitializer, dims::Dims{N}) where {T, N, B <: Mem.AbstractAMDBuffer}
@assert isbitstype(T) "ROCArray only supports bits types"
function _alloc_f()
sz::Int64 = prod(dims) * sizeof(T)
sz::Int64 = prod(dims) * sizeof(T)
return GPUArrays.cached_alloc((ROCArray, AMDGPU.device(), T, B, sz)) do
@debug "Allocate `T=$T`, `dims=$dims`: $(Base.format_bytes(sz))"
data = DataRef(pool_free, pool_alloc(B, sz))
finalizer(unsafe_free!, new{T, N, B}(data, dims, 0))
end
return _alloc_f()

# name = GPUArrays.CacheAllocatorName[]
# # Do not use caching allocator if it is not set or
# # the buffer is not a device memory.
# return if !(B <: Mem.HIPBuffer) || name == :none
# _alloc_f()
# else
# GPUArrays.alloc!(_alloc_f, ROCBackend(), name, T, dims)::ROCArray{T, N, B}
# end
return finalizer(unsafe_free!, new{T, N, B}(data, dims, 0))
end::ROCArray{T, N, B}
end

function ROCArray{T, N}(
buf::DataRef{Managed{B}}, dims::Dims{N}; offset::Integer = 0,
) where {T, N, B <: Mem.AbstractAMDBuffer}
function ROCArray{T, N}(buf::DataRef{Managed{B}}, dims::Dims{N}; offset::Integer = 0) where {T, N, B <: Mem.AbstractAMDBuffer}
@assert isbitstype(T) "ROCArray only supports bits types"
xs = new{T, N, B}(buf, dims, offset)
return finalizer(unsafe_free!, xs)
Expand Down
5 changes: 0 additions & 5 deletions src/cache_allocator.jl

This file was deleted.

8 changes: 4 additions & 4 deletions src/exception_handler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ struct ExceptionHolder
n_str_buffers = 100

exception_flag = Mem.HostBuffer(sizeof(Int32), HIP.hipHostAllocDefault)
gate, buffers_counter, str_buffers_counter = (#GPUArrays.@no_cache_scope begin
gate, buffers_counter, str_buffers_counter = GPUArrays.@uncached begin
ROCArray(UInt64[0]), ROCArray(Int32[0]), ROCArray(Int32[0])
)
end

errprintf_buffers = [
Mem.HostBuffer(buf_len, HIP.hipHostAllocDefault)
Expand All @@ -51,9 +51,9 @@ struct ExceptionHolder
Mem.HostBuffer(str_len, HIP.hipHostAllocDefault)
for _ in 1:n_str_buffers]

errprintf_buffers_dev, str_buffers_dev = (#GPUArrays.@no_cache_scope begin
errprintf_buffers_dev, str_buffers_dev = GPUArrays.@uncached begin
ROCArray(Mem.device_ptr.(errprintf_buffers)), ROCArray(Mem.device_ptr.(str_buffers))
)
end

new(
exception_flag, gate, buffers_counter, str_buffers_counter,
Expand Down
3 changes: 3 additions & 0 deletions test/gpuarrays_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,6 @@ end
@testitem "gpuarrays - uniformscaling" setup=[TSGPUArrays] begin
gpuarrays_test("uniformscaling")
end
@testitem "gpuarrays - alloc cache" setup=[TSGPUArrays] begin
gpuarrays_test("alloc cache")
end

0 comments on commit 0fece1f

Please sign in to comment.