Skip to content

Commit

Permalink
More simplifications.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Jan 23, 2025
1 parent 5568373 commit 72f295a
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 41 deletions.
5 changes: 1 addition & 4 deletions lib/cl/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,7 @@ function call(
set_args!(k, args...)
flag = cl.memory_backend() == cl.SVMBackend() ? CL_KERNEL_EXEC_INFO_SVM_PTRS : CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL
if !isempty(pointers)
clSetKernelExecInfo(
k, flag,
sizeof(pointers), pointers
)
clSetKernelExecInfo(k, flag, sizeof(pointers), pointers)
end
enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on)
end
Expand Down
15 changes: 7 additions & 8 deletions lib/cl/memory/svm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ struct SharedVirtualMemory <: AbstractMemory
context::Context
end

function svm_alloc(
ctx::Context, bytesize::Integer;
function svm_alloc(bytesize::Integer;
alignment::Integer = 0, access::Symbol = :rw, fine_grained = false
)
flags = if access == :rw
Expand All @@ -22,15 +21,15 @@ function svm_alloc(
flags |= CL_MEM_SVM_FINE_GRAIN_BUFFER
end

ptr = clSVMAlloc(ctx, flags, bytesize, alignment)
ptr = clSVMAlloc(context(), flags, bytesize, alignment)
@assert ptr != C_NULL

# JuliaGPU/OpenCL.jl#252: uninitialized SVM memory doesn't work on Intel
if platform().name == "Intel(R) OpenCL Graphics"
enqueue_svm_fill(ptr, UInt8(0), bytesize)
end

return SharedVirtualMemory(ptr, bytesize, ctx)
return SharedVirtualMemory(ptr, bytesize, context())
end

function svm_free(buf::SharedVirtualMemory)
Expand Down Expand Up @@ -114,14 +113,14 @@ end
function enqueue_svm_fill(ptr::Union{Ptr, CLPtr}, pattern::T, N::Integer;
wait_for::Vector{Event}=Event[]) where {T}
nbytes = N * sizeof(T)
nbytes_pattern = sizeof(T)
@assert nbytes_pattern > 0
nbytes == 0 && return
pattern_size = sizeof(T)
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueSVMMemFill(queue(), ptr, [pattern],
nbytes_pattern, nbytes,
clEnqueueSVMMemFill(queue(), ptr, Ref(pattern),
pattern_size, nbytes,
n_evts, evt_ids, ret_evt)
@return_event ret_evt[]
end
Expand Down
29 changes: 13 additions & 16 deletions lib/cl/memory/usm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ struct UnifiedDeviceMemory <: UnifiedMemory
device::Device
end

function device_alloc(
ctx::Context, dev::Device, bytesize::Integer;
function device_alloc(bytesize::Integer;
alignment::Integer = 0, write_combined::Bool = false
)
flags = 0
Expand All @@ -37,12 +36,12 @@ function device_alloc(

error_code = Ref{Cint}()
props = cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0]
ptr = clDeviceMemAllocINTEL(ctx, dev, props, bytesize, alignment, error_code)
ptr = clDeviceMemAllocINTEL(context(), device(), props, bytesize, alignment, error_code)
if error_code[] != CL_SUCCESS
throw(CLError(error_code[]))
end

return UnifiedDeviceMemory(ptr, bytesize, ctx, dev)
return UnifiedDeviceMemory(ptr, bytesize, context(), device())
end

Base.pointer(buf::UnifiedDeviceMemory) = buf.ptr
Expand Down Expand Up @@ -71,8 +70,7 @@ struct UnifiedHostMemory <: UnifiedMemory
context::Context
end

function host_alloc(
ctx::Context, bytesize::Integer;
function host_alloc(bytesize::Integer;
alignment::Integer = 0, write_combined::Bool = false
)
flags = 0
Expand All @@ -82,12 +80,12 @@ function host_alloc(

error_code = Ref{Cint}()
props = cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0]
ptr = clHostMemAllocINTEL(ctx, props, bytesize, alignment, error_code)
ptr = clHostMemAllocINTEL(context(), props, bytesize, alignment, error_code)
if error_code[] != CL_SUCCESS
throw(CLError(error_code[]))
end

return UnifiedHostMemory(ptr, bytesize, ctx)
return UnifiedHostMemory(ptr, bytesize, context())
end

Base.pointer(buf::UnifiedHostMemory) = buf.ptr
Expand Down Expand Up @@ -116,8 +114,7 @@ struct UnifiedSharedMemory <: UnifiedMemory
device::Union{Nothing, Device}
end

function shared_alloc(
ctx::Context, dev::Device, bytesize::Integer;
function shared_alloc(bytesize::Integer;
alignment::Integer = 0, write_combined = false, placement = nothing
)
flags = 0
Expand All @@ -136,12 +133,12 @@ function shared_alloc(

error_code = Ref{Cint}()
props = cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0]
ptr = clSharedMemAllocINTEL(ctx, dev, props, bytesize, alignment, error_code)
ptr = clSharedMemAllocINTEL(context(), device(), props, bytesize, alignment, error_code)
if error_code[] != CL_SUCCESS
throw(CLError(error_code[]))
end

return UnifiedSharedMemory(ptr, bytesize, ctx, dev)
return UnifiedSharedMemory(ptr, bytesize, context(), device())
end

Base.pointer(buf::UnifiedSharedMemory) = buf.ptr
Expand Down Expand Up @@ -190,14 +187,14 @@ end
function enqueue_usm_fill(ptr::Union{Ptr, CLPtr}, pattern::T, N::Integer;
wait_for::Vector{Event}=Event[]) where {T}
nbytes = N * sizeof(T)
nbytes_pattern = sizeof(T)
@assert nbytes_pattern > 0
nbytes == 0 && return
pattern_size = sizeof(T)
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueMemFillINTEL(queue(), ptr, [pattern],
nbytes_pattern, nbytes,
clEnqueueMemFillINTEL(queue(), ptr, Ref(pattern),
pattern_size, nbytes,
n_evts, evt_ids, ret_evt)
@return_event ret_evt[]
end
Expand Down
1 change: 0 additions & 1 deletion src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,6 @@ function unsafe_fill!(
ptr::Union{Ptr{T}, CLPtr{T}},
pattern::T, N::Integer
) where {T}
N * sizeof(T) == 0 && return
if cl.memory_backend() == cl.USMBackend()
cl.enqueue_usm_fill(ptr, pattern, N)
elseif cl.memory_backend() == cl.SVMBackend()
Expand Down
14 changes: 7 additions & 7 deletions src/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,18 @@ end
## public interface

function alloc(bytes::Int; alignment::Int = 0)
if cl.device_state(dev).usm
return cl.alloc(cl.UnifiedDeviceMemory, cl.context(), cl.device(), bytes; alignment)
if cl.memory_backend() == cl.USMBackend()
return alloc(cl.UnifiedDeviceMemory, bytes; alignment)
else
return cl.alloc(cl.SharedVirtualMemory, cl.context(), cl.device(), bytes; alignment)
return alloc(cl.SharedVirtualMemory, bytes; alignment)
end
end

function alloc(::Type{cl.UnifiedDeviceMemory}, bytes::Int; alignment::Int = 0)
if bytes == 0
return Managed(cl.UnifiedDeviceMemory(cl.CL_NULL, bytes, cl.context(), cl.device()))
end
mem = cl.device_alloc(cl.context(), cl.device(), bytes; alignment)
mem = cl.device_alloc(bytes; alignment)
return Managed(mem)
end

Expand All @@ -135,23 +135,23 @@ function alloc(::Type{cl.UnifiedSharedMemory}, bytes::Int; alignment::Int = 0)
return Managed(cl.UnifiedSharedMemory(cl.CL_NULL, bytes, cl.context(), cl.device()))
end
# TODO: support cross-device shared memory (by setting `dev=nothing`)
mem = cl.shared_alloc(cl.context(), cl.device(), bytes; alignment)
mem = cl.shared_alloc(bytes; alignment)
return Managed(mem)
end

function alloc(::Type{cl.UnifiedHostMemory}, bytes::Int; alignment::Int = 0)
if bytes == 0
return Managed(cl.UnifiedHostMemory(cl.CL_NULL, bytes, cl.context()))
end
mem = cl.host_alloc(cl.context(), bytes; alignment)
mem = cl.host_alloc(bytes; alignment)
return Managed(mem)
end

function alloc(::Type{cl.SharedVirtualMemory}, bytes::Int; alignment::Int = 0)
if bytes == 0
return Managed(cl.SharedVirtualMemory(cl.CL_NULL, bytes, cl.context()))
end
mem = cl.svm_alloc(cl.context(), bytes; alignment)
mem = cl.svm_alloc(bytes; alignment)
return Managed(mem)
end

Expand Down
10 changes: 5 additions & 5 deletions test/buffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ end

@testset "SVM Buffer" begin
# simple buffer
let buf = cl.svm_alloc(cl.context(), sizeof(Int))
let buf = cl.svm_alloc(sizeof(Int))
@test sizeof(buf) == sizeof(Int)
end

# memory copy
let buf = cl.svm_alloc(cl.context(), sizeof(Int))
let buf = cl.svm_alloc(sizeof(Int))
ptr = pointer(buf)

src = [42]
Expand All @@ -98,7 +98,7 @@ end

# memory map

let buf = cl.svm_alloc(cl.context(), sizeof(Int))
let buf = cl.svm_alloc(sizeof(Int))
ptr = pointer(buf)

src = [42]
Expand All @@ -117,10 +117,10 @@ end
end

# fill
let buf = cl.svm_alloc(cl.context(), 3 * sizeof(Int))
let buf = cl.svm_alloc(3 * sizeof(Int))
ptr = pointer(buf)

cl.enqueue_svm_fill(ptr, [42], 3)
cl.enqueue_svm_fill(ptr, 42, 3)

dst = Vector{Int}(undef, 3)
cl.enqueue_svm_copy(pointer(dst), ptr, sizeof(dst); blocking = true)
Expand Down

0 comments on commit 72f295a

Please sign in to comment.