Skip to content

Commit

Permalink
fix formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Feb 6, 2025
1 parent 7edafde commit 8098378
Show file tree
Hide file tree
Showing 14 changed files with 399 additions and 397 deletions.
18 changes: 7 additions & 11 deletions src/nditeration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,14 @@ needs to perform dynamic bounds-checking.
"""
@inline function partition(ndrange, __workgroupsize)
@assert length(__workgroupsize) <= length(ndrange)
if length(__workgroupsize) < length(ndrange)
# pad workgroupsize with ones
workgroupsize = ntuple(Val(length(ndrange))) do I
Base.@_inline_meta
if I > length(__workgroupsize)
return 1
else
return __workgroupsize[I]
end
# pad workgroupsize with ones
workgroupsize = ntuple(Val(length(ndrange))) do I
Base.@_inline_meta
if I > length(__workgroupsize) || __workgroupsize[I] == 0
return 1
else
return __workgroupsize[I]
end
else
workgroupsize = __workgroupsize
end
let workgroupsize = workgroupsize
dynamic = Ref(false)
Expand Down
14 changes: 7 additions & 7 deletions src/pocl/backend.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,26 @@ KA.allocate(::POCLBackend, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, d

function KA.zeros(backend::POCLBackend, ::Type{T}, dims::Tuple) where {T}
arr = KA.allocate(backend, T, dims)
kernel = init_kernel(backend)
kernel = KA.init_kernel(backend)
kernel(arr, zero, T, ndrange = length(arr))
return arr
end
function KA.ones(backend::POCLBackend, ::Type{T}, dims::Tuple) where {T}
arr = KA.allocate(backend, T, dims)
kernel = init_kernel(backend)
kernel = KA.init_kernel(backend)
kernel(arr, one, T; ndrange = length(arr))
return arr
end

function KA.copyto!(backend::POCLBackend, A, B)
if get_backend(A) == get_backend(B) && get_backend(A) isa POCLBackend
if KA.get_backend(A) == KA.get_backend(B) && KA.get_backend(A) isa POCLBackend
if length(A) != length(B)
error("Arrays must match in length")
end
if Base.mightalias(A, B)
error("Arrays may not alias")
end
kernel = copy_kernel(backend)
kernel = KA.copy_kernel(backend)
kernel(A, B, ndrange = length(A))
return A
else
Expand Down Expand Up @@ -131,9 +131,9 @@ function (obj::KA.Kernel{POCLBackend})(args...; ndrange = nothing, workgroupsize
# Launch kernel
global_size = groups * items
local_size = items
kernel(ctx, args...; global_size, local_size)

cl.finish(cl.queue()) # TODO, would waiting on an event be cheaper?
event = kernel(ctx, args...; global_size, local_size)
wait(event)
cl.clReleaseEvent(event)
return nothing
end

Expand Down
20 changes: 11 additions & 9 deletions src/pocl/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@

struct OpenCLCompilerParams <: AbstractCompilerParams end
const OpenCLCompilerConfig = CompilerConfig{SPIRVCompilerTarget, OpenCLCompilerParams}
const OpenCLCompilerJob = CompilerJob{SPIRVCompilerTarget,OpenCLCompilerParams}
const OpenCLCompilerJob = CompilerJob{SPIRVCompilerTarget, OpenCLCompilerParams}

GPUCompiler.runtime_module(::CompilerJob{<:Any,OpenCLCompilerParams}) = POCL
GPUCompiler.runtime_module(::CompilerJob{<:Any, OpenCLCompilerParams}) = POCL

GPUCompiler.method_table(::OpenCLCompilerJob) = method_table

# filter out OpenCL built-ins
# TODO: eagerly lower these using the translator API
GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
invoke(GPUCompiler.isintrinsic,
Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)},
job, fn) ||
invoke(
GPUCompiler.isintrinsic,
Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)},
job, fn
) ||
in(fn, opencl_builtins)


Expand Down Expand Up @@ -42,14 +44,14 @@ function compiler_config(dev::cl.Device; kwargs...)
end
return config
end
@noinline function _compiler_config(dev; kernel=true, name=nothing, always_inline=false, kwargs...)
@noinline function _compiler_config(dev; kernel = true, name = nothing, always_inline = false, kwargs...)
supports_fp16 = "cl_khr_fp16" in dev.extensions
supports_fp64 = "cl_khr_fp64" in dev.extensions

# create GPUCompiler objects
target = SPIRVCompilerTarget(; supports_fp16, supports_fp64, kwargs...)
params = OpenCLCompilerParams()
CompilerConfig(target, params; kernel, name, always_inline)
return CompilerConfig(target, params; kernel, name, always_inline)
end

# compile to executable machine code
Expand All @@ -59,7 +61,7 @@ function compile(@nospecialize(job::CompilerJob))
GPUCompiler.compile(:obj, job)
end

(;obj, entry=LLVM.name(meta.entry))
return (; obj, entry = LLVM.name(meta.entry))
end

# link into an executable kernel
Expand All @@ -70,5 +72,5 @@ function link(@nospecialize(job::CompilerJob), compiled)
error("Your device does not support SPIR-V, which is currently required for native execution.")
end
cl.build!(prog)
cl.Kernel(prog, compiled.entry)
return cl.Kernel(prog, compiled.entry)
end
66 changes: 36 additions & 30 deletions src/pocl/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const LAUNCH_KWARGS = [:global_size, :local_size, :queue]

macro opencl(ex...)
call = ex[end]
kwargs = map(ex[1:end-1]) do kwarg
kwargs = map(ex[1:(end - 1)]) do kwarg
if kwarg isa Symbol
:($kwarg = $kwarg)
elseif Meta.isexpr(kwarg, :(=))
Expand All @@ -31,14 +31,14 @@ macro opencl(ex...)
macro_kwargs, compiler_kwargs, call_kwargs, other_kwargs =
split_kwargs(kwargs, MACRO_KWARGS, COMPILER_KWARGS, LAUNCH_KWARGS)
if !isempty(other_kwargs)
key,val = first(other_kwargs).args
key, val = first(other_kwargs).args
throw(ArgumentError("Unsupported keyword argument '$key'"))
end

# handle keyword arguments that influence the macro's behavior
launch = true
for kwarg in macro_kwargs
key,val = kwarg.args
key, val = kwarg.args
if key == :launch
isa(val, Bool) || throw(ArgumentError("`launch` keyword argument to @opencl should be a constant value"))
launch = val::Bool
Expand All @@ -56,7 +56,8 @@ macro opencl(ex...)

# convert the arguments, call the compiler and launch the kernel
# while keeping the original arguments alive
push!(code.args,
push!(
code.args,
quote
$f_var = $f
GC.@preserve $(vars...) $f_var begin
Expand All @@ -69,13 +70,16 @@ macro opencl(ex...)
end
$kernel
end
end)
end
)

return esc(quote
let
$code
return esc(
quote
let
$code
end
end
end)
)
end


Expand All @@ -101,21 +105,23 @@ end
# Base.RefValue isn't GPU compatible, so provide a compatible alternative
# TODO: port improvements from CUDA.jl
struct CLRefValue{T} <: Ref{T}
x::T
x::T
end
Base.getindex(r::CLRefValue) = r.x
Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue) = CLRefValue(adapt(to, r[]))

# broadcast sometimes passes a ref(type), resulting in a GPU-incompatible DataType box.
# avoid that by using a special kind of ref that knows about the boxed type.
struct CLRefType{T} <: Ref{DataType} end
Base.getindex(r::CLRefType{T}) where T = T
Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue{<:Union{DataType,Type}}) =
Base.getindex(r::CLRefType{T}) where {T} = T
Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue{<:Union{DataType, Type}}) =
CLRefType{r[]}()

# case where type is the function being broadcasted
Adapt.adapt_structure(to::KernelAdaptor,
bc::Broadcast.Broadcasted{Style, <:Any, Type{T}}) where {Style, T} =
Adapt.adapt_structure(
to::KernelAdaptor,
bc::Broadcast.Broadcasted{Style, <:Any, Type{T}}
) where {Style, T} =
Broadcast.Broadcasted{Style}((x...) -> T(x...), adapt(to, bc.args), bc.axes)

"""
Expand All @@ -131,29 +137,30 @@ register methods for the the `OpenCL.KernelAdaptor` type.
The `pointers` argument is used to collect pointers to indirect SVM buffers, which need to
be registered with OpenCL before invoking the kernel.
"""
function clconvert(arg, pointers::Vector{Ptr{Cvoid}}=Ptr{Cvoid}[])
adapt(KernelAdaptor(pointers), arg)
function clconvert(arg, pointers::Vector{Ptr{Cvoid}} = Ptr{Cvoid}[])
return adapt(KernelAdaptor(pointers), arg)
end



## abstract kernel functionality

abstract type AbstractKernel{F,TT} end
abstract type AbstractKernel{F, TT} end

@inline @generated function (kernel::AbstractKernel{F,TT})(args...;
call_kwargs...) where {F,TT}
@inline @generated function (kernel::AbstractKernel{F, TT})(
args...;
call_kwargs...
) where {F, TT}
sig = Tuple{F, TT.parameters...} # Base.signature_type with a function type
args = (:(kernel.f), (:( clconvert(args[$i], svm_pointers) ) for i in 1:length(args))...)
args = (:(kernel.f), (:(clconvert(args[$i], svm_pointers)) for i in 1:length(args))...)

# filter out ghost arguments that shouldn't be passed
predicate = dt -> GPUCompiler.isghosttype(dt) || Core.Compiler.isconstType(dt)
to_pass = map(!predicate, sig.parameters)
call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]]
call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]]
call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]]
call_args = Union{Expr, Symbol}[x[1] for x in zip(args, to_pass) if x[2]]

# replace non-isbits arguments (they should be unused, or compilation would have failed)
for (i,dt) in enumerate(call_t)
for (i, dt) in enumerate(call_t)
if !isbitstype(dt)
call_t[i] = Ptr{Any}
call_args[i] = :C_NULL
Expand All @@ -163,17 +170,16 @@ abstract type AbstractKernel{F,TT} end
# finalize types
call_tt = Base.to_tuple_type(call_t)

quote
return quote
svm_pointers = Ptr{Cvoid}[]
$cl.clcall(kernel.fun, $call_tt, $(call_args...); svm_pointers, call_kwargs...)
end
end



## host-side kernels

struct HostKernel{F,TT} <: AbstractKernel{F,TT}
struct HostKernel{F, TT} <: AbstractKernel{F, TT}
f::F
fun::cl.Kernel
end
Expand All @@ -183,7 +189,7 @@ end

const clfunction_lock = ReentrantLock()

function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT}
function clfunction(f::F, tt::TT = Tuple{}; kwargs...) where {F, TT}
ctx = context()
dev = device()

Expand All @@ -200,10 +206,10 @@ function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT}
kernel = get(_kernel_instances, h, nothing)
if kernel === nothing
# create the kernel state object
kernel = HostKernel{F,tt}(f, fun)
kernel = HostKernel{F, tt}(f, fun)
_kernel_instances[h] = kernel
end
return kernel::HostKernel{F,tt}
return kernel::HostKernel{F, tt}
end
end

Expand Down
13 changes: 7 additions & 6 deletions src/pocl/compiler/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,28 @@ for method in (:code_typed, :code_warntype, :code_llvm, :code_native)
args = method == :code_typed ? (:job,) : (:io, :job)

@eval begin
function $method(io::IO, @nospecialize(func), @nospecialize(types);
kernel::Bool=false, kwargs...)
function $method(
io::IO, @nospecialize(func), @nospecialize(types);
kernel::Bool = false, kwargs...
)
compiler_kwargs, kwargs = split_kwargs_runtime(kwargs, COMPILER_KWARGS)
source = methodinstance(typeof(func), Base.to_tuple_type(types))
config = compiler_config(device(); kernel, compiler_kwargs...)
job = CompilerJob(source, config)
GPUCompiler.$method($(args...); kwargs...)
return GPUCompiler.$method($(args...); kwargs...)
end
$method(@nospecialize(func), @nospecialize(types); kwargs...) =
$method(stdout, func, types; kwargs...)
end
end



#
# @device_code_* functions
#

export @device_code_lowered, @device_code_typed, @device_code_warntype, @device_code_llvm,
@device_code_native, @device_code
@device_code_native, @device_code

# forward to GPUCompiler
@eval $(Symbol("@device_code_lowered")) = $(getfield(GPUCompiler, Symbol("@device_code_lowered")))
Expand All @@ -70,5 +71,5 @@ function return_type(@nospecialize(func), @nospecialize(tt))
job = CompilerJob(source, config)
interp = GPUCompiler.get_interpreter(job)
sig = Base.signature_type(func, tt)
Core.Compiler.return_type(interp, sig)
return Core.Compiler.return_type(interp, sig)
end
Loading

0 comments on commit 8098378

Please sign in to comment.