-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
512 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
module POCLKernels | ||
|
||
using ..POCL | ||
using ..POCL: @device_override, SPIRVIntrinsics, cl | ||
using ..POCL: device | ||
|
||
import KernelAbstractions as KA | ||
|
||
import StaticArrays | ||
|
||
import Adapt | ||
|
||
|
||
## Back-end Definition | ||
|
||
export POCLBackend | ||
|
||
struct POCLBackend <: KA.GPU | ||
end | ||
|
||
# KA.allocate(::POCLBackend, ::Type{T}, dims::Tuple) where T = CLArray{T}(undef, dims) | ||
# KA.zeros(::POCLBackend, ::Type{T}, dims::Tuple) where T = OpenCL.zeros(T, dims) | ||
# KA.ones(::POCLBackend, ::Type{T}, dims::Tuple) where T = OpenCL.ones(T, dims) | ||
|
||
# KA.get_backend(::CLArray) = POCLBackend() | ||
# KA.synchronize(::POCLBackend) = cl.finish(cl.queue()) | ||
# KA.supports_float64(::POCLBackend) = false # XXX: this is platform/device dependent | ||
|
||
# Adapt.adapt_storage(::POCLBackend, a::Array) = Adapt.adapt(CLArray, a) | ||
# Adapt.adapt_storage(::POCLBackend, a::CLArray) = a | ||
# Adapt.adapt_storage(::KA.CPU, a::CLArray) = convert(Array, a) | ||
|
||
|
||
## Memory Operations | ||
|
||
# function KA.copyto!(::POCLBackend, A, B) | ||
# copyto!(A, B) | ||
# # TODO: Address device to host copies in jl being synchronizing | ||
# end | ||
|
||
|
||
## Kernel Launch | ||
|
||
function KA.mkcontext(kernel::KA.Kernel{POCLBackend}, _ndrange, iterspace) | ||
KA.CompilerMetadata{KA.ndrange(kernel), KA.DynamicCheck}(_ndrange, iterspace) | ||
end | ||
function KA.mkcontext(kernel::KA.Kernel{POCLBackend}, I, _ndrange, iterspace, | ||
::Dynamic) where Dynamic | ||
KA.CompilerMetadata{KA.ndrange(kernel), Dynamic}(I, _ndrange, iterspace) | ||
end | ||
|
||
function KA.launch_config(kernel::KA.Kernel{POCLBackend}, ndrange, workgroupsize) | ||
if ndrange isa Integer | ||
ndrange = (ndrange,) | ||
end | ||
if workgroupsize isa Integer | ||
workgroupsize = (workgroupsize, ) | ||
end | ||
|
||
# partition checked that the ndrange's agreed | ||
if KA.ndrange(kernel) <: KA.StaticSize | ||
ndrange = nothing | ||
end | ||
|
||
iterspace, dynamic = if KA.workgroupsize(kernel) <: KA.DynamicSize && | ||
workgroupsize === nothing | ||
# use ndrange as preliminary workgroupsize for autotuning | ||
KA.partition(kernel, ndrange, ndrange) | ||
else | ||
KA.partition(kernel, ndrange, workgroupsize) | ||
end | ||
|
||
return ndrange, workgroupsize, iterspace, dynamic | ||
end | ||
|
||
function threads_to_workgroupsize(threads, ndrange) | ||
total = 1 | ||
return map(ndrange) do n | ||
x = min(div(threads, total), n) | ||
total *= x | ||
return x | ||
end | ||
end | ||
|
||
function (obj::KA.Kernel{POCLBackend})(args...; ndrange=nothing, workgroupsize=nothing) | ||
ndrange, workgroupsize, iterspace, dynamic = | ||
KA.launch_config(obj, ndrange, workgroupsize) | ||
|
||
# this might not be the final context, since we may tune the workgroupsize | ||
ctx = KA.mkcontext(obj, ndrange, iterspace) | ||
kernel = @opencl launch=false obj.f(ctx, args...) | ||
|
||
# figure out the optimal workgroupsize automatically | ||
if KA.workgroupsize(obj) <: KA.DynamicSize && workgroupsize === nothing | ||
wg_info = cl.work_group_info(kernel.fun, device()) | ||
wg_size_nd = threads_to_workgroupsize(wg_info.size, ndrange) | ||
iterspace, dynamic = KA.partition(obj, ndrange, wg_size_nd) | ||
ctx = KA.mkcontext(obj, ndrange, iterspace) | ||
end | ||
|
||
groups = length(KA.blocks(iterspace)) | ||
items = length(KA.workitems(iterspace)) | ||
|
||
if groups == 0 | ||
return nothing | ||
end | ||
|
||
# Launch kernel | ||
global_size = groups * items | ||
local_size = items | ||
kernel(ctx, args...; global_size, local_size) | ||
|
||
return nothing | ||
end | ||
|
||
|
||
## Indexing Functions | ||
|
||
@device_override @inline function KA.__index_Local_Linear(ctx) | ||
return get_local_id(1) | ||
end | ||
|
||
@device_override @inline function KA.__index_Group_Linear(ctx) | ||
return get_group_id(1) | ||
end | ||
|
||
@device_override @inline function KA.__index_Global_Linear(ctx) | ||
return get_global_id(1) | ||
end | ||
|
||
@device_override @inline function KA.__index_Local_Cartesian(ctx) | ||
@inbounds KA.workitems(KA.__iterspace(ctx))[get_local_id(1)] | ||
end | ||
|
||
@device_override @inline function KA.__index_Group_Cartesian(ctx) | ||
@inbounds KA.blocks(KA.__iterspace(ctx))[get_group_id(1)] | ||
end | ||
|
||
@device_override @inline function KA.__index_Global_Cartesian(ctx) | ||
return @inbounds KA.expand(KA.__iterspace(ctx), get_group_id(1), get_local_id(1)) | ||
end | ||
|
||
@device_override @inline function KA.__validindex(ctx) | ||
if KA.__dynamic_checkbounds(ctx) | ||
I = @inbounds KA.expand(KA.__iterspace(ctx), get_group_id(1), get_local_id(1)) | ||
return I in KA.__ndrange(ctx) | ||
else | ||
return true | ||
end | ||
end | ||
|
||
|
||
## Shared and Scratch Memory | ||
|
||
@device_override @inline function KA.SharedMemory(::Type{T}, ::Val{Dims}, ::Val{Id}) where {T, Dims, Id} | ||
ptr = SPIRVIntrinsics.emit_localmemory(T, Val(prod(Dims))) | ||
CLDeviceArray(Dims, ptr) | ||
end | ||
|
||
@device_override @inline function KA.Scratchpad(ctx, ::Type{T}, ::Val{Dims}) where {T, Dims} | ||
StaticArrays.MArray{KA.__size(Dims), T}(undef) | ||
end | ||
|
||
|
||
## Synchronization and Printing | ||
|
||
@device_override @inline function KA.__synchronize() | ||
barrier() | ||
end | ||
|
||
@device_override @inline function KA.__print(args...) | ||
SPIRVIntrinsics._print(args...) | ||
end | ||
|
||
|
||
## Other | ||
|
||
KA.argconvert(::KA.Kernel{POCLBackend}, arg) = clconvert(arg) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.