Skip to content

Commit

Permalink
yank CPU backend
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Feb 3, 2025
1 parent a89c07c commit 429ef74
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 155 deletions.
42 changes: 7 additions & 35 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -492,35 +492,6 @@ Abstract type for all GPU based KernelAbstractions backends.
"""
abstract type GPU <: Backend end

"""
CPU(; static=false)
Instantiate a CPU (multi-threaded) backend.
## Options:
- `static`: Uses a static thread assignment, this can be beneficial for NUMA aware code.
Defaults to false.
!!! note
`CPU` will be aliased to `POCLBackend()` on KernelAbstractions v1.0
"""
struct CPU <: Backend
static::Bool
CPU(; static::Bool = false) = new(static)
end

"""
isgpu(::Backend)::Bool
Returns true for all [`GPU`](@ref) backends.
!!! note
`isgpu` will be removed in KernelAbstractions v1.0
"""
isgpu(::GPU) = true
isgpu(::CPU) = false


"""
get_backend(A::AbstractArray)::Backend
Expand All @@ -538,7 +509,6 @@ get_backend(A::AbstractArray) = get_backend(parent(A))
# Define:
# adapt_storage(::Backend, a::Array) = adapt(BackendArray, a)
# adapt_storage(::Backend, a::BackendArray) = a
Adapt.adapt_storage(::CPU, a::Array) = a

"""
allocate(::Backend, Type, dims...)::AbstractArray
Expand Down Expand Up @@ -758,7 +728,7 @@ Partition a kernel for the given ndrange and workgroupsize.
return iterspace, dynamic
end

function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: GPU, S <: _Size, NDRange <: _Size, XPUName}
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
end

Expand All @@ -775,6 +745,10 @@ include("compiler.jl")
function __workitems_iterspace end
function __validindex end

# for reflection
function mkcontext end
function launch_config end

include("macros.jl")

###
Expand Down Expand Up @@ -844,14 +818,12 @@ end
end

# CPU backend

include("cpu.jl")

# Future-CPU backend
include("pocl/pocl.jl")
using .POCL
export POCLBackend

const CPU = POCLBackend

# precompile
PrecompileTools.@compile_workload begin
@eval begin
Expand Down
124 changes: 4 additions & 120 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,6 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
constargs[i] = false
end

# create two functions
# 1. GPU function
# 2. CPU function with work-group loops inserted
#
# Without the deepcopy we might accidentially modify expr shared between CPU and GPU
cpu_name = Symbol(:cpu_, name)
if generate_cpu
def_cpu = deepcopy(def)
def_cpu[:name] = cpu_name
transform_cpu!(def_cpu, constargs, force_inbounds)
cpu_function = combinedef(def_cpu)
end

def_gpu = deepcopy(def)
def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
transform_gpu!(def_gpu, constargs, force_inbounds)
Expand All @@ -56,24 +43,12 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
$name(dev, size) = $name(dev, $StaticSize(size), $DynamicSize())
$name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range))
function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S <: $_Size, NDRange <: $_Size}
if $isgpu(dev)
return $construct(dev, sz, range, $gpu_name)
else
if $generate_cpu
return $construct(dev, sz, range, $cpu_name)
else
error("This kernel is unavailable for backend CPU")
end
end
return $construct(dev, sz, range, $gpu_name)
end
end
end

if generate_cpu
return Expr(:block, esc(cpu_function), esc(gpu_function), esc(constructors))
else
return Expr(:block, esc(gpu_function), esc(constructors))
end
return Expr(:block, esc(gpu_function), esc(constructors))
end

# The easy case, transform the function for GPU execution
Expand All @@ -94,42 +69,7 @@ function transform_gpu!(def, constargs, force_inbounds)
if force_inbounds
push!(new_stmts, Expr(:inbounds, true))
end
append!(new_stmts, split(emit_gpu, body.args))
if force_inbounds
push!(new_stmts, Expr(:inbounds, :pop))
end
push!(new_stmts, Expr(:popaliasscope))
push!(new_stmts, :(return nothing))
def[:body] = Expr(
:let,
Expr(:block, let_constargs...),
Expr(:block, new_stmts...),
)
return
end

# The hard case, transform the function for CPU execution
# - mark constant arguments by applying `constify`.
# - insert aliasscope markers
# - insert implied loop bodys
# - handle indicies
# - hoist workgroup definitions
# - hoist uniform variables
function transform_cpu!(def, constargs, force_inbounds)
let_constargs = Expr[]
for (i, arg) in enumerate(def[:args])
if constargs[i]
push!(let_constargs, :($arg = $constify($arg)))
end
end
pushfirst!(def[:args], :__ctx__)
new_stmts = Expr[]
body = MacroTools.flatten(def[:body])
push!(new_stmts, Expr(:aliasscope))
if force_inbounds
push!(new_stmts, Expr(:inbounds, true))
end
append!(new_stmts, split(emit_cpu, body.args))
append!(new_stmts, split(body.args))
if force_inbounds
push!(new_stmts, Expr(:inbounds, :pop))
end
Expand Down Expand Up @@ -169,7 +109,6 @@ end

# TODO proper handling of LineInfo
function split(
emit,
stmts,
indicies = Any[], private = Set{Symbol}(),
)
Expand Down Expand Up @@ -249,62 +188,7 @@ function split(
return new_stmts
end

function emit_cpu(loop)
idx = gensym(:I)
for stmt in loop.indicies
# splice index into the i = @index(Cartesian, $idx)
@assert stmt.head === :(=)
rhs = stmt.args[2]
push!(rhs.args, idx)
end
stmts = Any[]
append!(stmts, loop.allocations)

# private_allocations turn into lhs = ntuple(i->rhs, length(__workitems_iterspace()))
N = gensym(:N)
push!(stmts, :($N = length($__workitems_iterspace(__ctx__))))

for stmt in loop.private_allocations
if @capture(stmt, lhs_ = rhs_)
push!(stmts, :($lhs = ntuple(_ -> $rhs, $N)))
else
error("@private $stmt not an assignment")
end
end

# don't emit empty loops
if !(isempty(loop.stmts) || all(s -> s isa LineNumberNode, loop.stmts))
body = Expr(:block, loop.stmts...)
body = postwalk(body) do expr
if @capture(expr, lhs_ = rhs_)
if lhs in loop.private
error("Can't assign to variables marked private")
end
elseif @capture(expr, A_[i__])
if A in loop.private
return :($A[$__index_Local_Linear(__ctx__, $(idx))][$(i...)])
end
elseif expr isa Symbol
if expr in loop.private
return :($expr[$__index_Local_Linear(__ctx__, $(idx))])
end
end
return expr
end
loopexpr = quote
for $idx in $__workitems_iterspace(__ctx__)
$__validindex(__ctx__, $idx) || continue
$(loop.indicies...)
$(unblock(body))
end
end
push!(stmts, loopexpr)
end

return unblock(Expr(:block, stmts...))
end

function emit_gpu(loop)
function emit(loop)
stmts = Any[]
append!(stmts, loop.allocations)
for stmt in loop.private_allocations
Expand Down

0 comments on commit 429ef74

Please sign in to comment.