Skip to content

Commit

Permalink
API changes
Browse files Browse the repository at this point in the history
  • Loading branch information
pxl-th committed Feb 3, 2025
1 parent 0043ca5 commit 680d21d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/ROCKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ end

# Reduction.

KA.supports_warp_reduction(::ROCBackend) = true
@device_override @inline KA.__supports_warp_reduction() = true

@device_override @inline function KA.__shfl_down(val, offset)
AMDGPU.Device.shfl_down(val, offset)
Expand Down
18 changes: 18 additions & 0 deletions t.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using AMDGPU
using KernelAbstractions

@kernel cpu=false function groupreduce_1!(y, x, op, neutral)
i = @index(Global)
val = i > length(x) ? neutral : x[i]
res = @groupreduce(op, val, neutral)
i == 1 && (y[1] = res)
end

function main()
x = ROCArray(ones(Float32, 256))
y = ROCArray(zeros(Float32, 1))
groupreduce_1!(ROCBackend(), 256)(y, x, +, 0f0; ndrange=256)
@show y
return
end
main()

0 comments on commit 680d21d

Please sign in to comment.