Skip to content

Commit

Permalink
Optimize expand to avoid one unecessary sdiv
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Oct 18, 2024
1 parent 419481c commit eaa3d61
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions src/nditeration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,20 @@ Base.length(range::NDRange) = length(blocks(range))
CartesianIndex(nI)
end

Base.@propagate_inbounds function expand(ndrange::NDRange, groupidx::Integer, idx::Integer)
expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::Integer, idx::Integer) where {N}
# This causes two sdiv operations, one for each Linear to CartesianIndex
# expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])

# The formulation below saves one sdiv
B = blocks(ndrange)
W = workitems(ndrange)
Ind = ntuple(Val(N)) do I
Base.@_inline_meta
b = B.indices[I]
w = W.indices[I]
length(b) * length(w)
end
CartesianIndices(Ind)[(groupidx-1)* prod(size(B)) + idx]
end

Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::Integer) where {N}
Expand Down

0 comments on commit eaa3d61

Please sign in to comment.