diff --git a/.gitignore b/.gitignore index 42f07ea..2753f7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ -*.jl.cov -*.jl.mem +.DS_Store +/Manifest.toml +/test/Manifest.toml diff --git a/.travis.yml b/.travis.yml index 1ce59d4..d63dcea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,18 @@ +## Documentation: http://docs.travis-ci.com/user/languages/julia/ language: julia os: + - freebsd - linux - osx + - windows julia: - - 0.4 - - 0.5 + - 1.3 + - 1.4 - nightly notifications: email: false -script: - - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("StructsOfArrays"); Pkg.test("StructsOfArrays"; coverage=true)' -after_success: - - julia -e 'cd(Pkg.dir("StructsOfArrays")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' +git: + depth: 99999999 +jobs: + allow_failures: + - julia: nightly diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9b12cdf --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015: Simon Kornblith. +Copyright (c) 2018-2019: Valentin Churavy, and other contributors +Copyright (c) 2016 Simon Danisch +Copyright (c) 2018 JuliaGPU developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index a3451f8..0000000 --- a/LICENSE.md +++ /dev/null @@ -1,22 +0,0 @@ -The StructsOfArrays.jl package is licensed under the MIT "Expat" License: - -> Copyright (c) 2015: Simon Kornblith. -> -> Permission is hereby granted, free of charge, to any person obtaining -> a copy of this software and associated documentation files (the -> "Software"), to deal in the Software without restriction, including -> without limitation the rights to use, copy, modify, merge, publish, -> distribute, sublicense, and/or sell copies of the Software, and to -> permit persons to whom the Software is furnished to do so, subject to -> the following conditions: -> -> The above copyright notice and this permission notice shall be -> included in all copies or substantial portions of the Software. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..fa87bf1 --- /dev/null +++ b/Project.toml @@ -0,0 +1,20 @@ +name = "StructsOfArrays" +uuid = "ad108753-388a-4a68-a626-efa2fa9e4278" +authors = ["Simon Kornblith ", "Valentin Churavy ", "Other Contributors"] +version = "0.0.3" + +[deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +CUDAapi = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3" +CUDAnative = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17" +CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[compat] +Adapt = "~1" +CUDAapi = "~4" +CUDAnative = "~3" +CuArrays = "~2" +GPUArrays = "~3" +julia = "≥ 1.3" diff --git a/README.md b/README.md index 1f2d721..fd1360c 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,5 @@ # StructsOfArrays -[![Build Status](https://travis-ci.org/simonster/StructsOfArrays.jl.svg?branch=master)](https://travis-ci.org/simonster/StructsOfArrays.jl) -[![codecov.io](http://codecov.io/github/simonster/StructsOfArrays.jl/coverage.svg?branch=master)](http://codecov.io/github/simonster/StructsOfArrays.jl?branch=master) - A traditional Julia array of immutable objects is an array of structures. Fields of a given object are stored adjacent in memory. However, this often inhibits SIMD optimizations. StructsOfArrays implements the classic structure of arrays @@ -13,66 +10,55 @@ contains padding. It is especially useful for arrays of complex numbers. ## Usage -You can construct a StructOfArrays directly: +You can construct a `StructOfArrays` directly with: ```julia using StructsOfArrays -A = StructOfArrays(Complex128, 10, 10) +A = StructOfArrays(ComplexF64, Array, 10, 10) ``` -or by converting an AbstractArray: +or by converting an `AbstractArray`: ```julia -A = convert(StructOfArrays, complex(randn(10), randn(10))) +A = convert(StructOfArrays, rand(ComplexF64, 10, 10)) ``` -Beyond that, there's not much to say. Assignment and indexing works as with -other AbstractArray types. Indexing a `StructOfArrays{T}` yields an object of -type `T`, and you can assign objects of type `T` to a given index. The "magic" -is in the optimizations that the alternative memory layout allows LLVM to -perform. - -While you can create a StructOfArrays of non-`isbits` immutables, this is -probably slower than an ordinary array, since a new object must be heap -allocated every time the StructOfArrays is indexed. In practice, StructsOfArrays -works best with `isbits` immutables such as `Complex{T}`. - -## Benchmark +A `StructOfArrays` can have different storage arrays. You can construct a +`CuArray`-based `StructOfArrays` directly with: ```julia -using StructsOfArrays -regular = complex(randn(1000000), randn(1000000)) -soa = convert(StructOfArrays, regular) +using CuArrays +A = StructOfArrays(ComplexF64, CuArray, 10, 10) +``` -function f(x, a) - s = zero(eltype(x)) - @simd for i in 1:length(x) - @inbounds s += x[i] * a - end - s -end +or by converting an existing `StructOfArrays` using `replace_storage`: -using Benchmarks -@benchmark f(regular, 0.5+0.5im) -@benchmark f(soa, 0.5+0.5im) +```julia +A = replace_storage(CuArray, convert(StructOfArrays, rand(ComplexF64, 10, 10))) ``` -The time for `f(regular, 0.5+0.5im)` is: +This array can be used either in a kernel: -``` -Average elapsed time: 1.244 ms - 95% CI for average: [1.183 ms, 1.305 ms] -Minimum elapsed time: 1.177 ms +```julia +using CUDAnative +function kernel!(A) + i = (blockIdx().x-1)*blockDim().x + threadIdx().x + if i <= length(A) + A[i] += A[i] + end + return nothing +end +threads = 256 +blocks = cld(length(A), threads) +@cuda threads=threads blocks=blocks kernel!(A) ``` -and for `f(soa, 0.5+0.5im)`: +or via broadcasting: -``` -Average elapsed time: 832.198 μs - 95% CI for average: [726.349 μs, 938.048 μs] -Minimum elapsed time: 713.730 μs +```julia +A .+= A ``` -In this case, StructsOfArrays are about 1.5x faster than ordinary arrays. -Inspection of generated code demonstrates that `f(soa, a)` uses SIMD -instructions, while `f(regular, a)` does not. +Assignment and indexing works as with other `AbstractArray` types. Indexing a +`StructOfArrays{T}` yields an object of type `T`, and you can assign objects of +type `T` to a given index. diff --git a/REQUIRE b/REQUIRE deleted file mode 100644 index 33ed633..0000000 --- a/REQUIRE +++ /dev/null @@ -1,2 +0,0 @@ -julia 0.4 -Compat 0.19 diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 979983a..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,37 +0,0 @@ -environment: - matrix: - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" - -branches: - only: - - master - - /release-.*/ - -notifications: - - provider: Email - on_build_success: false - on_build_failure: false - on_build_status_changed: false - -install: - - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" -# Download most recent Julia Windows binary - - ps: (new-object net.webclient).DownloadFile( - $env:JULIA_URL, - "C:\projects\julia-binary.exe") -# Run installer silently, output to C:\projects\julia - - C:\projects\julia-binary.exe /S /D=C:\projects\julia - -build_script: -# Need to convert from shallow to complete for Pkg.clone to work - - IF EXIST .git\shallow (git fetch --unshallow) - - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"StructsOfArrays\"); Pkg.build(\"StructsOfArrays\")" - -test_script: - - C:\projects\julia\bin\julia --check-bounds=yes -e "Pkg.test(\"StructsOfArrays\")" diff --git a/src/StructsOfArrays.jl b/src/StructsOfArrays.jl index eb4226a..9b564c1 100644 --- a/src/StructsOfArrays.jl +++ b/src/StructsOfArrays.jl @@ -1,16 +1,56 @@ module StructsOfArrays -using Compat - export StructOfArrays +export replace_storage + +using Base.Broadcast +import Base.Broadcast: BroadcastStyle, Broadcasted, AbstractArrayStyle + +using Adapt +using LinearAlgebra + +""" + StructOfArrays{T, N, AT, U} <: AbstractArray{T,N} + +Transparent Struct of Arrays transformation. +Given an array like `Array{ComplexF64, 2}`, we want to represent it as: + +``` +struct ComplexArray{N} + re::Array{Float64} + im::Array{Float64} +end -immutable StructOfArrays{T,N,U<:Tuple} <: AbstractArray{T,N} +getindex(A::ComplexArray, i) = ComplexF64(A.re[i], A.im[i]) +``` + +Since the memory-access patterns of `ComplexArray` are much more SIMD friendly than +`Array{ComplexF64}`. This version of `StructOfArrays` also has a notion of underlying +storage and works both on CPU arrays and CuArrays. In most cases single field access of +the struct should be fast as well since Julia and LLVM both do DCE. +""" +struct StructOfArrays{T,N, AT<:AbstractArray{T, N},U<:Tuple} <: AbstractArray{T,N} arrays::U end +# Storage types of StructOfArrays need to implement this +_type_with_eltype(::Type{<:Array}, T, N) = Array{T, N} +_type(::Type{<:Array}) = Array + +function replace_storage(f, x::StructOfArrays{T, N}) where {T, N} + arrays = map(f, x.arrays) + TT = typeof(arrays) + AT = _type_with_eltype(eltype(arrays), T, N) + StructOfArrays{T, N, AT, TT}(arrays) +end + +Adapt.adapt_structure(to, x::StructOfArrays{T, N}) where {T, N} = replace_storage(y -> adapt(to, y), x) + function gather_eltypes(T, visited = Set{Type}()) - (!isleaftype(T) || T.mutable) && throw(ArgumentError("can only create an StructOfArrays of leaf type immutables")) - isempty(T.types) && throw(ArgumentError("cannot create an StructOfArrays of an empty or bitstype")) + (!isconcretetype(T) || T.mutable) && throw(ArgumentError("can only create an StructOfArrays of leaf type immutables")) + if isempty(T.types) + return Type[T] + end types = Type[] push!(visited, T) for S in T.types @@ -25,59 +65,68 @@ function gather_eltypes(T, visited = Set{Type}()) types end -@generated function StructOfArrays{T}(::Type{T}, dims::Integer...) - N = length(dims) - types = gather_eltypes(T) - arrtuple = Tuple{[Array{S,N} for S in types]...} - :(StructOfArrays{T,$N,$arrtuple}(($([:(Array{$(S)}(dims)) for S in types]...),))) -end -StructOfArrays(T::Type, dims::Tuple{Vararg{Integer}}) = StructOfArrays(T, dims...) - -@compat Base.IndexStyle{T<:StructOfArrays}(::Type{T}) = IndexLinear() +@generated function StructOfArrays(::Type{T}, ::Type{ArrayT}, dims::Integer...) where {T, ArrayT<:AbstractArray} + N = length(dims) + pArrayT = _type_with_eltype(ArrayT, T, N) + types = gather_eltypes(T) + arrtypes = map(t->_type_with_eltype(ArrayT, t, N), types) + arrtuple = Tuple{arrtypes...} -@generated function Base.similar{T}(A::StructOfArrays, ::Type{T}, dims::Dims) - if isbits(T) && length(T.types) > 1 - :(StructOfArrays(T, dims)) - else - :(Array{T}(dims)) - end + :(StructOfArrays{T,$N,$(pArrayT),$arrtuple}(($([:($(arrtypes[i])(undef,dims)) for i = 1:length(types)]...),))) end +StructOfArrays(T::Type, AT::Type, dims::Tuple{Vararg{Integer}}) = StructOfArrays(T, AT, dims...) -Base.convert{T,S,N}(::Type{StructOfArrays{T,N}}, A::AbstractArray{S,N}) = - copy!(StructOfArrays(T, size(A)), A) -Base.convert{T,S,N}(::Type{StructOfArrays{T}}, A::AbstractArray{S,N}) = - convert(StructOfArrays{T,N}, A) -Base.convert{T,N}(::Type{StructOfArrays}, A::AbstractArray{T,N}) = - convert(StructOfArrays{T,N}, A) +Base.size(A::StructOfArrays) = size(@inbounds(A.arrays[1])) +Base.size(A::StructOfArrays, i) = size(@inbounds(A.arrays[1]), i) + +Base.show(io::IO, a::StructOfArrays{T,N,A}) where {T,N,A} = print(io, "$(length(a))-element SoA{$T,$N,$A}") -Base.size(A::StructOfArrays) = size(A.arrays[1]) -Base.size(A::StructOfArrays, d) = size(A.arrays[1], d) +Base.print_array(::IO, ::StructOfArrays) = nothing -function generate_getindex(T, arraynum) +function generate_getindex(T, getindex, arraynum) members = Expr[] for S in T.types - sizeof(S) == 0 && push!(members, :($(S()))) - if isempty(S.types) - push!(members, :(A.arrays[$arraynum][i...])) + if sizeof(S) == 0 + if S <: Tuple + exprs2 = Expr[] + for S2 in S.parameters + push!(exprs2, :($(S2)())) + end + push!(members, Expr(:tuple, exprs2)) + else + push!(members, :($(S)())) + end + elseif isempty(S.types) + push!(members, :($(getindex)(A.arrays[$arraynum], i...))) arraynum += 1 else - member, arraynum = generate_getindex(S, arraynum) + member, arraynum = generate_getindex(S, getindex, arraynum) push!(members, member) end end Expr(:new, T, members...), arraynum end -@generated function Base.getindex{T}(A::StructOfArrays{T}, i::Integer...) - strct, _ = generate_getindex(T, 1) - Expr(:block, Expr(:meta, :inline), strct) +@generated function Base.getindex(A::StructOfArrays{T}, i::Integer...) where T + exprs = Any[Expr(:meta, :inline), Expr(:meta, :propagate_inbounds)] + if isempty(T.types) + push!(exprs, :(return A.arrays[1][i...])) + else + strct, _ = generate_getindex(T, Base.getindex, 1) + push!(exprs, strct) + end + quote + $(exprs...) + end end function generate_setindex(T, x, arraynum) s = gensym() exprs = Expr[:($s = $x)] for (el,S) in enumerate(T.types) - sizeof(S) == 0 && push!(members, :($(S()))) + if sizeof(S) == 0 + continue + end if isempty(S.types) push!(exprs, :(A.arrays[$arraynum][i...] = getfield($s, $el))) arraynum += 1 @@ -89,13 +138,73 @@ function generate_setindex(T, x, arraynum) exprs, arraynum end -@generated function Base.setindex!{T}(A::StructOfArrays{T}, x, i::Integer...) - exprs = Expr(:block, generate_setindex(T, :x, 1)[1]...) +@generated function Base.setindex!(A::StructOfArrays{T}, x, i::Integer...) where {T} + exprs = Any[Expr(:meta, :inline), Expr(:meta, :propagate_inbounds)] + push!(exprs, :(v = convert(T, x))) + if isempty(T.types) + push!(exprs, :(A.arrays[1][i...] = v)) + else + append!(exprs, generate_setindex(T, :v, 1)[1]) + end + push!(exprs, :(return x)) quote - $(Expr(:meta, :inline)) - v = convert(T, x) - $exprs - x + $(exprs...) end end -end # module + +Base.IndexStyle(::Type{<:StructOfArrays{T,N,A}}) where {T,N,A<:AbstractArray} = Base.IndexStyle(A) +BroadcastStyle(::Type{<:StructOfArrays{T,N,A}}) where {T,N,A<:AbstractArray} = Broadcast.ArrayStyle{StructOfArrays{T,N,A}}() + +function Base.similar(A::StructOfArrays{T1,N,AT}, ::Type{T}, dims::Dims) where {T1,N,AT,T} + StructOfArrays(T, AT, dims) +end + +function Base.similar(bc::Broadcasted{Broadcast.ArrayStyle{StructOfArrays{T1,N,AT}}}, ::Type{T}) where {T1,N,AT,T} + StructOfArrays(T, AT, Base.to_shape(axes(bc))) +end + +function Base.convert(::Type{<:StructOfArrays{T,N,AT}}, A::StructOfArrays{T,N}) where {T,N,AT<:AbstractArray{T,N}} + if AT <: StructOfArrays + error("Can't embed a SoA array in a SoA array") + end + arrays = map(a->convert(_type(AT), a), A.arrays) + tt = typeof(arrays) + StructOfArrays{T, N, AT, tt}(arrays) +end + +function Base.convert(::Type{<:StructOfArrays{T,N,AT}}, A::StructOfArrays{S,N,BT}) where {T,N,AT,S,BT} + BT != AT && AT<:CuArray && error("Can't convert from $BT to $AT with different eltypes") + copyto!(StructOfArrays(T, _type_with_eltype(AT, T, N), size(A)), A) +end + +function Base.convert(::Type{<:StructOfArrays{T,N}}, A::AbstractArray) where {T,N} + @assert !(A isa StructOfArrays) + copyto!(StructOfArrays(T, _type_with_eltype(typeof(A), T, N), size(A)), A) +end + +Base.convert(::Type{<:StructOfArrays{T}}, A::AbstractArray{S,N}) where {T,S,N} = + convert(StructOfArrays{T,N}, A) + +Base.convert(::Type{<:StructOfArrays}, A::AbstractArray{T,N}) where {T,N} = + convert(StructOfArrays{T,N}, A) + +function Base.one(x::StructOfArrays{T}) where {T} + # FIXME: probably not the best way, ie. don't use Base._one + convert(StructOfArrays, Base._one(one(T), x)) +end + +function Base.Array{T,N}(src::StructOfArrays{T,N}) where {T,N} + A = convert(StructOfArrays{T,N,Array{T,N}}, src) + dst = Array{T,N}(uninitialized, size(src)) + copyto!(dst, A) + return dst +end +Array(src::StructOfArrays{T,N}) where {T,N} = Array{T,N}(src) + +include("storage/gpuarrays.jl") +using CUDAapi +if has_cuda_gpu() + include("storage/cuda.jl") +end + +end diff --git a/src/storage/cuda.jl b/src/storage/cuda.jl new file mode 100644 index 0000000..50443f7 --- /dev/null +++ b/src/storage/cuda.jl @@ -0,0 +1,38 @@ +import CuArrays +import CuArrays: CuArray +_type_with_eltype(::Type{<:CuArray}, T, N) = CuArray{T, N, Nothing} +_type(::Type{<:CuArray}) = CuArray + +import CUDAnative +import CUDAnative: CuDeviceArray +_type_with_eltype(::Type{<:CuDeviceArray}, T, N) = CuDeviceArray{T, N} +_type(::Type{<:CuDeviceArray}) = CuDeviceArray + +const AbstractStructOfCuArrays = StructOfArrays{T,N,<:CuArray} where {T, N} + +@eval const DestStructOfCuArrays = + Union{AbstractStructOfCuArrays, + $((:($W where {AT <: AbstractStructOfCuArrays}) for (W, _) in Adapt.wrappers)...), + Base.RefValue{<:AbstractStructOfCuArrays} } + +function broadcast_kernel!(dest, bc′) + i = (CUDAnative.blockIdx().x-1)*CUDAnative.blockDim().x + CUDAnative.threadIdx().x + if i <= length(dest) + let I = CartesianIndex(CartesianIndices(dest)[i]) + @inbounds dest[I] = bc′[I] + end + end + return nothing +end + +@inline function Base.copyto!(dest::DestStructOfCuArrays, bc::Broadcasted{Nothing}) + axes(dest) == axes(bc) || Broadcast.throwdm(axes(dest), axes(bc)) + bc′ = Broadcast.preprocess(dest, bc) + + threads = 256 + blocks = cld(length(dest), threads) + + CUDAnative.@cuda threads=threads blocks=blocks broadcast_kernel!(dest, bc′) + + return dest +end diff --git a/src/storage/gpuarrays.jl b/src/storage/gpuarrays.jl new file mode 100644 index 0000000..bb6748e --- /dev/null +++ b/src/storage/gpuarrays.jl @@ -0,0 +1,53 @@ +using GPUArrays + +const AbstractStructOfGPUArrays = StructOfArrays{T,N,<:AbstractGPUArray} where {T, N} +const AbstractStructOfGPUArraysStyle{N} = Broadcast.ArrayStyle{StructOfArrays{T,N,<:AbstractGPUArray}} where T + +# Wrapper types otherwise forget that they are StructOfArrays +for (W, ctor) in Adapt.wrappers + @eval begin + BroadcastStyle(::Type{<:$W}) where {AT<:StructOfArrays{T,N,A} where {T,N,A}} = BroadcastStyle(AT) + backend(::Type{<:$W}) where {AT<:StructOfArrays{T,N,A} where {T,N,A}} = backend(AT) + end +end + +# This Union is a hack. Ideally Base would have a +# Transpose <: WrappedArray <: AbstractArray +# and we could define our methods in terms of +# Union{AbstractStructOfGPUArrays, WrappedArray{<:Any, <:AbstractStructOfGPUArrays}} +@eval const DestStructOfGPUArrays = + Union{AbstractStructOfGPUArrays, + $((:($W where {AT <: AbstractStructOfGPUArrays}) for (W, _) in Adapt.wrappers)...), + Base.RefValue{<:AbstractStructOfGPUArrays} } + +# Ref is special: it's not a real wrapper, so not part of Adapt, +# but it is commonly used to bypass broadcasting of an argument +# so we need to preserve its dimensionless properties. +BroadcastStyle(::Type{Base.RefValue{AT}}) where {AT<:AbstractStructOfGPUArrays} = typeof(BroadcastStyle(AT))(Val(0)) +backend(::Type{Base.RefValue{AT}}) where {AT<:AbstractStructOfGPUArrays} = backend(AT) +# but make sure we don't dispatch to the optimized copy method that directly indexes +function Broadcast.copy(bc::Broadcasted{<:Broadcast.ArrayStyle{StructOfArrays{T,0,<:AbstractGPUArray}}}) where {T} + ElType = Broadcast.combine_eltypes(bc.f, bc.args) + isbitstype(ElType) || error("Cannot broadcast function returning non-isbits $ElType.") + dest = copyto!(similar(bc, ElType), bc) + return @allowscalar dest[CartesianIndex()] # 0D broadcast needs to unwrap results +end + +# Base defines this method as a performance optimization, but we don't know how to do +# `fill!` in general for all `DestStructOfGPUArrays` so we just go straight to the fallback +@inline Base.copyto!(dest::DestStructOfGPUArrays, bc::Broadcasted{<:AbstractArrayStyle{0}}) = + copyto!(dest, convert(Broadcasted{Nothing}, bc)) + +## map +allequal(x) = true +allequal(x, y, z...) = x == y && allequal(y, z...) + +function Base.map!(f, y::DestStructOfGPUArrays, xs::AbstractArray...) + @assert allequal(size.((y, xs...))...) + return y .= f.(xs...) +end + +function Base.map(f, y::DestStructOfGPUArrays, xs::AbstractArray...) + @assert allequal(size.((y, xs...))...) + return f.(y, xs...) +end diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..c97f942 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,9 @@ +[deps] +CUDAapi = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3" +CUDAnative = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17" +CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +StructsOfArrays = "ad108753-388a-4a68-a626-efa2fa9e4278" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/REQUIRE b/test/REQUIRE deleted file mode 100644 index fbb6170..0000000 --- a/test/REQUIRE +++ /dev/null @@ -1 +0,0 @@ -Compat 0.9.1 diff --git a/test/cuda.jl b/test/cuda.jl new file mode 100644 index 0000000..7304735 --- /dev/null +++ b/test/cuda.jl @@ -0,0 +1,59 @@ +using CuArrays +using CuArrays: @allowscalar +using CUDAnative +using LinearAlgebra +CuArrays.allowscalar(false) + +@testset "basic" begin + type = SArray{Tuple{3},Float64,1,3} + N = 1000 + data = rand(MersenneTwister(0), type, N) + + a = CuArray(data) + b = StructOfArrays(type, CuArray, N) + c = similar(a) + d = replace_storage(CuArray, convert(StructOfArrays, data)) + + @test eltype(d) == type + @test eltype(eltype(d.arrays)) == Float64 + @test @allowscalar a[3] === d[3] + + function kernel!(dest, src) + i = (blockIdx().x-1)*blockDim().x + threadIdx().x + if i <= length(dest) + dest[i] = src[i] + end + return nothing + end + + threads = 1024 + blocks = cld(length(a), threads) + + @cuda threads=threads blocks=blocks kernel!(b, a) + @test @allowscalar a[3] === b[3] + + @cuda threads=threads blocks=blocks kernel!(c, b) + @test @allowscalar c[3] === b[3] +end + +@testset "broadcast" begin + type = SArray{Tuple{3},Float64,1,3} + N = 1000 + data = rand(MersenneTwister(0), type, N) + + d = replace_storage(CuArray, convert(StructOfArrays, data)) + e = d .+ d + @test @allowscalar e[3] == d[3] .+ d[3] + n = norm.(d) + @test @allowscalar n[4] == norm(d[4]) + + f = convert(StructOfArrays, rand(ComplexF64, 1, 3)) + g = convert(StructOfArrays, rand(ComplexF64, 3, 1)) + + cf = replace_storage(CuArray, f) + cg = replace_storage(CuArray, g) + + o = f .+ g + co = cf .+ cg + @test @allowscalar o[2, 1] == co[2, 1] +end diff --git a/test/runtests.jl b/test/runtests.jl index 98b30bc..2f4566d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,44 +1,82 @@ using StructsOfArrays -using Base.Test -using Compat +using Test +using Random +using StaticArrays +using CUDAapi -regular = @compat complex.(randn(10000), randn(10000)) -soa = convert(StructOfArrays, regular) -@test regular == soa -@test sum(regular) ≈ sum(soa) +@testset "StructsOfArrays.jl" begin + @testset "constructor" begin + regular = rand(MersenneTwister(0), ComplexF64, 10000) + soa = convert(StructOfArrays, regular) + @test regular == soa + @test sum(regular) ≈ sum(soa) -soa64 = convert(StructOfArrays{Complex64}, regular) -@test convert(Array{Complex64}, regular) == soa64 + soa64 = convert(StructOfArrays{ComplexF64}, regular) + @test convert(Array{ComplexF64}, regular) == soa64 -sim = similar(soa) -@test typeof(sim) == typeof(soa) -@test size(sim) == size(soa) + sim = similar(soa) + @test typeof(sim) == typeof(soa) + @test size(sim) == size(soa) -regular = @compat complex.(randn(10, 5), randn(10, 5)) -soa = convert(StructOfArrays, regular) -for i = 1:10, j = 1:5 - @test regular[i, j] == soa[i, j] -end -@test size(soa, 1) == 10 -@test size(soa, 2) == 5 + regular = randn(MersenneTwister(0), ComplexF64, 10, 5) + soa = convert(StructOfArrays, regular) + for i = 1:10, j = 1:5 + @test regular[i, j] == soa[i, j] + end + @test size(soa, 1) == 10 + @test size(soa, 2) == 5 + end -immutable OneField - x::Int -end + @testset "similar" begin + struct OneField + x::Int + end -small = StructOfArrays(Complex64, 2) -@test typeof(similar(small, Complex)) === Vector{Complex} -@test typeof(similar(small, Int)) === Vector{Int} -@test typeof(similar(small, SubString)) === Vector{SubString} -@test typeof(similar(small, OneField)) === Vector{OneField} -@test typeof(similar(small, Complex128)) <: StructOfArrays - -# Recursive structs -immutable TwoField - one::OneField - two::OneField -end + small = StructOfArrays(ComplexF64, Array, 2) + @test typeof(small) <: AbstractArray{Complex{T}} where T + @test typeof(similar(small, ComplexF64)) <: AbstractArray{Complex{Float64}} + @test typeof(similar(small, Int)) <: AbstractArray{Int} + @test typeof(similar(small, OneField)) <: AbstractArray{OneField} + @test typeof(similar(small, ComplexF64)) <: StructOfArrays + end + + @testset "broadcast" begin + regular = rand(MersenneTwister(0), ComplexF64, 100) + soa = convert(StructOfArrays, regular) + @test regular .+ regular == soa .+ soa + @test typeof(soa .+ soa) === typeof(soa) + @test typeof(regular .+ soa) === typeof(soa) + @test sin.(soa)[2] == sin(regular[2]) + end + + @testset "recursive structs" begin + struct OneField + x::Int + end -small = StructOfArrays(TwoField, 2, 2) -small[1,1] = TwoField(OneField(1), OneField(2)) -@test small[1,1] == TwoField(OneField(1), OneField(2)) + struct TwoField + one::OneField + two::OneField + end + + small = StructOfArrays(TwoField, Array, 2, 2) + small[1,1] = TwoField(OneField(1), OneField(2)) + @test small[1,1] == TwoField(OneField(1), OneField(2)) + end + + @testset "StaticArrays" begin + type = SArray{Tuple{3},Float64,1,3} + regular = rand(MersenneTwister(0), type, 10000) + soa = convert(StructOfArrays, regular) + @test regular == soa + @test sum(regular) ≈ sum(soa) + @test eltype(regular) === eltype(soa) + @test regular[3] === soa[3] + end + + if has_cuda_gpu() + @testset "CUDA" begin + include("cuda.jl") + end + end +end