Skip to content

Commit

Permalink
Get updates (#139)
Browse files Browse the repository at this point in the history
* deprecate metadata

* add methods for subset of keys

---------

Co-authored-by: Kevin Bonham <kevin@bonham.ch>
  • Loading branch information
kescobo and Kevin Bonham authored Feb 1, 2023
1 parent f2bb337 commit f69c566
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 41 deletions.
8 changes: 4 additions & 4 deletions docs/src/profiles.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,13 @@ julia> samplenames(comm)
```

Finally, you can pull out the metadata of all samples
or a subset using [`metadata`](@ref).
or a subset using [`get`](@ref).
The returned value is a vector of `NamedTuple`s,
which is compliant with the [`Tables.jl`](https://github.com/JuliaData/Tables.jl) interface,
so it's easy to load into other formats (like [`DataFrames.jl`](https://github.com/JuliaData/DataFrames.jl) for example):

```jldoctest profiles
julia> metadata(comm)
julia> get(comm)
3-element Vector{NamedTuple{(:sample,), Tuple{String}}}:
(sample = "s1",)
(sample = "s2",)
Expand Down Expand Up @@ -276,7 +276,7 @@ with the value `missing` in any samples that do not have that field set.


```jldoctest profiles
julia> metadata(comm)
julia> get(comm)
3-element Vector{NamedTuple{(:sample, :subject)}}:
(sample = "s1", subject = "kevin")
(sample = "s2", subject = "anika")
Expand All @@ -301,7 +301,7 @@ julia> md2 = [(name="s1", other="Hello, World!"), (name="s2", other="Goodbye!")]
julia> insert!(comm, md2; namecol=:name)
julia> metadata(comm)
julia> get(comm)
3-element Vector{NamedTuple{(:sample, :subject, :foo, :other)}}:
(sample = "s1", subject = "kevin", foo = "bar", other = "Hello, World!")
(sample = "s2", subject = "anika", foo = missing, other = "Goodbye!")
Expand Down
6 changes: 2 additions & 4 deletions src/Microbiome.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ export MicrobiomeSample,
Taxon,
GeneFunction,
Metabolite,
metadata,
set!,
unset!,
insert!,
Expand Down Expand Up @@ -43,8 +42,7 @@ export CommunityProfile,
profiletype,
featuretotals,
sampletotals,
commjoin,
metadata
commjoin

# Abundances
export present,
Expand Down Expand Up @@ -76,7 +74,7 @@ using MultivariateStats
using ReTest

import Dictionaries: set!, unset!, insert!, delete!
import Base: ==
import Base: ==, get

include("ecobase.jl")
include("samples.jl")
Expand Down
59 changes: 39 additions & 20 deletions src/profiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -366,24 +366,6 @@ end

## Metadata

"""
metadata(commp::CommunityProfile)
Returns iterator of `NamedTuple` per sample, where keys are `:sample`
and each metadata key found in `commp`.
Samples without given metadata are filled with `missing`.
Returned values can be passed to any Tables.rowtable - compliant type,
eg `DataFrame`.
"""
function metadata(commp::CommunityProfile)
ss = samples(commp)
cols = unique(reduce(vcat, collect.(keys.(metadata.(samples(commp))))))
return Tables.rowtable(merge((; sample=name(s)),
NamedTuple(c => get(s, c, missing) for c in cols)
) for s in ss)
end


"""
set!(commp::CommunityProfile, sample::AbstractString, prop::Symbol, val)
Expand Down Expand Up @@ -535,7 +517,7 @@ end
Return an iterator over all keys of the metadata attached to `sample` in a CommunityProfile `commp`.
`collect(keys(commp, sample))` returns an array of keys.
"""
Base.keys(commp::CommunityProfile, sample::AbstractString) = keys(metadata(samples(commp, sample)))
Base.keys(commp::CommunityProfile, sample::AbstractString) = keys(get(samples(commp, sample)))

"""
haskey(commp::CommunityProfile, sample::AbstractString, key::Symbol)
Expand All @@ -550,7 +532,7 @@ Base.haskey(commp::CommunityProfile, sample::AbstractString, key::Symbol) = in(k
Return the value of the metadata in a `sample` stored for the given `key`, or the given `default` value if no mapping for the key is present.
"""
Base.get(commp::CommunityProfile, sample::AbstractString, key::Symbol, default=missing) = get(metadata(samples(commp, sample)), key, default)
Base.get(commp::CommunityProfile, sample::AbstractString, key::Symbol, default=missing) = get(get(samples(commp, sample)), key, default)

"""
get(commp::CommunityProfile, key::Symbol, default)
Expand All @@ -560,6 +542,43 @@ Return the value of the metadata in a `sample` stored for the given `key`, or th
Base.get(commp::CommunityProfile, key::Symbol, default=missing) = [get(commp, sample, key, default) for sample in samplenames(commp)]


"""
get(commp::CommunityProfile, cols::AbstractVector{<:Symbol}, default)
end
Returns iterator of `NamedTuple` per sample, where keys are `:sample`
and each metadata key found in `commp`.
Samples without given metadata are filled with `default`.
Returned values can be passed to any Tables.rowtable - compliant type,
eg `DataFrame`.
"""
function Base.get(commp::CommunityProfile, cols::AbstractVector{<:Symbol}, default)
ss = samples(commp)
return Tables.rowtable(merge((; sample=name(s)), get(s, cols, default)) for s in ss)
end

Base.get(commp::CommunityProfile, cols::AbstractVector{<:Symbol}) = get(commp, cols, missing)

"""
get(commp::CommunityProfile)
Returns iterator of `NamedTuple` per sample, where keys are `:sample`
and each metadata key found in `commp`.
Samples without given metadata are filled with `missing`.
Returned values can be passed to any Tables.rowtable - compliant type,
eg `DataFrame`.
"""
function Base.get(commp::CommunityProfile)
ss = samples(commp)
cols = unique(reduce(vcat, collect.(keys.(get.(samples(commp))))))
return get(commp, cols)
end

Base.@deprecate_binding metadata get

"""
filter(f, comm::CommunityProfile)
Expand Down
16 changes: 12 additions & 4 deletions src/samples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ Get the `name` field from an `AbstractSample` or `AbstractFeature`.
name(as::AbstractSample) = as.name

"""
metadata(t::AbstractSample)
get(t::AbstractSample)
Get the `metadata` field from an `AbstractSample`.
Note that this is not a copy, so modifications to the returned value
will update the parent `AbstractSample` as well.
"""
metadata(as::AbstractSample) = as.metadata
get(as::AbstractSample) = as.metadata

Base.:(==)(as1::AbstractSample, as2::AbstractSample) = name(as1) == name(as2)

Expand Down Expand Up @@ -123,7 +123,7 @@ end
Return an iterator over all keys of the metadata attached to sample `as`.
`collect(keys(as))` returns an array of keys.
"""
Base.keys(as::AbstractSample) = keys(metadata(as))
Base.keys(as::AbstractSample) = keys(as.metadata)

"""
haskey(as::AbstractSample, key::Symbol)
Expand All @@ -138,8 +138,16 @@ Base.haskey(as::AbstractSample, key::Symbol) = in(key, keys(as))
Return the value of the metadata in the sample `as` stored for the given `key`, or the given `default` value if no mapping for the key is present.
"""
Base.get(as::AbstractSample, key::Symbol, default) = get(metadata(as), key, default)
Base.get(as::AbstractSample, key::Symbol, default) = get(as.metadata, key, default)

"""
get(as::AbstractSample, key::Symbol, default)
Return the value of the metadata in the sample `as` stored for the given `key`, or the given `default` value if no mapping for the key is present.
"""
Base.get(as::AbstractSample, keys::AbstractVector{<:Symbol}, default) = NamedTuple(key=> get(as.metadata, key, default) for key in keys)

Base.get(as::AbstractSample, keys::AbstractVector{<:Symbol}) = NamedTuple(key=> get(as.metadata, key, missing) for key in keys)

function set!(as::AbstractSample, d::Union{NamedTuple, Dictionary{Symbol, <:Any}})
for (key, value) in pairs(d)
Expand Down
20 changes: 12 additions & 8 deletions test/MicrobiomeTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import Microbiome.MultivariateStats: MDS
@testset "MicriobiomeSamples and metadata" begin
ms = MicrobiomeSample("sample")
@test name(ms) == "sample"
@test isempty(metadata(ms))
@test metadata(ms) isa Dictionary
@test isempty(get(ms))
@test get(ms) isa Dictionary
@test_throws Dictionaries.IndexError ms.thing = "metadata"
@test_throws Dictionaries.IndexError ms[:thing] = "metadata"

Expand Down Expand Up @@ -232,7 +232,7 @@ end

@testset "Single sample" begin
c4 = CommunityProfile(sparse([1 1; 2 2; 3 3]), [Taxon(string(i)) for i in 1:3], [s1, s2])
md1, md2 = metadata(c4)
md1, md2 = get(c4)

@test_throws Dictionaries.IndexError insert!(c4, "sample1", :something, 3.0)
@test_throws Dictionaries.IndexError delete!(c4, "sample1", :something_else)
Expand All @@ -244,7 +244,7 @@ end
@test insert!(c4, "sample1", :something, 3.0) isa MicrobiomeSample
@test get(c4, "sample1", :something, 42) == 3.0
set!(c4, "sample1", :something, 1.0)
@test first(metadata(c4))[:something] == 1.0
@test first(get(c4))[:something] == 1.0
@test haskey(c4, "sample1", :something)
@test unset!(c4, "sample1", :something) isa MicrobiomeSample
@test !haskey(c4, "sample1", :something)
Expand All @@ -255,12 +255,16 @@ end

@testset "Whole community" begin
c5 = CommunityProfile(sparse([1 1; 2 2; 3 3]), [Taxon(string(i)) for i in 1:3], [s1, s2])
md1, md2 = metadata(c5)

@test all(row-> row[:age] == 37, [md1, md2])
@test all(row-> row[:name] == "kevin", [md1, md2])
md1, md2 = get(c5)
md1_2, md2_2 = get(c5, [:name, :age, :test])
@test all(row-> row[:age] == 37, [md1, md2, md1_2, md2_2])
@test all(row-> row[:name] == "kevin", [md1, md2, md1_2, md2_2])
@test md1[:something] == 1.0
@test ismissing(md2[:something])
@test !haskey(md1_2, :something)
@test !haskey(md2_2, :something)
@test ismissing(md1_2[:test])
@test ismissing(md2_2[:test])
@test md2[:something_else] == 2.0
@test ismissing(md1[:something_else])

Expand Down
2 changes: 1 addition & 1 deletion tutorials/juliacon2022.jl
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ And you can get all of the metadata out as a vector of `NamedTuple`s
"""

# ╔═╡ 71be47ae-1682-4068-a4d7-e36291682efa
DataFrame(metadata(comm))
DataFrame(get(comm))

# ╔═╡ 8d64359d-3ab4-4da0-af92-afc4d25389bf
md"""
Expand Down

0 comments on commit f69c566

Please sign in to comment.