Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add method to display all DHW scenarios for the currently set RCP/SSP #914

Merged
merged 8 commits into from
Jan 30, 2025
1 change: 1 addition & 0 deletions src/analysis/analysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,6 @@ include("intervention.jl")
include("clustering.jl")
include("rule_extraction.jl")
include("scenario.jl")
include("feature_set.jl")

end
117 changes: 117 additions & 0 deletions src/analysis/feature_set.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
_filter_constants(scens::DataFrame)::DataFrame

Filter out features/factors that do not vary.
"""
function _filter_constants(scens::DataFrame)::DataFrame
varying_cols = []
for (i, col) in enumerate(eachcol(scens))
Copy link
Collaborator

@Rosejoycrocker Rosejoycrocker Jan 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the loop here is more efficient than something like findall(dropdims(sum(diff(Matrix(scens);dims=1),dims=1),dims=1).==0.0) ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adjusted to use a comprehension instead of mutating an existing vector.

I had to get something done quickly so what was there before was the easiest way without using too much brain power.

This should address your concern re efficiency while balancing for code readability.

if !all(col .== col[1])
push!(varying_cols, i)
end
end

return scens[:, varying_cols]
end

"""
_seeded(rs::ResultSet)::Tuple{Vector}

Extract total and average deployment, currently in terms of proportional increase to cover
relative to the locations' carrying capacity.

# Arguments
- `rs` : ResultSet holding scenario outcomes

# Returns
DataFrames of mean and total deployment for each coral group
"""
function _seeded(rs::ResultSet)::Tuple{DataFrame,DataFrame}
deployed_corals = rs.seed_log.coral_id

mean_deployment = [
mean(
sum(rs.seed_log[:, c_id, :, :], dims=:timesteps),
dims=:locations
).data[:]
for c_id in deployed_corals
]

total_deployment = [
sum(
sum(rs.seed_log[:, c_id, :, :], dims=:timesteps),
dims=:locations
).data[:]
for c_id in deployed_corals
]

col_names = ["deployed_coral_$(i)" for i in string.(collect(deployed_corals))]
μ = DataFrame(hcat(mean_deployment...), "mean_" .* col_names)
T = DataFrame(hcat(total_deployment...), "total_" .* col_names)

return μ, T
end

"""
feature_set(rs::ResultSet)::DataFrame

Extract a feature set from results for analysis purposes.
"""
function feature_set(rs::ResultSet)::DataFrame
scens = copy(rs.inputs)

rcp_ids = collect(keys(rs.dhw_stats))
rcp_id = first(rcp_ids)
if length(rcp_ids) > 1
@warn "Multiple RCPs found, assigning stats for first id: $(rcp_id)"
end

# Add DHW statistics
dhw_stat = mean(rs.dhw_stats[rcp_id], dims=:locations)
dhw_means = dhw_stat[stat=At("mean")].data[:]
dhw_stdevs = dhw_stat[stat=At("std")].data[:]

insertcols!(scens, 2, :dhw_mean => -99.0, :dhw_stdev => -99.0)
for (i, r) in enumerate(eachrow(scens))
scens[i, :dhw_mean] = dhw_means[Int64(r.dhw_scenario)]
scens[i, :dhw_stdev] = dhw_stdevs[Int64(r.dhw_scenario)]
end

@assert all(scens.dhw_mean .> -99.0) "Unknown DHW scenario found, check rows: $(findall(scens.dhw_mean .== -99.0))"
@assert all(scens.dhw_stdev .> -99.0) "Unknown DHW scenario found, check rows: $(findall(scens.dhw_mean .== -99.0))"

# Add indicators of deployments
seed_stats = ADRIA.decision.deployment_summary_stats(rs.ranks, :seed)
fog_stats = ADRIA.decision.deployment_summary_stats(rs.ranks, :fog)

# Only attach mean of deployment effort
insertcols!(
scens,
:n_loc_seed_mean=>seed_stats[stats=At(:mean)].data[:],
:n_loc_fog_mean=>fog_stats[stats=At(:mean)].data[:]
)

# Replace `depth_offset` with maximum depth
scens.depth_max = scens.depth_min .+ scens.depth_offset
scens = scens[:, Not(:depth_offset)]

total_seeded, mean_seeded = _seeded(rs)
DataFrames.hcat!(scens, mean_seeded)
DataFrames.hcat!(scens, total_seeded)
scens.total_deployed_coral = sum.(eachrow(total_seeded))

# Remove `dhw_scenario` as Scenario IDs are not very informative for analyses
scens = scens[:, Not(:dhw_scenario)]

# Remove correlated features
# Remove desired seed deployment targets
# N_seed_TA, etc, indicate maximum deployment effort, not actual simulated deployment
scens = scens[:, Not([:N_seed_TA, :N_seed_CA, :N_seed_SM])]

# Set missing values to 0
for col in eachcol(scens)
replace!(col, missing=>0.0)
end

return _filter_constants(scens)
end