From cf1065006f7d6e5ed32fd70b0bcc5dff19860056 Mon Sep 17 00:00:00 2001 From: Shawn Laffan Date: Thu, 8 Oct 2015 08:14:51 +1100 Subject: [PATCH] Initial work on the spatially structured randomisations. The current version allocates to all groups within the neighbour sets around a seed location, filling each in turn. Once those are filled it finds another seed location and repeats. Currently does not fully allocate all elements on the first pass, and ends up using the swap algorithm to finish the work. This could be a broader issue, though. Updates issue #76 Signed-off-by: Shawn Laffan --- lib/Biodiverse/Randomise.pm | 146 ++++++++++++++++++++++++++++++++---- lib/Biodiverse/Spatial.pm | 25 ++++++ t/28-Randomisation.t | 84 +++++++++++++++++++++ 3 files changed, 241 insertions(+), 14 deletions(-) diff --git a/lib/Biodiverse/Randomise.pm b/lib/Biodiverse/Randomise.pm index 251ff2f86..00073e1b1 100644 --- a/lib/Biodiverse/Randomise.pm +++ b/lib/Biodiverse/Randomise.pm @@ -1006,6 +1006,49 @@ sub rand_csr_by_group { return $new_bd; } + +sub get_spatial_output_for_label_allocation { + my ($self, %args) = @_; + + my $sp_conditions = $args{spatial_conditions_for_label_allocation}; + + return if !$sp_conditions; + + my $bd = $args{basedata_ref} || $self->get_param ('BASEDATA_REF'); + + # Check the sp conditions + # If we get only whitespace and comments then default to selecting all groups + my $sp_check_text = $sp_conditions->[0]; + $sp_check_text //= ''; + if (blessed ($sp_check_text)) { + $sp_check_text = $sp_check_text->get_conditions_unparsed; + } + $sp_check_text =~ s/[\s\r\n]//g; # clear any whitespace + $sp_check_text =~ s/^\s*#.*$//g; # and any comments + + return if !length $sp_check_text; # all we had was whitespace and comments + + my $sp = $self->get_param('SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION'); + + return $sp if $sp; + + $sp = $bd->add_spatial_output(name => 'spatial_output_for_label_allocation'); + + # we only want the neighbour sets + $sp->run_analysis ( + spatial_conditions => $sp_conditions, + #definition_query => $def_query, # do we want a def query for this? Prob not. + calculations => [], + override_valid_analysis_check => 1, + calc_only_elements_to_calc => 1, # really need to rename this undocumented arg + ); + + $bd->delete_output (output => $sp); + $self->set_param(SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION => $sp); + + return $sp; +} + sub get_metadata_rand_structured { my $self = shift; @@ -1074,13 +1117,15 @@ sub rand_structured { my $bd = $args{basedata_ref} || $self->get_param ('BASEDATA_REF'); + my $sp_for_label_allocation = $self->get_spatial_output_for_label_allocation (%args); + my $progress_bar = Biodiverse::Progress->new(); my $rand = $args{rand_object}; # can't store to all output formats and then recreate delete $args{rand_object}; # need to get these from the ARGS param if available - should also croak if negative - my $multiplier = $args{richness_multiplier} || 1; + my $multiplier = $args{richness_multiplier} // 1; my $addition = $args{richness_addition} || 0; my $name = $self->get_param ('NAME'); @@ -1124,7 +1169,7 @@ END_PROGRESS_TEXT # make sure shuffle does not work on the original data my $rand_label_order = $rand->shuffle ([@sorted_labels]); - printf "[RANDOMISE] Richness Shuffling %s labels from %s groups\n", + printf "[RANDOMISE] Spatially structured shuffling %s labels from %s groups\n", scalar @sorted_labels, scalar @sorted_groups; # generate a hash with the target richness values @@ -1233,21 +1278,91 @@ END_PROGRESS_TEXT = $cloned_bd->get_groups_with_label_as_hash (label => $label); my $tmp_rand_order = $rand->shuffle ([sort keys %tmp]); - my (%new_bd_additions, %cloned_bd_deletions); + my ( + %new_bd_additions, + %cloned_bd_deletions, + @sp_alloc_nbr_lists, + $last_group_assigned, + %assigned, + ); - BY_GROUP: + BY_GROUP: foreach my $from_group (@$tmp_rand_order) { my $count = $tmp{$from_group}; + my $to_group; + +# should we always assign to the seed location? +# What if the central group is not part of the nbr set? +# Issue is that the algorithm might never land on a valid target +# group given the selection process is only unfilled groups without the label + + if (!$sp_for_label_allocation || !defined $last_group_assigned) { + # select a group at random to assign to + my $j = int ($rand->rand (scalar @target_groups)); + $to_group = $target_groups[$j]; + + # make sure we don't select this group again + # for this label this time round + splice (@target_groups, $j, 1); + + if ($sp_for_label_allocation) { + # we need a copy + # should cache and clone these to avoid re-sorting the same data + @sp_alloc_nbr_lists + = $sp_for_label_allocation->get_calculated_nbr_lists_for_element ( + element => $to_group, + sort_lists => 1, # could later add a proximity sort + ); + foreach my $list (@sp_alloc_nbr_lists) { + # don't reconsider $to_group, and drop out of this loop if we find it + last if defined $self->delete_from_sorted_list_aa ($to_group, $list); + } + } + } + else { + my $target_nbrs = $sp_alloc_nbr_lists[0]; + + FIND_TARGET_NBR: + while (scalar @sp_alloc_nbr_lists) { + if ($target_nbrs && !scalar @$target_nbrs) { + if (scalar @sp_alloc_nbr_lists) { + shift @sp_alloc_nbr_lists; # start work on the next neighbour set + if (!scalar @sp_alloc_nbr_lists) { + $last_group_assigned = undef; + next BY_GROUP; # no nbrs left + } + $target_nbrs = $sp_alloc_nbr_lists[0]; + next FIND_TARGET_NBR if !scalar @$target_nbrs; + } + else { + $last_group_assigned = undef; + next BY_GROUP; + } + } + my $j = int ($rand->rand (scalar @$target_nbrs)); + $to_group = $target_nbrs->[$j]; + splice (@$target_nbrs, $j, 1); + next FIND_TARGET_NBR + if $assigned{$to_group} || exists $filled_groups{$to_group}; + last FIND_TARGET_NBR if exists $target_groups_hash{$to_group}; + } + + if (!defined $to_group) { + $last_group_assigned = undef; + next BY_GROUP; + } + + # make sure we don't select this group again + # for this label this time round + $self->delete_from_sorted_list_aa ($to_group, \@target_groups); + } - # select a group at random to assign to - my $j = int ($rand->rand (scalar @target_groups)); - my $to_group = $target_groups[$j]; - # make sure we don't select this group again - # for this label this time round - splice (@target_groups, $j, 1); + $last_group_assigned = $to_group; # drop out criterion, occurs when $richness_multiplier < 1 - last BY_GROUP if not defined $to_group; + last BY_GROUP if not defined $to_group; + + $assigned{$to_group}++; warn "SELECTING GROUP THAT IS ALREADY FULL $to_group," . "$filled_groups{$to_group}, $target_richness{$to_group}, " @@ -2198,12 +2313,15 @@ sub delete_from_sorted_list { } # array args version to reduce sub and args hash cleanup overheads +# using $_ to squeeze a bit more performance out of the code, since it is a hot path sub delete_from_sorted_list_aa { - my ($self, $item, $list) = @_; + #my ($self, $item, $list) = @_; - my $idx = binsearch { $a cmp $b } $item, @$list; + #my $idx = binsearch { $a cmp $b } $item, @$list; + my $idx = binsearch { $a cmp $b } $_[1], @{$_[2]}; if (defined $idx) { - splice @$list, $idx, 1; + #splice @$list, $idx, 1; + splice @{$_[2]}, $idx, 1; } # skip the explicit return as a minor speedup for pre-5.20 systems diff --git a/lib/Biodiverse/Spatial.pm b/lib/Biodiverse/Spatial.pm index 211174bcd..904336427 100644 --- a/lib/Biodiverse/Spatial.pm +++ b/lib/Biodiverse/Spatial.pm @@ -667,7 +667,32 @@ sub sp_calc { return 1; } +# assumes they have already been calculated +sub get_calculated_nbr_lists_for_element { + my $self = shift; + my %args = @_; + + my $element = $args{element}; + my $use_nbrs_from = $args{use_nbrs_from}; + my $spatial_conditions_arr = $self->get_spatial_conditions; + my $sort_lists = $args{sort_lists}; + + my @nbr_list; + foreach my $i (0 .. $#$spatial_conditions_arr) { + my $nbr_list_name = '_NBR_SET' . ($i+1); + my $nbr_list = $self->get_list_ref ( + element => $element, + list => $nbr_list_name, + autovivify => 0, + ); + my $copy = $sort_lists ? [sort @$nbr_list] : [@$nbr_list]; + push @nbr_list, $copy; + } + + return wantarray ? @nbr_list : \@nbr_list; +} +# should probably be calculate_nbrs_for_element sub get_nbrs_for_element { my $self = shift; my %args = @_; diff --git a/t/28-Randomisation.t b/t/28-Randomisation.t index 8cd272788..45823224d 100644 --- a/t/28-Randomisation.t +++ b/t/28-Randomisation.t @@ -24,6 +24,8 @@ use Test::Exception; use Biodiverse::TestHelpers qw /:cluster :element_properties :tree/; use Biodiverse::Cluster; +use Math::Random::MT::Auto; + my $default_prng_seed = 2345; use Devel::Symdump; @@ -113,6 +115,88 @@ sub test_rand_structured_richness_same { } +# Basic spatial structure approach +# We find a neighbourhood and fill it up, then find another and fill it up, etc +sub test_rand_spatially_structured { + my $c = 1; + my $c3 = $c * 1; + my $c6 = $c * 2; + my $c9 = $c * 3; + my $bd_size = 25; + + my $prng_seed = 2345; + + my $prng = Math::Random::MT::Auto->new; + + my $bd = Biodiverse::BaseData->new ( + NAME => 'test_rand_spatially_structured', + CELL_SIZES => [$c, $c], + ); + + foreach my $i (0 .. $bd_size) { + foreach my $j (0 .. $bd_size) { + my $group = "$i:$j"; + $bd->add_element (group => $group); + foreach my $label (qw /a b c/) { + if ($prng->rand < (1/3)) { + $bd->add_element (group => $group, label => $label); + } + } + } + } + + $bd->build_spatial_index(resolutions => [$c, $c]); + + my $sp = $bd->add_spatial_output (name => 'sp'); + + $sp->run_analysis ( + spatial_conditions => ['sp_self_only()'], + calculations => [qw /calc_richness/], + ); + + my $rand_name = 'rand_spatially_structured'; + + my $rand = $bd->add_randomisation_output (name => $rand_name); + my $rand_bd_array = $rand->run_analysis ( + function => 'rand_structured', + iterations => 1, # reset to 3 later + seed => $prng_seed, + richness_addition => 30, # make sure we can put our three labels anywhere + richness_multiplier => 1, + spatial_conditions_for_label_allocation => [ + "sp_circle(radius => $c3)", + "sp_circle(radius => $c6)", + "sp_circle(radius => $c9)", + ], + return_rand_bd_array => 1, + ); + + $rand_bd_array->[0]->get_groups_ref->export ( + format => 'GeoTIFF', + file => 'barry', + list => 'SUBELEMENTS', + ); + + subtest 'range scores match' => sub { + foreach my $rand_bd (@$rand_bd_array) { + foreach my $label (sort $rand_bd->get_labels) { + is ($rand_bd->get_range (element => $label), + $bd->get_range (element => $label), + "range for $label matches", + ); + } + } + }; + + { + local $TODO = 'Not implemented yet'; + ok (0, "Spatially structured allocation of labels"); + } + + return; +} + + sub test_rand_structured_richness_multiplier_and_addition { my $c = 100000; my $bd = get_basedata_object_from_site_data(CELL_SIZES => [$c, $c]);