Skip to content

Commit

Permalink
Check selection against column shape
Browse files Browse the repository at this point in the history
  • Loading branch information
sjperkins committed Feb 6, 2024
1 parent 83dbbf0 commit 73d0a8a
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 12 deletions.
26 changes: 26 additions & 0 deletions cpp/arcae/base_column_map.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,35 @@
#include "arcae/base_column_map.h"

#include <arrow/status.h>

#include <casacore/casa/Arrays/IPosition.h>

namespace arcae {

std::ptrdiff_t SelectDim(std::size_t dim, std::size_t sdims, std::size_t ndims) {
return std::ptrdiff_t(dim) + std::ptrdiff_t(sdims) - std::ptrdiff_t(ndims);
}

// Validate the selection against the shape
// The shape should include the row dimension
arrow::Status CheckSelectionAgainstShape(
const casacore::IPosition & shape,
const ColumnSelection & selection) {

for(std::size_t dim=0; dim < shape.size(); ++dim) {
auto sdim = SelectDim(dim, selection.size(), shape.size());
if(sdim >= 0 && selection[sdim].size() > 0) {
for(auto i: selection[sdim]) {
if(i >= shape[dim]) {
return arrow::Status::Invalid("Selection index ", i,
" exceeds dimension ", dim,
" of shape ", shape);
}
}
}
}
return arrow::Status::OK();
}


} // namespace arcae
7 changes: 5 additions & 2 deletions cpp/arcae/base_column_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ namespace arcae {

enum MapOrder {C_ORDER=0, F_ORDER};

using RowIds = absl::Span<const casacore::rownr_t>;
using ColumnSelection = std::vector<RowIds>;

// Return a selection dimension given
//
// 1. FORTRAN ordered dim
Expand All @@ -27,9 +30,9 @@ enum MapOrder {C_ORDER=0, F_ORDER};
//
// A return of < 0 indicates a non-existent selection
std::ptrdiff_t SelectDim(std::size_t dim, std::size_t sdims, std::size_t ndims);
arrow::Status CheckSelectionAgainstShape(const casacore::IPosition & shape,
const ColumnSelection & selection);

using RowIds = absl::Span<const casacore::rownr_t>;
using ColumnSelection = std::vector<RowIds>;

// Describes a mapping between disk and memory
struct IdMap {
Expand Down
6 changes: 5 additions & 1 deletion cpp/arcae/column_read_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace arcae {
namespace {

// Clip supplied shape based on the column selection
// The shape should not include the row dimension
arrow::Result<casacore::IPosition> ClipShape(
const casacore::IPosition & shape,
const ColumnSelection & selection) {
Expand Down Expand Up @@ -149,7 +150,7 @@ MakeOffsets(const decltype(VariableShapeData::row_shapes_) & row_shapes) {
// Factory method for creating Variably Shape Data from column
arrow::Result<std::unique_ptr<VariableShapeData>>
VariableShapeData::Make(const casacore::TableColumn & column,
const ColumnSelection & selection) {
const ColumnSelection & selection) {
assert(!column.columnDesc().isFixedShape());
auto row_shapes = decltype(VariableShapeData::row_shapes_){};
bool fixed_shape = true;
Expand Down Expand Up @@ -218,6 +219,9 @@ ShapeProvider::Make(const casacore::TableColumn & column,
const ColumnSelection & selection) {

if(column.columnDesc().isFixedShape()) {
auto shape = column.columnDesc().shape();
shape.append(casacore::IPosition({ssize_t(column.nrow())}));
ARROW_RETURN_NOT_OK(CheckSelectionAgainstShape(shape, selection));
return ShapeProvider{std::cref(column), selection};
}

Expand Down
15 changes: 12 additions & 3 deletions cpp/arcae/column_write_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ arrow::Result<ArrowShapeProvider>
ArrowShapeProvider::Make(const casacore::TableColumn & column,
const ColumnSelection & selection,
const std::shared_ptr<arrow::Array> & data) {

ARROW_ASSIGN_OR_RAISE(auto properties, GetDataProperties(column, selection, data));
auto &[shape, ndim, dtype, is_complex] = properties;
return ArrowShapeProvider{std::cref(column),
Expand Down Expand Up @@ -404,17 +403,27 @@ ColumnWriteMap::Make(
const std::shared_ptr<arrow::Array> & data,
MapOrder order) {

// Convert to FORTRAN ordering, which the casacore internals use
// Convert to FORTRAN ordering, used by casacore internals
if(order == MapOrder::C_ORDER) {
std::reverse(std::begin(selection), std::end(selection));
}

ARROW_ASSIGN_OR_RAISE(auto shape_prov, ArrowShapeProvider::Make(column, selection, data));
auto maps = MapFactory(shape_prov, selection);
if(shape_prov.IsColumnVarying()) {

if(shape_prov.IsColumnFixed()) {
// If the column has a fixed shape, check up front that
// the selection indices that we're writing to exist
auto colshape = column.columnDesc().shape();
colshape.append(casacore::IPosition({ssize_t(column.nrow())}));
ARROW_RETURN_NOT_OK(CheckSelectionAgainstShape(colshape, selection));
} else {
// Otherwise we may be able to set the shape in the case
// of variably shaped columns
auto array_base = casacore::ArrayColumnBase(column);
ARROW_RETURN_NOT_OK(SetVariableRowShapes(array_base, selection, data, shape_prov));
}

ARROW_ASSIGN_OR_RAISE(auto ranges, RangeFactory(shape_prov, maps));

if(ranges.size() == 0) {
Expand Down
12 changes: 6 additions & 6 deletions cpp/tests/range_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ using IPos = casacore::IPosition;

using namespace std::string_literals;

static constexpr std::size_t knrow = 10;
static constexpr std::size_t knchan = 4;
static constexpr std::size_t kncorr = 2;
static constexpr std::size_t knrow = 20;
static constexpr std::size_t knchan = 20;
static constexpr std::size_t kncorr = 20;

template <typename T> ScalarColumn<T>
GetScalarColumn(const MS & ms, MSColumns column) {
Expand Down Expand Up @@ -111,8 +111,8 @@ TEST_F(RangeTest, CheckMapsAndRangesSingleton) {
EXPECT_EQ(map.DimMap(1).size(), 0);
EXPECT_THAT(map.DimMap(2), ::testing::ElementsAre(IdMap{0, 0}));

EXPECT_THAT(map.DimRanges(0), ::testing::ElementsAre(Range{0, 2, Range::FREE}));
EXPECT_THAT(map.DimRanges(1), ::testing::ElementsAre(Range{0, 4, Range::FREE}));
EXPECT_THAT(map.DimRanges(0), ::testing::ElementsAre(Range{0, 20, Range::FREE}));
EXPECT_THAT(map.DimRanges(1), ::testing::ElementsAre(Range{0, 20, Range::FREE}));
EXPECT_THAT(map.DimRanges(2), ::testing::ElementsAre(Range{0, 1, Range::MAP}));
}

Expand Down Expand Up @@ -165,7 +165,7 @@ TEST_F(RangeTest, TestSimplicity) {
}

{
ASSERT_OK_AND_ASSIGN(auto map, ColumnReadMap::Make(data, {{1, 2, 3, 4}, {5, 6}, {6, 7}}));
ASSERT_OK_AND_ASSIGN(auto map, ColumnReadMap::Make(data, {{1, 2, 3, 4}, {0, 1}, {0, 1}}));
ASSERT_TRUE(map.IsSimple());
}

Expand Down

0 comments on commit 73d0a8a

Please sign in to comment.