diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index e3e6dccb81a10..285e2a597389d 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -72,7 +72,7 @@ using arrow::internal::Iota; // Help reduce verbosity using ParquetReader = parquet::ParquetFileReader; -using parquet::RecordReader; +using parquet::internal::RecordReader; namespace bit_util = arrow::bit_util; diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index e54b416c56cfb..e5aef5a45b5f3 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -86,9 +86,9 @@ using ::arrow::internal::SafeLeftShift; using ::arrow::util::Float16; using ::arrow::util::SafeLoadAs; -using parquet::BinaryRecordReader; -using parquet::DictionaryRecordReader; -using parquet::RecordReader; +using parquet::internal::BinaryRecordReader; +using parquet::internal::DictionaryRecordReader; +using parquet::internal::RecordReader; using parquet::schema::GroupNode; using parquet::schema::Node; using parquet::schema::PrimitiveNode; diff --git a/cpp/src/parquet/arrow/reader_internal.h b/cpp/src/parquet/arrow/reader_internal.h index 0642ded143e66..cf9dbb86577b5 100644 --- a/cpp/src/parquet/arrow/reader_internal.h +++ b/cpp/src/parquet/arrow/reader_internal.h @@ -98,7 +98,7 @@ class FileColumnIterator { using FileColumnIteratorFactory = std::function; -Status TransferColumnData(::parquet::RecordReader* reader, +Status TransferColumnData(::parquet::internal::RecordReader* reader, const std::shared_ptr<::arrow::Field>& value_field, const ColumnDescriptor* descr, ::arrow::MemoryPool* pool, std::shared_ptr<::arrow::ChunkedArray>* out); diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index 98b038568f45f..b2be1b3c5354d 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -39,7 +39,7 @@ namespace parquet { -using RecordReader; +using internal::RecordReader; namespace arrow { diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 80faa65aa9c81..5aca128ab7975 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -302,6 +302,8 @@ class TypedColumnReader : public ColumnReader { int32_t* dict_len) = 0; }; +namespace internal { + /// \brief Stateful column reader that delimits semantic records for both flat /// and nested columns /// @@ -484,6 +486,8 @@ class DictionaryRecordReader : virtual public RecordReader { virtual std::shared_ptr<::arrow::ChunkedArray> GetResult() = 0; }; +} // namespace internal + using BoolReader = TypedColumnReader; using Int32Reader = TypedColumnReader; using Int64Reader = TypedColumnReader; diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc index 168c87f08148f..93ab2dfa8c1ac 100644 --- a/cpp/src/parquet/column_reader_benchmark.cc +++ b/cpp/src/parquet/column_reader_benchmark.cc @@ -26,8 +26,8 @@ namespace parquet { using benchmark::DoNotOptimize; -using parquet::RecordReader; using parquet::Repetition; +using parquet::internal::RecordReader; using parquet::test::MakePages; using schema::NodePtr; @@ -72,9 +72,9 @@ class BenchmarkHelper { internal::LevelInfo level_info; level_info.def_level = descr_->max_definition_level(); level_info.rep_level = descr_->max_repetition_level(); - record_reader_ = - RecordReader::Make(descr_.get(), level_info, ::arrow::default_memory_pool(), - /*read_dictionary=*/false, read_dense_for_nullable); + record_reader_ = internal::RecordReader::Make( + descr_.get(), level_info, ::arrow::default_memory_pool(), + /*read_dictionary=*/false, read_dense_for_nullable); record_reader_->SetPageReader(std::move(pager)); return record_reader_.get(); } diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 053517095ea05..9096f195687fb 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -753,9 +753,9 @@ class RecordReaderPrimitiveTypeTest NodePtr root = GroupNode::Make("root", Repetition::REQUIRED, {column}); schema_descriptor_.Init(root); descr_ = schema_descriptor_.Column(0); - record_reader_ = RecordReader::Make(descr_, ComputeLevelInfo(descr_), - ::arrow::default_memory_pool(), - /*read_dictionary=*/false, GetParam()); + record_reader_ = internal::RecordReader::Make(descr_, ComputeLevelInfo(descr_), + ::arrow::default_memory_pool(), + /*read_dictionary=*/false, GetParam()); } void CheckReadValues(std::vector expected_values, @@ -796,7 +796,7 @@ class RecordReaderPrimitiveTypeTest protected: SchemaDescriptor schema_descriptor_; - std::shared_ptr record_reader_; + std::shared_ptr record_reader_; const ColumnDescriptor* descr_; }; @@ -1492,9 +1492,9 @@ class FLBARecordReaderTest : public ::testing::TestWithParam { MakePages(descr_.get(), num_pages, levels_per_page, def_levels_, rep_levels_, values_, buffer_, pages_, Encoding::PLAIN); auto pager = std::make_unique(pages_); - record_reader_ = - RecordReader::Make(descr_.get(), level_info, ::arrow::default_memory_pool(), - /*read_dictionary=*/false, read_dense_for_nullable()); + record_reader_ = internal::RecordReader::Make( + descr_.get(), level_info, ::arrow::default_memory_pool(), + /*read_dictionary=*/false, read_dense_for_nullable()); record_reader_->SetPageReader(std::move(pager)); } @@ -1554,7 +1554,7 @@ class FLBARecordReaderTest : public ::testing::TestWithParam { } protected: - std::shared_ptr record_reader_; + std::shared_ptr record_reader_; private: int levels_per_page_; @@ -1586,9 +1586,9 @@ class ByteArrayRecordReaderTest : public ::testing::TestWithParam { auto pager = std::make_unique(pages_); - record_reader_ = - RecordReader::Make(descr_.get(), level_info, ::arrow::default_memory_pool(), - /*read_dictionary=*/false, read_dense_for_nullable()); + record_reader_ = internal::RecordReader::Make( + descr_.get(), level_info, ::arrow::default_memory_pool(), + /*read_dictionary=*/false, read_dense_for_nullable()); record_reader_->SetPageReader(std::move(pager)); } @@ -1648,7 +1648,7 @@ class ByteArrayRecordReaderTest : public ::testing::TestWithParam { } protected: - std::shared_ptr record_reader_; + std::shared_ptr record_reader_; private: int levels_per_page_; @@ -1777,7 +1777,8 @@ TEST_P(RecordReaderStressTest, StressTest) { pager.reset(new test::MockPageReader(pages)); // Set up the RecordReader. - std::shared_ptr record_reader = RecordReader::Make(&descr, level_info); + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); record_reader->SetPageReader(std::move(pager)); // Figure out how many total records. diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 2a3853102ee73..3e9eeea6c6f67 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -111,7 +111,8 @@ std::shared_ptr RowGroupReader::Column(int i) { const_cast(contents_->properties())->memory_pool()); } -std::shared_ptr RowGroupReader::RecordReader(int i, bool read_dictionary) { +std::shared_ptr RowGroupReader::RecordReader( + int i, bool read_dictionary) { if (i >= metadata()->num_columns()) { std::stringstream ss; ss << "Trying to read column index " << i << " but row group metadata has only " @@ -124,7 +125,7 @@ std::shared_ptr RowGroupReader::RecordReader(int i, bool read_dict internal::LevelInfo level_info = internal::LevelInfo::ComputeLevelInfo(descr); - auto reader = RecordReader::Make( + auto reader = internal::RecordReader::Make( descr, level_info, contents_->properties()->memory_pool(), read_dictionary, contents_->properties()->read_dense_for_nullable()); reader->SetPageReader(std::move(page_reader)); @@ -144,7 +145,7 @@ std::shared_ptr RowGroupReader::ColumnWithExposeEncoding( return reader; } -std::shared_ptr RowGroupReader::RecordReaderWithExposeEncoding( +std::shared_ptr RowGroupReader::RecordReaderWithExposeEncoding( int i, ExposedEncoding encoding_to_expose) { return RecordReader( i, diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h index 1283aa6a1a173..b59b59f95c2d8 100644 --- a/cpp/src/parquet/file_reader.h +++ b/cpp/src/parquet/file_reader.h @@ -37,7 +37,9 @@ class BloomFilterReader; class PageReader; class RowGroupMetaData; +namespace internal { class RecordReader; +} class PARQUET_EXPORT RowGroupReader { public: @@ -62,7 +64,8 @@ class PARQUET_EXPORT RowGroupReader { // EXPERIMENTAL: Construct a RecordReader for the indicated column of the row group. // Ownership is shared with the RowGroupReader. - std::shared_ptr RecordReader(int i, bool read_dictionary = false); + std::shared_ptr RecordReader(int i, + bool read_dictionary = false); // Construct a ColumnReader, trying to enable exposed encoding. // @@ -87,7 +90,7 @@ class PARQUET_EXPORT RowGroupReader { // reader will read decoded data without exposing the dictionary. // // \note API EXPERIMENTAL - std::shared_ptr RecordReaderWithExposeEncoding( + std::shared_ptr RecordReaderWithExposeEncoding( int i, ExposedEncoding encoding_to_expose); std::unique_ptr GetColumnPageReader(int i); diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index f947a7df010b0..fb77ba6cbc178 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -598,7 +598,7 @@ TEST(TestFileReader, RecordReaderReadDenseForNullable) { std::unique_ptr file_reader = ParquetFileReader::OpenFile( alltypes_plain(), /* memory_map = */ false, reader_props); std::shared_ptr group = file_reader->RowGroup(0); - std::shared_ptr col_record_reader = group->RecordReader(0); + std::shared_ptr col_record_reader = group->RecordReader(0); ASSERT_EQ(reader_props.read_dense_for_nullable(), col_record_reader->read_dense_for_nullable()); } @@ -611,7 +611,7 @@ TEST(TestFileReader, GetRecordReader) { alltypes_plain(), /* memory_map = */ false, reader_props); std::shared_ptr group = file_reader->RowGroup(0); - std::shared_ptr col_record_reader_ = group->RecordReader(0); + std::shared_ptr col_record_reader_ = group->RecordReader(0); ASSERT_TRUE(col_record_reader_->HasMoreData()); auto records_read = col_record_reader_->ReadRecords(4);