From ddd5f1665a33c2ad251d3f2193a238acbc2ef7a9 Mon Sep 17 00:00:00 2001 From: mwish Date: Fri, 26 Jul 2024 11:27:43 +0800 Subject: [PATCH] GH-43427: Parquet Deprecate ColumnChunk::file_offset field --- cpp/src/parquet/metadata.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 7bab9104619ce..df5de41991f20 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -1536,10 +1536,11 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { const std::shared_ptr& encryptor) { if (dictionary_page_offset > 0) { column_chunk_->meta_data.__set_dictionary_page_offset(dictionary_page_offset); - column_chunk_->__set_file_offset(dictionary_page_offset + compressed_size); - } else { - column_chunk_->__set_file_offset(data_page_offset + compressed_size); } + // https://github.com/apache/parquet-format/pull/440 + // The `file_offset` field is deprecated and should be set to 0 for writer + // if the column chunk has not been written outsidethe footer. + column_chunk_->__set_file_offset(0); column_chunk_->__isset.meta_data = true; column_chunk_->meta_data.__set_num_values(num_values); if (index_page_offset >= 0) {