From f86d7da19152c783d416747fa49c7265b51ef7b8 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Fri, 8 Sep 2023 20:35:00 +0100 Subject: [PATCH] Improve test --- python/pyarrow/tests/test_dataset.py | 56 ++++++++++++++-------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 0b58de5f543db..e0988f2752033 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -5292,33 +5292,35 @@ def test_write_dataset_preserve_field_metadata(tempdir): def test_write_dataset_write_page_index(tempdir): - for write_page_index in [True, False]: - schema = pa.schema([ - pa.field("x", pa.int64()), - pa.field("y", pa.int64())]) - - arrays = [[1, 2, 3], [None, 5, None]] - table = pa.Table.from_arrays(arrays, schema=schema) - - file_format = ds.ParquetFileFormat() - base_dir = tempdir / f"write_page_index_{write_page_index}" - ds.write_dataset( - table, - base_dir, - format="parquet", - file_options=file_format.make_write_options( - write_page_index=write_page_index, - ), - existing_data_behavior='overwrite_or_ignore', - ) - ds1 = ds.dataset(base_dir, format="parquet") - - for file in ds1.files: - # Can retrieve sorting columns from metadata - metadata = pq.read_metadata(file) - cc = metadata.row_group(0).column(0) - assert cc.has_offset_index is write_page_index - assert cc.has_column_index is write_page_index + for write_statistics in [True, False]: + for write_page_index in [True, False]: + schema = pa.schema([ + pa.field("x", pa.int64()), + pa.field("y", pa.int64())]) + + arrays = [[1, 2, 3], [None, 5, None]] + table = pa.Table.from_arrays(arrays, schema=schema) + + file_format = ds.ParquetFileFormat() + base_dir = tempdir / f"write_page_index_{write_page_index}" + ds.write_dataset( + table, + base_dir, + format="parquet", + file_options=file_format.make_write_options( + write_statistics=write_statistics, + write_page_index=write_page_index, + ), + existing_data_behavior='overwrite_or_ignore', + ) + ds1 = ds.dataset(base_dir, format="parquet") + + for file in ds1.files: + # Can retrieve sorting columns from metadata + metadata = pq.read_metadata(file) + cc = metadata.row_group(0).column(0) + assert cc.has_offset_index is write_page_index + assert cc.has_column_index is write_page_index & write_statistics @pytest.mark.parametrize('dstype', [