From a79ba486766bd47c26879231cd4292ca79c926fa Mon Sep 17 00:00:00 2001 From: Jonathan Albrecht Date: Thu, 6 Feb 2025 17:55:21 +0100 Subject: [PATCH] [SPARK-51042][SQL] Read and write the month and days fields of intervals with one call in Unsafe* classes ### What changes were proposed in this pull request? Write the month and days fields of intervals with one call to Platform.put/getLong() instead of two calls to Platform.put/getInt(). In commit ac07cea234f4fb687442aafa8b6d411695110a4e there was a performance improvement to reading a writing CalendarIntervals in UnsafeRow. This makes writing intervals consistent with UnsafeRow and has better performance compared to the original code. This also fixes big endian platforms where the old (two calls to getput) and new methods of reading and writing CalendarIntervals do not order the bytes in the same way. Currently CalendarInterval related tests in Catalyst and SQL are failing on big endian platforms. There is no effect on little endian platforms (byte order is not affected) except for performance improvement. ### Why are the changes needed? * Improves performance reading and writing CalendarIntervals in Unsafe* classes * Fixes big endian platforms where CalendarIntervals are not read or written correctly in Unsafe* classes ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing unit tests on big and little endian platforms ### Was this patch authored or co-authored using generative AI tooling? No Closes #49737 from jonathan-albrecht-ibm/master-endian-interval. Authored-by: Jonathan Albrecht Signed-off-by: Max Gekk --- .../spark/sql/catalyst/expressions/UnsafeArrayData.java | 5 +++-- .../spark/sql/catalyst/expressions/codegen/UnsafeWriter.java | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index e612166fb2596..be3e5a7d50434 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -227,8 +227,9 @@ public CalendarInterval getInterval(int ordinal) { if (isNullAt(ordinal)) return null; final long offsetAndSize = getLong(ordinal); final int offset = (int) (offsetAndSize >> 32); - final int months = Platform.getInt(baseObject, baseOffset + offset); - final int days = Platform.getInt(baseObject, baseOffset + offset + 4); + final long monthAndDays = Platform.getLong(baseObject, baseOffset + offset); + final int months = (int) (0xFFFFFFFFL & monthAndDays); + final int days = (int) ((0xFFFFFFFF00000000L & monthAndDays) >> 32); final long microseconds = Platform.getLong(baseObject, baseOffset + offset + 8); return new CalendarInterval(months, days, microseconds); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java index d651e5ab5b3e5..8e6d08bdadb8b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java @@ -140,8 +140,9 @@ public void write(int ordinal, CalendarInterval input) { BitSetMethods.set(getBuffer(), startingOffset, ordinal); } else { // Write the months, days and microseconds fields of interval to the variable length portion. - Platform.putInt(getBuffer(), cursor(), input.months); - Platform.putInt(getBuffer(), cursor() + 4, input.days); + long longVal = + ((long) input.months & 0xFFFFFFFFL) | (((long) input.days << 32) & 0xFFFFFFFF00000000L); + Platform.putLong(getBuffer(), cursor(), longVal); Platform.putLong(getBuffer(), cursor() + 8, input.microseconds); } // we need to reserve the space so that we can update it later.