Skip to content

Commit

Permalink
DX-100503: new arrow type and vector TimestampWithPrecision
Browse files Browse the repository at this point in the history
  • Loading branch information
xxlaykxx committed Feb 13, 2025
1 parent b02259e commit c3c97ea
Show file tree
Hide file tree
Showing 23 changed files with 665 additions and 25 deletions.
15 changes: 11 additions & 4 deletions format/Schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ enum MetadataVersion:short {
/// forward compatibility guarantees).
/// 2. A means of negotiating between a client and server
/// what features a stream is allowed to use. The enums
/// values here are intented to represent higher level
/// features, additional details maybe negotiated
/// values here are intended to represent higher level
/// features, additional details may be negotiated
/// with key-value pairs specific to the protocol.
///
/// Enums added to this list should be assigned power-of-two values
Expand Down Expand Up @@ -395,6 +395,12 @@ table Timestamp {
timezone: string;
}

table TimestampWithPrecision {
/// Total number of decimal digits
precision: int;
timezone: string;
}

enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO}
// A "calendar" interval which models types that don't necessarily
// have a precise duration without the context of a base timestamp (e.g.
Expand All @@ -421,7 +427,7 @@ table Interval {
// An absolute length of time unrelated to any calendar artifacts.
//
// For the purposes of Arrow Implementations, adding this value to a Timestamp
// ("t1") naively (i.e. simply summing the two number) is acceptable even
// ("t1") naively (i.e. simply summing the two numbers) is acceptable even
// though in some cases the resulting Timestamp (t2) would not account for
// leap-seconds during the elapsed time between "t1" and "t2". Similarly,
// representing the difference between two Unix timestamp is acceptable, but
Expand Down Expand Up @@ -465,6 +471,7 @@ union Type {
BinaryView,
Utf8View,
ListView,
TimestampWithPrecision
LargeListView,
}

Expand Down Expand Up @@ -510,7 +517,7 @@ table DictionaryEncoding {
/// nested type.

table Field {
/// Name is not required, in i.e. a List
/// Name is not required (e.g., in a List)
name: string;

/// Whether or not this field can contain nulls. Should be true in general.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.arrow.vector.TimeNanoVector;
import org.apache.arrow.vector.TimeSecVector;
import org.apache.arrow.vector.TimeStampVector;
import org.apache.arrow.vector.TimeStampWithPrecisionVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
Expand Down Expand Up @@ -294,4 +295,14 @@ public ColumnBinder visit(ArrowType.ListView type) {
public ColumnBinder visit(ArrowType.LargeListView type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}

@Override
public ColumnBinder visit(ArrowType.TimestampWithPrecision type) {
Calendar calendar = null;
final String timezone = type.getTimezone();
if (timezone != null && !timezone.isEmpty()) {
calendar = Calendar.getInstance(TimeZone.getTimeZone(ZoneId.of(timezone)));
}
return new TimestampWithPrecisionBinder((TimeStampWithPrecisionVector) vector, calendar);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.adapter.jdbc.binder;

import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.sql.Types;
import java.util.Calendar;
import org.apache.arrow.vector.TimeStampVector;
import org.apache.arrow.vector.TimeStampWithPrecisionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;

/** A column binder for timestamps. */
public class TimestampWithPrecisionBinder extends BaseColumnBinder<TimeStampVector> {
private final Calendar calendar;
private final long unitsPerSecond;
private final long nanosPerUnit;

/** Create a binder for a timestamp vector using the default JDBC type code. */
public TimestampWithPrecisionBinder(TimeStampWithPrecisionVector vector, Calendar calendar) {
this(
vector,
calendar,
isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP);
}

/**
* Create a binder for a timestamp vector.
*
* @param vector The vector to pull values from.
* @param calendar Optionally, the calendar to pass to JDBC.
* @param jdbcType The JDBC type code to use for null values.
*/
public TimestampWithPrecisionBinder(
TimeStampWithPrecisionVector vector, Calendar calendar, int jdbcType) {
super(vector, jdbcType);
this.calendar = calendar;

final ArrowType.TimestampWithPrecision type =
(ArrowType.TimestampWithPrecision) vector.getField().getType();
switch (type.getPrecision()) {
case 0:
this.unitsPerSecond = 1;
this.nanosPerUnit = 1_000_000_000;
break;
case 1:
this.unitsPerSecond = 10;
this.nanosPerUnit = 1_000_000_00;
break;
case 2:
this.unitsPerSecond = 100;
this.nanosPerUnit = 1_000_000_0;
break;
case 3:
this.unitsPerSecond = 1_000;
this.nanosPerUnit = 1_000_000;
break;
case 4:
this.unitsPerSecond = 10_000;
this.nanosPerUnit = 1_000_00;
break;
case 5:
this.unitsPerSecond = 100_000;
this.nanosPerUnit = 1_000_0;
break;
case 6:
this.unitsPerSecond = 1_000_000;
this.nanosPerUnit = 1_000;
break;
case 7:
this.unitsPerSecond = 10_000_000;
this.nanosPerUnit = 1_00;
break;
case 8:
this.unitsPerSecond = 100_000_000;
this.nanosPerUnit = 1_0;
break;
case 9:
this.unitsPerSecond = 1_000_000_000;
this.nanosPerUnit = 1;
break;
default:
throw new IllegalArgumentException("Invalid time unit in " + type);
}
}

@Override
public void bind(PreparedStatement statement, int parameterIndex, int rowIndex)
throws SQLException {
// TODO: option to throw on truncation (vendor Guava IntMath#multiply) or overflow
final long rawValue =
vector.getDataBuffer().getLong((long) rowIndex * TimeStampWithPrecisionVector.TYPE_WIDTH);
final long seconds = rawValue / unitsPerSecond;
final int nanos = (int) ((rawValue - (seconds * unitsPerSecond)) * nanosPerUnit);
final Timestamp value = new Timestamp(seconds * 1_000);
value.setNanos(nanos);
if (calendar != null) {
// Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC
// value with a
// zone offset, so we don't need to do any conversion.
statement.setTimestamp(parameterIndex, value, calendar);
} else {
// Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is
// technically wrong,
// but there is no 'correct' interpretation here. The application should provide a calendar.
statement.setTimestamp(parameterIndex, value);
}
}

private static boolean isZoned(ArrowType type) {
final String timezone = ((ArrowType.TimestampWithPrecision) type).getTimezone();
return timezone != null && !timezone.isEmpty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.driver.jdbc.converter.impl;

import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.TimeStampWithPrecisionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.calcite.avatica.AvaticaParameter;
import org.apache.calcite.avatica.remote.TypedValue;

/** AvaticaParameterConverter for Timestamp Arrow types. */
public class TimestampPrecisionAvaticaParameterConverter extends BaseAvaticaParameterConverter {

public TimestampPrecisionAvaticaParameterConverter(ArrowType.TimestampWithPrecision type) {}

@Override
public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) {
long value = (long) typedValue.toLocal();
if (vector instanceof TimeStampWithPrecisionVector) {
((TimeStampWithPrecisionVector) vector).setSafe(index, value);
return true;
}
return false;
}

@Override
public AvaticaParameter createParameter(Field field) {
return createParameter(field, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.arrow.driver.jdbc.converter.impl.StructAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.TimeAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.TimestampPrecisionAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter;
import org.apache.arrow.memory.BufferAllocator;
Expand Down Expand Up @@ -287,5 +288,11 @@ public Boolean visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException(
"No Avatica parameter binder implemented for type " + type);
}

@Override
public Boolean visit(ArrowType.TimestampWithPrecision type) {
return new TimestampPrecisionAvaticaParameterConverter(type)
.bindParameter(vector, typedValue, index);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.arrow.driver.jdbc.converter.impl.StructAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.TimeAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.TimestampPrecisionAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.Utf8ViewAvaticaParameterConverter;
Expand Down Expand Up @@ -290,5 +291,10 @@ public AvaticaParameter visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException(
"No Avatica parameter binder implemented for type " + type);
}

@Override
public AvaticaParameter visit(ArrowType.TimestampWithPrecision type) {
return new TimestampPrecisionAvaticaParameterConverter(type).createParameter(field);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.flatbuf;

import com.google.flatbuffers.BaseVector;
import com.google.flatbuffers.Constants;
import com.google.flatbuffers.FlatBufferBuilder;
import com.google.flatbuffers.Table;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

public final class TimestampWithPrecision extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); }
public static TimestampWithPrecision getRootAsTimestampWithPrecision(ByteBuffer _bb) {
return getRootAsTimestampWithPrecision(_bb, new TimestampWithPrecision());
}
public static TimestampWithPrecision getRootAsTimestampWithPrecision(ByteBuffer _bb, TimestampWithPrecision obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN);
return obj.__init(_bb.getInt(_bb.position()) + _bb.position(), _bb);
}

public TimestampWithPrecision __init(int _i, ByteBuffer _bb) {
this.bb_pos = _i;
this.bb = _bb;
return this;
}
public TimestampWithPrecision __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }

public int precision() {
int o = this.__offset(4);
return o != 0 ? this.bb.getInt(o + this.bb_pos) : 0;
}
public String timezone() {
int o = this.__offset(6);
return o != 0 ? this.__string(o + this.bb_pos) : null;
}
public ByteBuffer timezoneAsByteBuffer() {
return this.__vector_as_bytebuffer(6, 1);
}
public ByteBuffer timezoneInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }

public static int createTimestampWithPrecision(FlatBufferBuilder builder, int precision, int timezone) {
builder.startTable(2);
TimestampWithPrecision.addTimezone(builder, timezone);
TimestampWithPrecision.addPrecision(builder, precision);
return endTimestampWithPrecision(builder);
}
public static void startTimestampWithPrecision(FlatBufferBuilder builder) {
builder.startTable(2);
}
public static void addPrecision(FlatBufferBuilder builder, int precision) {
builder.addInt(0, precision, 0);
}
public static void addTimezone(FlatBufferBuilder builder, int timezoneOffset) {
builder.addOffset(1, timezoneOffset, 0);
}
public static int endTimestampWithPrecision(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
}

public static final class Vector extends BaseVector {
public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }

public TimestampWithPrecision get(int j) { return get( new TimestampWithPrecision(), j); }
public TimestampWithPrecision get(TimestampWithPrecision obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
}
}
3 changes: 2 additions & 1 deletion java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ private Type() { }
public static final byte Utf8View = 24;
public static final byte ListView = 25;
public static final byte LargeListView = 26;
public static final byte TimestampWithPrecision = 27;

public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView", };
public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView", "TimestampWithPrecision",};

public static String name(int e) { return names[e]; }
}
Expand Down
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@
fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "timezone", type: String}]
complex: false
},
{
name: "TimestampWithPrecision",
fields: [{name: "precision", type: int}, {name: "timezone", type: String}],
complex: false
},
{
name: "Interval",
fields: [{name: "unit", type: short, valueType: IntervalUnit}],
Expand Down
Loading

0 comments on commit c3c97ea

Please sign in to comment.