Skip to content

Commit

Permalink
[CALCITE-2040] Create adapter for Apache Arrow
Browse files Browse the repository at this point in the history
Co-authored-by: Alessandro Solimando <alessandro.solimando@gmail.com>
Co-authored-by: Jonathan Swenson <jonathan@exploreomni.com>
Co-authored-by: Julian Hyde <jhyde@apache.org>
Co-authored-by: Karshit Shah <shahkarshit@yahoo.co.in>
Co-authored-by: Michael Mior <mmior@cs.rit.edu>
  • Loading branch information
6 people committed Mar 11, 2024
1 parent ad08ce5 commit d4e8830
Show file tree
Hide file tree
Showing 34 changed files with 2,848 additions and 5 deletions.
17 changes: 15 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ jobs:
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
arguments: --scan --no-parallel --no-daemon build javadoc
# Arrow build is excluded because it is not supported on Windows
# See https://arrow.apache.org/docs/java/install.html#system-compatibility
arguments: --scan --no-parallel --no-daemon build javadoc --exclude-task :arrow:build
- name: 'sqlline and sqllsh'
shell: cmd
run: |
Expand Down Expand Up @@ -103,7 +105,9 @@ jobs:
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
arguments: --scan --no-parallel --no-daemon build
# Arrow build is excluded because it is not supported on Windows
# See https://arrow.apache.org/docs/java/install.html#system-compatibility
arguments: --scan --no-parallel --no-daemon build --exclude-task :arrow:build
- name: 'sqlline and sqllsh'
shell: cmd
run: |
Expand Down Expand Up @@ -215,6 +219,9 @@ jobs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.S3_BUILD_CACHE_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.S3_BUILD_CACHE_SECRET_KEY }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
# The following option `--add-opens=java.base/java.nio=ALL-UNNAMED` is required jdk17+
# to avoid error. See https://arrow.apache.org/docs/java/install.html#java-compatibility
_JAVA_OPTIONS: ${{ env._JAVA_OPTIONS }} --add-opens=java.base/java.nio=ALL-UNNAMED
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
Expand All @@ -241,6 +248,9 @@ jobs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.S3_BUILD_CACHE_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.S3_BUILD_CACHE_SECRET_KEY }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
# The following option `--add-opens=java.base/java.nio=ALL-UNNAMED` is required jdk17+
# to avoid error. See https://arrow.apache.org/docs/java/install.html#java-compatibility
_JAVA_OPTIONS: ${{ env._JAVA_OPTIONS }} --add-opens=java.base/java.nio=ALL-UNNAMED
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
Expand Down Expand Up @@ -310,6 +320,9 @@ jobs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.S3_BUILD_CACHE_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.S3_BUILD_CACHE_SECRET_KEY }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
# The following option `--add-opens=java.base/java.nio=ALL-UNNAMED` is required jdk17+
# to avoid error. See https://arrow.apache.org/docs/java/install.html#java-compatibility
_JAVA_OPTIONS: ${{ env._JAVA_OPTIONS }} --add-opens=java.base/java.nio=ALL-UNNAMED
with:
job-id: jdk19
remote-build-cache-proxy-enabled: false
Expand Down
4 changes: 3 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ node('ubuntu') {
}
stage('Code Quality') {
timeout(time: 1, unit: 'HOURS') {
withEnv(["Path+JDK=$JAVA_JDK_17/bin","JAVA_HOME=$JAVA_JDK_17"]) {
// The following option `--add-opens=java.base/java.nio=ALL-UNNAMED` is required jdk17+
// to avoid error. See https://arrow.apache.org/docs/java/install.html#java-compatibility
withEnv(["Path+JDK=$JAVA_JDK_17/bin","JAVA_HOME=$JAVA_JDK_17","_JAVA_OPTIONS=--add-opens=java.base/java.nio=ALL-UNNAMED"]) {
withCredentials([string(credentialsId: 'SONARCLOUD_TOKEN', variable: 'SONAR_TOKEN')]) {
if ( env.BRANCH_NAME.startsWith("PR-") ) {
sh './gradlew --no-parallel --no-daemon jacocoAggregateTestReport sonar -PenableJacoco -Dsonar.pullrequest.branch=${CHANGE_BRANCH} -Dsonar.pullrequest.base=${CHANGE_TARGET} -Dsonar.pullrequest.key=${CHANGE_ID} -Dsonar.login=${SONAR_TOKEN}'
Expand Down
32 changes: 32 additions & 0 deletions arrow/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
dependencies {
api(project(":core"))

implementation("com.google.guava:guava")
implementation("org.apache.arrow:arrow-memory-netty")
implementation("org.apache.arrow:arrow-vector")
implementation("org.apache.arrow.gandiva:arrow-gandiva")
annotationProcessor("org.immutables:value")
compileOnly("org.immutables:value-annotations")

testImplementation("org.apache.arrow:arrow-jdbc")
testImplementation("net.hydromatic:scott-data-hsqldb")
testImplementation("org.apache.commons:commons-lang3")
testImplementation(project(":core"))
testImplementation(project(":testkit"))
}
18 changes: 18 additions & 0 deletions arrow/gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
description=Arrow adapter for Calcite
artifact.name=Calcite Arrow
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Util;

import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.VectorUnloader;
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Enumerator that reads from a collection of Arrow value-vectors.
*/
abstract class AbstractArrowEnumerator implements Enumerator<Object> {
protected final ArrowFileReader arrowFileReader;
protected final List<Integer> fields;
protected final List<ValueVector> valueVectors;
protected int currRowIndex;
protected int rowCount;

AbstractArrowEnumerator(ArrowFileReader arrowFileReader, ImmutableIntList fields) {
this.arrowFileReader = arrowFileReader;
this.fields = fields;
this.valueVectors = new ArrayList<>(fields.size());
this.currRowIndex = -1;
}

abstract void evaluateOperator(ArrowRecordBatch arrowRecordBatch);

protected void loadNextArrowBatch() {
try {
final VectorSchemaRoot vsr = arrowFileReader.getVectorSchemaRoot();
for (int i : fields) {
this.valueVectors.add(vsr.getVector(i));
}
this.rowCount = vsr.getRowCount();
VectorUnloader vectorUnloader = new VectorUnloader(vsr);
ArrowRecordBatch arrowRecordBatch = vectorUnloader.getRecordBatch();
evaluateOperator(arrowRecordBatch);
} catch (IOException e) {
throw Util.toUnchecked(e);
}
}

@Override public Object current() {
if (fields.size() == 1) {
return this.valueVectors.get(0).getObject(currRowIndex);
}
Object[] current = new Object[valueVectors.size()];
for (int i = 0; i < valueVectors.size(); i++) {
ValueVector vector = this.valueVectors.get(i);
current[i] = vector.getObject(currRowIndex);
}
return current;
}

@Override public void reset() {
throw new UnsupportedOperationException();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.calcite.linq4j.AbstractEnumerable;
import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Util;

import org.apache.arrow.gandiva.evaluator.Filter;
import org.apache.arrow.gandiva.evaluator.Projector;
import org.apache.arrow.vector.ipc.ArrowFileReader;

import org.checkerframework.checker.nullness.qual.Nullable;

/**
* Enumerable that reads from Arrow value-vectors.
*/
class ArrowEnumerable extends AbstractEnumerable<Object> {
private final ArrowFileReader arrowFileReader;
private final ImmutableIntList fields;
private final @Nullable Projector projector;
private final @Nullable Filter filter;


ArrowEnumerable(ArrowFileReader arrowFileReader, ImmutableIntList fields,
@Nullable Projector projector, @Nullable Filter filter) {
this.arrowFileReader = arrowFileReader;
this.projector = projector;
this.filter = filter;
this.fields = fields;
}

@Override public Enumerator<Object> enumerator() {
try {
if (projector != null) {
return new ArrowProjectEnumerator(arrowFileReader, fields, projector);
} else if (filter != null) {
return new ArrowFilterEnumerator(arrowFileReader, fields, filter);
}
throw new IllegalArgumentException(
"The arrow enumerator must have either a filter or a projection");
} catch (Exception e) {
throw Util.toUnchecked(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.linq4j.tree.Primitive;
import org.apache.calcite.rel.type.RelDataType;

import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;

import java.math.BigDecimal;
import java.util.Date;
import java.util.List;

import static java.util.Objects.requireNonNull;

/**
* Arrow field type.
*/
enum ArrowFieldType {
INT(Primitive.INT),
BOOLEAN(Primitive.BOOLEAN),
STRING(String.class),
FLOAT(Primitive.FLOAT),
DOUBLE(Primitive.DOUBLE),
DATE(Date.class),
LIST(List.class),
DECIMAL(BigDecimal.class),
LONG(Primitive.LONG),
BYTE(Primitive.BYTE),
SHORT(Primitive.SHORT);

private final Class<?> clazz;

ArrowFieldType(Primitive primitive) {
this(requireNonNull(primitive.boxClass, "boxClass"));
}

ArrowFieldType(Class<?> clazz) {
this.clazz = clazz;
}

public RelDataType toType(JavaTypeFactory typeFactory) {
RelDataType javaType = typeFactory.createJavaType(clazz);
RelDataType sqlType = typeFactory.createSqlType(javaType.getSqlTypeName());
return typeFactory.createTypeWithNullability(sqlType, true);
}

public static ArrowFieldType of(ArrowType arrowType) {
switch (arrowType.getTypeID()) {
case Int:
int bitWidth = ((ArrowType.Int) arrowType).getBitWidth();
switch (bitWidth) {
case 64:
return LONG;
case 32:
return INT;
case 16:
return SHORT;
case 8:
return BYTE;
default:
throw new IllegalArgumentException("Unsupported Int bit width: " + bitWidth);
}
case Bool:
return BOOLEAN;
case Utf8:
return STRING;
case FloatingPoint:
FloatingPointPrecision precision = ((ArrowType.FloatingPoint) arrowType).getPrecision();
switch (precision) {
case SINGLE:
return FLOAT;
case DOUBLE:
return DOUBLE;
default:
throw new IllegalArgumentException("Unsupported Floating point precision: " + precision);
}
case Date:
return DATE;
case Decimal:
return DECIMAL;
default:
throw new IllegalArgumentException("Unsupported type: " + arrowType);
}
}
}
Loading

0 comments on commit d4e8830

Please sign in to comment.