Skip to content

Commit

Permalink
Partially have Async API working
Browse files Browse the repository at this point in the history
Can submit an async query and the result is written to the result index.

Need to create the external table in Spark before submitting the query

Signed-off-by: Norman Jordan <norman.jordan@improving.com>
  • Loading branch information
normanj-bitquill committed Jan 10, 2025
1 parent d5052a6 commit ca30131
Show file tree
Hide file tree
Showing 18 changed files with 691 additions and 30 deletions.
5 changes: 3 additions & 2 deletions docker/integ-test/.env
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ MASTER_UI_PORT=8080
MASTER_PORT=7077
UI_PORT=4040
SPARK_CONNECT_PORT=15002
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
PPL_JAR=./ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=./flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
SQL_APP_JAR=./spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar
OPENSEARCH_NODE_MEMORY=512m
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
OPENSEARCH_PORT=9200
Expand Down
75 changes: 52 additions & 23 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
services:
metastore:
build: ./metastore
container_name: metastore
ports:
- "${THRIFT_PORT:-9083}:9083"
volumes:
- type: bind
source: ./metastore/hive-site.xml
target: /opt/apache-hive-2.3.9-bin/conf/hive-site.xml
- type: bind
source: ./metastore/hive-log4j2.properties
target: /opt/apache-hive-2.3.9-bin/conf/hive-log4j2.properties
networks:
- opensearch-net

spark:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
container_name: spark
Expand All @@ -20,22 +35,22 @@ services:
- OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
volumes:
- type: bind
source: ./spark-master-entrypoint.sh
source: ./spark/spark-master-entrypoint.sh
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
- type: bind
source: ./spark-defaults.conf
source: ./spark/spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
source: ./spark/log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
- type: bind
source: ./s3.credentials
source: ./spark/s3.credentials
target: /opt/bitnami/spark/s3.credentials
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
Expand All @@ -47,6 +62,8 @@ services:
networks:
- opensearch-net
depends_on:
metastore:
condition: service_started
opensearch:
condition: service_healthy
opensearch-dashboards:
Expand All @@ -67,35 +84,50 @@ services:
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-defaults.conf
source: ./spark/spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
source: ./spark/log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
networks:
- opensearch-net
depends_on:
metastore:
condition: service_started
spark:
condition: service_healthy

spark-submit:
build:
context: ../../
dockerfile: docker/integ-test/spark-submit/Dockerfile
args:
FLINT_JAR: ${FLINT_JAR}
PPL_JAR: ${PPL_JAR}
SQL_APP_JAR: ${SQL_APP_JAR}
depends_on:
metastore:
condition: service_completed_successfully

opensearch:
image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
build: ./opensearch
container_name: opensearch
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch
- discovery.seed_hosts=opensearch
- cluster.initial_cluster_manager_nodes=opensearch
- discovery.type=single-node
- bootstrap.memory_lock=true
- plugins.security.system_indices.enabled=false
- plugins.security.system_indices.permission.enabled=false
- plugins.security.ssl.http.enabled=false
- plugins.query.datasources.encryption.masterkey=9a515c99d4313f140a6607053502f4d6
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m} -DEMR_SERVERLESS_CLIENT_FACTORY_CLASS=org.opensearch.sql.spark.client.DockerEMRServerlessClientFactory -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m} -DEMR_SERVERLESS_CLIENT_FACTORY_CLASS=org.opensearch.sql.spark.client.DockerEMRServerlessClientFactory
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
ulimits:
memlock:
Expand All @@ -105,20 +137,18 @@ services:
soft: 65536
hard: 65536
volumes:
- opensearch-data:/usr/share/opensearch/data
- type: volume
source: opensearch-data
target: /usr/share/opensearch/data
- type: bind
source: ./aws-java-sdk-emrserverless-1.12.651.jar
target: /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-1.12.651.jar
- type: bind
source: ../../spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar
target: /spark-sql-application.jar
source: /var/run/docker.sock
target: /var/run/docker.sock
ports:
- ${OPENSEARCH_PORT:-9200}:9200
- 9600:9600
- 5005:5005
expose:
- "${OPENSEARCH_PORT:-9200}"
- "5005"
- "9300"
healthcheck:
test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
interval: 1m
Expand Down Expand Up @@ -177,6 +207,5 @@ services:
volumes:
opensearch-data:
minio-data:

networks:
opensearch-net:
23 changes: 23 additions & 0 deletions docker/integ-test/metastore/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM openjdk:21-jdk-bookworm

WORKDIR /opt

ENV HADOOP_HOME=/opt/hadoop-3.3.4
ENV HIVE_HOME=/opt/apache-hive-2.3.9-bin

#RUN apt-get update
RUN curl -L https://archive.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz | tar zxf -
RUN curl -L https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz | tar zxf -
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-api-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-runtime-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.12.262.jar $HIVE_HOME/lib/

RUN groupadd -f -r hive --gid=1000
RUN useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive
RUN chown hive:hive -R ${HIVE_HOME}

WORKDIR $HIVE_HOME
EXPOSE 9083
ENTRYPOINT ["/opt/apache-hive-2.3.9-bin/bin/hive", "--service", "metastore"]
USER hive
62 changes: 62 additions & 0 deletions docker/integ-test/metastore/hive-log4j2.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

status = INFO
name = HiveLog4j2
packages = org.apache.hadoop.hive.ql.log

# list of properties
property.hive.log.level = INFO
property.hive.root.logger = console
property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
property.hive.log.file = hive.log
property.hive.perflogger.log.level = INFO

# list of all appenders
appenders = console

# console appender
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n

# list of all loggers
loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, PerfLogger

logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
logger.NIOServerCnxn.level = WARN

logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
logger.ClientCnxnSocketNIO.level = WARN

logger.DataNucleus.name = DataNucleus
logger.DataNucleus.level = ERROR

logger.Datastore.name = Datastore
logger.Datastore.level = ERROR

logger.JPOX.name = JPOX
logger.JPOX.level = ERROR

logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger
logger.PerfLogger.level = ${sys:hive.perflogger.log.level}

# root logger
rootLogger.level = ${sys:hive.log.level}
rootLogger.appenderRefs = root
rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
53 changes: 53 additions & 0 deletions docker/integ-test/metastore/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>file:///tmp</value>
<description></description>
</property>
<property>
<name>fs.default.name</name>
<value>file:///tmp</value>
</property>
<property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=metastore_db;create=true</value>
</property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
</property>
<property>
<name>datanucleus.schema.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>Vt7jnvi5BICr1rkfsheT</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio-S3:9000</value>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>false</value>
</property>
</configuration>
41 changes: 41 additions & 0 deletions docker/integ-test/opensearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM opensearchproject/opensearch:latest

USER root

RUN mkdir /tmp/alter-emr-jar
WORKDIR /tmp/alter-emr-jar

ENV AWS_VERSION=1.12.651

RUN curl -O -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-emrserverless/${AWS_VERSION}/aws-java-sdk-emrserverless-${AWS_VERSION}.jar
RUN curl -O -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/${AWS_VERSION}/aws-java-sdk-core-${AWS_VERSION}.jar

COPY emr-src /tmp/alter-emr-jar/emr-src
WORKDIR /tmp/alter-emr-jar/emr-src
RUN /usr/share/opensearch/jdk/bin/javac -cp ../aws-java-sdk-emrserverless-${AWS_VERSION}.jar:../aws-java-sdk-core-${AWS_VERSION}.jar com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.java org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java
RUN mkdir /tmp/alter-emr-jar/extracted
WORKDIR /tmp/alter-emr-jar/extracted
RUN /usr/share/opensearch/jdk/bin/jar -xf ../aws-java-sdk-emrserverless-${AWS_VERSION}.jar
RUN cp ../emr-src/com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.class com/amazonaws/services/emrserverless/
RUN mkdir -p org/opensearch/spark/emrserverless
RUN cp ../emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.class org/opensearch/spark/emrserverless/
RUN /usr/share/opensearch/jdk/bin/jar -cfM /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-*.jar META-INF/MANIFEST.MF *
RUN chown opensearch:opensearch /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-*.jar
RUN rm -rf /tmp/alter-emr-jar

RUN yum install -y docker util-linux

COPY opensearch-docker-it-entrypoint.sh /usr/share/opensearch/opensearch-docker-it-entrypoint.sh
COPY docker-command-runner.sh /usr/share/opensearch/docker-command-runner.sh
COPY opensearch_security.policy /usr/share/opensearch/config/opensearch-performance-analyzer/opensearch_security.policy
COPY log4j2.properties /usr/share/opensearch/config/log4j2.properties

RUN chown opensearch:opensearch /usr/share/opensearch/config/opensearch-performance-analyzer/opensearch_security.policy
RUN chown opensearch:opensearch /usr/share/opensearch/config/log4j2.properties

WORKDIR /usr/share/opensearch
ENTRYPOINT ["./opensearch-docker-it-entrypoint.sh"]
CMD ["opensearch"]

EXPOSE 9200
EXPOSE 9300
29 changes: 29 additions & 0 deletions docker/integ-test/opensearch/docker-command-runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

function process_files {
for cmd_file in `ls -1`; do
echo "$cmd_file" | grep -q 'cmd$'
if [ "$?" -eq "0" ]; then
stdout_filename=$(echo $cmd_file | sed -e 's/cmd$/stdout/')
stderr_filename=$(echo $cmd_file | sed -e 's/cmd$/stderr/')
exit_code_filename=$(echo $cmd_file | sed -e 's/cmd/exitCode/')

/usr/bin/docker $(cat $cmd_file) > $stdout_filename 2> $stderr_filename
echo "$?" > $exit_code_filename

rm $cmd_file
fi
done
}

if [ ! -d '/tmp/docker' ]; then
mkdir /tmp/docker
chown opensearch:opensearch /tmp/docker
fi

cd /tmp/docker
while true; do
process_files
sleep 1
done

Loading

0 comments on commit ca30131

Please sign in to comment.