Skip to content

Commit

Permalink
Reworked image use for Spark nodes
Browse files Browse the repository at this point in the history
* Hive container will now store data on a volume
* Spark containers now use a built image, no need for custom entrypoint script or to start with root user

Signed-off-by: Norman Jordan <norman.jordan@improving.com>
  • Loading branch information
normanj-bitquill committed Jan 13, 2025
1 parent 60eb7bc commit c43791f
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 46 deletions.
38 changes: 14 additions & 24 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,47 +11,38 @@ services:
- type: bind
source: ./metastore/hive-log4j2.properties
target: /opt/apache-hive-2.3.9-bin/conf/hive-log4j2.properties
- type: volume
source: metastore-data
target: /data
networks:
- opensearch-net

spark:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
build:
context: ./spark
dockerfile: Dockerfile
args:
SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
container_name: spark
ports:
- "${MASTER_UI_PORT:-8080}:8080"
- "${MASTER_PORT:-7077}:7077"
- "${UI_PORT:-4040}:4040"
- "${SPARK_CONNECT_PORT}:15002"
entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
user: root
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_PUBLIC_DNS=localhost
- AWS_ENDPOINT_URL_S3=http://minio-S3
- OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
volumes:
- type: bind
source: ./spark/spark-master-entrypoint.sh
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
- type: bind
source: ./spark/spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./spark/log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
- type: bind
source: ./spark/s3.credentials
target: /opt/bitnami/spark/s3.credentials
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
interval: 1m
Expand All @@ -72,7 +63,11 @@ services:
condition: service_completed_successfully

spark-worker:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
build:
context: ./spark
dockerfile: Dockerfile
args:
SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
container_name: spark-worker
environment:
- SPARK_MODE=worker
Expand All @@ -85,12 +80,6 @@ services:
- SPARK_SSL_ENABLED=no
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark/spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./spark/log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
Expand Down Expand Up @@ -225,6 +214,7 @@ services:
- opensearch-net

volumes:
metastore-data:
opensearch-data:
minio-data:
networks:
Expand Down
3 changes: 3 additions & 0 deletions docker/integ-test/metastore/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ RUN groupadd -f -r hive --gid=1000
RUN useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive
RUN chown hive:hive -R ${HIVE_HOME}

RUN mkdir /data
RUN chown hive:hive /data

WORKDIR $HIVE_HOME
EXPOSE 9083
ENTRYPOINT ["/opt/apache-hive-2.3.9-bin/bin/hive", "--service", "metastore"]
Expand Down
2 changes: 1 addition & 1 deletion docker/integ-test/metastore/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=metastore_db;create=true</value>
<value>jdbc:derby:;databaseName=/data/metastore_db;create=true</value>
</property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
Expand Down
13 changes: 13 additions & 0 deletions docker/integ-test/spark/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

ARG SPARK_VERSION=3.5.3
FROM bitnami/spark:${SPARK_VERSION}

USER root
RUN apt update
RUN apt install -y curl

USER 1001
COPY ./spark-defaults.conf /opt/bitnami/spark/conf/spark-defaults.conf
COPY ./log4j2.properties /opt/bitnami/spark/conf/log4j2.properties
2 changes: 0 additions & 2 deletions docker/integ-test/spark/s3.credentials

This file was deleted.

19 changes: 0 additions & 19 deletions docker/integ-test/spark/spark-master-entrypoint.sh

This file was deleted.

0 comments on commit c43791f

Please sign in to comment.