From d5052a6fb4a9979a2f7f8671ac68ba16df9113e9 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Tue, 31 Dec 2024 10:41:09 -0800 Subject: [PATCH] WIP: Docker integ test with async API Signed-off-by: Norman Jordan --- docker/integ-test/docker-compose.yml | 45 ++++++++++++- docker/integ-test/s3.credentials | 2 + docker/integ-test/spark-defaults.conf | 7 ++ docker/integ-test/spark-master-entrypoint.sh | 70 ++++++++++++++++---- 4 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 docker/integ-test/s3.credentials diff --git a/docker/integ-test/docker-compose.yml b/docker/integ-test/docker-compose.yml index c5ee53d7d..907de2205 100644 --- a/docker/integ-test/docker-compose.yml +++ b/docker/integ-test/docker-compose.yml @@ -8,6 +8,7 @@ services: - "${UI_PORT:-4040}:4040" - "${SPARK_CONNECT_PORT}:15002" entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh + user: root environment: - SPARK_MODE=master - SPARK_RPC_AUTHENTICATION_ENABLED=no @@ -15,6 +16,8 @@ services: - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no - SPARK_SSL_ENABLED=no - SPARK_PUBLIC_DNS=localhost + - AWS_ENDPOINT_URL_S3=http://minio-S3 + - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD} volumes: - type: bind source: ./spark-master-entrypoint.sh @@ -31,6 +34,9 @@ services: - type: bind source: $FLINT_JAR target: /opt/bitnami/spark/jars/flint-spark-integration.jar + - type: bind + source: ./s3.credentials + target: /opt/bitnami/spark/s3.credentials healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/"] interval: 1m @@ -40,6 +46,11 @@ services: start_interval: 5s networks: - opensearch-net + depends_on: + opensearch: + condition: service_healthy + opensearch-dashboards: + condition: service_healthy spark-worker: image: bitnami/spark:${SPARK_VERSION:-3.5.3} @@ -70,7 +81,8 @@ services: networks: - opensearch-net depends_on: - - spark + spark: + condition: service_healthy opensearch: image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest} @@ -82,7 +94,8 @@ services: - cluster.initial_cluster_manager_nodes=opensearch - bootstrap.memory_lock=true - plugins.security.ssl.http.enabled=false - - OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m} + - plugins.query.datasources.encryption.masterkey=9a515c99d4313f140a6607053502f4d6 + - OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m} -DEMR_SERVERLESS_CLIENT_FACTORY_CLASS=org.opensearch.sql.spark.client.DockerEMRServerlessClientFactory -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD} ulimits: memlock: @@ -93,11 +106,19 @@ services: hard: 65536 volumes: - opensearch-data:/usr/share/opensearch/data + - type: bind + source: ./aws-java-sdk-emrserverless-1.12.651.jar + target: /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-1.12.651.jar + - type: bind + source: ../../spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar + target: /spark-sql-application.jar ports: - ${OPENSEARCH_PORT:-9200}:9200 - 9600:9600 + - 5005:5005 expose: - "${OPENSEARCH_PORT:-9200}" + - "5005" healthcheck: test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"] interval: 1m @@ -107,6 +128,9 @@ services: start_interval: 5s networks: - opensearch-net + depends_on: + minio: + condition: service_healthy opensearch-dashboards: image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION} @@ -119,8 +143,16 @@ services: OPENSEARCH_HOSTS: '["http://opensearch:9200"]' networks: - opensearch-net + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5601/"] + interval: 1m + timeout: 5s + retries: 3 + start_period: 30s + start_interval: 5s depends_on: - - opensearch + opensearch: + condition: service_healthy minio: image: minio/minio @@ -132,6 +164,13 @@ services: - "9001:9001" volumes: - minio-data:/data + healthcheck: + test: ["CMD", "curl", "-q", "-f", "http://localhost:9000/minio/health/live"] + interval: 1m + timeout: 5s + retries: 3 + start_period: 30s + start_interval: 5s networks: - opensearch-net diff --git a/docker/integ-test/s3.credentials b/docker/integ-test/s3.credentials new file mode 100644 index 000000000..2683eeb60 --- /dev/null +++ b/docker/integ-test/s3.credentials @@ -0,0 +1,2 @@ +ACCESS_KEY=Vt7jnvi5BICr1rkfsheT +SECRET_KEY=5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO \ No newline at end of file diff --git a/docker/integ-test/spark-defaults.conf b/docker/integ-test/spark-defaults.conf index 19b9e4ec1..2a092f55f 100644 --- a/docker/integ-test/spark-defaults.conf +++ b/docker/integ-test/spark-defaults.conf @@ -33,3 +33,10 @@ spark.datasource.flint.scheme http spark.datasource.flint.auth basic spark.datasource.flint.auth.username admin spark.datasource.flint.auth.password C0rrecthorsebatterystaple. +spark.sql.warehouse.dir s3a://integ-test/ +spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem +spark.hadoop.fs.s3a.path.style.access true +spark.hadoop.fs.s3a.access.key Vt7jnvi5BICr1rkfsheT +spark.hadoop.fs.s3a.secret.key 5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO +spark.hadoop.fs.s3a.endpoint minio-S3:9000 +spark.hadoop.fs.s3a.connection.ssl.enabled false \ No newline at end of file diff --git a/docker/integ-test/spark-master-entrypoint.sh b/docker/integ-test/spark-master-entrypoint.sh index a21c20643..664f55263 100755 --- a/docker/integ-test/spark-master-entrypoint.sh +++ b/docker/integ-test/spark-master-entrypoint.sh @@ -1,17 +1,61 @@ #!/bin/bash -function start_spark_connect() { - sc_version=$(ls -1 /opt/bitnami/spark/jars/spark-core_*.jar | sed -e 's/^.*\/spark-core_//' -e 's/\.jar$//' -e 's/-/:/') +# Add passwd and shadow entries so that su works +grep -q '^spark:' /etc/passwd +if [ "$?" -ne "0" ]; then + echo 'spark:x:1001:0:spark:/opt/bitnami/spark:/bin/bash' >> /etc/passwd +fi +grep -q '^spark:' /etc/shadow +if [ "$?" -ne "0" ]; then + echo 'spark:*:17885:0:99999:7:::' >> /etc/shadow +fi - attempt=1 - while [ -e "/tmp/spark_master_running" -a "$attempt" -le 10 ]; do - sleep 1 - /opt/bitnami/spark/sbin/start-connect-server.sh --master spark://spark:7077 --packages org.apache.spark:spark-connect_${sc_version} - attempt=$(($attempt+1)) - done -} +apt update +apt install -y curl -touch /tmp/spark_master_running -start_spark_connect & -/opt/bitnami/scripts/spark/entrypoint.sh /opt/bitnami/scripts/spark/run.sh -rm /tmp/spark_master_running +S3_ACCESS_KEY=`grep '^ACCESS_KEY=' /opt/bitnami/spark/s3.credentials | sed -e 's/^.*=//'` +S3_SECRET_KEY=`grep '^SECRET_KEY=' /opt/bitnami/spark/s3.credentials | sed -e 's/^.*=//'` + +# Login to Minio +curl -q \ + -c /tmp/minio-cookies.txt \ + -H 'Content-Type: application/json' \ + -d '{"accessKey": "minioadmin", "secretKey": "minioadmin"}' \ + http://minio-S3:9001/api/v1/login +# Delete the test bucket +curl -b /tmp/minio-cookies.txt \ + -X DELETE \ + http://minio-S3:9001/api/v1/buckets/test +# Create the integ-test bucket +curl -q \ + -b /tmp/minio-cookies.txt \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \ + http://minio-S3:9001/api/v1/buckets +# Create the access key +curl -q \ + -b /tmp/minio-cookies.txt \ + -X POST \ + -H 'Content-Type: application/json' \ + -d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \ + http://minio-S3:9001/api/v1/service-account-credentials + +# Login to OpenSearch Dashboards +curl -c /tmp/opensearch-cookies.txt \ + -X POST \ + -H 'Content-Type: application/json' \ + -H 'Osd-Version: 2.18.0' \ + -H 'Osd-Xsrf: fetch' \ + -d "{\"username\": \"admin\", \"password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}" \ + 'http://opensearch-dashboards:5601/auth/login?dataSourceId=' +# Create the S3/Glue datasource +curl -b /tmp/opensearch-cookies.txt \ + -X POST \ + -H 'Content-Type: application/json' \ + -H 'Osd-Version: 2.18.0' \ + -H 'Osd-Xsrf: fetch' \ + -d '{"name": "mys3", "allowedRoles": [], "connector": "s3glue", "properties": {"glue.auth.type": "iam_role", "glue.auth.role_arn": "arn:aws:iam::123456789012:role/S3Access", "glue.indexstore.opensearch.uri": "http://opensearch:9200", "glue.indexstore.opensearch.auth": "basicauth", "glue.indexstore.opensearch.auth.username": "admin", "glue.indexstore.opensearch.auth.password": "arn:aws:iam::123456789012:role/S3Access"}}' \ + http://opensearch-dashboards:5601/api/directquery/dataconnections + +su spark -c '/opt/bitnami/scripts/spark/entrypoint.sh /opt/bitnami/scripts/spark/run.sh'