Skip to content

Commit

Permalink
[DOP-22141] Add logic for handling SFTP transfers
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilyas Gasanov committed Jan 28, 2025
1 parent 85618f1 commit b2f9e5d
Show file tree
Hide file tree
Showing 19 changed files with 1,221 additions and 24 deletions.
7 changes: 7 additions & 0 deletions .env.docker
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ TEST_HDFS_HOST=test-hive
TEST_HDFS_WEBHDFS_PORT=9870
TEST_HDFS_IPC_PORT=9820

TEST_SFTP_HOST_FOR_CONFTEST=test-sftp
TEST_SFTP_PORT_FOR_CONFTEST=2222
TEST_SFTP_HOST_FOR_WORKER=test-sftp
TEST_SFTP_PORT_FOR_WORKER=2222
TEST_SFTP_USER=syncmaster
TEST_SFTP_PASSWORD=AesujeifohgoaCu0Boosiet5aimeitho

SPARK_CONF_DIR=/app/tests/spark/hive/conf/
HADOOP_CONF_DIR=/app/tests/spark/hadoop/
HIVE_CONF_DIR=/app/tests/spark/hive/conf/
7 changes: 7 additions & 0 deletions .env.local
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ export TEST_HDFS_HOST=test-hive
export TEST_HDFS_WEBHDFS_PORT=9870
export TEST_HDFS_IPC_PORT=9820

export TEST_SFTP_HOST_FOR_CONFTEST=localhost
export TEST_SFTP_PORT_FOR_CONFTEST=2222
export TEST_SFTP_HOST_FOR_WORKER=test-sftp
export TEST_SFTP_PORT_FOR_WORKER=2222
export TEST_SFTP_USER=syncmaster
export TEST_SFTP_PASSWORD=AesujeifohgoaCu0Boosiet5aimeitho

export SPARK_CONF_DIR=./tests/spark/hive/conf/
export HADOOP_CONF_DIR=./tests/spark/hadoop/
export HIVE_CONF_DIR=./tests/spark/hive/conf/
79 changes: 79 additions & 0 deletions .github/workflows/sftp-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: SFTP tests
on:
workflow_call:

env:
DEFAULT_PYTHON: '3.12'

jobs:
test:
name: Run SFTP tests
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Cache jars
uses: actions/cache@v4
with:
path: ./cached_jars
key: ${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-sftp
restore-keys: |
${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-sftp
${{ runner.os }}-python-
- name: Build Worker Image
uses: docker/build-push-action@v6
with:
context: .
tags: mtsrus/syncmaster-worker:${{ github.sha }}
target: test
file: docker/Dockerfile.worker
load: true
cache-from: mtsrus/syncmaster-worker:develop

- name: Docker compose up
run: |
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
docker compose -f docker-compose.test.yml --profile sftp up -d --wait --wait-timeout 200
env:
WORKER_IMAGE_TAG: ${{ github.sha }}

- name: Run SFTP Tests
run: |
docker compose -f ./docker-compose.test.yml --profile sftp exec -T worker coverage run -m pytest -vvv -s -m "worker and sftp"
- name: Dump worker logs on failure
if: failure()
uses: jwalton/gh-docker-logs@v2
with:
images: mtsrus/syncmaster-worker
dest: ./logs

# This is important, as coverage is exported after receiving SIGTERM
- name: Shutdown
if: always()
run: |
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
- name: Upload worker logs
uses: actions/upload-artifact@v4
if: failure()
with:
name: worker-logs-sftp
path: logs/*

- name: Upload coverage results
uses: actions/upload-artifact@v4
with:
name: coverage-sftp
path: reports/*
# https://github.com/actions/upload-artifact/issues/602
include-hidden-files: true
6 changes: 5 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ jobs:
name: S3 tests
uses: ./.github/workflows/s3-tests.yml

sftp_tests:
name: SFTP tests
uses: ./.github/workflows/sftp-tests.yml

scheduler_tests:
name: Scheduler tests
uses: ./.github/workflows/scheduler-tests.yml
Expand All @@ -56,7 +60,7 @@ jobs:
name: Tests done
runs-on: ubuntu-latest

needs: [oracle_tests, clickhouse_tests, mssql_tests, mysql_tests, hive_tests, hdfs_tests, s3_tests, unit_tests]
needs: [unit_tests, scheduler_tests, oracle_tests, clickhouse_tests, mssql_tests, mysql_tests, hive_tests, hdfs_tests, s3_tests, sftp_tests]
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ test-integration-s3: test-db ##@Test Run integration tests for S3
docker compose -f docker-compose.test.yml --profile s3 up -d --wait $(DOCKER_COMPOSE_ARGS)
${POETRY} run pytest ./tests/test_integration -m s3 $(PYTEST_ARGS)

test-integration-sftp: test-db ##@Test Run integration tests for SFTP
docker compose -f docker-compose.test.yml --profile sftp up -d --wait $(DOCKER_COMPOSE_ARGS)
${POETRY} run pytest ./tests/test_integration -m sftp $(PYTEST_ARGS)

test-integration: test-db ##@Test Run all integration tests
docker compose -f docker-compose.test.yml --profile all up -d --wait $(DOCKER_COMPOSE_ARGS)
${POETRY} run pytest ./tests/test_integration $(PYTEST_ARGS)
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ List of currently supported connections:
* MySQL
* HDFS
* S3
* SFTP

Current Data.SyncMaster implementation provides following components:

Expand Down
24 changes: 19 additions & 5 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ services:
condition: service_completed_successfully
rabbitmq:
condition: service_healthy
profiles: [worker, scheduler, s3, oracle, hdfs, hive, clickhouse, mysql, mssql, all]
profiles: [worker, scheduler, s3, oracle, hdfs, hive, clickhouse, mysql, mssql, sftp, all]

test-postgres:
image: postgres
Expand All @@ -139,7 +139,7 @@ services:
interval: 30s
timeout: 5s
retries: 3
profiles: [s3, oracle, clickhouse, mysql, mssql, hdfs, hive, all]
profiles: [s3, oracle, clickhouse, mysql, mssql, hdfs, hive, sftp, all]

test-s3:
image: bitnami/minio:latest
Expand Down Expand Up @@ -225,7 +225,7 @@ services:
interval: 30s
timeout: 5s
retries: 3
profiles: [hive, hdfs, s3, all]
profiles: [hive, hdfs, s3, sftp, all]

keycloak:
image: quay.io/keycloak/keycloak:latest
Expand Down Expand Up @@ -263,8 +263,22 @@ services:
HIVE_METASTORE_DB_DRIVER: org.postgresql.Driver
HIVE_METASTORE_DB_USER: test_hive
HIVE_METASTORE_DB_PASSWORD: test_hive
# writing spark dataframe to s3 xml file fails without running hive metastore server
profiles: [hive, hdfs, s3, all]
# writing spark dataframe to s3, sftp xml file fails without running hive metastore server
profiles: [hive, hdfs, s3, sftp, all]

test-sftp:
image: ${SFTP_IMAGE:-linuxserver/openssh-server}
restart: unless-stopped
ports:
- 2222:2222
environment:
PUID: 1000
PGID: 1000
USER_NAME: syncmaster
PASSWORD_ACCESS: true
SUDO_ACCESS: true
USER_PASSWORD: AesujeifohgoaCu0Boosiet5aimeitho
profiles: [sftp, all]

volumes:
postgres_test_data:
Expand Down
1 change: 1 addition & 0 deletions docs/changelog/next_release/189.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add logic for handling SFTP transfers
Loading

0 comments on commit b2f9e5d

Please sign in to comment.