Skip to content

Commit

Permalink
beaker
Browse files Browse the repository at this point in the history
  • Loading branch information
favyen2 committed Feb 5, 2025
1 parent b8d45af commit c331c9e
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 106 deletions.
151 changes: 48 additions & 103 deletions .github/workflows/forest_loss_driver_prediction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@ on:
- cron: '0 8 * * 1' # Run at 8:00 AM UTC every Monday

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
SERVICE_NAME: "rslearn_projects"
RSLP_PROJECT: "forest_loss_driver"
RSLP_WORKFLOW: "integrated_pipeline"
# Name to use when creating Beaker image.
BEAKER_IMAGE_NAME: "forest_loss_driver"
# After creation, it is prefixed by username, so when we delete we need to use this
# full name.
BEAKER_IMAGE_FULL_NAME: "favyen/forest_loss_driver"
# BEAKER_TOKEN is Henry's token but it has space at beginning which causes issue when
# using it here.
# BEAKER_TOKEN_2 is Favyen's token, without the space issue.
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN_2 }}
BEAKER_ADDR: ${{ secrets.BEAKER_ADDR }}

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand All @@ -20,30 +29,11 @@ jobs:
permissions:
contents: read
packages: write
outputs:
ghcr_docker_image: ${{ steps.image-names.outputs.ghcr_docker_image }}
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,format=long
type=sha,format=short
type=raw,value=latest,enable={{is_default_branch}}
# Avoid issues with running out of disk space.
- name: Cleanup disk space
run: |
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
Expand All @@ -52,98 +42,53 @@ jobs:
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
- name: Build and push Docker image
id: build-push
uses: docker/build-push-action@v6
# Setup Beaker CLI.
- name: Setup Beaker
uses: allenai/setup-beaker@v2
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
GIT_USERNAME=${{ secrets.GIT_USERNAME }}
GIT_TOKEN=${{ secrets.GIT_TOKEN }}
- name: Store Image Names
id: image-names
run: |-
GHCR_IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@${{ steps.build-push.outputs.digest }}"
GHCR_IMAGE=`echo ${GHCR_IMAGE} | tr '[:upper:]' '[:lower:]'` # docker requires that all image names be lowercase
echo "ghcr_docker_image=\"${GHCR_IMAGE}\"" >> $GITHUB_OUTPUT
token: ${{ env.BEAKER_TOKEN }}
workspace: "ai2/earth-systems"

predict:
needs: build
runs-on: ubuntu-latest-m
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# This step produces a Docker image called forest_loss_driver.
- name: Build Docker Image
working-directory: rslp/${{ env.RSLP_PROJECT }}
run: |
docker compose build
- name: Cleanup disk space
# We upload the image from previous step to Beaker.
- name: Create Beaker Image
run: |
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
sudo docker image prune --all --force >/dev/null 2>&1 || true
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
beaker image delete "$BEAKER_IMAGE_FULL_NAME" || true
beaker image create --name "$BEAKER_IMAGE_NAME" "$DOCKER_IMAGE_NAME"
env:
# Name of Docker image created by build step.
DOCKER_IMAGE_NAME: forest_loss_driver

# Now we can launch the Beaker job.
# This runs inside a Docker image, so although `rslp common beaker_launcher`
# in theory supports uploading image, we can't use that functionality here.
- name: Run integrated pipeline in Beaker job
run: |
docker compose -f docker-compose.yaml run \
-e BEAKER_TOKEN="${BEAKER_TOKEN}" \
-e BEAKER_ADDR="${BEAKER_ADDR}" \
-e RSLP_PREFIX="${RSLP_PREFIX}" \
test python -m rslp.main \
common
beaker_launcher
--project $RSLP_PROJECT
--workflow $RSLP_WORKFLOW
--extra_args $EXTRA_ARGS
common \
beaker_launcher \
"${RSLP_PROJECT}" \
"${RSLP_WORKFLOW}" \
"${EXTRA_ARGS}" \
"${BEAKER_IMAGE_NAME}" \
"${CLUSTERS}" \
--gpu_count 1 \
--preemptible false
env:
RSLP_PROJECT: forest-loss-driver
RSLP_WORKFLOW: integrated_pipeline
EXTRA_ARGS: |
[
"--pred_pipeline_config",
"rslp/forest_loss_driver/inference/config/forest_loss_driver_predict_pipeline_config.yaml",
"--make_tiles_args.dst_dir",
"s3://satlas-explorer-data/rslearn-public/forest_loss_driver/tiles/latest/",
]
- name:
run: |
export PIPELINE_INFERENCE_CONFIG_PATH= && \
export PRED_PIPELINE_CONFIG_ARG="--pred_pipeline_config $PIPELINE_INFERENCE_CONFIG_PATH" && \
# NOTE: The Index cahce dir will be copied to the VM and mounted as a volume in the docker container
export INDEX_CACHE_DIR=${{ secrets.RSLP_PREFIX }}/datasets/forest_loss_driver/index_cache_dir && \
export TILE_STORE_ROOT_DIR=${{ secrets.RSLP_PREFIX }}/datasets/forest_loss_driver/tile_store_root_dir && \
export DATASET_EXTRACT_COMMAND="python -m rslp.main forest_loss_driver extract_dataset $PRED_PIPELINE_CONFIG_ARG" && \
export RSLP_PROJECT="forest_loss_driver" && \
bash .github/workflows/deploy_image_on_vm.sh \
--project-id ${{ secrets.GCP_PROJECT_ID }} \
--zone "us-west1-b" \
--machine-type "n2-standard-128" \
--docker-image ${{ needs.build.outputs.ghcr_docker_image }} \
--command "$DATASET_EXTRACT_COMMAND" \
--user ${{ secrets.GCP_USER }} \
--ghcr-user allenai \
--service-account ${{ secrets.FOREST_LOSS_DRIVER_INFERENCE_SERVICE_ACCOUNT }} \
--delete no \
--beaker-token ${{ secrets.BEAKER_TOKEN }} \
--beaker-addr "https://beaker.org" \
--beaker-username ${{ secrets.BEAKER_USERNAME }} \
--rslp-project $RSLP_PROJECT \
--rslp-prefix ${{ secrets.RSLP_PREFIX }} \
--gpu-count 1 \
--shared-memory "64Gib" \
--cluster ${{ secrets.BEAKER_CLUSTER_INFERENCE }} \
--priority "normal" \
--task-name "${RSLP_PROJECT}_inference_$(uuidgen | cut -c1-8)" \
--budget ${{ secrets.BEAKER_BUDGET }} \
--workspace ${{ secrets.BEAKER_WORKSPACE }} \
--extra_args_model_predict "$PRED_PIPELINE_CONFIG_ARG" && \
echo "Inference job launched!"
CLUSTERS: |
["ai2/jupiter-cirrascale-2"]
RSLP_PREFIX: ${{ secrets.RSLP_PREFIX }}
2 changes: 2 additions & 0 deletions rslp/common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Pipelines common across projects."""

from .beaker_launcher import launch_job
from .worker import launch_workers, worker_pipeline

workflows = {
"worker": worker_pipeline,
"launch": launch_workers,
"beaker_launcher": launch_job,
}
6 changes: 4 additions & 2 deletions rslp/common/beaker_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def launch_job(
task_name: name for the Beaker job.
gpu_count: number of GPUs to assign.
shared_memory: amount of shared memory.
priority: priority of the Beake rjob.
priority: priority of the Beaker job.
task_specific_env_vars: additional task-specific environment variables to pass
to the Beaker job.
budget: the Beaker budget.
Expand All @@ -84,6 +84,8 @@ def launch_job(
logger.info("Generating task name...")
task_uuid = str(uuid.uuid4())[0:8]
unique_task_name = f"{task_name}_{task_uuid}"

# Check for existing image and create image if it doesn't exist.
try:
beaker.image.get(image)
logger.info(f"Image already exists: {image}")
Expand All @@ -93,7 +95,7 @@ def launch_job(
# Handle image upload
image_source = upload_image(image, workspace, beaker)
logger.info(f"Image uploaded: {image_source.beaker}")
# Potentially we might want to have many different tasks as part of a job but this is very simple for now

logger.info("Creating experiment spec...")
experiment_spec = ExperimentSpec.new(
budget=budget,
Expand Down
1 change: 1 addition & 0 deletions rslp/forest_loss_driver/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
FROM base-image:latest

# Install tippecanoe.
RUN apt install -y build-essential libsqlite3-dev zlib1g-dev
RUN git clone https://github.com/mapbox/tippecanoe /opt/tippecanoe
WORKDIR /opt/tippecanoe
RUN make -j
Expand Down
2 changes: 1 addition & 1 deletion rslp/forest_loss_driver/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
GEOJSON_FNAME = "forest_loss_events.geojson"

# Where to store tiles. These need to be in a publicly accessible bucket.
DEFAULT_TILE_PATH = "gs://ai2-rslearn-projects-data/forest_loss_driver/"
DEFAULT_TILE_PATH = "gs://ai2-rslearn-projects-data/forest_loss_driver/tiles/"

# A special file indicating that this dataset is ready to serve from the web app.
READY_FOR_SERVING_FNAME = "ready_for_serving"

0 comments on commit c331c9e

Please sign in to comment.