diff --git a/.github/workflows/deploy_image_on_vm.sh b/.github/workflows/deploy_image_on_vm.sh index dc3d080d..6ee8c14e 100755 --- a/.github/workflows/deploy_image_on_vm.sh +++ b/.github/workflows/deploy_image_on_vm.sh @@ -304,7 +304,7 @@ extra_args_model_predict="$extra_args_model_predict" \ --task_name $TASK_NAME \ --budget $BUDGET \ --workspace $WORKSPACE \ - --extra_args $EXTRA_ARGS" && \ + --extra_args \"$EXTRA_ARGS\"" && \ echo "INFERENCE_JOB_LAUNCH_COMMAND: $INFERENCE_JOB_LAUNCH_COMMAND" && \ echo "Launching inference job on Beaker" && \ docker run -e BEAKER_TOKEN=$BEAKER_TOKEN \ diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 563fe89f..4da963cd 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,46 @@ Overview -------- -rslearn_projects contains Ai2-specific tooling for managing remote sensing projects -built on top of rslearn, as well as project-specific code and configuration files. +rslearn_projects contains the training datasets, model weights, and corresponding code +for machine learning applications built on top of +[rslearn](https://github.com/allenai/rslearn/) at Ai2. - -Tooling -------- - -The additional tooling comes into play when training and deploying models. This is an -outline of the steps the tooling takes care of when training models: - -1. User runs e.g. `python -m rslp.launch_beaker --config_path path/to/config.yaml`. -2. Launcher uploads the code to a canonical path on Google Cloud Storage (GCS), based - on the project ID and experiment ID specified in `config.yaml`. -3. Launcher then starts a job, in this case on Beaker, to train the model. -4. `rslp.docker_entrypoint` is the entrypoint for the job, and starts by downloading - the code. The image contains a copy of the code too, but it is overwritten with the - latest code from the user's codebase. -5. It then saves W&B run ID to GCS. It also configures rslearn to write checkpoints to - a canonical folder on GCS. -6. If the job is pre-empted and resumes, it will automatically load the latest - checkpoint and W&B run ID from GCS. It will also load these in calls to `model test` - or `model predict`. +- **Model weights and Code**: Licensed under [Apache License 2.0](LICENSE). +- **Annotations**: Licensed under [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/). Setup ----- -rslp expects an environment variable specifying the GCS bucket to write prepared -rslearn datasets, model checkpoints, etc. The easiest way is to create a `.env` file. - - RSLP_PREFIX=gs://rslearn-eai - RSLP_WEKA_PREFIX=weka://dfive-default/rslearn-eai - -You will also need to setup GCP credentials that have access to this bucket. +Install rslearn: -Training additionally depends on credentials for W&B. If you train directly using -`rslp.rslearn_main`, then you will need to setup these credentials. If you use a -launcher like `rslp.launch_beaker`, then it isn't needed since the credentials are -already configured as secrets on the platform, but you would need to setup your Beaker -or other platform credentials to be able to launch the jobs. + git clone https://github.com/allenai/rslearn.git + cd rslearn + pip install .[extra] -TODO: update GCP/W&B to use service accounts. +Install requirements: -Currently, until https://github.com/allenai/rslearn/issues/33 is resolved, model config -files use S3-compatable API to access GCS rather than GCS directly. Therefore, you need -to set up environment variables to provide the appropriate credentials: - - S3_ACCESS_KEY_ID=GOOG... - S3_SECRET_ACCESS_KEY=... - -You can create these credentials at -https://console.cloud.google.com/storage/settings;tab=interoperability?hl=en&project=skylight-proto-1 -under "Access keys for your user account". - - -Usage ------ + cd .. + git clone https://github.com/allenai/rslearn_projects.git + cd rslearn_projects + pip install -r requirements.txt -Create an environment for rslearn and setup with rslearn_projects requirements: +rslearn_projects includes tooling that expects model checkpoints and auxiliary files to +be stored in an `RSLP_PREFIX` directory. Create a file `.env` to set the `RSLP_PREFIX` +environment variable: - conda create -n rslearn python=3.12 - conda activate rslearn - pip install -r rslearn/requirements.txt -r rslearn/extra_requirements.txt - pip install -r rslearn_projects/requirements.txt + mkdir project_data + echo "RSLP_PREFIX=project_data/" > .env -For development it is easier to use PYTHONPATH or install rslearn and rslearn_projects -in editable mode, e.g.: - export PYTHONPATH=.:/path/to/rslearn/rslearn - -Execute a data processing pipeline: - - python -m rslp.main maldives_ecosystem_mapping data --dp_config.workers 32 - -Launch training on Beaker: - - python -m rslp.main maldives_ecosystem_mapping train_maxar - -Manually train locally: - - python -m rslp.rslearn_main model fit --config_path data/maldives_ecosystem_mapping/config.yaml - - -Projects --------- +Applications +------------ -- [Forest Loss Driver](rslp/forest_loss_driver/README.md) +- [Sentinel-2 Vessel Detection](docs/sentinel2_vessels.md) +- [Sentinel-2 Vessel Attribute Prediction](docs/sentinel2_vessel_attribute.md) +- [Landsat Vessel Detection](docs/landsat_vessels.md) +- [Satlas: Solar Farm Segmentation](docs/satlas_solar_farm.md) +- [Satlas: Wind Turbine Detection](docs/satlas_wind_turbine.md) +- [Satlas: Marine Infrastructure Detection](docs/satlas_marine_infra.md) +- [Forest Loss Driver Classification](docs/forest_loss_driver.md) +- [Maldives Ecosystem Mapping](docs/maldives_ecosystem_mapping.md) diff --git a/ai2_docs/README.md b/ai2_docs/README.md new file mode 100644 index 00000000..563fe89f --- /dev/null +++ b/ai2_docs/README.md @@ -0,0 +1,90 @@ +Overview +-------- + +rslearn_projects contains Ai2-specific tooling for managing remote sensing projects +built on top of rslearn, as well as project-specific code and configuration files. + + +Tooling +------- + +The additional tooling comes into play when training and deploying models. This is an +outline of the steps the tooling takes care of when training models: + +1. User runs e.g. `python -m rslp.launch_beaker --config_path path/to/config.yaml`. +2. Launcher uploads the code to a canonical path on Google Cloud Storage (GCS), based + on the project ID and experiment ID specified in `config.yaml`. +3. Launcher then starts a job, in this case on Beaker, to train the model. +4. `rslp.docker_entrypoint` is the entrypoint for the job, and starts by downloading + the code. The image contains a copy of the code too, but it is overwritten with the + latest code from the user's codebase. +5. It then saves W&B run ID to GCS. It also configures rslearn to write checkpoints to + a canonical folder on GCS. +6. If the job is pre-empted and resumes, it will automatically load the latest + checkpoint and W&B run ID from GCS. It will also load these in calls to `model test` + or `model predict`. + + +Setup +----- + +rslp expects an environment variable specifying the GCS bucket to write prepared +rslearn datasets, model checkpoints, etc. The easiest way is to create a `.env` file. + + RSLP_PREFIX=gs://rslearn-eai + RSLP_WEKA_PREFIX=weka://dfive-default/rslearn-eai + +You will also need to setup GCP credentials that have access to this bucket. + +Training additionally depends on credentials for W&B. If you train directly using +`rslp.rslearn_main`, then you will need to setup these credentials. If you use a +launcher like `rslp.launch_beaker`, then it isn't needed since the credentials are +already configured as secrets on the platform, but you would need to setup your Beaker +or other platform credentials to be able to launch the jobs. + +TODO: update GCP/W&B to use service accounts. + +Currently, until https://github.com/allenai/rslearn/issues/33 is resolved, model config +files use S3-compatable API to access GCS rather than GCS directly. Therefore, you need +to set up environment variables to provide the appropriate credentials: + + S3_ACCESS_KEY_ID=GOOG... + S3_SECRET_ACCESS_KEY=... + +You can create these credentials at +https://console.cloud.google.com/storage/settings;tab=interoperability?hl=en&project=skylight-proto-1 +under "Access keys for your user account". + + +Usage +----- + +Create an environment for rslearn and setup with rslearn_projects requirements: + + conda create -n rslearn python=3.12 + conda activate rslearn + pip install -r rslearn/requirements.txt -r rslearn/extra_requirements.txt + pip install -r rslearn_projects/requirements.txt + +For development it is easier to use PYTHONPATH or install rslearn and rslearn_projects +in editable mode, e.g.: + + export PYTHONPATH=.:/path/to/rslearn/rslearn + +Execute a data processing pipeline: + + python -m rslp.main maldives_ecosystem_mapping data --dp_config.workers 32 + +Launch training on Beaker: + + python -m rslp.main maldives_ecosystem_mapping train_maxar + +Manually train locally: + + python -m rslp.rslearn_main model fit --config_path data/maldives_ecosystem_mapping/config.yaml + + +Projects +-------- + +- [Forest Loss Driver](rslp/forest_loss_driver/README.md) diff --git a/docs/batch_inference.md b/ai2_docs/batch_inference.md similarity index 100% rename from docs/batch_inference.md rename to ai2_docs/batch_inference.md diff --git a/docs/coding_best_practices_brainstorm.md b/ai2_docs/coding_best_practices_brainstorm.md similarity index 100% rename from docs/coding_best_practices_brainstorm.md rename to ai2_docs/coding_best_practices_brainstorm.md diff --git a/docs/landsat_vessels/api_use.md b/ai2_docs/landsat_vessels/api_use.md similarity index 100% rename from docs/landsat_vessels/api_use.md rename to ai2_docs/landsat_vessels/api_use.md diff --git a/docs/landsat_vessels/images/missed_vessels_B8.png b/ai2_docs/landsat_vessels/images/missed_vessels_B8.png similarity index 100% rename from docs/landsat_vessels/images/missed_vessels_B8.png rename to ai2_docs/landsat_vessels/images/missed_vessels_B8.png diff --git a/docs/landsat_vessels/images/missed_vessels_RGB.png b/ai2_docs/landsat_vessels/images/missed_vessels_RGB.png similarity index 100% rename from docs/landsat_vessels/images/missed_vessels_RGB.png rename to ai2_docs/landsat_vessels/images/missed_vessels_RGB.png diff --git a/docs/landsat_vessels/model_summary.md b/ai2_docs/landsat_vessels/model_summary.md similarity index 93% rename from docs/landsat_vessels/model_summary.md rename to ai2_docs/landsat_vessels/model_summary.md index c81a37ce..9092b4f1 100644 --- a/docs/landsat_vessels/model_summary.md +++ b/ai2_docs/landsat_vessels/model_summary.md @@ -30,8 +30,8 @@ Note: The evaluation metrics are reported for the two-stage model (detector + cl --- ## Model Configurations -- **Detector**: `rslearn_projects/data/landsat_vessels/config.yaml` -- **Classifier**: `rslearn_projects/landsat/recheck_landsat_labels/phase123_config.yaml` +- **Detector**: `rslearn_projects/data/landsat_vessels/config_detector.yaml` +- **Classifier**: `rslearn_projects/data/landsat_vessels/config_classifier.yaml` - **Filters**: marine infrastructure `rslearn_projects/rslp/utils/filter.py` --- diff --git a/docs/landsat_vessels/train_eval.md b/ai2_docs/landsat_vessels/train_eval.md similarity index 96% rename from docs/landsat_vessels/train_eval.md rename to ai2_docs/landsat_vessels/train_eval.md index 0c3b8da6..31291c8a 100644 --- a/docs/landsat_vessels/train_eval.md +++ b/ai2_docs/landsat_vessels/train_eval.md @@ -12,17 +12,17 @@ This detects vessels in Landsat imagery using two models: The object detector can be trained like this: - python -m rslp.rslearn_main model fit --config data/landsat_vessels/config.yaml + python -m rslp.rslearn_main model fit --config data/landsat_vessels/config_detector.yaml The dataset was originally labeled in siv and has been converted to rslearn dataset using the code in `landsat/existing_dataset_to_utm/`. The classifier can be trained like this: - python -m rslp.rslearn_main model fit --config landsat/recheck_landsat_labels/phase123_config.yaml + python -m rslp.rslearn_main model fit --config data/landsat_vessels/config_classifier.yaml The data collection process for the classifier is described in -`landsat/recheck_landsat_labels/README.md`. +`one_off_projects/landsat/recheck_landsat_labels/README.md`. --- diff --git a/data/landsat_vessels/config_classifier.yaml b/data/landsat_vessels/config_classifier.yaml new file mode 100644 index 00000000..1288b444 --- /dev/null +++ b/data/landsat_vessels/config_classifier.yaml @@ -0,0 +1,105 @@ +model: + class_path: rslearn.train.lightning_module.RslearnLightningModule + init_args: + model: + class_path: rslearn.models.multitask.MultiTaskModel + init_args: + encoder: + - class_path: rslearn.models.swin.Swin + init_args: + input_channels: 7 + output_layers: [1, 3, 5, 7] + pretrained: true + decoders: + class: + - class_path: rslearn.models.pooling_decoder.PoolingDecoder + init_args: + in_channels: 1024 + out_channels: 2 + - class_path: rslearn.train.tasks.classification.ClassificationHead + lr: 0.0001 + plateau_factor: 0.1 + plateau_patience: 10 + plateau_min_lr: 0 + plateau_cooldown: 0 + restore_config: + restore_path: gcs://rslearn-eai/datasets/landsat_vessel_detection/artifacts/2024-03-13-landsat-vessels/vessel02_satlas_freeze_crop512_nosatlas_b8first2/best.pth + remap_prefixes: + - ["backbone.backbone.", "encoder.0.model."] +data: + class_path: rslearn.train.data_module.RslearnDataModule + init_args: + path: gcs://rslearn-eai/datasets/landsat_vessel_detection/classifier/dataset_20240905/ + inputs: + image: + data_type: "raster" + layers: ["landsat"] + bands: ["B8", "B2", "B3", "B4", "B5", "B6", "B7"] + passthrough: true + label: + data_type: "vector" + layers: ["label"] + is_target: true + task: + class_path: rslearn.train.tasks.multi_task.MultiTask + init_args: + tasks: + class: + class_path: rslearn.train.tasks.classification.ClassificationTask + init_args: + property_name: "label" + classes: ["correct", "incorrect"] + allow_invalid: true + skip_unknown_categories: true + prob_property: "prob" + positive_class: "correct" + positive_class_threshold: 0.85 + input_mapping: + class: + label: "targets" + batch_size: 16 + num_workers: 16 + default_config: + transforms: + - class_path: rslearn.train.transforms.normalize.Normalize + init_args: + mean: 0 + std: 255 + - class_path: rslearn.train.transforms.pad.Pad + init_args: + mode: "center" + size: 32 + - class_path: rslearn.train.transforms.flip.Flip + groups: ["selected_copy", "phase2a_completed", "phase3a_selected"] + train_config: + tags: + split: train + sampler: + class_path: rslearn.train.dataset.WeightedRandomSamplerFactory + init_args: + option_key: "weight" + num_samples: 1000 + val_config: + groups: ["phase2a_completed"] + tags: + split: val + test_config: + groups: ["phase2a_completed"] + tags: + split: val + predict_config: + groups: ["classify_predict"] + skip_targets: true +trainer: + max_epochs: 64 + callbacks: + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: "epoch" + - class_path: rslearn.train.prediction_writer.RslearnWriter + init_args: + path: gcs://rslearn-eai/datasets/landsat_vessel_detection/classifier/dataset_20240905/ + output_layer: output + selector: ["class"] +rslp_project: rslearn-landsat-recheck +rslp_experiment: phase123_20240919_01_copy diff --git a/data/landsat_vessels/config.yaml b/data/landsat_vessels/config_detector.yaml similarity index 100% rename from data/landsat_vessels/config.yaml rename to data/landsat_vessels/config_detector.yaml diff --git a/docs/images/landsat_vessels/prediction.png b/docs/images/landsat_vessels/prediction.png new file mode 100644 index 00000000..6a66d831 Binary files /dev/null and b/docs/images/landsat_vessels/prediction.png differ diff --git a/docs/images/sentinel2_vessels/prediction.png b/docs/images/sentinel2_vessels/prediction.png new file mode 100644 index 00000000..c932f234 Binary files /dev/null and b/docs/images/sentinel2_vessels/prediction.png differ diff --git a/docs/landsat_vessels.md b/docs/landsat_vessels.md new file mode 100644 index 00000000..ca800087 --- /dev/null +++ b/docs/landsat_vessels.md @@ -0,0 +1,91 @@ +Landsat Vessel Detection +--------------------------- + +The Landsat vessel detection model detects ships in Landsat 8/9 scenes. We use Level-1 data since they are released with a lower latency, and latency is +important for [Skylight](https://www.skylight.global/) (which is the primary use of +this model within Ai2). + +The model includes of a detector and a classifier: the detector detects ship-like objects, and the classifier refines these detections by pruning ones that it is confident are not ships. The detector is trained on a dataset consisting of 7,954 Landsat patches (ranging from 384x384 to 768x768) with 18,509 ship labels. The classifier is trained on a dataset consisting of 1,733 annotated detections, with each detection represented as a 64x64 patch centered at the position of a detected ship. See our paper for more details about the model and dataset. + +
+ Image showing a Landsat image with predicted positions of ships from the model overlayed. +
+ + +Inference +--------- + +First, download the detector and classifier checkpoints to the `RSLP_PREFIX` directory. + + cd rslearn_projects + mkdir -p project_data/projects/landsat_vessels/data_20240924_model_20240924_imagenet_patch512_flip_03/checkpoints/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/landsat_vessels/detector/best.ckpt -O project_data/projects/landsat_vessels/data_20240924_model_20240924_imagenet_patch512_flip_03/checkpoints/last.ckpt + + mkdir -p project_data/projects/rslearn-landsat-recheck/phase123_20240919_01_copy/checkpoints/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/landsat_vessels/classifer/best.ckpt -O project_data/projects/rslearn-landsat-recheck/phase123_20240919_01_copy/checkpoints/last.ckpt + +The easiest way to apply the model is using the prediction pipeline in `rslp/landsat_vessels/predict_pipeline.py`. You can download the Landsat scene files, e.g. from USGS EarthExplorer or AWS, and then create a configuration file for the prediction pipeline, here is an example: + +```json +{ + "image_files": { + "B2": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B2.TIF", + "B3": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B3.TIF", + "B4": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B4.TIF", + "B5": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B5.TIF", + "B6": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B6.TIF", + "B7": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B7.TIF", + "B8": "/home/data/LC08_L1TP_125059_20240727_20240801_02_T1_B8.TIF", + }, + "scratch_path": "/home/data/scratch/", + "json_path": "/home/data/vessels.json", + "crop_path": "/home/data/crops/" +} +``` + +This specifies the arguments to +`rslp.landsat_vessels.predict_pipeline.predict_pipeline` via `jsonargparse`. + +Now we can run the pipeline: + + python -m rslp.main landsat_vessels predict --config /path/to/config.json + +Here, `scratch_path` saves the rslearn dataset, `crop_path` saves the cropped RGB images centered around the detected ships, and `json_path` saves the JSON output of the detected ships, all of which are optional, depending on whether the user wants to save the intermediate results or not. + +The prediction pipeline also accepts a Landsat scene ID and automatically downloads the scene images from [AWS](https://aws.amazon.com/marketplace/pp/prodview-ivr4jeq6flk7u#resources). You will need to set up your AWS account for accessing Landsat data. Use the command below to run the pipeline with scene ID: + + python -m rslp.main landsat_vessels predict --scene_id LC08_L1TP_125059_20240727_20240801_02_T1 + + +Training +-------- + +First, download the training dataset for detector: + + cd rslearn_projects + mkdir -p project_data/datasets/landsat_vessels/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/landsat_vessels/landsat_vessels_detector.tar -0 project_data/datasets/landsat_vessels_detector.tar + tar xvf project_data/datasets/landsat_vessels_detector.tar --directory project_data/datasets/landsat_vessels/ + +It is an rslearn dataset consisting of window folders like `windows/labels_utm/41984_2354176_f7c057a567ee40b694d0a77ea59ef81a_6359/`. Inside each window folder: + +- `layers/landsat/` contains different Landsat bands used by the model. +- `layers/label/data.geojson` contains the positions of ships. These are offset from + the bounds of the window which are in `metadata.json`, so subtract the window's + bounds to get pixel coordinates relative to the image. + +Use the command below to train the detector. Note that Weights & Biases is needed. You can +disable W&B with `--no_log true` but then it may be difficult to track the metrics. + + python -m rslp.rslearn_main model fit --config data/landsat_vessels/config_detector.yaml --data.init_args.path project_data/datasets/landsat_vessels/dataset_20240924/ + +Second, download the training dataset for classifier: + + wget https://storage.googleapis.com/ai2-rslearn-projects-data/landsat_vessels/landsat_vessels_classifier.tar -0 project_data/datasets/landsat_vessels_classifier.tar + tar xvf project_data/dataset/landsat_vessels_classifier.tar --directory project_data/datasets/landsat_vessels/ + +Use the command below to train the classifier. + + python -m rslp.rslearn_main model fit --config data/landsat_vessels/config_classifier.yaml --data.init_args.path project_data/datasets/landsat_vessels/dataset_20240905/ diff --git a/docs/satlas_marine_infra.md b/docs/satlas_marine_infra.md new file mode 100644 index 00000000..2a1e9653 --- /dev/null +++ b/docs/satlas_marine_infra.md @@ -0,0 +1,115 @@ +Satlas Marine Infrastructure +---------------------------- + +The Satlas marine infrastructure model uses Sentinel-2 L1C scenes to predict the +locations of off-shore wind turbines and off-shore platforms. Note that off-shore +platforms is a catch-all category for human-made objects in the ocean that are not wind +turbines. + +It inputs four mosaics of Sentinel-2 images, where each mosaic should be constructed +using Sentinel-2 scenes from a distinct 30-day period. + +The model consists of a SatlasPretrain backbone to extract features from the image time +series, paired with a Faster R-CNN decoder to predict bounding boxes. Note that the +actual labels are points but the model is trained to predict bounding boxes. + +It is trained on a dataset consisting of 7,197 image patches (ranging from 300x300 to +1000x1000) with 8,791 turbine labels and 4,459 platform labels. + + +Inference +--------- + +First, download the model checkpoint to the `RSLP_PREFIX` directory. + + cd rslearn_projects + mkdir -p project_data/projects/satlas_marine_infra/data_20241210_run_20241210_00/checkpoints/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/satlas_marine_infra/best.ckpt -O project_data/projects/satlas_marine_infra/data_20241210_run_20241210_00/checkpoints/last.ckpt + +The Satlas prediction pipeline applies the model on a bounding box in a UTM projection +at 10 m/pixel. Given a longitude and latitude where you want to apply the model, you +can use the code below to identify a suitable bounding box: + + longitude = 120.148 + latitude = 24.007 + window_size = 4096 + + import json + import shapely + from rslearn.const import WGS84_PROJECTION + from rslearn.utils.geometry import STGeometry + from rslearn.utils.get_utm_ups_crs import get_utm_ups_projection + + src_geom = STGeometry(WGS84_PROJECTION, shapely.Point(longitude, latitude), None) + dst_projection = get_utm_ups_projection(longitude, latitude, 10, -10) + dst_geom = src_geom.to_projection(dst_projection) + center_point = ( + int(dst_geom.shp.x) // 2048 * 2048, + int(dst_geom.shp.y) // 2048 * 2048, + ) + bounds = ( + center_point[0] - window_size // 2, + center_point[1] - window_size // 2, + center_point[0] + window_size // 2, + center_point[1] + window_size // 2, + ) + print(json.dumps(dst_projection.serialize())) + print(json.dumps(bounds)) + +Run the prediction pipeline. The argument after the projection and bounds specifies the +time range, it should be a seven month range to give enough options to pick the four +30-day mosaics, note that the timestamps are ISO 8601 formatted. + + mkdir out_dir + python -m rslp.main satlas predict MARINE_INFRA '{"crs": "EPSG:32651", "x_resolution": 10, "y_resolution": -10}' '[18432, -268288, 22528, -264192]' '["2024-01-01T00:00:00+00:00", "2024-08-01T00:00:00+00:00"]' out_dir/ scratch_dir/ --use_rtree_index false + +You may need to delete the "scratch_dir" directory if it exists already. This is used +to store a temporary rslearn dataset for ingesting the Sentinel-2 input images. + +This generates a GeoJSON in out_dir but it is in pixel coordinates. Convert to +longitude/latitude coordinates using this script (which can also be used to merge +multiple GeoJSONs produced by the prediction pipeline): + + mkdir merged_dir + python -m rslp.main satlas merge_points MARINE_INFRA 2024-01 out_dir/ merged_dir/ + +Now you can open the GeoJSON to view predicted positions of marine infrastructure, e.g. +in qgis: + + qgis merged_dir/2024-01.geojson + + +Training +-------- + +First, download the training dataset: + + cd rslearn_projects + mkdir -p project_data/datasets/satlas_marine_infra/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/satlas_marine_infra/satlas_marine_infra.tar -O project_data/datasets/satlas_marine_infra.tar + tar xvf project_data/datasets/satlas_marine_infra.tar --directory project_data/datasets/satlas_marine_infra/ + +It is an rslearn dataset consisting of window folders like +`windows/label/2102272_1262592/`. Inside each window folder: + +- `layers/sentinel2{.1,.2,.3}/` contains the four input Sentinel-2 mosaics. +- `layers/label/data.geojson` contains the positions of marine infrastructure. These + are offset from the bounds of the window which are in `metadata.json`, so subtract + the window's bounds to get pixel coordinates relative to the image. +- `layers/mask/mask/image.png` contains a mask specifying the valid portion of the + window. The labels were originally annotated in WebMercator projection, but have been + re-projected to UTM in this dataset; the transformation results in a non-rectangular + extent, so the window corresponds to the rectangular bounds of that extent while the + mask specifies the extent within those bounds. This is used in the mask step in the + model configuration file `data/satlas_marine_infra/config.yaml` to black out the + other parts of the input image. + +Use the command below to train the model. Note that Weights & Biases is needed. You can +disable W&B with `--no_log true` but then it may be difficult to track the metrics. + + python -m rslp.rslearn_main model fit --config data/satlas_marine_infra/config.yaml --data.init_args.path project_data/datasets/satlas_marine_infra/ + +To visualize outputs on the validation set: + + mkdir vis + python -m rslp.rslearn_main model test --config data/satlas_marine_infra/config.yaml --data.init_args.path project_data/datasets/satlas_marine_infra/ --model.init_args.visualize_dir=vis/ --load_best true diff --git a/docs/sentinel2_vessels.md b/docs/sentinel2_vessels.md new file mode 100644 index 00000000..0dfd11a9 --- /dev/null +++ b/docs/sentinel2_vessels.md @@ -0,0 +1,66 @@ +Sentinel-2 Vessel Detection +--------------------------- + +The Sentinel-2 vessel detection model detects ships in Sentinel-2 L1C scenes. We use +L1C instead of L2A since L1C scenes are released with a lower latency, and latency is +important for [Skylight](https://www.skylight.global/) (which is the primary use of +this model within Ai2). + +It is trained on a dataset consisting of 43,443 image patches (ranging from 300x300 to +1000x1000) with 37,145 ship labels. See [our paper](https://arxiv.org/pdf/2312.03207) +for more details about the model and dataset. + +![Image showing a Sentinel-2 image with predicted positions of ships from the model overlayed.](./images/sentinel2_vessels/prediction.png) + + +Inference +--------- + +First, download the model checkpoint to the `RSLP_PREFIX` directory. + + cd rslearn_projects + mkdir -p project_data/projects/sentinel2_vessels/data_20240927_satlaspretrain_patch512_00/checkpoints/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/sentinel2_vessels/best.ckpt -O project_data/projects/sentinel2_vessels/data_20240927_satlaspretrain_patch512_00/checkpoints/last.ckpt + +The easiest way to apply the model is using the prediction pipeline in +`rslp/sentinel2_vessels/predict_pipeline.py`. It accepts a Sentinel-2 scene ID and +automatically downloads the scene images from a +[public Google Cloud Storage bucket](https://cloud.google.com/storage/docs/public-datasets/sentinel-2). + + mkdir output_crops + mkdir scratch_dir + python -m rslp.main sentinel2_vessels predict '["scene_id": "S2A_MSIL1C_20180904T110621_N0206_R137_T30UYD_20180904T133425", "json_path": "out.json", "crop_path": "output_crops/"]' scratch_dir/ + +Then, `out.json` will contain a JSON list of detected ships while `output_crops` will +contain corresponding crops centered around those ships (showing the RGB B4/B3/B2 +bands). + + +Training +-------- + +First, download the training dataset: + + cd rslearn_projects + mkdir -p project_data/datasets/sentinel2_vessels/ + wget https://storage.googleapis.com/ai2-rslearn-projects-data/sentinel2_vessels/sentinel2_vessels.tar -O project_data/datasets/sentinel2_vessels.tar + tar xvf project_data/datasets/sentinel2_vessels.tar --directory project_data/datasets/sentinel2_vessels/ + +It is an rslearn dataset consisting of window folders like +`windows/sargassum_train/1186117_1897173_158907/`. Inside each window folder: + +- `layers/sentinel2/` contains different Sentinel-2 bands used by the model, such as + `layers/sentinel2/R_G_B/image.png`. +- `layers/label/data.geojson` contains the positions of ships. These are offset from + the bounds of the window which are in `metadata.json`, so subtract the window's + bounds to get pixel coordinates relative to the image. + +Use the command below to train the model. Note that Weights & Biases is needed. You can +disable W&B with `--no_log true` but then it may be difficult to track the metrics. + + python -m rslp.rslearn_main model fit --config data/sentinel2_vessels/config.yaml --data.init_args.path project_data/datasets/sentinel2_vessels/ + +To visualize outputs on the validation set: + + mkdir vis + python -m rslp.rslearn_main model test --config data/sentinel2_vessels/config.yaml --data.init_args.path project_data/datasets/sentinel2_vessels/ --model.init_args.visualize_dir vis/ --load_best true diff --git a/landsat/confirm_same_performance_as_integration/README.md b/one_off_projects/landsat/confirm_same_performance_as_integration/README.md similarity index 100% rename from landsat/confirm_same_performance_as_integration/README.md rename to one_off_projects/landsat/confirm_same_performance_as_integration/README.md diff --git a/landsat/confirm_same_performance_as_integration/rslearn_to_multisat.py b/one_off_projects/landsat/confirm_same_performance_as_integration/rslearn_to_multisat.py similarity index 100% rename from landsat/confirm_same_performance_as_integration/rslearn_to_multisat.py rename to one_off_projects/landsat/confirm_same_performance_as_integration/rslearn_to_multisat.py diff --git a/landsat/existing_dataset_to_utm/add_label_layer.py b/one_off_projects/landsat/existing_dataset_to_utm/add_label_layer.py similarity index 100% rename from landsat/existing_dataset_to_utm/add_label_layer.py rename to one_off_projects/landsat/existing_dataset_to_utm/add_label_layer.py diff --git a/landsat/existing_dataset_to_utm/config.json b/one_off_projects/landsat/existing_dataset_to_utm/config.json similarity index 100% rename from landsat/existing_dataset_to_utm/config.json rename to one_off_projects/landsat/existing_dataset_to_utm/config.json diff --git a/landsat/existing_dataset_to_utm/prepare_windows.py b/one_off_projects/landsat/existing_dataset_to_utm/prepare_windows.py similarity index 100% rename from landsat/existing_dataset_to_utm/prepare_windows.py rename to one_off_projects/landsat/existing_dataset_to_utm/prepare_windows.py diff --git a/landsat/existing_dataset_to_utm/reformat_multisat.py b/one_off_projects/landsat/existing_dataset_to_utm/reformat_multisat.py similarity index 100% rename from landsat/existing_dataset_to_utm/reformat_multisat.py rename to one_off_projects/landsat/existing_dataset_to_utm/reformat_multisat.py diff --git a/landsat/quick_vis_script.py b/one_off_projects/landsat/quick_vis_script.py similarity index 100% rename from landsat/quick_vis_script.py rename to one_off_projects/landsat/quick_vis_script.py diff --git a/landsat/random_landsat/convert_to_multisat_dataset.py b/one_off_projects/landsat/random_landsat/convert_to_multisat_dataset.py similarity index 100% rename from landsat/random_landsat/convert_to_multisat_dataset.py rename to one_off_projects/landsat/random_landsat/convert_to_multisat_dataset.py diff --git a/landsat/random_landsat/convert_windows_to_utm.py b/one_off_projects/landsat/random_landsat/convert_windows_to_utm.py similarity index 100% rename from landsat/random_landsat/convert_windows_to_utm.py rename to one_off_projects/landsat/random_landsat/convert_windows_to_utm.py diff --git a/landsat/random_landsat/random_landsat_windows.py b/one_off_projects/landsat/random_landsat/random_landsat_windows.py similarity index 100% rename from landsat/random_landsat/random_landsat_windows.py rename to one_off_projects/landsat/random_landsat/random_landsat_windows.py diff --git a/landsat/recheck_landsat_labels/README.md b/one_off_projects/landsat/recheck_landsat_labels/README.md similarity index 100% rename from landsat/recheck_landsat_labels/README.md rename to one_off_projects/landsat/recheck_landsat_labels/README.md diff --git a/landsat/recheck_landsat_labels/phase123_config.yaml b/one_off_projects/landsat/recheck_landsat_labels/phase123_config.yaml similarity index 100% rename from landsat/recheck_landsat_labels/phase123_config.yaml rename to one_off_projects/landsat/recheck_landsat_labels/phase123_config.yaml diff --git a/landsat/recheck_landsat_labels/phase123_config_hparams.yaml b/one_off_projects/landsat/recheck_landsat_labels/phase123_config_hparams.yaml similarity index 100% rename from landsat/recheck_landsat_labels/phase123_config_hparams.yaml rename to one_off_projects/landsat/recheck_landsat_labels/phase123_config_hparams.yaml diff --git a/landsat/recheck_landsat_labels/phase12_config.yaml b/one_off_projects/landsat/recheck_landsat_labels/phase12_config.yaml similarity index 100% rename from landsat/recheck_landsat_labels/phase12_config.yaml rename to one_off_projects/landsat/recheck_landsat_labels/phase12_config.yaml diff --git a/landsat/recheck_landsat_labels/phase1_assign_split.py b/one_off_projects/landsat/recheck_landsat_labels/phase1_assign_split.py similarity index 100% rename from landsat/recheck_landsat_labels/phase1_assign_split.py rename to one_off_projects/landsat/recheck_landsat_labels/phase1_assign_split.py diff --git a/landsat/recheck_landsat_labels/phase1_config.json b/one_off_projects/landsat/recheck_landsat_labels/phase1_config.json similarity index 100% rename from landsat/recheck_landsat_labels/phase1_config.json rename to one_off_projects/landsat/recheck_landsat_labels/phase1_config.json diff --git a/landsat/recheck_landsat_labels/phase1_config.yaml b/one_off_projects/landsat/recheck_landsat_labels/phase1_config.yaml similarity index 100% rename from landsat/recheck_landsat_labels/phase1_config.yaml rename to one_off_projects/landsat/recheck_landsat_labels/phase1_config.yaml diff --git a/landsat/recheck_landsat_labels/phase1_get_1000.py b/one_off_projects/landsat/recheck_landsat_labels/phase1_get_1000.py similarity index 100% rename from landsat/recheck_landsat_labels/phase1_get_1000.py rename to one_off_projects/landsat/recheck_landsat_labels/phase1_get_1000.py diff --git a/landsat/recheck_landsat_labels/phase1_index.html b/one_off_projects/landsat/recheck_landsat_labels/phase1_index.html similarity index 100% rename from landsat/recheck_landsat_labels/phase1_index.html rename to one_off_projects/landsat/recheck_landsat_labels/phase1_index.html diff --git a/landsat/recheck_landsat_labels/phase1_server.py b/one_off_projects/landsat/recheck_landsat_labels/phase1_server.py similarity index 100% rename from landsat/recheck_landsat_labels/phase1_server.py rename to one_off_projects/landsat/recheck_landsat_labels/phase1_server.py diff --git a/landsat/recheck_landsat_labels/phase2_config.json b/one_off_projects/landsat/recheck_landsat_labels/phase2_config.json similarity index 100% rename from landsat/recheck_landsat_labels/phase2_config.json rename to one_off_projects/landsat/recheck_landsat_labels/phase2_config.json diff --git a/landsat/recheck_landsat_labels/phase2_config.yaml b/one_off_projects/landsat/recheck_landsat_labels/phase2_config.yaml similarity index 100% rename from landsat/recheck_landsat_labels/phase2_config.yaml rename to one_off_projects/landsat/recheck_landsat_labels/phase2_config.yaml diff --git a/landsat/recheck_landsat_labels/phase2_get_3000.py b/one_off_projects/landsat/recheck_landsat_labels/phase2_get_3000.py similarity index 100% rename from landsat/recheck_landsat_labels/phase2_get_3000.py rename to one_off_projects/landsat/recheck_landsat_labels/phase2_get_3000.py diff --git a/landsat/recheck_landsat_labels/phase2_index.html b/one_off_projects/landsat/recheck_landsat_labels/phase2_index.html similarity index 100% rename from landsat/recheck_landsat_labels/phase2_index.html rename to one_off_projects/landsat/recheck_landsat_labels/phase2_index.html diff --git a/landsat/recheck_landsat_labels/phase2_make_overview_windows.py b/one_off_projects/landsat/recheck_landsat_labels/phase2_make_overview_windows.py similarity index 100% rename from landsat/recheck_landsat_labels/phase2_make_overview_windows.py rename to one_off_projects/landsat/recheck_landsat_labels/phase2_make_overview_windows.py diff --git a/landsat/recheck_landsat_labels/phase2_overview_rgb.py b/one_off_projects/landsat/recheck_landsat_labels/phase2_overview_rgb.py similarity index 100% rename from landsat/recheck_landsat_labels/phase2_overview_rgb.py rename to one_off_projects/landsat/recheck_landsat_labels/phase2_overview_rgb.py diff --git a/landsat/recheck_landsat_labels/phase2_pansharpen.py b/one_off_projects/landsat/recheck_landsat_labels/phase2_pansharpen.py similarity index 100% rename from landsat/recheck_landsat_labels/phase2_pansharpen.py rename to one_off_projects/landsat/recheck_landsat_labels/phase2_pansharpen.py diff --git a/landsat/recheck_landsat_labels/phase2_server.py b/one_off_projects/landsat/recheck_landsat_labels/phase2_server.py similarity index 100% rename from landsat/recheck_landsat_labels/phase2_server.py rename to one_off_projects/landsat/recheck_landsat_labels/phase2_server.py diff --git a/landsat/recheck_landsat_labels/phase3_get_750.py b/one_off_projects/landsat/recheck_landsat_labels/phase3_get_750.py similarity index 100% rename from landsat/recheck_landsat_labels/phase3_get_750.py rename to one_off_projects/landsat/recheck_landsat_labels/phase3_get_750.py diff --git a/landsat/recheck_landsat_labels/phase3a_selected.csv b/one_off_projects/landsat/recheck_landsat_labels/phase3a_selected.csv similarity index 100% rename from landsat/recheck_landsat_labels/phase3a_selected.csv rename to one_off_projects/landsat/recheck_landsat_labels/phase3a_selected.csv diff --git a/pyproject.toml b/pyproject.toml index fe48d9f1..5cf08ac0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,8 @@ authors = [ {name = "Favyen Bastani", email = "favyenb@allenai.org"}, {name = "Patrick Beukema", email = "patrickb@allenai.org"}, {name = "Henry Herzog", email = "henryh@allenai.org"}, + {name = "Yawen Zhang", email = "yawenz@allenai.org"}, + {name = "Mike Jacobi", email = "mikej@allenai.org"}, ] readme = "README.md" requires-python = ">=3.10" diff --git a/rslp/landsat_vessels/config.py b/rslp/landsat_vessels/config.py index 96a5b613..510eb82a 100644 --- a/rslp/landsat_vessels/config.py +++ b/rslp/landsat_vessels/config.py @@ -6,13 +6,9 @@ LANDSAT_LAYER_NAME = "landsat" LANDSAT_RESOLUTION = 15 -# Detector config +# Data config LOCAL_FILES_DATASET_CONFIG = "data/landsat_vessels/predict_dataset_config.json" AWS_DATASET_CONFIG = "data/landsat_vessels/predict_dataset_config_aws.json" -DETECT_MODEL_CONFIG = "data/landsat_vessels/config.yaml" -DETECT_MODEL_EVAL_CONFIG = ( - "data/landsat_vessels/config_eval.yaml" # config for evaluation -) # Extract Landsat bands from local config file with open(LOCAL_FILES_DATASET_CONFIG) as f: @@ -21,8 +17,9 @@ band["bands"][0] for band in json_data["layers"][LANDSAT_LAYER_NAME]["band_sets"] ] -# Classifier config -CLASSIFY_MODEL_CONFIG = "landsat/recheck_landsat_labels/phase123_config.yaml" +# Model config +DETECT_MODEL_CONFIG = "data/landsat_vessels/config_detector.yaml" +CLASSIFY_MODEL_CONFIG = "data/landsat_vessels/config_classifier.yaml" CLASSIFY_WINDOW_SIZE = 64 # Filter config diff --git a/rslp/launch_beaker.py b/rslp/launch_beaker.py index f3800844..8d39ac53 100644 --- a/rslp/launch_beaker.py +++ b/rslp/launch_beaker.py @@ -23,6 +23,7 @@ def launch_job( workspace: str = DEFAULT_WORKSPACE, username: str | None = None, gpus: int = 1, + shared_memory: str = "256GiB", ) -> None: """Launch training for the specified config on Beaker. @@ -37,6 +38,7 @@ def launch_job( workspace: the Beaker workspace to run the job in. username: optional W&B username to associate with the W&B run for this job. gpus: number of GPUs to use. + shared_memory: shared memory resource string to use, e.g. "256GiB". """ hparams_configs_dir = None @@ -104,7 +106,7 @@ def launch_job( preemptible=True, datasets=[launcher_lib.create_gcp_credentials_mount()], env_vars=env_vars, - resources=TaskResources(gpu_count=gpus), + resources=TaskResources(gpu_count=gpus, shared_memory=shared_memory), ) unique_id = str(uuid.uuid4())[0:8] beaker.experiment.create(f"{project_id}_{experiment_id}_{unique_id}", spec) @@ -167,6 +169,12 @@ def launch_job( help="Number of GPUs", default=1, ) + parser.add_argument( + "--shared_memory", + type=str, + help="Shared memory", + default="256GiB", + ) args = parser.parse_args() launch_job( config_path=args.config_path, @@ -177,4 +185,5 @@ def launch_job( workspace=args.workspace, username=args.username, gpus=args.gpus, + shared_memory=args.shared_memory, ) diff --git a/rslp/lightning_cli.py b/rslp/lightning_cli.py index ec7b859c..b05c301d 100644 --- a/rslp/lightning_cli.py +++ b/rslp/lightning_cli.py @@ -107,7 +107,7 @@ def on_fit_start(self, trainer: Trainer, pl_module: LightningModule) -> None: self.project_id, self.experiment_id, self.run_id, wandb_id ) - if self.config_str is not None: + if self.config_str is not None and "rslp_project" not in wandb.config: wandb.config.update(json.loads(self.config_str))