diff --git a/.gitmodules b/.gitmodules
index 5f7212dce..a24788cb8 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,12 +1,12 @@
-[submodule "submodules/Catch2"]
-	path = submodules/Catch2
+[submodule "components/core/submodules/Catch2"]
+	path = components/core/submodules/Catch2
 	url = https://github.com/catchorg/Catch2.git
-[submodule "submodules/date"]
-	path = submodules/date
+[submodule "components/core/submodules/date"]
+	path = components/core/submodules/date
 	url = https://github.com/HowardHinnant/date.git
-[submodule "submodules/yaml-cpp"]
-	path = submodules/yaml-cpp
+[submodule "components/core/submodules/yaml-cpp"]
+	path = components/core/submodules/yaml-cpp
 	url = https://github.com/jbeder/yaml-cpp.git
-[submodule "submodules/json"]
-	path = submodules/json
+[submodule "components/core/submodules/json"]
+	path = components/core/submodules/json
 	url = https://github.com/nlohmann/json.git
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..7a4a3ea24
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
index f913393d2..9a4719c0e 100644
--- a/README.md
+++ b/README.md
@@ -1,183 +1,29 @@
 # CLP
 
-Compressed Log Processor (CLP) is a tool capable of losslessly compressing text logs and searching the compressed logs without decompression.
-To learn more about it, you can read our [paper](https://www.usenix.org/system/files/osdi21-rodrigues.pdf).
-
-🔔 clp-core is part of a larger CLP package that can be built from [clp-packager](https://github.com/y-scope/clp-packager).
-
-## Contents
-
-* [Getting Started](#getting-started)
-* [Requirements](#requirements)
-* [Building](#building)
-  * [Source Dependencies](#source-dependencies)
-  * [Packages](#packages)
-  * [Libraries](#libraries)
-  * [Build](#build)
-* [Running](#running)
-  * [`clp`](#clp)
-  * [`clg`](#clg)
-* [Parallel Compression](#parallel-compression)
-* [Next Steps](#next-steps)
-  
+Compressed Log Processor (CLP) is a tool capable of losslessly compressing text logs and searching 
+the compressed logs without decompression. To learn more about it, you can read our 
+[paper](https://www.usenix.org/system/files/osdi21-rodrigues.pdf).
 
 ## Getting Started
 
-CLP is currently released as source, so you'll need to build it before running it.
-
-## Requirements
-
-* We have built and tested CLP on **Ubuntu 18.04 (bionic)** and **Ubuntu 20.04 (focal)**.
-  * If you have trouble building for another OS, file an issue and we may be able to help.
-* A compiler that supports c++14
-
-## Building
-
-* To build, we require some source dependencies, packages from package managers, and libraries built from source.
-
-### Source Dependencies
-
-We use both git submodules and third-party source packages. To download all, you can run this script:
-```shell
-tools/scripts/deps-download/download-all.sh
-```
-
-This will download:
-* [Catch2](https://github.com/catchorg/Catch2.git) (v2.13.6)
-* [date](https://github.com/HowardHinnant/date.git) (v3.0.1)
-* [json](https://github.com/nlohmann/json.git) (v3.10.2)
-* [SQLite3](https://www.sqlite.org/download.html) (v3.36.0)
-* [yaml-cpp](https://github.com/jbeder/yaml-cpp.git) (v0.7.0)
-
-### Packages
-
-If you're using apt-get, you can use the following command to install all:
-```shell
-sudo apt-get install -y ca-certificates checkinstall cmake build-essential \
-libboost-filesystem-dev libboost-iostreams-dev libboost-program-options-dev \
-libssl-dev pkg-config rsync wget zlib1g-dev
-```
-
-This will download:
-* ca-certificates
-* checkinstall
-* cmake
-* build-essential
-* libboost-filesystem-dev
-* libboost-iostreams-dev
-* libboost-program-options-dev
-* libssl-dev
-* pkg-config
-* rsync
-* wget
-* zlib1g-dev
-
-### Libraries
-
-The latest versions of some packages are not offered by apt repositories,
-so we've included some scripts to download, compile, and install them:
-```shell
-./tools/scripts/lib_install/fmtlib.sh 8.0.1
-./tools/scripts/lib_install/libarchive.sh 3.5.1
-./tools/scripts/lib_install/lz4.sh 1.8.2
-./tools/scripts/lib_install/mariadb-connector-c.sh 3.2.3
-./tools/scripts/lib_install/spdlog.sh 1.9.2
-./tools/scripts/lib_install/zstandard.sh 1.4.9
-```
-
-### Build
-
-* Configure the cmake project:
-  ```shell
-  mkdir build
-  cd build
-  cmake ../
-  ```
-
-* Build:
-  ```shell
-  make
-  ```
-
-## Running
-
-* CLP contains two executables: `clp` and `clg`
-    * `clp` is used for compressing and extracting logs
-    * `clg` is used for performing wildcard searches on the compressed logs
-
-### `clp`
-
-To compress some logs:
-```shell
-./clp c archives-dir /home/my/logs
-```
-* `archives-dir` is where compressed logs should be output
-  * `clp` will create a number of files and directories within, so it's best if this directory is empty
-  * You can use the same directory repeatedly and `clp` will add to the compressed logs within.
-* `/home/my/logs` is any log file or directory containing log files
-
-To decompress those logs:
-```shell
-./clp x archive-dir decompressed
-```
-* `archives-dir` is where the compressed logs were previously stored
-* `decompressed` is a directory where they will be decompressed to
-
-You can also decompress a specific file:
-```shell
-./clp x archive-dir decompressed /my/file/path.log
-```
-* `/my/file/path.log` is the uncompressed file's path (the one that was passed to `clp` for compression) 
-
-More usage instructions can be found by running:
-```shell
-./clp --help
-```
-
-### `clg`
-
-To search the compressed logs:
-```shell
-./clg archives-dir " a *wildcard* search phrase "
-```
-* `archives-dir` is where the compressed logs were previously stored
-* The search phrase can contain the `*` wildcard which matches 0 or more characters, or the `?` wildcard which matches any single character.
-
-Similar to `clp`, `clg` can search a single file:
-```shell
-./clg archives-dir " a *wildcard* search phrase " /my/file/path.log
-```
-* `/my/file/path.log` is the uncompressed file's path (the one that was passed to `clp` for compression)
-
-More usage instructions can be found by running:
-```shell
-./clg --help
-```
-
-## Parallel Compression
-
-By default, `clp` uses an embedded SQLite database, so each directory containing archives can only
-be accessed by a single `clp` instance.
+You can download a release from the [releases](TODO) page or you can build the latest by using the
+[packager](tools/packager/README.md).
 
-To enable parallel compression to the same archives directory, `clp`/`clg` can be configured to
-use a MySQL-type database (MariaDB) as follows: 
+## Project Structure
 
-* Install and configure MariaDB using the instructions for your platform
-* Create a user that has privileges to create databases, create tables, insert records, and delete
-  records.
-* Copy and change `config/metadata-db.yml`, setting the type to `mysql` and uncommenting the MySQL 
-  parameters.
-* Install the MariaDB and PyYAML Python packages `pip3 install mariadb PyYAML`
-  * This is necessary to run the database initialization script. If you prefer, you can run the 
-    SQL statements in `tools/scripts/db/init-db.py` directly.
-* Run `tools/scripts/db/init-db.py` with the updated config file. This will initialize the 
-  database CLP requires.
-* Run `clp` or `clg` as before, with the addition of the `--db-config-file` option pointing at 
-  the updated config file.
-* To compress in parallel, simply run another instance of `clp` concurrently.
+CLP is currently split across a few different components in the [components](components) 
+directory:
 
-Note that currently, decompression (`clp x`) and search (`clg`) can only be run with a single 
-instance. We are in the process of open-sourcing parallelizable versions of these as well.
+* [clp-py-utils](components/clp-py-utils) contains Python utilities common to several of the 
+  other components.
+* [compression-job-handler](components/compression-job-handler) contains code to submit
+  compression jobs to a cluster.
+* [core](components/core) contains code to compress uncompressed logs, decompress compressed 
+  logs, and search compressed logs.
+* [job-orchestration](components/job-orchestration) contains code to schedule compression jobs on
+  the cluster.
+* [package-template](components/package-template) contains the base directory structure and files of the 
+  CLP package.
 
 ## Next Steps
 
diff --git a/components/clp-py-utils/LICENSE b/components/clp-py-utils/LICENSE
new file mode 100644
index 000000000..7a4a3ea24
--- /dev/null
+++ b/components/clp-py-utils/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/components/clp-py-utils/README.md b/components/clp-py-utils/README.md
new file mode 100644
index 000000000..57db586bc
--- /dev/null
+++ b/components/clp-py-utils/README.md
@@ -0,0 +1,10 @@
+# CLP Python Utilities
+
+This python module contains utilities imported by other Python modules in the CLP package.
+
+## Installation
+
+```bash
+pip3 install -r requirements.txt --target <clp-package>/lib/python3/site-packages
+cp -R clp_py_utils <clp-package>/lib/python3/site-packages
+```
diff --git a/components/clp-py-utils/clp_py_utils/__init__.py b/components/clp-py-utils/clp_py_utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py
new file mode 100644
index 000000000..c00d0d8cc
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/clp_config.py
@@ -0,0 +1,176 @@
+import typing
+
+from pydantic import BaseModel, validator
+
+from clp_py_utils.pretty_size import pretty_size
+
+
+class Database(BaseModel):
+    type: str
+    host: str
+    port: int
+    username: str
+    password: str
+    name: str
+    ssl_cert: typing.Optional[str] = None
+    auto_commit: bool = False
+    compress: bool = True
+
+    @validator('type')
+    def validate_database_type(cls, field):
+        supported_database_type = ['mysql', 'mariadb', 'bundled']
+        if field not in supported_database_type:
+            raise ValueError(f'must be one of the following {"|".join(supported_database_type)}')
+        return field
+
+    def get_mysql_connection_params(self):
+        # Currently, mysql's connector parameter is the same as mariadb
+        connection_params = {
+            'host': self.host,
+            'port': self.port,
+            'user': self.username,
+            'password': self.password,
+            'database': self.name,
+            'compress': self.compress,
+            'autocommit': self.auto_commit
+        }
+        if self.ssl_cert:
+            connection_params['ssl_cert'] = self.ssl_cert
+        return connection_params
+
+    def get_mariadb_connection_params(self):
+        # Currently, mysql's connector parameter is the same as mysql
+        connection_params = {
+            'host': self.host,
+            'port': self.port,
+            'user': self.username,
+            'password': self.password,
+            'database': self.name,
+            'compress': self.compress,
+            'autocommit': self.auto_commit
+        }
+        if self.ssl_cert:
+            connection_params['ssl_cert'] = self.ssl_cert
+        return connection_params
+
+    def get_clp_connection_params_and_type(self):
+        connection_params_and_type = {
+            'type': 'mysql',  # hard code this as mysql as CLP only support "mysql" for global database
+            'host': self.host,
+            'port': self.port,
+            'username': self.username,
+            'password': self.password,
+            'name': self.name,
+            'compress': self.compress,
+            'autocommit': self.auto_commit
+        }
+        if self.ssl_cert:
+            connection_params_and_type['ssl_cert'] = self.ssl_cert
+        return connection_params_and_type
+
+
+class Scheduler(BaseModel):
+    host: str
+    username: str
+    password: str
+    jobs_poll_delay: int
+
+
+class ArchiveOutput(BaseModel):
+    type: str  # Support only 'fs' type for now
+    directory: str
+    storage_is_node_specific: bool = False
+    target_archive_size: int
+    target_dictionaries_size: int
+    target_encoded_file_size: int
+    target_segment_size: int
+
+    @validator('type')
+    def validate_type(cls, field):
+        if 'fs' != field:
+            raise ValueError('only fs type is supported in the opensource distribution')
+        return field
+
+    @validator('target_archive_size')
+    def validate_target_archive_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_archive_size parameter must be greater than 0')
+        return field
+
+    @validator('target_dictionaries_size')
+    def validate_target_dictionaries_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_dictionaries_size parameter must be greater than 0')
+        return field
+
+    @validator('target_encoded_file_size')
+    def validate_target_encoded_file_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_encoded_file_size parameter must be greater than 0')
+        return field
+
+    @validator('target_segment_size')
+    def validate_target_segment_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_segment_size parameter must be greater than 0')
+        return field
+
+
+class CLPConfig(BaseModel):
+    input_logs_dfs_path: str
+    database: Database
+    scheduler: Scheduler
+    archive_output: ArchiveOutput
+    data_directory: str
+    logs_directory: str
+
+    def generate_config_file_content_with_comments(self):
+        file_content = [
+            f'# A path containing any logs you which to compress. Must be reachable by all workers.',
+            f'# - This path will be exposed inside the docker container.',
+            f'# - This path should not be any path that exists in the container image (an Ubuntu image) (e.g., /var/log).',
+            f'# - Limitations: Docker follow symlink outside context, therefore, we recommend avoiding symbolic links',
+            f'input_logs_dfs_path: {self.input_logs_dfs_path}',
+            f'',
+            f'database:',
+            f'  type: {self.database.type}',
+            f'  host: {self.database.host}',
+            f'  port: {self.database.port}',
+            f'  username: {self.database.username}',
+            f'  password: {self.database.password}',
+            f'  name: {self.database.name}',
+            f'',
+            f'scheduler:',
+            f'  host: {self.scheduler.host}',
+            f'  username: {self.scheduler.username}',
+            f'  password: {self.scheduler.password}',
+            f'  jobs_poll_delay: {self.scheduler.jobs_poll_delay}   # Seconds',
+            f'',
+            f'# Where archives should be output to',
+            f'# Note: Only one output type may be specified',
+            f'archive_output:',
+            f'  type: {self.archive_output.type}',
+            f'  directory: "{self.archive_output.directory}"',
+            f'',
+            f'  storage_is_node_specific: {self.archive_output.storage_is_node_specific}',
+            f'',
+            f'  # How much data CLP should try to compress into each archive',
+            f'  target_archive_size: {self.archive_output.target_archive_size}   # {pretty_size(self.archive_output.target_archive_size)}',
+            f'',
+            f'  # How large the dictionaries should be allowed to get before the archive is closed and a new one is created',
+            f'  target_dictionaries_size: {self.archive_output.target_dictionaries_size}   # {pretty_size(self.archive_output.target_dictionaries_size)}',
+            f'',
+            f'  # How large each encoded file should be before being split into a new encoded file',
+            f'  target_encoded_file_size: {self.archive_output.target_encoded_file_size}   # {pretty_size(self.archive_output.target_encoded_file_size)}',
+            f'',
+            f'  # How much data CLP should try to fit into each segment within an archive',
+            f'  target_segment_size: {self.archive_output.target_segment_size}   # {pretty_size(self.archive_output.target_segment_size)}',
+            f'',
+            f'# Location where other data is stored',
+            f'data_directory: "{self.data_directory}"',
+            f'',
+            f'# Location where logs are stored',
+            f'logs_directory: "{self.logs_directory}"',
+            f'',
+        ]
+        return '\n'.join(file_content)
diff --git a/components/clp-py-utils/clp_py_utils/clp_io_config.py b/components/clp-py-utils/clp_py_utils/clp_io_config.py
new file mode 100644
index 000000000..0be4c9164
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/clp_io_config.py
@@ -0,0 +1,30 @@
+import typing
+
+from pydantic import BaseModel
+
+
+class PathsToCompress(BaseModel):
+    file_paths: typing.List[str]
+    group_ids: typing.List[int]
+    st_sizes: typing.List[int]
+    empty_directories: typing.List[str] = None
+
+
+class InputConfig(BaseModel):
+    type: str
+    list_path: str
+    path_prefix_to_remove: str = None
+
+
+class OutputConfig(BaseModel):
+    type: str
+    target_archive_size: int
+    target_dictionaries_size: int
+    target_segment_size: int
+    target_encoded_file_size: int
+    storage_is_node_specific: bool
+
+
+class ClpIoConfig(BaseModel):
+    input: InputConfig
+    output: OutputConfig
diff --git a/components/clp-py-utils/clp_py_utils/clp_package_config.py b/components/clp-py-utils/clp_py_utils/clp_package_config.py
new file mode 100644
index 000000000..4c4d71ae0
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/clp_package_config.py
@@ -0,0 +1,60 @@
+from pydantic import BaseModel, validator
+
+from clp_py_utils.pretty_size import pretty_size
+
+
+# Limited set of configurations operation found in clp_config.py
+class ArchiveOutput(BaseModel):
+    target_archive_size: int
+    target_dictionaries_size: int
+    target_encoded_file_size: int
+    target_segment_size: int
+
+    @validator('target_archive_size')
+    def validate_target_archive_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_archive_size parameter must be greater than 0')
+        return field
+
+    @validator('target_dictionaries_size')
+    def validate_target_dictionaries_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_dictionaries_size parameter must be greater than 0')
+        return field
+
+    @validator('target_encoded_file_size')
+    def validate_target_encoded_file_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_encoded_file_size parameter must be greater than 0')
+        return field
+
+    @validator('target_segment_size')
+    def validate_target_segment_size(cls, field):
+        if field <= 0:
+            raise ValueError('target_segment_size parameter must be greater than 0')
+        return field
+
+
+class CLPPackageConfig(BaseModel):
+    cluster_name: str
+    archive_output: ArchiveOutput
+
+    def generate_package_config_file_content_with_comments(self):
+        file_content = [
+            f'cluster_name: {self.cluster_name}',
+            f'',
+            f'archive_output:',
+            f'  # How much data CLP should try to compress into each archive',
+            f'  target_archive_size: {str(self.archive_output.target_archive_size)}   # {pretty_size(self.archive_output.target_archive_size)}',
+            f'',
+            f'  # How large the dictionaries should be allowed to get before the archive is closed and a new one is created',
+            f'  target_dictionaries_size: {str(self.archive_output.target_dictionaries_size)}   # {pretty_size(self.archive_output.target_dictionaries_size)}',
+            f'',
+            f'  # How large each encoded file should be before being split into a new encoded file',
+            f'  target_encoded_file_size: {str(self.archive_output.target_encoded_file_size)}   # {pretty_size(self.archive_output.target_encoded_file_size)}',
+            f'',
+            f'  # How much data CLP should try to fit into each segment within an archive',
+            f'  target_segment_size: {str(self.archive_output.target_segment_size)}   # {pretty_size(self.archive_output.target_segment_size)}',
+            f''
+        ]
+        return '\n'.join(file_content)
diff --git a/components/clp-py-utils/clp_py_utils/compression.py b/components/clp-py-utils/clp_py_utils/compression.py
new file mode 100644
index 000000000..dbad6d10f
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/compression.py
@@ -0,0 +1,141 @@
+import pathlib
+import typing
+
+import Levenshtein
+
+# Constants
+FILE_GROUPING_MIN_LEVENSHTEIN_RATIO = 0.6
+
+
+class FileMetadata:
+    __slots__ = ('path', 'size', 'estimated_uncompressed_size')
+
+    def __init__(self, path: pathlib.Path, size: int):
+        self.path = path
+        self.size = size
+        self.estimated_uncompressed_size = size
+
+        filename = path.name
+        if any(filename.endswith(extension) for extension in ['.gz', '.gzip', '.tgz', '.tar.gz']):
+            self.estimated_uncompressed_size *= 13
+        elif any(filename.endswith(extension) for extension in ['.zstd', '.zstandard', '.tar.zstd', '.tar.zstandard']):
+            self.estimated_uncompressed_size *= 8
+
+
+class FilesPartition:
+    def __init__(self):
+        self.__files = []
+        self.__file_paths = []
+        self.__group_ids = []
+        self.__st_sizes = []
+        self.__total_file_size = 0
+
+    def add_file(self, file_metadata: FileMetadata, group_id: int):
+        self.__files.append(file_metadata)
+        self.__file_paths.append(str(file_metadata.path))
+        self.__group_ids.append(group_id)
+        self.__st_sizes.append(file_metadata.size)
+        self.__total_file_size += file_metadata.estimated_uncompressed_size
+
+    def add_file_if_empty(self, file_metadata: FileMetadata, group_id: int):
+        if file_metadata.estimated_uncompressed_size > 0:
+            return False
+
+        self.__files.append(file_metadata)
+        self.__file_paths.append(str(file_metadata.path))
+        self.__group_ids.append(group_id)
+        self.__st_sizes.append(file_metadata.size)
+        return True
+
+    def pop_files(self):
+        files = self.__files
+        file_paths = self.__file_paths
+        group_ids = self.__group_ids
+        st_sizes = self.__st_sizes
+        total_file_size = self.__total_file_size
+
+        self.__files = []
+        self.__file_paths = []
+        self.__group_ids = []
+        self.__st_sizes = []
+        self.__total_file_size = 0
+
+        return files, file_paths, group_ids, st_sizes, total_file_size
+
+    def get_total_file_size(self):
+        return self.__total_file_size
+
+    def contains_files(self):
+        return len(self.__files) > 0
+
+
+def file_paths_in_same_group(a: pathlib.Path, b: pathlib.Path):
+    return Levenshtein.ratio(str(a.name), str(b.name)) >= FILE_GROUPING_MIN_LEVENSHTEIN_RATIO
+
+
+def group_files_by_similar_filenames(files: typing.List[FileMetadata]):
+    groups = []
+
+    if len(files) == 0:
+        return groups
+
+    current_group_id = 0
+    current_group = {'id': current_group_id, 'files': []}
+    groups.append(current_group)
+
+    # Sort by filename
+    files.sort(key=lambda x: x.path.name)
+
+    file_ix = 0
+    file = files[file_ix]
+    current_group['files'].append(file)
+    last_file_path = file.path
+
+    for file_ix in range(1, len(files)):
+        file = files[file_ix]
+        if not file_paths_in_same_group(last_file_path, file.path):
+            current_group_id += 1
+            current_group = {'id': current_group_id, 'files': []}
+            groups.append(current_group)
+
+        current_group['files'].append(file)
+        last_file_path = file.path
+
+    return groups
+
+
+def validate_path_and_get_info(required_parent_dir: pathlib.Path, path: pathlib.Path):
+    file = None
+    empty_directory = None
+
+    # Verify that path is absolute
+    if not path.is_absolute():
+        raise ValueError(f'"{path}" is not absolute.')
+
+    # Verify that path exists
+    if not path.exists():
+        raise ValueError(f'"{path}" does not exist.')
+
+    # Verify that path points to a file/dir within required parent dir
+    try:
+        path.resolve().relative_to(required_parent_dir)
+    except ValueError:
+        raise ValueError(f'"{path}" is not within {required_parent_dir}')
+
+    # Convert path to a path within required parent dir if necessary
+    # (e.g., if path is a symlink outside parent dir, but points to a file/dir inside parent dir)
+    try:
+        path.relative_to(required_parent_dir)
+    except ValueError:
+        # Not within parent dir, so resolve it
+        path = path.resolve()
+
+    if path.is_dir():
+        # Check if directory is empty
+        if next(path.iterdir(), None) is None:
+            empty_directory = str(path)
+    else:
+        file_size = path.stat().st_size
+        file = FileMetadata(path, file_size)
+
+    return file, empty_directory
diff --git a/components/clp-py-utils/clp_py_utils/core.py b/components/clp-py-utils/clp_py_utils/core.py
new file mode 100644
index 000000000..db202d432
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/core.py
@@ -0,0 +1,11 @@
+import pathlib
+
+import yaml
+
+
+def read_yaml_config_file(yaml_config_file_path: pathlib.Path):
+    with open(yaml_config_file_path, 'r') as yaml_config_file:
+        config = yaml.safe_load(yaml_config_file)
+    if config is None:
+        raise Exception(f'Unable to parse configuration from {yaml_config_file_path}.')
+    return config
diff --git a/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py b/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py
new file mode 100644
index 000000000..ce072182d
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import sys
+from contextlib import closing
+
+from pydantic import ValidationError
+from sql_adapter import SQL_Adapter
+
+from clp_py_utils.clp_config import CLPConfig
+from clp_py_utils.core import read_yaml_config_file
+
+# Setup logging
+# Create logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+logger.addHandler(logging_console_handler)
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Setup CLP metadata tables compression and search.')
+    args_parser.add_argument('--config', required=True, help='CLP package config file.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(parsed_args.config))
+        sql_adapter = SQL_Adapter(clp_config.database)
+        with closing(sql_adapter.create_connection()) as metadata_db, \
+                closing(metadata_db.cursor(dictionary=True)) as metadata_db_cursor:
+            metadata_db_cursor.execute("""
+                CREATE TABLE IF NOT EXISTS `archives` (
+                    `pagination_id` BIGINT unsigned NOT NULL AUTO_INCREMENT,
+                    `id` VARCHAR(64) NOT NULL,
+                    `storage_id` VARCHAR(64) NOT NULL,
+                    `uncompressed_size` BIGINT NOT NULL,
+                    `size` BIGINT NOT NULL,
+                    `creator_id` VARCHAR(64) NOT NULL,
+                    `creation_ix` INT NOT NULL,
+                    KEY `archives_creation_order` (`creator_id`,`creation_ix`) USING BTREE,
+                    UNIQUE KEY `archive_id` (`id`) USING BTREE,
+                    PRIMARY KEY (`pagination_id`)
+                );
+                """
+            )
+
+            metadata_db_cursor.execute("""
+                CREATE TABLE IF NOT EXISTS `files` (
+                    `id` VARCHAR(64) NOT NULL,
+                    `orig_file_id` VARCHAR(64) NOT NULL,
+                    `path` VARCHAR(12288) NOT NULL,
+                    `begin_timestamp` BIGINT NOT NULL,
+                    `end_timestamp` BIGINT NOT NULL,
+                    `num_uncompressed_bytes` BIGINT NOT NULL,
+                    `num_messages` BIGINT NOT NULL,
+                    `archive_id` VARCHAR(64) NOT NULL,
+                    KEY `files_path` (path(768)) USING BTREE,
+                    KEY `files_archive_id` (`archive_id`) USING BTREE,
+                    PRIMARY KEY (`id`)
+                ) ROW_FORMAT=DYNAMIC
+                ;
+                """
+            )
+
+            metadata_db.commit()
+            logger.info('Successfully created clp metadata tables for compression and search')
+
+    except ValidationError as err:
+        logger.error(err)
+        return -1
+    except Exception as ex:
+        logger.error(ex)
+        return -1
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py b/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py
new file mode 100644
index 000000000..68161c229
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import sys
+from contextlib import closing
+
+from pydantic import ValidationError
+from sql_adapter import SQL_Adapter
+
+from clp_py_utils.clp_config import CLPConfig
+from clp_py_utils.core import read_yaml_config_file
+
+# Setup logging
+# Create logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+logger.addHandler(logging_console_handler)
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Setup metadata tables for job orchestration.')
+    args_parser.add_argument('--config', required=True, help='CLP package config file.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(parsed_args.config))
+        sql_adapter = SQL_Adapter(clp_config.database)
+        with closing(sql_adapter.create_connection()) as scheduling_db, \
+                closing(scheduling_db.cursor(dictionary=True)) as scheduling_db_cursor:
+            scheduling_db_cursor.execute("""
+                CREATE TABLE IF NOT EXISTS `compression_jobs` (
+                    `job_id` INT NOT NULL AUTO_INCREMENT,
+                    `job_status` VARCHAR(16) NOT NULL DEFAULT 'SCHEDULING',
+                    `job_status_msg` VARCHAR(255) NOT NULL DEFAULT '',
+                    `job_creation_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                    `job_start_time` DATETIME NULL DEFAULT NULL,
+                    `job_duration` INT NULL DEFAULT NULL,
+                    `job_original_size` BIGINT NOT NULL DEFAULT '0',
+                    `job_uncompressed_size` BIGINT NOT NULL DEFAULT '0',
+                    `job_compressed_size` BIGINT NOT NULL DEFAULT '0',
+                    `num_tasks` INT NOT NULL DEFAULT '0',
+                    `num_tasks_completed` INT NOT NULL DEFAULT '0',
+                    `clp_binary_version` INT NULL DEFAULT NULL,
+                    `clp_config` VARBINARY(60000) NOT NULL,
+                    PRIMARY KEY (`job_id`) USING BTREE,
+                    INDEX `JOB_STATUS` (`job_status`) USING BTREE
+                ) ROW_FORMAT=DYNAMIC
+                ;
+                """
+            )
+
+            scheduling_db_cursor.execute("""
+                CREATE TABLE IF NOT EXISTS `compression_tasks` (
+                    `task_id` BIGINT NOT NULL AUTO_INCREMENT,
+                    `task_status` VARCHAR(16) NOT NULL DEFAULT 'SUBMITTED',
+                    `task_scheduled_time` DATETIME NULL DEFAULT NULL,
+                    `task_start_time` DATETIME NULL DEFAULT NULL,
+                    `task_duration` SMALLINT NULL DEFAULT NULL,
+                    `job_id` INT NOT NULL,
+                    `clp_paths_to_compress` VARBINARY(60000) NOT NULL,
+                    `partition_original_size` BIGINT NOT NULL,
+                    `partition_uncompressed_size` BIGINT NULL DEFAULT NULL,
+                    `partition_compressed_size` BIGINT NULL DEFAULT NULL,
+                    PRIMARY KEY (`task_id`) USING BTREE,
+                    INDEX `job_id` (`job_id`) USING BTREE,
+                    INDEX `TASK_STATUS` (`task_status`) USING BTREE,
+                    INDEX `TASK_START_TIME` (`task_start_time`) USING BTREE,
+                    CONSTRAINT `compression_tasks` FOREIGN KEY (`job_id`) 
+                    REFERENCES `compression_jobs` (`job_id`) ON UPDATE NO ACTION ON DELETE NO ACTION
+                ) ROW_FORMAT=DYNAMIC
+                ;
+                """
+            )
+
+            scheduling_db.commit()
+            logger.info('Successfully created compression_jobs and compression_tasks orchestration tables')
+
+    except ValidationError as err:
+        logger.error(err)
+        return -1
+    except Exception as ex:
+        logger.error(ex)
+        return -1
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/clp-py-utils/clp_py_utils/pretty_size.py b/components/clp-py-utils/clp_py_utils/pretty_size.py
new file mode 100644
index 000000000..c69c5ee59
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/pretty_size.py
@@ -0,0 +1,6 @@
+def pretty_size(num, suffix='B'):
+    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+        if abs(num) < 1024.0:
+            return '%3.2f%s%s' % (num, unit, suffix)
+        num /= 1024.0
+    return '%.2f%s%s' % (num, 'Yi', suffix)
diff --git a/components/clp-py-utils/clp_py_utils/sql_adapter.py b/components/clp-py-utils/clp_py_utils/sql_adapter.py
new file mode 100644
index 000000000..a42fa79c9
--- /dev/null
+++ b/components/clp-py-utils/clp_py_utils/sql_adapter.py
@@ -0,0 +1,43 @@
+import logging
+
+import mariadb
+import mysql.connector
+from mysql.connector import errorcode
+
+from clp_py_utils.clp_config import Database
+
+
+class SQL_Adapter:
+    def __init__(self, database_config: Database):
+        self.database_config = database_config
+
+    def create_mysql_connection(self) -> mysql.connector.MySQLConnection:
+        try:
+            connection = mysql.connector.connect(**self.database_config.get_mysql_connection_params())
+        except mysql.connector.Error as err:
+            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
+                logging.error('Database access denied.')
+            elif err.errno == errorcode.ER_BAD_DB_ERROR:
+                logging.error(f'Specified database "{self.database_config.name}" does not exist.')
+            else:
+                logging.error(err)
+            raise err
+        else:
+            return connection
+
+    def create_mariadb_connection(self) -> mariadb.connection:
+        try:
+            connection = mariadb.connect(**self.database_config.get_mysql_connection_params())
+        except mariadb.Error as err:
+            logging.error(f'Error connecting to MariaDB: {err}')
+            raise err
+        else:
+            return connection
+
+    def create_connection(self):
+        if 'mysql' == self.database_config.type:
+            return self.create_mysql_connection()
+        elif 'mariadb' == self.database_config.type:
+            return self.create_mariadb_connection()
+        else:
+            raise NotImplementedError
diff --git a/components/clp-py-utils/requirements.txt b/components/clp-py-utils/requirements.txt
new file mode 100644
index 000000000..c5a0c915f
--- /dev/null
+++ b/components/clp-py-utils/requirements.txt
@@ -0,0 +1,5 @@
+python-Levenshtein
+PyYAML==5.4
+pydantic==1.8.2
+mysql-connector-python==8.0.26
+mariadb~=1.0.7
diff --git a/components/compression-job-handler/.gitignore b/components/compression-job-handler/.gitignore
new file mode 100644
index 000000000..4b0a0fbd9
--- /dev/null
+++ b/components/compression-job-handler/.gitignore
@@ -0,0 +1 @@
+clp-config.yaml
\ No newline at end of file
diff --git a/components/compression-job-handler/LICENSE b/components/compression-job-handler/LICENSE
new file mode 100644
index 000000000..7a4a3ea24
--- /dev/null
+++ b/components/compression-job-handler/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/components/compression-job-handler/README.md b/components/compression-job-handler/README.md
new file mode 100644
index 000000000..423299f51
--- /dev/null
+++ b/components/compression-job-handler/README.md
@@ -0,0 +1,32 @@
+# CLP Compression Job Handler
+
+This Python module submits compression jobs to the CLP compression scheduler.
+
+## Installation
+
+```bash
+pip3 install -r requirements.txt --target <clp-package>/lib/python3/site-packages
+cp -R clp_py_utils <clp-package>/lib/python3/site-packages
+```
+
+## Usage
+
+Below are a few ways to use this module.
+
+### Docker compression wrapper
+
+```bash
+<clp-package>/sbin/compress <parameters>
+```
+
+### Native compression wrapper
+
+```bash
+<clp-package>/sbin/native/compress <parameters>
+```
+
+### Standalone
+
+```bash
+PYTHONPATH=<clp_home/lib/python3/site-packages> python3 -m compression_job_handler <parameters>
+```
diff --git a/components/compression-job-handler/compression_job_handler/__init__.py b/components/compression-job-handler/compression_job_handler/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/compression-job-handler/compression_job_handler/compression_job_handler.py b/components/compression-job-handler/compression_job_handler/compression_job_handler.py
new file mode 100644
index 000000000..e88983615
--- /dev/null
+++ b/components/compression-job-handler/compression_job_handler/compression_job_handler.py
@@ -0,0 +1,466 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import pathlib
+import sys
+import time
+import typing
+from contextlib import closing
+
+import msgpack
+import mysql.connector
+import zstandard
+import zstandard as zstd
+from pydantic import ValidationError
+
+from clp_py_utils.clp_config import CLPConfig
+from clp_py_utils.clp_io_config import PathsToCompress, InputConfig, OutputConfig, ClpIoConfig
+from clp_py_utils.compression import FileMetadata, FilesPartition, \
+    group_files_by_similar_filenames, validate_path_and_get_info
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.pretty_size import pretty_size
+from clp_py_utils.sql_adapter import SQL_Adapter
+from .utils.common import JobCompletionStatus
+
+# Setup logging
+# Create logger
+logger = logging.getLogger('compression-job-handler')
+logger.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+logger.addHandler(logging_console_handler)
+
+
+class PathsToCompressBuffer:
+    def __init__(self, scheduler_db_cursor, maintain_file_ordering: bool,
+                 empty_directories_allowed: bool, target_archive_size: int,
+                 file_size_to_trigger_compression, scheduling_job_id: int, zstd_cctx):
+        self.__files: typing.List[FileMetadata] = []
+        self.__maintain_file_ordering: bool = maintain_file_ordering
+        if empty_directories_allowed:
+            self.__empty_directories: typing.Optional[typing.List[str]] = []
+        else:
+            self.__empty_directories: typing.Optional[typing.List[str]] = None
+        self.__total_file_size: int = 0
+        self.__target_archive_size: int = target_archive_size
+        self.__file_size_to_trigger_compression: int = file_size_to_trigger_compression
+        self.__scheduling_job_id: int = scheduling_job_id
+        self.scheduling_job_id: int = scheduling_job_id
+        self.__zstd_cctx = zstd_cctx
+
+        self.__scheduler_db_cursor = scheduler_db_cursor
+        self.num_tasks = 0
+
+    def add_file(self, file: FileMetadata):
+        self.__files.append(file)
+        self.__total_file_size += file.estimated_uncompressed_size
+
+        if self.__total_file_size >= self.__file_size_to_trigger_compression:
+            self.__partition_and_compress(False)
+
+    def add_empty_directory(self, path: pathlib.Path):
+        if self.__empty_directories is None:
+            return
+        self.__empty_directories.append(str(path))
+
+    def flush(self):
+        self.__partition_and_compress(True)
+
+    def contains_paths(self):
+        return len(self.__files) > 0 or (
+                self.__empty_directories and len(self.__empty_directories) > 0)
+
+    def __submit_partition_for_compression(self, partition: FilesPartition):
+        files, file_paths, group_ids, st_sizes, partition_total_file_size = partition.pop_files()
+        paths_to_compress = PathsToCompress(file_paths=file_paths, group_ids=group_ids, st_sizes=st_sizes)
+
+        if self.__empty_directories is not None and len(self.__empty_directories) > 0:
+            paths_to_compress.empty_directories = self.__empty_directories
+            self.__empty_directories = []
+
+        # Note: partition_total_file_size => estimated size, aggregate
+        # the st_size => real original size
+        self.__scheduler_db_cursor.execute(
+            f'INSERT INTO compression_tasks '
+            f'(job_id, partition_original_size, clp_paths_to_compress) '
+            f'VALUES({str(self.__scheduling_job_id)}, {str(sum(st_sizes))}, %s);',
+            (self.__zstd_cctx.compress(msgpack.packb(paths_to_compress.dict(exclude_none=True))),)
+        )
+        self.num_tasks += 1
+
+        return partition_total_file_size
+
+    def add_files(self, target_num_archives: int, target_archive_size: int, files):
+        target_num_archives = min(len(files), target_num_archives)
+
+        groups = group_files_by_similar_filenames(files)
+        next_file_ix_per_group = [0 for _ in range(len(groups))]
+
+        partitions = [FilesPartition() for _ in range(target_num_archives)]
+
+        # Distribute files across partitions in round-robin order; full partitions are skipped
+        next_partition_ix = 0
+        group_ix = 0
+        while len(groups) > 0:
+            group_file_ix = next_file_ix_per_group[group_ix]
+            group_id = groups[group_ix]['id']
+            group_files = groups[group_ix]['files']
+
+            file = group_files[group_file_ix]
+
+            # Look for a partition with space
+            while True:
+                partition = partitions[next_partition_ix]
+                next_partition_ix = (next_partition_ix + 1) % target_num_archives
+                if partition.get_total_file_size() < target_archive_size:
+                    break
+
+            partition.add_file(file, group_id)
+
+            group_file_ix += 1
+            if len(group_files) == group_file_ix:
+                groups.pop(group_ix)
+                next_file_ix_per_group.pop(group_ix)
+            else:
+                next_file_ix_per_group[group_ix] = group_file_ix
+                group_ix += 1
+            if len(groups) > 0:
+                group_ix %= len(groups)
+
+        # Compress partitions
+        for partition in partitions:
+            self.__submit_partition_for_compression(partition)
+
+    def __partition_and_compress(self, flush_buffer: bool):
+        if not flush_buffer and self.__total_file_size < self.__target_archive_size:
+            # Not enough data for a full partition and we don't need to exhaust the buffer
+            return
+        if not self.contains_paths():
+            # Nothing to compress
+            return
+
+        partition = FilesPartition()
+
+        if self.__maintain_file_ordering:
+            # NOTE: grouping by filename is not supported when maintaining file ordering,
+            # so we give each file its own group ID to maintain ordering
+
+            group_ix = 0
+            # Compress full partitions
+            if self.__total_file_size >= self.__target_archive_size:
+                file_ix = 0
+                for file_ix, file in enumerate(self.__files):
+                    partition.add_file(file, group_ix)
+                    group_ix += 1
+
+                    # Compress partition if ready
+                    if partition.get_total_file_size() >= self.__target_archive_size:
+                        self.__total_file_size -= self.__submit_partition_for_compression(
+                            partition)
+                        if self.__total_file_size < self.__target_archive_size:
+                            # Not enough files to fill a partition, so break
+                            break
+                # Pop compressed files
+                self.__files = self.__files[file_ix + 1:]
+
+            # Compress remaining partial partition if necessary
+            if flush_buffer and self.contains_paths():
+                for file in self.__files:
+                    partition.add_file(file, group_ix)
+                    group_ix += 1
+                self.__total_file_size -= self.__submit_partition_for_compression(partition)
+                self.__files = []
+        else:
+            groups = group_files_by_similar_filenames(self.__files)
+            next_file_ix_per_group = [0 for _ in range(len(groups))]
+
+            group_ix = 0
+            while len(groups) > 0:
+                group_file_ix = next_file_ix_per_group[group_ix]
+                group_id = groups[group_ix]['id']
+                group_files = groups[group_ix]['files']
+
+                file = group_files[group_file_ix]
+
+                partition.add_file(file, group_id)
+
+                group_file_ix += 1
+                if len(group_files) == group_file_ix:
+                    groups.pop(group_ix)
+                    next_file_ix_per_group.pop(group_ix)
+                else:
+                    next_file_ix_per_group[group_ix] = group_file_ix
+                    group_ix += 1
+                if len(groups) > 0:
+                    group_ix %= len(groups)
+
+                # Compress partition if ready
+                if partition.get_total_file_size() >= self.__target_archive_size:
+                    self.__total_file_size -= self.__submit_partition_for_compression(partition)
+                    if not flush_buffer and self.__total_file_size < self.__target_archive_size:
+                        # Not enough files to fill a partition and
+                        # we don't need to exhaust the buffer, so break
+                        break
+
+            # Compress partial partition
+            if partition.contains_files():
+                self.__total_file_size -= self.__submit_partition_for_compression(partition)
+                self.__files = []
+
+            # Pop compressed files
+            remaining_files = []
+            for group_ix, group in enumerate(groups):
+                group_files = group['files']
+                group_file_ix = next_file_ix_per_group[group_ix]
+                for i in range(group_file_ix, len(group_files)):
+                    remaining_files.append(group_files[i])
+            self.__files = remaining_files
+
+            # Compress any remaining empty directories
+            if flush_buffer and self.contains_paths():
+                self.__total_file_size -= self.__submit_partition_for_compression(partition)
+                self.__files = []
+
+
+def handle_job(scheduling_db, scheduling_db_cursor, clp_io_config: ClpIoConfig, logs_dir_abs: str,
+               fs_logs_required_parent_dir: pathlib.Path, zstd_cctx: zstandard.ZstdCompressor,
+               no_progress_reporting: bool) -> JobCompletionStatus:
+    job_logger = None
+    all_worker_jobs_successful = True
+
+    try:
+        job_completed_with_errors = False
+        if 'fs' == clp_io_config.input.type:
+            # Create new job in the sql database
+            scheduling_db_cursor.execute(
+                'INSERT INTO compression_jobs (clp_config) VALUES (%s);',
+                (zstd_cctx.compress(msgpack.packb(clp_io_config.dict(exclude_none=True, exclude_unset=True))),)
+            )
+            scheduling_db.commit()
+            scheduling_job_id = scheduling_db_cursor.lastrowid
+
+            # Create job-specific logger
+            job_str = f'job-{scheduling_job_id}'
+            job_logger = logging.getLogger(job_str)
+            job_logger.setLevel(logging.INFO)
+            combined_log_file_path = f'{logs_dir_abs}/{job_str}.log'
+            job_logger_file_handler = logging.FileHandler(combined_log_file_path)
+            job_logger_file_handler.setFormatter(logging_formatter)
+            job_logger.addHandler(logging_console_handler)
+            job_logger.addHandler(job_logger_file_handler)
+
+            job_logger.debug(f'Starting job {scheduling_job_id}')
+
+            paths_to_compress_buffer = PathsToCompressBuffer(
+                scheduler_db_cursor=scheduling_db_cursor,
+                maintain_file_ordering=False,
+                empty_directories_allowed=True,
+                target_archive_size=clp_io_config.output.target_archive_size,
+                file_size_to_trigger_compression=clp_io_config.output.target_archive_size * 2,
+                scheduling_job_id=scheduling_job_id,
+                zstd_cctx=zstd_cctx
+            )
+
+            # Compress all files at once to try and satisfy the target number of archives
+            job_logger.info("Iterating and partitioning files into tasks.")
+            # TODO: Handle file not found
+            with open(pathlib.Path(clp_io_config.input.list_path).resolve(), 'r') as f:
+                for path_idx, path in enumerate(f, start=1):
+                    stripped_path = path.strip()
+                    if '' == stripped_path:
+                        # Skip empty paths
+                        continue
+                    path = pathlib.Path(stripped_path)
+
+                    try:
+                        file, empty_directory = validate_path_and_get_info(fs_logs_required_parent_dir, path)
+                    except ValueError as ex:
+                        job_logger.error(str(ex))
+                        job_completed_with_errors = True
+                        continue
+
+                    if file:
+                        paths_to_compress_buffer.add_file(file)
+                    elif empty_directory:
+                        paths_to_compress_buffer.add_empty_directory(empty_directory)
+
+                    if path.is_dir():
+                        for internal_path in path.rglob('*'):
+                            try:
+                                file, empty_directory = validate_path_and_get_info(
+                                    fs_logs_required_parent_dir, internal_path)
+                            except ValueError as ex:
+                                job_logger.error(str(ex))
+                                job_completed_with_errors = True
+                                continue
+
+                            if file:
+                                paths_to_compress_buffer.add_file(file)
+                            elif empty_directory:
+                                paths_to_compress_buffer.add_empty_directory(empty_directory)
+
+                    if path_idx % 10000 == 0:
+                        scheduling_db.commit()
+
+            paths_to_compress_buffer.flush()
+
+        # Ensure all of the scheduled task and the total number of tasks
+        # in the job row has been updated and committed
+        scheduling_db_cursor.execute(
+            f'UPDATE compression_jobs '
+            f'SET num_tasks={paths_to_compress_buffer.num_tasks}, job_status="SCHEDULED" '
+            f'WHERE job_id={scheduling_job_id};'
+        )
+        scheduling_db.commit()
+
+        # TODO: what happens when commit fails, log error and crash ASAP
+
+        # Wait for jobs to finish
+        job_logger.info(f'Waiting for {paths_to_compress_buffer.num_tasks} task(s) to finish.')
+
+        # Simply poll the job_status in the job scheduling table
+        if no_progress_reporting:
+            polling_query = \
+                f'SELECT job_status, job_status_msg FROM compression_jobs ' \
+                f'WHERE job_id={scheduling_job_id};'
+        else:
+            polling_query = \
+                f'SELECT job_status, job_status_msg, job_uncompressed_size, job_compressed_size ' \
+                f'FROM compression_jobs WHERE job_id={scheduling_job_id};'
+
+        completion_query = \
+            f'SELECT job_duration, job_uncompressed_size, job_compressed_size ' \
+            f'FROM compression_jobs WHERE job_id={scheduling_job_id};'
+
+        job_last_uncompressed_size = 0
+        while True:
+            scheduling_db_cursor.execute(polling_query)
+
+            # Using fetchall() here t
+            results = scheduling_db_cursor.fetchall()
+            if len(results) > 1:
+                logging.error("Duplicated job_id")
+                logging.error(str(results))
+            if len(results) == 0:
+                time.sleep(1)
+                continue
+            if isinstance(scheduling_db, mysql.connector.MySQLConnection):
+                scheduling_db.commit()  # clear the query cache
+
+            job_row = results[0]
+            job_status = job_row['job_status']
+
+            if not no_progress_reporting:
+                job_uncompressed_size = job_row['job_uncompressed_size']
+                job_compressed_size = job_row['job_compressed_size']
+                if job_uncompressed_size > 0:
+                    compression_ratio = float(job_uncompressed_size) / job_compressed_size
+                    if job_last_uncompressed_size < job_uncompressed_size:
+                        job_logger.info(
+                            f'Compressed {pretty_size(job_uncompressed_size)} into '
+                            f'{pretty_size(job_compressed_size)} ({compression_ratio:.2f})')
+                        job_last_uncompressed_size = job_uncompressed_size
+
+            if job_status == 'SCHEDULED':
+                pass  # Simply wait another iteration
+            elif job_status == 'COMPLETED':
+                # All tasks in the job is done
+                if not no_progress_reporting:
+                    scheduling_db_cursor.execute(completion_query)
+                    job_row = scheduling_db_cursor.fetchone()
+                    if job_row['job_duration']:
+                        speed = job_row['job_uncompressed_size'] / job_row['job_duration']
+                    job_logger.info(
+                        f'Compression finished. Runtime: {str(job_row["job_duration"])}s. '
+                        f'Speed: {pretty_size(speed)}/s.')
+                break  # Done
+            elif job_status == 'FAILED':
+                # One or more tasks in the job has failed
+                job_logger.error(f'Compression failed. See log file in {job_row["job_status_msg"]}')
+                break  # Done
+            else:
+                job_logger.info(f'handler for job_status "{job_status}" is not implemented')
+                raise NotImplementedError
+
+            scheduling_db.commit()  # clear the query cache
+            time.sleep(1)
+
+    except Exception as ex:
+        if job_logger:
+            job_logger.exception(f'Exception while processing {job_str}.')
+            job_logger.error(ex)
+        all_worker_jobs_successful = False
+    finally:
+        if job_logger:
+            job_logger.removeHandler(job_logger_file_handler)
+            job_logger_file_handler.flush()
+            job_logger_file_handler.close()
+
+    if not all_worker_jobs_successful:
+        return JobCompletionStatus.FAILED
+    elif job_completed_with_errors:
+        return JobCompletionStatus.SUCCEEDED_WITH_ERRORS
+
+    logger.debug(f'Finished job {job_str}')
+
+    return JobCompletionStatus.SUCCEEDED
+
+
+def handle_jobs(sql_adapter: SQL_Adapter, clp_io_config: ClpIoConfig, logs_dir_abs: str,
+                fs_logs_required_parent_dir: pathlib.Path, no_progress_reporting: bool):
+    logger.info('compression-job-handler started.')
+
+    # Instantiate zstdandard compression context
+    zstd_cctx = zstd.ZstdCompressor(level=3)
+
+    # Connect to SQL Database
+    with closing(sql_adapter.create_connection()) as scheduling_db, \
+            closing(scheduling_db.cursor(dictionary=True)) as scheduling_db_cursor:
+        # Execute new compression job
+        handle_job(scheduling_db=scheduling_db, scheduling_db_cursor=scheduling_db_cursor, clp_io_config=clp_io_config,
+                   logs_dir_abs=logs_dir_abs, fs_logs_required_parent_dir=fs_logs_required_parent_dir,
+                   zstd_cctx=zstd_cctx, no_progress_reporting=no_progress_reporting)
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Wait for and run compression jobs.')
+    args_parser.add_argument('--fs-logs-required-parent-dir', default="/nonexistent",
+                             help='The required parent for any logs ingested from the filesystem.')
+    args_parser.add_argument('--no-progress-reporting', action='store_true', help='Disables progress reporting.')
+    args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.')
+    args_parser.add_argument('--log-list-path', required=True, help='File containing list of input files to compress')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Load configuration
+    config_path = pathlib.Path(parsed_args.config)
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(config_path))
+    except ValidationError as err:
+        logger.error(err)
+    except Exception as ex:
+        # read_yaml_config_file already logs the parsing error inside
+        pass
+    else:
+        # Configure file system directory locations   # TODO: refactor with better comment
+        fs_logs_required_parent_dir = pathlib.Path(parsed_args.fs_logs_required_parent_dir)
+
+        sql_adapter = SQL_Adapter(clp_config.database)
+
+        clp_io_config = ClpIoConfig(
+            input=InputConfig(type='fs', list_path=str(pathlib.Path(parsed_args.log_list_path).resolve())),
+            output=OutputConfig.parse_obj(clp_config.archive_output)
+        )
+
+        logs_directory_abs = str(pathlib.Path(clp_config.logs_directory).resolve())
+
+        handle_jobs(sql_adapter=sql_adapter, clp_io_config=clp_io_config, logs_dir_abs=logs_directory_abs,
+                    fs_logs_required_parent_dir=fs_logs_required_parent_dir,
+                    no_progress_reporting=parsed_args.no_progress_reporting)
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/compression-job-handler/compression_job_handler/utils/__init__.py b/components/compression-job-handler/compression_job_handler/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/compression-job-handler/compression_job_handler/utils/common.py b/components/compression-job-handler/compression_job_handler/utils/common.py
new file mode 100644
index 000000000..ce119402e
--- /dev/null
+++ b/components/compression-job-handler/compression_job_handler/utils/common.py
@@ -0,0 +1,7 @@
+import enum
+
+
+class JobCompletionStatus(enum.IntEnum):
+    SUCCEEDED = 0
+    FAILED = 1
+    SUCCEEDED_WITH_ERRORS = 2
diff --git a/components/compression-job-handler/requirements.txt b/components/compression-job-handler/requirements.txt
new file mode 100644
index 000000000..444771b39
--- /dev/null
+++ b/components/compression-job-handler/requirements.txt
@@ -0,0 +1,8 @@
+python-Levenshtein
+PyYAML==5.4
+
+setuptools~=45.2.0
+msgpack~=1.0.2
+zstandard~=0.15.2
+mysql-connector-python==8.0.26
+pydantic==1.8.2
diff --git a/.gitignore b/components/core/.gitignore
similarity index 100%
rename from .gitignore
rename to components/core/.gitignore
diff --git a/CHANGELOG.md b/components/core/CHANGELOG.md
similarity index 100%
rename from CHANGELOG.md
rename to components/core/CHANGELOG.md
diff --git a/CMakeLists.txt b/components/core/CMakeLists.txt
similarity index 100%
rename from CMakeLists.txt
rename to components/core/CMakeLists.txt
diff --git a/LICENSE.md b/components/core/LICENSE.md
similarity index 100%
rename from LICENSE.md
rename to components/core/LICENSE.md
diff --git a/components/core/README.md b/components/core/README.md
new file mode 100644
index 000000000..296e61601
--- /dev/null
+++ b/components/core/README.md
@@ -0,0 +1,172 @@
+# CLP Core
+
+CLP's core is the low-level component that performs compression, decompression, and search.
+
+## Contents
+
+* [Getting Started](#getting-started)
+* [Requirements](#requirements)
+* [Building](#building)
+  * [Source Dependencies](#source-dependencies)
+  * [Packages](#packages)
+  * [Libraries](#libraries)
+  * [Build](#build)
+* [Running](#running)
+  * [`clp`](#clp)
+  * [`clg`](#clg)
+* [Parallel Compression](#parallel-compression)
+* [Next Steps](#next-steps)
+
+## Requirements
+
+* We have built and tested CLP on **Ubuntu 18.04 (bionic)** and **Ubuntu 20.04 (focal)**.
+  * If you have trouble building for another OS, file an issue and we may be able to help.
+* A compiler that supports c++14
+
+## Building
+
+* To build, we require some source dependencies, packages from package managers, and libraries built from source.
+
+### Source Dependencies
+
+We use both git submodules and third-party source packages. To download all, you can run this script:
+```shell
+tools/scripts/deps-download/download-all.sh
+```
+
+This will download:
+* [Catch2](https://github.com/catchorg/Catch2.git) (v2.13.6)
+* [date](https://github.com/HowardHinnant/date.git) (v3.0.1)
+* [json](https://github.com/nlohmann/json.git) (v3.10.2)
+* [SQLite3](https://www.sqlite.org/download.html) (v3.36.0)
+* [yaml-cpp](https://github.com/jbeder/yaml-cpp.git) (v0.7.0)
+
+### Packages
+
+If you're using apt-get, you can use the following command to install all:
+```shell
+sudo apt-get install -y ca-certificates checkinstall cmake build-essential \
+libboost-filesystem-dev libboost-iostreams-dev libboost-program-options-dev \
+libssl-dev pkg-config rsync wget zlib1g-dev
+```
+
+This will download:
+* ca-certificates
+* checkinstall
+* cmake
+* build-essential
+* libboost-filesystem-dev
+* libboost-iostreams-dev
+* libboost-program-options-dev
+* libssl-dev
+* pkg-config
+* rsync
+* wget
+* zlib1g-dev
+
+### Libraries
+
+The latest versions of some packages are not offered by apt repositories,
+so we've included some scripts to download, compile, and install them:
+```shell
+./tools/scripts/lib_install/fmtlib.sh 8.0.1
+./tools/scripts/lib_install/libarchive.sh 3.5.1
+./tools/scripts/lib_install/lz4.sh 1.8.2
+./tools/scripts/lib_install/mariadb-connector-c.sh 3.2.3
+./tools/scripts/lib_install/spdlog.sh 1.9.2
+./tools/scripts/lib_install/zstandard.sh 1.4.9
+```
+
+### Build
+
+* Configure the cmake project:
+  ```shell
+  mkdir build
+  cd build
+  cmake ../
+  ```
+
+* Build:
+  ```shell
+  make
+  ```
+
+## Running
+
+* CLP contains two executables: `clp` and `clg`
+    * `clp` is used for compressing and extracting logs
+    * `clg` is used for performing wildcard searches on the compressed logs
+
+### `clp`
+
+To compress some logs:
+```shell
+./clp c archives-dir /home/my/logs
+```
+* `archives-dir` is where compressed logs should be output
+  * `clp` will create a number of files and directories within, so it's best if this directory is empty
+  * You can use the same directory repeatedly and `clp` will add to the compressed logs within.
+* `/home/my/logs` is any log file or directory containing log files
+
+To decompress those logs:
+```shell
+./clp x archive-dir decompressed
+```
+* `archives-dir` is where the compressed logs were previously stored
+* `decompressed` is a directory where they will be decompressed to
+
+You can also decompress a specific file:
+```shell
+./clp x archive-dir decompressed /my/file/path.log
+```
+* `/my/file/path.log` is the uncompressed file's path (the one that was passed to `clp` for compression) 
+
+More usage instructions can be found by running:
+```shell
+./clp --help
+```
+
+### `clg`
+
+To search the compressed logs:
+```shell
+./clg archives-dir " a *wildcard* search phrase "
+```
+* `archives-dir` is where the compressed logs were previously stored
+* The search phrase can contain the `*` wildcard which matches 0 or more characters, or the `?` wildcard which matches any single character.
+
+Similar to `clp`, `clg` can search a single file:
+```shell
+./clg archives-dir " a *wildcard* search phrase " /my/file/path.log
+```
+* `/my/file/path.log` is the uncompressed file's path (the one that was passed to `clp` for compression)
+
+More usage instructions can be found by running:
+```shell
+./clg --help
+```
+
+## Parallel Compression
+
+By default, `clp` uses an embedded SQLite database, so each directory containing archives can only
+be accessed by a single `clp` instance.
+
+To enable parallel compression to the same archives directory, `clp`/`clg` can be configured to
+use a MySQL-type database (MariaDB) as follows: 
+
+* Install and configure MariaDB using the instructions for your platform
+* Create a user that has privileges to create databases, create tables, insert records, and delete
+  records.
+* Copy and change `config/metadata-db.yml`, setting the type to `mysql` and uncommenting the MySQL 
+  parameters.
+* Install the MariaDB and PyYAML Python packages `pip3 install mariadb PyYAML`
+  * This is necessary to run the database initialization script. If you prefer, you can run the 
+    SQL statements in `tools/scripts/db/init-db.py` directly.
+* Run `tools/scripts/db/init-db.py` with the updated config file. This will initialize the 
+  database CLP requires.
+* Run `clp` or `clg` as before, with the addition of the `--db-config-file` option pointing at 
+  the updated config file.
+* To compress in parallel, simply run another instance of `clp` concurrently.
+
+Note that currently, decompression (`clp x`) and search (`clg`) can only be run with a single 
+instance. We are in the process of open-sourcing parallelizable versions of these as well.
diff --git a/cmake/Modules/FindLZ4.cmake b/components/core/cmake/Modules/FindLZ4.cmake
similarity index 100%
rename from cmake/Modules/FindLZ4.cmake
rename to components/core/cmake/Modules/FindLZ4.cmake
diff --git a/cmake/Modules/FindLibArchive.cmake b/components/core/cmake/Modules/FindLibArchive.cmake
similarity index 100%
rename from cmake/Modules/FindLibArchive.cmake
rename to components/core/cmake/Modules/FindLibArchive.cmake
diff --git a/cmake/Modules/FindLibraryDependencies.cmake b/components/core/cmake/Modules/FindLibraryDependencies.cmake
similarity index 100%
rename from cmake/Modules/FindLibraryDependencies.cmake
rename to components/core/cmake/Modules/FindLibraryDependencies.cmake
diff --git a/cmake/Modules/FindMariaDBClient.cmake b/components/core/cmake/Modules/FindMariaDBClient.cmake
similarity index 100%
rename from cmake/Modules/FindMariaDBClient.cmake
rename to components/core/cmake/Modules/FindMariaDBClient.cmake
diff --git a/cmake/Modules/FindOpenSSL.cmake b/components/core/cmake/Modules/FindOpenSSL.cmake
similarity index 100%
rename from cmake/Modules/FindOpenSSL.cmake
rename to components/core/cmake/Modules/FindOpenSSL.cmake
diff --git a/cmake/Modules/FindZStd.cmake b/components/core/cmake/Modules/FindZStd.cmake
similarity index 100%
rename from cmake/Modules/FindZStd.cmake
rename to components/core/cmake/Modules/FindZStd.cmake
diff --git a/cmake/Modules/LibFindMacros.cmake b/components/core/cmake/Modules/LibFindMacros.cmake
similarity index 100%
rename from cmake/Modules/LibFindMacros.cmake
rename to components/core/cmake/Modules/LibFindMacros.cmake
diff --git a/config/metadata-db.yml b/components/core/config/metadata-db.yml
similarity index 100%
rename from config/metadata-db.yml
rename to components/core/config/metadata-db.yml
diff --git a/src/CommandLineArgumentsBase.cpp b/components/core/src/CommandLineArgumentsBase.cpp
similarity index 100%
rename from src/CommandLineArgumentsBase.cpp
rename to components/core/src/CommandLineArgumentsBase.cpp
diff --git a/src/CommandLineArgumentsBase.hpp b/components/core/src/CommandLineArgumentsBase.hpp
similarity index 100%
rename from src/CommandLineArgumentsBase.hpp
rename to components/core/src/CommandLineArgumentsBase.hpp
diff --git a/src/Defs.h b/components/core/src/Defs.h
similarity index 100%
rename from src/Defs.h
rename to components/core/src/Defs.h
diff --git a/src/DictionaryEntry.cpp b/components/core/src/DictionaryEntry.cpp
similarity index 100%
rename from src/DictionaryEntry.cpp
rename to components/core/src/DictionaryEntry.cpp
diff --git a/src/DictionaryEntry.hpp b/components/core/src/DictionaryEntry.hpp
similarity index 100%
rename from src/DictionaryEntry.hpp
rename to components/core/src/DictionaryEntry.hpp
diff --git a/src/DictionaryReader.cpp b/components/core/src/DictionaryReader.cpp
similarity index 100%
rename from src/DictionaryReader.cpp
rename to components/core/src/DictionaryReader.cpp
diff --git a/src/DictionaryReader.hpp b/components/core/src/DictionaryReader.hpp
similarity index 100%
rename from src/DictionaryReader.hpp
rename to components/core/src/DictionaryReader.hpp
diff --git a/src/DictionaryWriter.cpp b/components/core/src/DictionaryWriter.cpp
similarity index 100%
rename from src/DictionaryWriter.cpp
rename to components/core/src/DictionaryWriter.cpp
diff --git a/src/DictionaryWriter.hpp b/components/core/src/DictionaryWriter.hpp
similarity index 100%
rename from src/DictionaryWriter.hpp
rename to components/core/src/DictionaryWriter.hpp
diff --git a/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp
similarity index 100%
rename from src/EncodedVariableInterpreter.cpp
rename to components/core/src/EncodedVariableInterpreter.cpp
diff --git a/src/EncodedVariableInterpreter.hpp b/components/core/src/EncodedVariableInterpreter.hpp
similarity index 100%
rename from src/EncodedVariableInterpreter.hpp
rename to components/core/src/EncodedVariableInterpreter.hpp
diff --git a/src/ErrorCode.hpp b/components/core/src/ErrorCode.hpp
similarity index 100%
rename from src/ErrorCode.hpp
rename to components/core/src/ErrorCode.hpp
diff --git a/src/FileReader.cpp b/components/core/src/FileReader.cpp
similarity index 100%
rename from src/FileReader.cpp
rename to components/core/src/FileReader.cpp
diff --git a/src/FileReader.hpp b/components/core/src/FileReader.hpp
similarity index 100%
rename from src/FileReader.hpp
rename to components/core/src/FileReader.hpp
diff --git a/src/FileWriter.cpp b/components/core/src/FileWriter.cpp
similarity index 100%
rename from src/FileWriter.cpp
rename to components/core/src/FileWriter.cpp
diff --git a/src/FileWriter.hpp b/components/core/src/FileWriter.hpp
similarity index 100%
rename from src/FileWriter.hpp
rename to components/core/src/FileWriter.hpp
diff --git a/src/GlobalMetadataDB.cpp b/components/core/src/GlobalMetadataDB.cpp
similarity index 100%
rename from src/GlobalMetadataDB.cpp
rename to components/core/src/GlobalMetadataDB.cpp
diff --git a/src/GlobalMetadataDB.hpp b/components/core/src/GlobalMetadataDB.hpp
similarity index 100%
rename from src/GlobalMetadataDB.hpp
rename to components/core/src/GlobalMetadataDB.hpp
diff --git a/src/GlobalMetadataDBConfig.cpp b/components/core/src/GlobalMetadataDBConfig.cpp
similarity index 100%
rename from src/GlobalMetadataDBConfig.cpp
rename to components/core/src/GlobalMetadataDBConfig.cpp
diff --git a/src/GlobalMetadataDBConfig.hpp b/components/core/src/GlobalMetadataDBConfig.hpp
similarity index 100%
rename from src/GlobalMetadataDBConfig.hpp
rename to components/core/src/GlobalMetadataDBConfig.hpp
diff --git a/src/GlobalMySQLMetadataDB.cpp b/components/core/src/GlobalMySQLMetadataDB.cpp
similarity index 100%
rename from src/GlobalMySQLMetadataDB.cpp
rename to components/core/src/GlobalMySQLMetadataDB.cpp
diff --git a/src/GlobalMySQLMetadataDB.hpp b/components/core/src/GlobalMySQLMetadataDB.hpp
similarity index 100%
rename from src/GlobalMySQLMetadataDB.hpp
rename to components/core/src/GlobalMySQLMetadataDB.hpp
diff --git a/src/GlobalSQLiteMetadataDB.cpp b/components/core/src/GlobalSQLiteMetadataDB.cpp
similarity index 100%
rename from src/GlobalSQLiteMetadataDB.cpp
rename to components/core/src/GlobalSQLiteMetadataDB.cpp
diff --git a/src/GlobalSQLiteMetadataDB.hpp b/components/core/src/GlobalSQLiteMetadataDB.hpp
similarity index 100%
rename from src/GlobalSQLiteMetadataDB.hpp
rename to components/core/src/GlobalSQLiteMetadataDB.hpp
diff --git a/src/Grep.cpp b/components/core/src/Grep.cpp
similarity index 100%
rename from src/Grep.cpp
rename to components/core/src/Grep.cpp
diff --git a/src/Grep.hpp b/components/core/src/Grep.hpp
similarity index 100%
rename from src/Grep.hpp
rename to components/core/src/Grep.hpp
diff --git a/src/LibarchiveFileReader.cpp b/components/core/src/LibarchiveFileReader.cpp
similarity index 100%
rename from src/LibarchiveFileReader.cpp
rename to components/core/src/LibarchiveFileReader.cpp
diff --git a/src/LibarchiveFileReader.hpp b/components/core/src/LibarchiveFileReader.hpp
similarity index 100%
rename from src/LibarchiveFileReader.hpp
rename to components/core/src/LibarchiveFileReader.hpp
diff --git a/src/LibarchiveReader.cpp b/components/core/src/LibarchiveReader.cpp
similarity index 100%
rename from src/LibarchiveReader.cpp
rename to components/core/src/LibarchiveReader.cpp
diff --git a/src/LibarchiveReader.hpp b/components/core/src/LibarchiveReader.hpp
similarity index 100%
rename from src/LibarchiveReader.hpp
rename to components/core/src/LibarchiveReader.hpp
diff --git a/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp
similarity index 100%
rename from src/LogTypeDictionaryEntry.cpp
rename to components/core/src/LogTypeDictionaryEntry.cpp
diff --git a/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp
similarity index 100%
rename from src/LogTypeDictionaryEntry.hpp
rename to components/core/src/LogTypeDictionaryEntry.hpp
diff --git a/src/LogTypeDictionaryReader.cpp b/components/core/src/LogTypeDictionaryReader.cpp
similarity index 100%
rename from src/LogTypeDictionaryReader.cpp
rename to components/core/src/LogTypeDictionaryReader.cpp
diff --git a/src/LogTypeDictionaryReader.hpp b/components/core/src/LogTypeDictionaryReader.hpp
similarity index 100%
rename from src/LogTypeDictionaryReader.hpp
rename to components/core/src/LogTypeDictionaryReader.hpp
diff --git a/src/LogTypeDictionaryWriter.cpp b/components/core/src/LogTypeDictionaryWriter.cpp
similarity index 100%
rename from src/LogTypeDictionaryWriter.cpp
rename to components/core/src/LogTypeDictionaryWriter.cpp
diff --git a/src/LogTypeDictionaryWriter.hpp b/components/core/src/LogTypeDictionaryWriter.hpp
similarity index 100%
rename from src/LogTypeDictionaryWriter.hpp
rename to components/core/src/LogTypeDictionaryWriter.hpp
diff --git a/src/MessageParser.cpp b/components/core/src/MessageParser.cpp
similarity index 100%
rename from src/MessageParser.cpp
rename to components/core/src/MessageParser.cpp
diff --git a/src/MessageParser.hpp b/components/core/src/MessageParser.hpp
similarity index 100%
rename from src/MessageParser.hpp
rename to components/core/src/MessageParser.hpp
diff --git a/src/MySQLDB.cpp b/components/core/src/MySQLDB.cpp
similarity index 100%
rename from src/MySQLDB.cpp
rename to components/core/src/MySQLDB.cpp
diff --git a/src/MySQLDB.hpp b/components/core/src/MySQLDB.hpp
similarity index 100%
rename from src/MySQLDB.hpp
rename to components/core/src/MySQLDB.hpp
diff --git a/src/MySQLParamBindings.cpp b/components/core/src/MySQLParamBindings.cpp
similarity index 100%
rename from src/MySQLParamBindings.cpp
rename to components/core/src/MySQLParamBindings.cpp
diff --git a/src/MySQLParamBindings.hpp b/components/core/src/MySQLParamBindings.hpp
similarity index 100%
rename from src/MySQLParamBindings.hpp
rename to components/core/src/MySQLParamBindings.hpp
diff --git a/src/MySQLPreparedStatement.cpp b/components/core/src/MySQLPreparedStatement.cpp
similarity index 100%
rename from src/MySQLPreparedStatement.cpp
rename to components/core/src/MySQLPreparedStatement.cpp
diff --git a/src/MySQLPreparedStatement.hpp b/components/core/src/MySQLPreparedStatement.hpp
similarity index 100%
rename from src/MySQLPreparedStatement.hpp
rename to components/core/src/MySQLPreparedStatement.hpp
diff --git a/src/PageAllocatedVector.cpp b/components/core/src/PageAllocatedVector.cpp
similarity index 100%
rename from src/PageAllocatedVector.cpp
rename to components/core/src/PageAllocatedVector.cpp
diff --git a/src/PageAllocatedVector.hpp b/components/core/src/PageAllocatedVector.hpp
similarity index 100%
rename from src/PageAllocatedVector.hpp
rename to components/core/src/PageAllocatedVector.hpp
diff --git a/src/ParsedMessage.cpp b/components/core/src/ParsedMessage.cpp
similarity index 100%
rename from src/ParsedMessage.cpp
rename to components/core/src/ParsedMessage.cpp
diff --git a/src/ParsedMessage.hpp b/components/core/src/ParsedMessage.hpp
similarity index 100%
rename from src/ParsedMessage.hpp
rename to components/core/src/ParsedMessage.hpp
diff --git a/src/Profiler.cpp b/components/core/src/Profiler.cpp
similarity index 100%
rename from src/Profiler.cpp
rename to components/core/src/Profiler.cpp
diff --git a/src/Profiler.hpp b/components/core/src/Profiler.hpp
similarity index 100%
rename from src/Profiler.hpp
rename to components/core/src/Profiler.hpp
diff --git a/src/Query.cpp b/components/core/src/Query.cpp
similarity index 100%
rename from src/Query.cpp
rename to components/core/src/Query.cpp
diff --git a/src/Query.hpp b/components/core/src/Query.hpp
similarity index 100%
rename from src/Query.hpp
rename to components/core/src/Query.hpp
diff --git a/src/ReaderInterface.cpp b/components/core/src/ReaderInterface.cpp
similarity index 100%
rename from src/ReaderInterface.cpp
rename to components/core/src/ReaderInterface.cpp
diff --git a/src/ReaderInterface.hpp b/components/core/src/ReaderInterface.hpp
similarity index 100%
rename from src/ReaderInterface.hpp
rename to components/core/src/ReaderInterface.hpp
diff --git a/src/SQLiteDB.cpp b/components/core/src/SQLiteDB.cpp
similarity index 100%
rename from src/SQLiteDB.cpp
rename to components/core/src/SQLiteDB.cpp
diff --git a/src/SQLiteDB.hpp b/components/core/src/SQLiteDB.hpp
similarity index 100%
rename from src/SQLiteDB.hpp
rename to components/core/src/SQLiteDB.hpp
diff --git a/src/SQLitePreparedStatement.cpp b/components/core/src/SQLitePreparedStatement.cpp
similarity index 100%
rename from src/SQLitePreparedStatement.cpp
rename to components/core/src/SQLitePreparedStatement.cpp
diff --git a/src/SQLitePreparedStatement.hpp b/components/core/src/SQLitePreparedStatement.hpp
similarity index 100%
rename from src/SQLitePreparedStatement.hpp
rename to components/core/src/SQLitePreparedStatement.hpp
diff --git a/src/Stopwatch.cpp b/components/core/src/Stopwatch.cpp
similarity index 100%
rename from src/Stopwatch.cpp
rename to components/core/src/Stopwatch.cpp
diff --git a/src/Stopwatch.hpp b/components/core/src/Stopwatch.hpp
similarity index 100%
rename from src/Stopwatch.hpp
rename to components/core/src/Stopwatch.hpp
diff --git a/src/TimestampPattern.cpp b/components/core/src/TimestampPattern.cpp
similarity index 100%
rename from src/TimestampPattern.cpp
rename to components/core/src/TimestampPattern.cpp
diff --git a/src/TimestampPattern.hpp b/components/core/src/TimestampPattern.hpp
similarity index 100%
rename from src/TimestampPattern.hpp
rename to components/core/src/TimestampPattern.hpp
diff --git a/src/TraceableException.cpp b/components/core/src/TraceableException.cpp
similarity index 100%
rename from src/TraceableException.cpp
rename to components/core/src/TraceableException.cpp
diff --git a/src/TraceableException.hpp b/components/core/src/TraceableException.hpp
similarity index 100%
rename from src/TraceableException.hpp
rename to components/core/src/TraceableException.hpp
diff --git a/src/Utils.cpp b/components/core/src/Utils.cpp
similarity index 100%
rename from src/Utils.cpp
rename to components/core/src/Utils.cpp
diff --git a/src/Utils.hpp b/components/core/src/Utils.hpp
similarity index 100%
rename from src/Utils.hpp
rename to components/core/src/Utils.hpp
diff --git a/src/VariableDictionaryEntry.cpp b/components/core/src/VariableDictionaryEntry.cpp
similarity index 100%
rename from src/VariableDictionaryEntry.cpp
rename to components/core/src/VariableDictionaryEntry.cpp
diff --git a/src/VariableDictionaryEntry.hpp b/components/core/src/VariableDictionaryEntry.hpp
similarity index 100%
rename from src/VariableDictionaryEntry.hpp
rename to components/core/src/VariableDictionaryEntry.hpp
diff --git a/src/VariableDictionaryReader.cpp b/components/core/src/VariableDictionaryReader.cpp
similarity index 100%
rename from src/VariableDictionaryReader.cpp
rename to components/core/src/VariableDictionaryReader.cpp
diff --git a/src/VariableDictionaryReader.hpp b/components/core/src/VariableDictionaryReader.hpp
similarity index 100%
rename from src/VariableDictionaryReader.hpp
rename to components/core/src/VariableDictionaryReader.hpp
diff --git a/src/VariableDictionaryWriter.cpp b/components/core/src/VariableDictionaryWriter.cpp
similarity index 100%
rename from src/VariableDictionaryWriter.cpp
rename to components/core/src/VariableDictionaryWriter.cpp
diff --git a/src/VariableDictionaryWriter.hpp b/components/core/src/VariableDictionaryWriter.hpp
similarity index 100%
rename from src/VariableDictionaryWriter.hpp
rename to components/core/src/VariableDictionaryWriter.hpp
diff --git a/src/Writer.cpp b/components/core/src/Writer.cpp
similarity index 100%
rename from src/Writer.cpp
rename to components/core/src/Writer.cpp
diff --git a/src/Writer.hpp b/components/core/src/Writer.hpp
similarity index 100%
rename from src/Writer.hpp
rename to components/core/src/Writer.hpp
diff --git a/src/WriterInterface.cpp b/components/core/src/WriterInterface.cpp
similarity index 100%
rename from src/WriterInterface.cpp
rename to components/core/src/WriterInterface.cpp
diff --git a/src/WriterInterface.hpp b/components/core/src/WriterInterface.hpp
similarity index 100%
rename from src/WriterInterface.hpp
rename to components/core/src/WriterInterface.hpp
diff --git a/src/clg/CommandLineArguments.cpp b/components/core/src/clg/CommandLineArguments.cpp
similarity index 100%
rename from src/clg/CommandLineArguments.cpp
rename to components/core/src/clg/CommandLineArguments.cpp
diff --git a/src/clg/CommandLineArguments.hpp b/components/core/src/clg/CommandLineArguments.hpp
similarity index 100%
rename from src/clg/CommandLineArguments.hpp
rename to components/core/src/clg/CommandLineArguments.hpp
diff --git a/src/clg/clg.cpp b/components/core/src/clg/clg.cpp
similarity index 100%
rename from src/clg/clg.cpp
rename to components/core/src/clg/clg.cpp
diff --git a/src/clp/CommandLineArguments.cpp b/components/core/src/clp/CommandLineArguments.cpp
similarity index 100%
rename from src/clp/CommandLineArguments.cpp
rename to components/core/src/clp/CommandLineArguments.cpp
diff --git a/src/clp/CommandLineArguments.hpp b/components/core/src/clp/CommandLineArguments.hpp
similarity index 100%
rename from src/clp/CommandLineArguments.hpp
rename to components/core/src/clp/CommandLineArguments.hpp
diff --git a/src/clp/FileCompressor.cpp b/components/core/src/clp/FileCompressor.cpp
similarity index 100%
rename from src/clp/FileCompressor.cpp
rename to components/core/src/clp/FileCompressor.cpp
diff --git a/src/clp/FileCompressor.hpp b/components/core/src/clp/FileCompressor.hpp
similarity index 100%
rename from src/clp/FileCompressor.hpp
rename to components/core/src/clp/FileCompressor.hpp
diff --git a/src/clp/FileDecompressor.cpp b/components/core/src/clp/FileDecompressor.cpp
similarity index 100%
rename from src/clp/FileDecompressor.cpp
rename to components/core/src/clp/FileDecompressor.cpp
diff --git a/src/clp/FileDecompressor.hpp b/components/core/src/clp/FileDecompressor.hpp
similarity index 100%
rename from src/clp/FileDecompressor.hpp
rename to components/core/src/clp/FileDecompressor.hpp
diff --git a/src/clp/FileToCompress.cpp b/components/core/src/clp/FileToCompress.cpp
similarity index 100%
rename from src/clp/FileToCompress.cpp
rename to components/core/src/clp/FileToCompress.cpp
diff --git a/src/clp/FileToCompress.hpp b/components/core/src/clp/FileToCompress.hpp
similarity index 100%
rename from src/clp/FileToCompress.hpp
rename to components/core/src/clp/FileToCompress.hpp
diff --git a/src/clp/StructuredFileToCompress.cpp b/components/core/src/clp/StructuredFileToCompress.cpp
similarity index 100%
rename from src/clp/StructuredFileToCompress.cpp
rename to components/core/src/clp/StructuredFileToCompress.cpp
diff --git a/src/clp/StructuredFileToCompress.hpp b/components/core/src/clp/StructuredFileToCompress.hpp
similarity index 100%
rename from src/clp/StructuredFileToCompress.hpp
rename to components/core/src/clp/StructuredFileToCompress.hpp
diff --git a/src/clp/clp.cpp b/components/core/src/clp/clp.cpp
similarity index 100%
rename from src/clp/clp.cpp
rename to components/core/src/clp/clp.cpp
diff --git a/src/clp/compression.cpp b/components/core/src/clp/compression.cpp
similarity index 100%
rename from src/clp/compression.cpp
rename to components/core/src/clp/compression.cpp
diff --git a/src/clp/compression.hpp b/components/core/src/clp/compression.hpp
similarity index 100%
rename from src/clp/compression.hpp
rename to components/core/src/clp/compression.hpp
diff --git a/src/clp/decompression.cpp b/components/core/src/clp/decompression.cpp
similarity index 100%
rename from src/clp/decompression.cpp
rename to components/core/src/clp/decompression.cpp
diff --git a/src/clp/decompression.hpp b/components/core/src/clp/decompression.hpp
similarity index 100%
rename from src/clp/decompression.hpp
rename to components/core/src/clp/decompression.hpp
diff --git a/src/clp/utils.cpp b/components/core/src/clp/utils.cpp
similarity index 100%
rename from src/clp/utils.cpp
rename to components/core/src/clp/utils.cpp
diff --git a/src/clp/utils.hpp b/components/core/src/clp/utils.hpp
similarity index 100%
rename from src/clp/utils.hpp
rename to components/core/src/clp/utils.hpp
diff --git a/src/database_utils.cpp b/components/core/src/database_utils.cpp
similarity index 100%
rename from src/database_utils.cpp
rename to components/core/src/database_utils.cpp
diff --git a/src/database_utils.hpp b/components/core/src/database_utils.hpp
similarity index 100%
rename from src/database_utils.hpp
rename to components/core/src/database_utils.hpp
diff --git a/src/dictionary_utils.cpp b/components/core/src/dictionary_utils.cpp
similarity index 100%
rename from src/dictionary_utils.cpp
rename to components/core/src/dictionary_utils.cpp
diff --git a/src/dictionary_utils.hpp b/components/core/src/dictionary_utils.hpp
similarity index 100%
rename from src/dictionary_utils.hpp
rename to components/core/src/dictionary_utils.hpp
diff --git a/src/streaming_archive/Constants.hpp b/components/core/src/streaming_archive/Constants.hpp
similarity index 100%
rename from src/streaming_archive/Constants.hpp
rename to components/core/src/streaming_archive/Constants.hpp
diff --git a/src/streaming_archive/MetadataDB.cpp b/components/core/src/streaming_archive/MetadataDB.cpp
similarity index 100%
rename from src/streaming_archive/MetadataDB.cpp
rename to components/core/src/streaming_archive/MetadataDB.cpp
diff --git a/src/streaming_archive/MetadataDB.hpp b/components/core/src/streaming_archive/MetadataDB.hpp
similarity index 100%
rename from src/streaming_archive/MetadataDB.hpp
rename to components/core/src/streaming_archive/MetadataDB.hpp
diff --git a/src/streaming_archive/reader/Archive.cpp b/components/core/src/streaming_archive/reader/Archive.cpp
similarity index 100%
rename from src/streaming_archive/reader/Archive.cpp
rename to components/core/src/streaming_archive/reader/Archive.cpp
diff --git a/src/streaming_archive/reader/Archive.hpp b/components/core/src/streaming_archive/reader/Archive.hpp
similarity index 100%
rename from src/streaming_archive/reader/Archive.hpp
rename to components/core/src/streaming_archive/reader/Archive.hpp
diff --git a/src/streaming_archive/reader/File.cpp b/components/core/src/streaming_archive/reader/File.cpp
similarity index 100%
rename from src/streaming_archive/reader/File.cpp
rename to components/core/src/streaming_archive/reader/File.cpp
diff --git a/src/streaming_archive/reader/File.hpp b/components/core/src/streaming_archive/reader/File.hpp
similarity index 100%
rename from src/streaming_archive/reader/File.hpp
rename to components/core/src/streaming_archive/reader/File.hpp
diff --git a/src/streaming_archive/reader/Message.cpp b/components/core/src/streaming_archive/reader/Message.cpp
similarity index 100%
rename from src/streaming_archive/reader/Message.cpp
rename to components/core/src/streaming_archive/reader/Message.cpp
diff --git a/src/streaming_archive/reader/Message.hpp b/components/core/src/streaming_archive/reader/Message.hpp
similarity index 100%
rename from src/streaming_archive/reader/Message.hpp
rename to components/core/src/streaming_archive/reader/Message.hpp
diff --git a/src/streaming_archive/reader/Segment.cpp b/components/core/src/streaming_archive/reader/Segment.cpp
similarity index 100%
rename from src/streaming_archive/reader/Segment.cpp
rename to components/core/src/streaming_archive/reader/Segment.cpp
diff --git a/src/streaming_archive/reader/Segment.hpp b/components/core/src/streaming_archive/reader/Segment.hpp
similarity index 100%
rename from src/streaming_archive/reader/Segment.hpp
rename to components/core/src/streaming_archive/reader/Segment.hpp
diff --git a/src/streaming_archive/reader/SegmentManager.cpp b/components/core/src/streaming_archive/reader/SegmentManager.cpp
similarity index 100%
rename from src/streaming_archive/reader/SegmentManager.cpp
rename to components/core/src/streaming_archive/reader/SegmentManager.cpp
diff --git a/src/streaming_archive/reader/SegmentManager.hpp b/components/core/src/streaming_archive/reader/SegmentManager.hpp
similarity index 100%
rename from src/streaming_archive/reader/SegmentManager.hpp
rename to components/core/src/streaming_archive/reader/SegmentManager.hpp
diff --git a/src/streaming_archive/writer/Archive.cpp b/components/core/src/streaming_archive/writer/Archive.cpp
similarity index 100%
rename from src/streaming_archive/writer/Archive.cpp
rename to components/core/src/streaming_archive/writer/Archive.cpp
diff --git a/src/streaming_archive/writer/Archive.hpp b/components/core/src/streaming_archive/writer/Archive.hpp
similarity index 100%
rename from src/streaming_archive/writer/Archive.hpp
rename to components/core/src/streaming_archive/writer/Archive.hpp
diff --git a/src/streaming_archive/writer/File.cpp b/components/core/src/streaming_archive/writer/File.cpp
similarity index 100%
rename from src/streaming_archive/writer/File.cpp
rename to components/core/src/streaming_archive/writer/File.cpp
diff --git a/src/streaming_archive/writer/File.hpp b/components/core/src/streaming_archive/writer/File.hpp
similarity index 100%
rename from src/streaming_archive/writer/File.hpp
rename to components/core/src/streaming_archive/writer/File.hpp
diff --git a/src/streaming_archive/writer/InMemoryFile.cpp b/components/core/src/streaming_archive/writer/InMemoryFile.cpp
similarity index 100%
rename from src/streaming_archive/writer/InMemoryFile.cpp
rename to components/core/src/streaming_archive/writer/InMemoryFile.cpp
diff --git a/src/streaming_archive/writer/InMemoryFile.hpp b/components/core/src/streaming_archive/writer/InMemoryFile.hpp
similarity index 100%
rename from src/streaming_archive/writer/InMemoryFile.hpp
rename to components/core/src/streaming_archive/writer/InMemoryFile.hpp
diff --git a/src/streaming_archive/writer/OnDiskFile.cpp b/components/core/src/streaming_archive/writer/OnDiskFile.cpp
similarity index 100%
rename from src/streaming_archive/writer/OnDiskFile.cpp
rename to components/core/src/streaming_archive/writer/OnDiskFile.cpp
diff --git a/src/streaming_archive/writer/OnDiskFile.hpp b/components/core/src/streaming_archive/writer/OnDiskFile.hpp
similarity index 100%
rename from src/streaming_archive/writer/OnDiskFile.hpp
rename to components/core/src/streaming_archive/writer/OnDiskFile.hpp
diff --git a/src/streaming_archive/writer/Segment.cpp b/components/core/src/streaming_archive/writer/Segment.cpp
similarity index 100%
rename from src/streaming_archive/writer/Segment.cpp
rename to components/core/src/streaming_archive/writer/Segment.cpp
diff --git a/src/streaming_archive/writer/Segment.hpp b/components/core/src/streaming_archive/writer/Segment.hpp
similarity index 100%
rename from src/streaming_archive/writer/Segment.hpp
rename to components/core/src/streaming_archive/writer/Segment.hpp
diff --git a/src/streaming_compression/Compressor.cpp b/components/core/src/streaming_compression/Compressor.cpp
similarity index 100%
rename from src/streaming_compression/Compressor.cpp
rename to components/core/src/streaming_compression/Compressor.cpp
diff --git a/src/streaming_compression/Compressor.hpp b/components/core/src/streaming_compression/Compressor.hpp
similarity index 100%
rename from src/streaming_compression/Compressor.hpp
rename to components/core/src/streaming_compression/Compressor.hpp
diff --git a/src/streaming_compression/Constants.hpp b/components/core/src/streaming_compression/Constants.hpp
similarity index 100%
rename from src/streaming_compression/Constants.hpp
rename to components/core/src/streaming_compression/Constants.hpp
diff --git a/src/streaming_compression/Decompressor.cpp b/components/core/src/streaming_compression/Decompressor.cpp
similarity index 100%
rename from src/streaming_compression/Decompressor.cpp
rename to components/core/src/streaming_compression/Decompressor.cpp
diff --git a/src/streaming_compression/Decompressor.hpp b/components/core/src/streaming_compression/Decompressor.hpp
similarity index 100%
rename from src/streaming_compression/Decompressor.hpp
rename to components/core/src/streaming_compression/Decompressor.hpp
diff --git a/src/streaming_compression/passthrough/Compressor.cpp b/components/core/src/streaming_compression/passthrough/Compressor.cpp
similarity index 100%
rename from src/streaming_compression/passthrough/Compressor.cpp
rename to components/core/src/streaming_compression/passthrough/Compressor.cpp
diff --git a/src/streaming_compression/passthrough/Compressor.hpp b/components/core/src/streaming_compression/passthrough/Compressor.hpp
similarity index 100%
rename from src/streaming_compression/passthrough/Compressor.hpp
rename to components/core/src/streaming_compression/passthrough/Compressor.hpp
diff --git a/src/streaming_compression/passthrough/Decompressor.cpp b/components/core/src/streaming_compression/passthrough/Decompressor.cpp
similarity index 100%
rename from src/streaming_compression/passthrough/Decompressor.cpp
rename to components/core/src/streaming_compression/passthrough/Decompressor.cpp
diff --git a/src/streaming_compression/passthrough/Decompressor.hpp b/components/core/src/streaming_compression/passthrough/Decompressor.hpp
similarity index 100%
rename from src/streaming_compression/passthrough/Decompressor.hpp
rename to components/core/src/streaming_compression/passthrough/Decompressor.hpp
diff --git a/src/streaming_compression/zstd/Compressor.cpp b/components/core/src/streaming_compression/zstd/Compressor.cpp
similarity index 100%
rename from src/streaming_compression/zstd/Compressor.cpp
rename to components/core/src/streaming_compression/zstd/Compressor.cpp
diff --git a/src/streaming_compression/zstd/Compressor.hpp b/components/core/src/streaming_compression/zstd/Compressor.hpp
similarity index 100%
rename from src/streaming_compression/zstd/Compressor.hpp
rename to components/core/src/streaming_compression/zstd/Compressor.hpp
diff --git a/src/streaming_compression/zstd/Constants.hpp b/components/core/src/streaming_compression/zstd/Constants.hpp
similarity index 100%
rename from src/streaming_compression/zstd/Constants.hpp
rename to components/core/src/streaming_compression/zstd/Constants.hpp
diff --git a/src/streaming_compression/zstd/Decompressor.cpp b/components/core/src/streaming_compression/zstd/Decompressor.cpp
similarity index 100%
rename from src/streaming_compression/zstd/Decompressor.cpp
rename to components/core/src/streaming_compression/zstd/Decompressor.cpp
diff --git a/src/streaming_compression/zstd/Decompressor.hpp b/components/core/src/streaming_compression/zstd/Decompressor.hpp
similarity index 100%
rename from src/streaming_compression/zstd/Decompressor.hpp
rename to components/core/src/streaming_compression/zstd/Decompressor.hpp
diff --git a/src/version.hpp b/components/core/src/version.hpp
similarity index 100%
rename from src/version.hpp
rename to components/core/src/version.hpp
diff --git a/submodules/Catch2 b/components/core/submodules/Catch2
similarity index 100%
rename from submodules/Catch2
rename to components/core/submodules/Catch2
diff --git a/submodules/date b/components/core/submodules/date
similarity index 100%
rename from submodules/date
rename to components/core/submodules/date
diff --git a/submodules/json b/components/core/submodules/json
similarity index 100%
rename from submodules/json
rename to components/core/submodules/json
diff --git a/submodules/yaml-cpp b/components/core/submodules/yaml-cpp
similarity index 100%
rename from submodules/yaml-cpp
rename to components/core/submodules/yaml-cpp
diff --git a/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp
similarity index 100%
rename from tests/test-EncodedVariableInterpreter.cpp
rename to components/core/tests/test-EncodedVariableInterpreter.cpp
diff --git a/tests/test-Grep.cpp b/components/core/tests/test-Grep.cpp
similarity index 100%
rename from tests/test-Grep.cpp
rename to components/core/tests/test-Grep.cpp
diff --git a/tests/test-Segment.cpp b/components/core/tests/test-Segment.cpp
similarity index 100%
rename from tests/test-Segment.cpp
rename to components/core/tests/test-Segment.cpp
diff --git a/tests/test-Stopwatch.cpp b/components/core/tests/test-Stopwatch.cpp
similarity index 100%
rename from tests/test-Stopwatch.cpp
rename to components/core/tests/test-Stopwatch.cpp
diff --git a/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp
similarity index 100%
rename from tests/test-StreamingCompression.cpp
rename to components/core/tests/test-StreamingCompression.cpp
diff --git a/tests/test-TimestampPattern.cpp b/components/core/tests/test-TimestampPattern.cpp
similarity index 100%
rename from tests/test-TimestampPattern.cpp
rename to components/core/tests/test-TimestampPattern.cpp
diff --git a/tests/test-Utils.cpp b/components/core/tests/test-Utils.cpp
similarity index 100%
rename from tests/test-Utils.cpp
rename to components/core/tests/test-Utils.cpp
diff --git a/tests/test-main.cpp b/components/core/tests/test-main.cpp
similarity index 100%
rename from tests/test-main.cpp
rename to components/core/tests/test-main.cpp
diff --git a/tools/docker-images/clp-env-base-bionic/Dockerfile b/components/core/tools/docker-images/clp-env-base-bionic/Dockerfile
similarity index 100%
rename from tools/docker-images/clp-env-base-bionic/Dockerfile
rename to components/core/tools/docker-images/clp-env-base-bionic/Dockerfile
diff --git a/tools/docker-images/clp-env-base-bionic/build.sh b/components/core/tools/docker-images/clp-env-base-bionic/build.sh
similarity index 100%
rename from tools/docker-images/clp-env-base-bionic/build.sh
rename to components/core/tools/docker-images/clp-env-base-bionic/build.sh
diff --git a/tools/docker-images/clp-env-base-centos7.4/Dockerfile b/components/core/tools/docker-images/clp-env-base-centos7.4/Dockerfile
similarity index 100%
rename from tools/docker-images/clp-env-base-centos7.4/Dockerfile
rename to components/core/tools/docker-images/clp-env-base-centos7.4/Dockerfile
diff --git a/tools/docker-images/clp-env-base-centos7.4/build.sh b/components/core/tools/docker-images/clp-env-base-centos7.4/build.sh
similarity index 100%
rename from tools/docker-images/clp-env-base-centos7.4/build.sh
rename to components/core/tools/docker-images/clp-env-base-centos7.4/build.sh
diff --git a/tools/docker-images/clp-env-base-centos7.4/install-boost.sh b/components/core/tools/docker-images/clp-env-base-centos7.4/install-boost.sh
similarity index 100%
rename from tools/docker-images/clp-env-base-centos7.4/install-boost.sh
rename to components/core/tools/docker-images/clp-env-base-centos7.4/install-boost.sh
diff --git a/tools/docker-images/clp-env-base-centos7.4/install-cmake.sh b/components/core/tools/docker-images/clp-env-base-centos7.4/install-cmake.sh
similarity index 100%
rename from tools/docker-images/clp-env-base-centos7.4/install-cmake.sh
rename to components/core/tools/docker-images/clp-env-base-centos7.4/install-cmake.sh
diff --git a/tools/docker-images/clp-env-base-centos7.4/install-gcc.sh b/components/core/tools/docker-images/clp-env-base-centos7.4/install-gcc.sh
similarity index 100%
rename from tools/docker-images/clp-env-base-centos7.4/install-gcc.sh
rename to components/core/tools/docker-images/clp-env-base-centos7.4/install-gcc.sh
diff --git a/tools/docker-images/clp-env-base-focal/Dockerfile b/components/core/tools/docker-images/clp-env-base-focal/Dockerfile
similarity index 100%
rename from tools/docker-images/clp-env-base-focal/Dockerfile
rename to components/core/tools/docker-images/clp-env-base-focal/Dockerfile
diff --git a/tools/docker-images/clp-env-base-focal/build.sh b/components/core/tools/docker-images/clp-env-base-focal/build.sh
similarity index 100%
rename from tools/docker-images/clp-env-base-focal/build.sh
rename to components/core/tools/docker-images/clp-env-base-focal/build.sh
diff --git a/tools/scripts/db/init-db.py b/components/core/tools/scripts/db/init-db.py
similarity index 100%
rename from tools/scripts/db/init-db.py
rename to components/core/tools/scripts/db/init-db.py
diff --git a/tools/scripts/deps-download/download-all.sh b/components/core/tools/scripts/deps-download/download-all.sh
similarity index 82%
rename from tools/scripts/deps-download/download-all.sh
rename to components/core/tools/scripts/deps-download/download-all.sh
index 1b404f819..67dc1d895 100755
--- a/tools/scripts/deps-download/download-all.sh
+++ b/components/core/tools/scripts/deps-download/download-all.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
-project_root_dir=${script_dir}/../../../
+project_root_dir=${script_dir}/../../../../../
 
 cd ${project_root_dir}
 git submodule update --init --recursive
diff --git a/tools/scripts/deps-download/download-dep.py b/components/core/tools/scripts/deps-download/download-dep.py
similarity index 100%
rename from tools/scripts/deps-download/download-dep.py
rename to components/core/tools/scripts/deps-download/download-dep.py
diff --git a/tools/scripts/deps-download/sqlite3.json b/components/core/tools/scripts/deps-download/sqlite3.json
similarity index 100%
rename from tools/scripts/deps-download/sqlite3.json
rename to components/core/tools/scripts/deps-download/sqlite3.json
diff --git a/tools/scripts/lib_install/fmtlib.sh b/components/core/tools/scripts/lib_install/fmtlib.sh
similarity index 100%
rename from tools/scripts/lib_install/fmtlib.sh
rename to components/core/tools/scripts/lib_install/fmtlib.sh
diff --git a/tools/scripts/lib_install/libarchive.sh b/components/core/tools/scripts/lib_install/libarchive.sh
similarity index 100%
rename from tools/scripts/lib_install/libarchive.sh
rename to components/core/tools/scripts/lib_install/libarchive.sh
diff --git a/tools/scripts/lib_install/lz4.sh b/components/core/tools/scripts/lib_install/lz4.sh
similarity index 100%
rename from tools/scripts/lib_install/lz4.sh
rename to components/core/tools/scripts/lib_install/lz4.sh
diff --git a/tools/scripts/lib_install/mariadb-connector-c.sh b/components/core/tools/scripts/lib_install/mariadb-connector-c.sh
similarity index 100%
rename from tools/scripts/lib_install/mariadb-connector-c.sh
rename to components/core/tools/scripts/lib_install/mariadb-connector-c.sh
diff --git a/tools/scripts/lib_install/spdlog.sh b/components/core/tools/scripts/lib_install/spdlog.sh
similarity index 100%
rename from tools/scripts/lib_install/spdlog.sh
rename to components/core/tools/scripts/lib_install/spdlog.sh
diff --git a/tools/scripts/lib_install/zstandard.sh b/components/core/tools/scripts/lib_install/zstandard.sh
similarity index 100%
rename from tools/scripts/lib_install/zstandard.sh
rename to components/core/tools/scripts/lib_install/zstandard.sh
diff --git a/components/job-orchestration/LICENSE b/components/job-orchestration/LICENSE
new file mode 100644
index 000000000..3340c889f
--- /dev/null
+++ b/components/job-orchestration/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 YScope Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/components/job-orchestration/README.md b/components/job-orchestration/README.md
new file mode 100644
index 000000000..ab24cfbc1
--- /dev/null
+++ b/components/job-orchestration/README.md
@@ -0,0 +1,33 @@
+# CLP Job Orchestration
+
+This Python module contains CLP's scheduler and worker to handle distributed compression. 
+CLP's Compression Job Handler can be used to interface and submit compression jobs to the CLP scheduler.
+
+## Installation
+
+```bash
+pip3 install -r requirements.txt --target <clp-package>/lib/python3/site-packages
+cp -R clp_py_utils <clp-package>/lib/python3/site-packages
+```
+
+## Usage
+
+### Running the `scheduler`
+
+```bash
+PYTHONPATH=<clp_home/lib/python3/site-packages> \
+  BROKER_URL=amqp://<rabbitmq_user>:<rabbitmq_password>@<rabbitmq_host>:<rabbitmq_port> \
+  python3 -m job_orchestration.scheduler.scheduler --config <clp config file path>
+```
+
+### Running the `executor`
+
+```bash
+PYTHONPATH=<clp_home/lib/python3/site-packages> \
+  CLP_HOME=<clp_home> \
+  CLP_DATA_DIR=<clp data directory> \
+  CLP_LOGS_DIR=<clp log directory> \
+  BROKER_URL=amqp://<rabbitmq_user>:<rabbitmq_password>@<rabbitmq_host>:<rabbitmq_port> \
+  RESULT_BACKEND=rpc://<rabbitmq_user>:<rabbitmq_password>@<rabbitmq_host>:<rabbitmq_port> \
+  celery -A executor worker --loglevel INFO -Q compression
+```
diff --git a/components/job-orchestration/job_orchestration/executor/__init__.py b/components/job-orchestration/job_orchestration/executor/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/job-orchestration/job_orchestration/executor/celery.py b/components/job-orchestration/job_orchestration/executor/celery.py
new file mode 100644
index 000000000..769b8a2d3
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/executor/celery.py
@@ -0,0 +1,9 @@
+from celery import Celery
+
+from . import celeryconfig
+
+app = Celery('clp_scheduler')
+app.config_from_object(celeryconfig)
+
+if '__main__' == __name__:
+    app.start()
diff --git a/components/job-orchestration/job_orchestration/executor/celeryconfig.py b/components/job-orchestration/job_orchestration/executor/celeryconfig.py
new file mode 100644
index 000000000..768b5f4ec
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/executor/celeryconfig.py
@@ -0,0 +1,10 @@
+import os
+result_persistent = True
+worker_prefetch_multiplier = 1
+task_queue_max_priority = 3
+imports = 'job_orchestration.executor.compression.task'
+task_routes = {'job_orchestration.executor.compression.task.compress': 'compression'}
+task_create_missing_queues = True
+
+broker_url = os.getenv('BROKER_URL')
+result_backend = os.getenv('RESULT_BACKEND')
\ No newline at end of file
diff --git a/components/job-orchestration/job_orchestration/executor/compression/__init__.py b/components/job-orchestration/job_orchestration/executor/compression/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py b/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py
new file mode 100644
index 000000000..00e3d95ee
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py
@@ -0,0 +1,151 @@
+"""
+This module is specifically to hold the remote method, easing the process of
+figuring out what imports it requires.
+"""
+import json
+import pathlib
+import subprocess
+import sys
+
+import celery.utils.nodenames
+import yaml
+from celery.utils.log import get_task_logger
+
+from clp_py_utils.clp_io_config import ClpIoConfig, PathsToCompress
+
+
+def compress(clp_config: ClpIoConfig, clp_home_str: str, data_dir_str: str, logs_dir_str: str,
+             job_id_str: str, task_id_str: str, paths_to_compress: PathsToCompress, database_connection_params):
+    """
+    Compresses files from an FS into archives on an FS
+
+    :param clp_config: ClpIoConfig
+    :param clp_home_str:
+    :param data_dir_str:
+    :param logs_dir_str:
+    :param job_id_str:
+    :param task_id_str:
+    :param paths_to_compress: PathToCompress
+    :param database_connection_params:
+    :return: tuple -- (whether compression was successful, output messages)
+    """
+    # Setup logging
+    logger = get_task_logger(__name__)
+
+    instance_id_str = f'job-{job_id_str}-task-{task_id_str}'
+
+    clp_home = pathlib.Path(clp_home_str)
+
+    # Add clp package to sys.path
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        logger.error('Failed to load python3 packages bundled with CLP.')
+        return False, 0
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+    # Expand parameters
+    path_prefix_to_remove = clp_config.input.path_prefix_to_remove
+
+    file_paths = paths_to_compress.file_paths
+
+    data_dir = pathlib.Path(data_dir_str).resolve()
+    logs_dir = pathlib.Path(logs_dir_str).resolve()
+
+    # Generate database config file for clp
+    db_config_file_path = data_dir / f'{instance_id_str}-db-config.yml'
+    db_config_file = open(db_config_file_path, 'w')
+    yaml.safe_dump(database_connection_params, db_config_file)
+    db_config_file.close()
+
+    # Start assembling compression command
+    archives_dir = data_dir / 'archives'
+    compression_cmd = [
+        str(clp_home / 'bin' / 'clp'),
+        'c', str(archives_dir),
+        '--print-archive-stats-progress',
+        '--target-dictionaries-size',
+        str(clp_config.output.target_dictionaries_size),
+        '--target-segment-size', str(clp_config.output.target_segment_size),
+        '--target-encoded-file-size', str(clp_config.output.target_encoded_file_size),
+        '--storage-id',
+        '--db-config-file', str(db_config_file_path)
+    ]
+    if clp_config.output.storage_is_node_specific:
+        compression_cmd.append(celery.utils.nodenames.gethostname())
+    else:
+        # Mark as globally-accessible
+        compression_cmd.append('*')
+    if path_prefix_to_remove:
+        compression_cmd.append('--remove-path-prefix')
+        compression_cmd.append(path_prefix_to_remove)
+
+    # Prepare list of paths to compress for clp
+    log_list_path = data_dir / f'{instance_id_str}-log-paths.txt'
+    with open(log_list_path, 'w') as file:
+        if len(file_paths) > 0:
+            for path_str in file_paths:
+                file.write(path_str)
+                file.write('\n')
+        if paths_to_compress.empty_directories and len(paths_to_compress.empty_directories) > 0:
+            # Prepare list of paths to compress for clp
+            for path_str in paths_to_compress.empty_directories:
+                file.write(path_str)
+                file.write('\n')
+
+        compression_cmd.append('--files-from')
+        compression_cmd.append(str(log_list_path))
+
+    # Open stderr log file
+    stderr_log_path = logs_dir / f'{instance_id_str}-stderr.log'
+    stderr_log_file = open(stderr_log_path, 'w')
+
+    # Start compression
+    logger.debug('Compressing...')
+    compression_successful = False
+    proc = subprocess.Popen(compression_cmd, close_fds=True, stdout=subprocess.PIPE,
+                            stderr=stderr_log_file)
+
+    # Compute the total amount of data compressed
+    last_archive_stats = None
+    total_uncompressed_size = 0
+    total_compressed_size = 0
+    while True:
+        line = proc.stdout.readline()
+        if not line:
+            break
+        stats = json.loads(line.decode('ascii'))
+        if last_archive_stats is not None and stats['id'] != last_archive_stats['id']:
+            # We've started a new archive so add the previous archive's last
+            # reported size to the total
+            total_uncompressed_size += last_archive_stats['uncompressed_size']
+            total_compressed_size += last_archive_stats['size']
+        last_archive_stats = stats
+    if last_archive_stats is not None:
+        # Add the last archive's last reported size
+        total_uncompressed_size += last_archive_stats['uncompressed_size']
+        total_compressed_size += last_archive_stats['size']
+
+    # Wait for compression to finish
+    return_code = proc.wait()
+    if 0 != return_code:
+        logger.error(f'Failed to compress, return_code={str(return_code)}')
+    else:
+        compression_successful = True
+
+        # Remove generated temporary files
+        if log_list_path:
+            log_list_path.unlink()
+        db_config_file_path.unlink()
+    logger.debug('Compressed.')
+
+    # Close stderr log file
+    stderr_log_file.close()
+
+    if compression_successful:
+        return compression_successful, {
+            'total_uncompressed_size': total_uncompressed_size,
+            'total_compressed_size': total_compressed_size,
+        }
+    else:
+        return compression_successful, {'error_message': f'See logs {stderr_log_path}'}
diff --git a/components/job-orchestration/job_orchestration/executor/compression/task.py b/components/job-orchestration/job_orchestration/executor/compression/task.py
new file mode 100644
index 000000000..3460c9818
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/executor/compression/task.py
@@ -0,0 +1,63 @@
+import json
+import os
+from contextlib import closing
+
+import pika
+from celery.utils.log import get_task_logger
+
+from job_orchestration.executor.celery import app
+from . import fs_to_fs_compress_method
+
+logger = get_task_logger(__name__)
+
+from clp_py_utils.clp_io_config import ClpIoConfig, PathsToCompress
+
+
+@app.task()
+def compress(job_id: int, task_id: int, clp_io_config_json: str, paths_to_compress_json: str,
+             database_connection_params):
+    clp_home = os.getenv('CLP_HOME')
+    data_dir = os.getenv('CLP_DATA_DIR')
+    logs_dir = os.getenv('CLP_LOGS_DIR')
+    celery_broker_url = os.getenv('BROKER_URL')
+
+    logger.debug(f'CLP_HOME: {clp_home}')
+    logger.info(f'COMPRESSING job_id={job_id} task_id={task_id}')
+
+    clp_io_config = ClpIoConfig.parse_raw(clp_io_config_json)
+    paths_to_compress = PathsToCompress.parse_raw(paths_to_compress_json)
+
+    message = {'job_id': job_id, 'task_id': task_id, 'status': 'COMPRESSING'}
+
+    with closing(pika.BlockingConnection(pika.URLParameters(celery_broker_url))) as conn:
+        with closing(conn.channel()) as channel:
+            channel.tx_select()
+            channel.queue_declare('results')
+
+            channel.basic_publish(exchange='', routing_key='results',
+                                  body=json.dumps(message).encode('utf-8'))
+            channel.tx_commit()
+            logger.info(f'COMPRESSION STARTED job_id={job_id} task_id={task_id}')
+
+    if 'fs' == clp_io_config.input.type and 'fs' == clp_io_config.output.type:
+        compression_successful, worker_output = \
+            fs_to_fs_compress_method.compress(
+                clp_io_config, clp_home, data_dir, logs_dir, str(job_id), str(task_id),
+                paths_to_compress, database_connection_params)
+    else:
+        raise NotImplementedError
+
+    if compression_successful:
+        message['status'] = 'COMPLETED'
+        message['total_uncompressed_size'] = worker_output['total_uncompressed_size']
+        message['total_compressed_size'] = worker_output['total_compressed_size']
+    else:
+        message['status'] = 'FAILED'
+        message['error_message'] = worker_output['error_message']
+
+    with closing(pika.BlockingConnection(pika.URLParameters(celery_broker_url))) as conn:
+        with closing(conn.channel()) as channel:
+            channel.tx_select()
+            channel.basic_publish(exchange='', routing_key='results', body=json.dumps(message).encode('utf-8'))
+            channel.tx_commit()
+            logger.info(f'COMPRESSION COMPLETED job_id={job_id} task_id={task_id}')
diff --git a/components/job-orchestration/job_orchestration/scheduler/__init__.py b/components/job-orchestration/job_orchestration/scheduler/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/job-orchestration/job_orchestration/scheduler/results_consumer.py b/components/job-orchestration/job_orchestration/scheduler/results_consumer.py
new file mode 100644
index 000000000..0db8c6185
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/scheduler/results_consumer.py
@@ -0,0 +1,368 @@
+import functools
+import logging
+import time
+
+import pika
+from pika.exchange_type import ExchangeType
+
+LOG_FORMAT = ('%(levelname) -10s %(asctime)s %(name) -30s %(funcName) '
+              '-35s %(lineno) -5d: %(message)s')
+LOGGER = logging.getLogger(__name__)
+
+
+class ResultsConsumer(object):
+    """This is an example consumer that will handle unexpected interactions
+    with RabbitMQ such as channel and connection closures.
+    If RabbitMQ closes the connection, this class will stop and indicate
+    that reconnection is necessary. You should look at the output, as
+    there are limited reasons why the connection may be closed, which
+    usually are tied to permission related issues or socket timeouts.
+    If the channel is closed, it will indicate a problem with one of the
+    commands that were issued and that should surface in the output as well.
+    """
+    EXCHANGE = 'results'
+    EXCHANGE_TYPE = ExchangeType.topic
+    QUEUE = 'results'
+    ROUTING_KEY = 'results'
+
+    def __init__(self, amqp_url, on_messge_callback):
+        """Create a new instance of the consumer class, passing in the AMQP
+        URL used to connect to RabbitMQ.
+        :param str amqp_url: The AMQP url to connect with
+        """
+        self.should_reconnect = False
+        self.was_consuming = False
+
+        self._connection = None
+        self._channel = None
+        self._closing = False
+        self._consumer_tag = None
+        self._url = amqp_url
+        self._consuming = False
+        # In production, experiment with higher prefetch values
+        # for higher consumer throughput
+        self._prefetch_count = 1
+        self.on_message = on_messge_callback
+
+    def connect(self):
+        """This method connects to RabbitMQ, returning the connection handle.
+        When the connection is established, the on_connection_open method
+        will be invoked by pika.
+        :rtype: pika.SelectConnection
+        """
+        LOGGER.info('Connecting to %s', self._url)
+        return pika.SelectConnection(
+            parameters=pika.URLParameters(self._url),
+            on_open_callback=self.on_connection_open,
+            on_open_error_callback=self.on_connection_open_error,
+            on_close_callback=self.on_connection_closed)
+
+    def close_connection(self):
+        self._consuming = False
+        if self._connection.is_closing or self._connection.is_closed:
+            LOGGER.info('Connection is closing or already closed')
+        else:
+            LOGGER.info('Closing connection')
+            self._connection.close()
+
+    def on_connection_open(self, _unused_connection):
+        """This method is called by pika once the connection to RabbitMQ has
+        been established. It passes the handle to the connection object in
+        case we need it, but in this case, we'll just mark it unused.
+        :param pika.SelectConnection _unused_connection: The connection
+        """
+        LOGGER.info('Connection opened')
+        self.open_channel()
+
+    def on_connection_open_error(self, _unused_connection, err):
+        """This method is called by pika if the connection to RabbitMQ
+        can't be established.
+        :param pika.SelectConnection _unused_connection: The connection
+        :param Exception err: The error
+        """
+        LOGGER.error('Connection open failed: %s', err)
+        self.reconnect()
+
+    def on_connection_closed(self, _unused_connection, reason):
+        """This method is invoked by pika when the connection to RabbitMQ is
+        closed unexpectedly. Since it is unexpected, we will reconnect to
+        RabbitMQ if it disconnects.
+        :param pika.connection.Connection connection: The closed connection obj
+        :param Exception reason: exception representing reason for loss of
+            connection.
+        """
+        self._channel = None
+        if self._closing:
+            self._connection.ioloop.stop()
+        else:
+            LOGGER.warning('Connection closed, reconnect necessary: %s', reason)
+            self.reconnect()
+
+    def reconnect(self):
+        """Will be invoked if the connection can't be opened or is
+        closed. Indicates that a reconnect is necessary then stops the
+        ioloop.
+        """
+        self.should_reconnect = True
+        self.stop()
+
+    def open_channel(self):
+        """Open a new channel with RabbitMQ by issuing the Channel.Open RPC
+        command. When RabbitMQ responds that the channel is open, the
+        on_channel_open callback will be invoked by pika.
+        """
+        LOGGER.info('Creating a new channel')
+        self._connection.channel(on_open_callback=self.on_channel_open)
+
+    def on_channel_open(self, channel):
+        """This method is invoked by pika when the channel has been opened.
+        The channel object is passed in so we can make use of it.
+        Since the channel is now open, we'll declare the exchange to use.
+        :param pika.channel.Channel channel: The channel object
+        """
+        LOGGER.info('Channel opened')
+        self._channel = channel
+        self.add_on_channel_close_callback()
+        self.setup_exchange(self.EXCHANGE)
+
+    def add_on_channel_close_callback(self):
+        """This method tells pika to call the on_channel_closed method if
+        RabbitMQ unexpectedly closes the channel.
+        """
+        LOGGER.info('Adding channel close callback')
+        self._channel.add_on_close_callback(self.on_channel_closed)
+
+    def on_channel_closed(self, channel, reason):
+        """Invoked by pika when RabbitMQ unexpectedly closes the channel.
+        Channels are usually closed if you attempt to do something that
+        violates the protocol, such as re-declare an exchange or queue with
+        different parameters. In this case, we'll close the connection
+        to shutdown the object.
+        :param pika.channel.Channel: The closed channel
+        :param Exception reason: why the channel was closed
+        """
+        LOGGER.warning('Channel %i was closed: %s', channel, reason)
+        self.close_connection()
+
+    def setup_exchange(self, exchange_name):
+        """Setup the exchange on RabbitMQ by invoking the Exchange.Declare RPC
+        command. When it is complete, the on_exchange_declareok method will
+        be invoked by pika.
+        :param str|unicode exchange_name: The name of the exchange to declare
+        """
+        LOGGER.info('Declaring exchange: %s', exchange_name)
+        # Note: using functools.partial is not required, it is demonstrating
+        # how arbitrary data can be passed to the callback when it is called
+        cb = functools.partial(
+            self.on_exchange_declareok, userdata=exchange_name)
+        self._channel.exchange_declare(
+            exchange=exchange_name,
+            exchange_type=self.EXCHANGE_TYPE,
+            callback=cb)
+
+    def on_exchange_declareok(self, _unused_frame, userdata):
+        """Invoked by pika when RabbitMQ has finished the Exchange.Declare RPC
+        command.
+        :param pika.Frame.Method unused_frame: Exchange.DeclareOk response frame
+        :param str|unicode userdata: Extra user data (exchange name)
+        """
+        LOGGER.info('Exchange declared: %s', userdata)
+        self.setup_queue(self.QUEUE)
+
+    def setup_queue(self, queue_name):
+        """Setup the queue on RabbitMQ by invoking the Queue.Declare RPC
+        command. When it is complete, the on_queue_declareok method will
+        be invoked by pika.
+        :param str|unicode queue_name: The name of the queue to declare.
+        """
+        LOGGER.info('Declaring queue %s', queue_name)
+        cb = functools.partial(self.on_queue_declareok, userdata=queue_name)
+        self._channel.queue_declare(queue=queue_name, callback=cb)
+
+    def on_queue_declareok(self, _unused_frame, userdata):
+        """Method invoked by pika when the Queue.Declare RPC call made in
+        setup_queue has completed. In this method we will bind the queue
+        and exchange together with the routing key by issuing the Queue.Bind
+        RPC command. When this command is complete, the on_bindok method will
+        be invoked by pika.
+        :param pika.frame.Method _unused_frame: The Queue.DeclareOk frame
+        :param str|unicode userdata: Extra user data (queue name)
+        """
+        queue_name = userdata
+        LOGGER.info('Binding %s to %s with %s', self.EXCHANGE, queue_name,
+                    self.ROUTING_KEY)
+        cb = functools.partial(self.on_bindok, userdata=queue_name)
+        self._channel.queue_bind(
+            queue_name,
+            self.EXCHANGE,
+            routing_key=self.ROUTING_KEY,
+            callback=cb)
+
+    def on_bindok(self, _unused_frame, userdata):
+        """Invoked by pika when the Queue.Bind method has completed. At this
+        point we will set the prefetch count for the channel.
+        :param pika.frame.Method _unused_frame: The Queue.BindOk response frame
+        :param str|unicode userdata: Extra user data (queue name)
+        """
+        LOGGER.info('Queue bound: %s', userdata)
+        self.set_qos()
+
+    def set_qos(self):
+        """This method sets up the consumer prefetch to only be delivered
+        one message at a time. The consumer must acknowledge this message
+        before RabbitMQ will deliver another one. You should experiment
+        with different prefetch values to achieve desired performance.
+        """
+        self._channel.basic_qos(
+            prefetch_count=self._prefetch_count, callback=self.on_basic_qos_ok)
+
+    def on_basic_qos_ok(self, _unused_frame):
+        """Invoked by pika when the Basic.QoS method has completed. At this
+        point we will start consuming messages by calling start_consuming
+        which will invoke the needed RPC commands to start the process.
+        :param pika.frame.Method _unused_frame: The Basic.QosOk response frame
+        """
+        LOGGER.info('QOS set to: %d', self._prefetch_count)
+        self.start_consuming()
+
+    def start_consuming(self):
+        """This method sets up the consumer by first calling
+        add_on_cancel_callback so that the object is notified if RabbitMQ
+        cancels the consumer. It then issues the Basic.Consume RPC command
+        which returns the consumer tag that is used to uniquely identify the
+        consumer with RabbitMQ. We keep the value to use it when we want to
+        cancel consuming. The on_message method is passed in as a callback pika
+        will invoke when a message is fully received.
+        """
+        LOGGER.info('Issuing consumer related RPC commands')
+        self.add_on_cancel_callback()
+        self._consumer_tag = self._channel.basic_consume(
+            self.QUEUE, self.on_message)
+        self.was_consuming = True
+        self._consuming = True
+
+    def add_on_cancel_callback(self):
+        """Add a callback that will be invoked if RabbitMQ cancels the consumer
+        for some reason. If RabbitMQ does cancel the consumer,
+        on_consumer_cancelled will be invoked by pika.
+        """
+        LOGGER.info('Adding consumer cancellation callback')
+        self._channel.add_on_cancel_callback(self.on_consumer_cancelled)
+
+    def on_consumer_cancelled(self, method_frame):
+        """Invoked by pika when RabbitMQ sends a Basic.Cancel for a consumer
+        receiving messages.
+        :param pika.frame.Method method_frame: The Basic.Cancel frame
+        """
+        LOGGER.info('Consumer was cancelled remotely, shutting down: %r',
+                    method_frame)
+        if self._channel:
+            self._channel.close()
+
+    def acknowledge_message(self, delivery_tag):
+        """Acknowledge the message delivery from RabbitMQ by sending a
+        Basic.Ack RPC method for the delivery tag.
+        :param int delivery_tag: The delivery tag from the Basic.Deliver frame
+        """
+        LOGGER.info('Acknowledging message %s', delivery_tag)
+        self._channel.basic_ack(delivery_tag)
+
+    def stop_consuming(self):
+        """Tell RabbitMQ that you would like to stop consuming by sending the
+        Basic.Cancel RPC command.
+        """
+        if self._channel:
+            LOGGER.info('Sending a Basic.Cancel RPC command to RabbitMQ')
+            cb = functools.partial(
+                self.on_cancelok, userdata=self._consumer_tag)
+            self._channel.basic_cancel(self._consumer_tag, cb)
+
+    def on_cancelok(self, _unused_frame, userdata):
+        """This method is invoked by pika when RabbitMQ acknowledges the
+        cancellation of a consumer. At this point we will close the channel.
+        This will invoke the on_channel_closed method once the channel has been
+        closed, which will in-turn close the connection.
+        :param pika.frame.Method _unused_frame: The Basic.CancelOk frame
+        :param str|unicode userdata: Extra user data (consumer tag)
+        """
+        self._consuming = False
+        LOGGER.info(
+            'RabbitMQ acknowledged the cancellation of the consumer: %s',
+            userdata)
+        self.close_channel()
+
+    def close_channel(self):
+        """Call to close the channel with RabbitMQ cleanly by issuing the
+        Channel.Close RPC command.
+        """
+        LOGGER.info('Closing the channel')
+        self._channel.close()
+
+    def run(self):
+        """Run the example consumer by connecting to RabbitMQ and then
+        starting the IOLoop to block and allow the SelectConnection to operate.
+        """
+        self._connection = self.connect()
+        self._connection.ioloop.start()
+
+    def stop(self):
+        """Cleanly shutdown the connection to RabbitMQ by stopping the consumer
+        with RabbitMQ. When RabbitMQ confirms the cancellation, on_cancelok
+        will be invoked by pika, which will then closing the channel and
+        connection. The IOLoop is started again because this method is invoked
+        when CTRL-C is pressed raising a KeyboardInterrupt exception. This
+        exception stops the IOLoop which needs to be running for pika to
+        communicate with RabbitMQ. All of the commands issued prior to starting
+        the IOLoop will be buffered but not processed.
+        """
+        if not self._closing:
+            self._closing = True
+            LOGGER.info('Stopping')
+            if self._consuming:
+                self.stop_consuming()
+                self._connection.ioloop.start()
+            else:
+                self._connection.ioloop.stop()
+            LOGGER.info('Stopped')
+
+
+class ReconnectingResultsConsumer(object):
+    """This is an example consumer that will reconnect if the nested
+    ResultsConsumer indicates that a reconnect is necessary.
+    """
+
+    def __init__(self, amqp_url, on_message_callback):
+        self._reconnect_delay = 0
+        self._amqp_url = amqp_url
+        self._on_message_callback = on_message_callback
+        self._consumer = ResultsConsumer(self._amqp_url, self._on_message_callback)
+
+    def run(self):
+        while True:
+            try:
+                self._consumer.run()
+            except KeyboardInterrupt:
+                self._consumer.stop()
+                break
+            self._maybe_reconnect()
+
+    def _maybe_reconnect(self):
+        if self._consumer.should_reconnect:
+            self._consumer.stop()
+            reconnect_delay = self._get_reconnect_delay()
+            LOGGER.info('Reconnecting after %d seconds', reconnect_delay)
+            time.sleep(reconnect_delay)
+            self._consumer = ResultsConsumer(self._amqp_url, self._on_message_callback)
+
+    def _get_reconnect_delay(self):
+        if self._consumer.was_consuming:
+            self._reconnect_delay = 0
+        else:
+            self._reconnect_delay += 1
+        if self._reconnect_delay > 30:
+            self._reconnect_delay = 30
+        return self._reconnect_delay
+
+
+if __name__ == '__main__':
+    pass
\ No newline at end of file
diff --git a/components/job-orchestration/job_orchestration/scheduler/scheduler.py b/components/job-orchestration/job_orchestration/scheduler/scheduler.py
new file mode 100644
index 000000000..a6d0c92bd
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/scheduler/scheduler.py
@@ -0,0 +1,343 @@
+import argparse
+import datetime
+import logging
+import os
+import pathlib
+import sys
+import threading
+import time
+import typing
+from contextlib import closing
+
+import zstandard
+from pydantic import ValidationError
+
+from clp_py_utils.clp_config import CLPConfig, Database
+from clp_py_utils.sql_adapter import SQL_Adapter
+from job_orchestration.executor.compression.task import compress
+from job_orchestration.scheduler.results_consumer import ReconnectingResultsConsumer
+from job_orchestration.scheduler.scheduler_data \
+    import Job, Task, TaskUpdate, TaskCompletionUpdate, TaskFailureUpdate
+
+# Setup logging
+# Create logger
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.INFO)
+console_handler.setFormatter(
+    logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s'))
+log = logging.getLogger('scheduler')
+log.addHandler(console_handler)
+log.setLevel(logging.DEBUG)
+
+scheduled_jobs = {}
+jobs_lock = threading.Lock()
+
+from clp_py_utils.core import read_yaml_config_file
+
+
+def fetch_new_task_metadata(db_cursor) -> list:
+    db_cursor.execute(
+        """
+            SELECT compression_jobs.job_id,
+                compression_jobs.job_status,
+                compression_jobs.num_tasks,
+                compression_jobs.num_tasks_completed,
+                compression_jobs.clp_config,
+                compression_tasks.task_id,
+                compression_tasks.task_status,
+                compression_tasks.clp_paths_to_compress
+            FROM compression_jobs INNER JOIN compression_tasks
+            ON compression_jobs.job_id=compression_tasks.job_id
+            WHERE compression_tasks.task_status='SUBMITTED';
+        """
+    )
+    return db_cursor.fetchall()
+
+
+def update_task_metadata(db_cursor, task_id, kv: typing.Dict[str, typing.Any]):
+    if not len(kv):
+        log.error("Must specify at least one field to update")
+        raise ValueError
+
+    field_set_expressions = [f'{k}="{v}"' for k, v in kv.items()]
+    query = f'UPDATE compression_tasks SET {", ".join(field_set_expressions)} ' \
+            f'WHERE task_id={task_id};'
+    db_cursor.execute(query)
+
+
+def update_job_metadata(db_cursor, job_id, kv):
+    if not len(kv):
+        log.error("Must specify at least one field to update")
+        raise ValueError
+
+    field_set_expressions = [f'{k}="{v}"' for k, v in kv.items()]
+    query = f'UPDATE compression_jobs SET {", ".join(field_set_expressions)} ' \
+            f'WHERE job_id={job_id};'
+    db_cursor.execute(query)
+
+
+def increment_job_metadata(db_cursor, job_id, kv):
+    if not len(kv):
+        log.error("Must specify at least one field to increment")
+        raise ValueError
+
+    field_set_expressions = [f'{k}={k}+{v}' for k, v in kv.items()]
+    query = f'UPDATE compression_jobs SET {", ".join(field_set_expressions)} ' \
+            f'WHERE job_id={job_id};'
+    db_cursor.execute(query)
+
+
+def schedule_task(job: Job, task: Task, database_config: Database, dctx: zstandard.ZstdDecompressor = None):
+    return compress.apply_async(
+        (job.job_id, task.task_id,
+         job.get_clp_config_json(dctx),
+         task.get_clp_paths_to_compress_json(dctx),
+         database_config.get_clp_connection_params_and_type()),
+        task_id=str(task.task_id), queue='compression', priority=task.priority)
+
+
+def search_and_schedule_new_tasks(db_conn, db_cursor, database_config: Database):
+    """
+    For all task with SUBMITTED status, push them to task queue to be processed, if finished, update them
+    """
+    global scheduled_jobs
+    global jobs_lock
+
+    log.debug('Search and schedule new tasks')
+
+    dctx = zstandard.ZstdDecompressor()
+
+    # Fetch new task
+    for task_row in fetch_new_task_metadata(db_cursor):
+        log.debug(f"Found task with job_id={task_row['job_id']} task_id={task_row['task_id']}")
+
+        # Only Add database credentials to ephemeral task specification passed to workers
+        task = Task.parse_obj(task_row)
+        job_id: int = task_row['job_id']
+
+        with jobs_lock:
+            now = datetime.datetime.utcnow()
+
+            try:
+                job = scheduled_jobs[job_id]
+            except KeyError:
+                # Identified a new job identified
+                job = Job(job_start_time=now, **task_row)
+                update_job_metadata(db_cursor, job_id, dict(
+                    job_start_time=now.strftime('%Y-%m-%d %H:%M:%S')
+                ))
+
+            # Schedule task, update ephemeral metadata in scheduler and commit to database
+            celery_task_instance = schedule_task(job, task, database_config, dctx)
+
+            update_task_metadata(db_cursor, task.task_id, dict(
+                task_status='SCHEDULED',
+                task_scheduled_time=now.strftime('%Y-%m-%d %H:%M:%S')
+            ))
+            db_conn.commit()
+
+            # After database commit is successful, update internal metadata
+            task.instance = celery_task_instance
+            task.task_status = 'SCHEDULED'
+            job.tasks[task.task_id] = task
+
+            # Optimization: if job has finished scheduling while we are scheduling task,
+            # Then we'll update the job's status and num_tasks count
+            try:
+                if 'SCHEDULED' == task_row['job_status']:
+                    job.num_tasks = task_row['num_tasks']
+                    job.job_status = task_row['job_status']
+            except KeyError:
+                pass
+
+            scheduled_jobs[job_id] = job
+            db_conn.commit()
+
+
+def update_completed_jobs(db_conn, db_cursor):
+    # Update completed jobs if there are any
+    db_cursor.execute(
+        """
+            UPDATE compression_jobs 
+            SET job_status="COMPLETED", job_duration=TIMESTAMPDIFF(SECOND,job_start_time, CURRENT_TIMESTAMP())
+            WHERE job_status="SCHEDULED" AND num_tasks=num_tasks_completed;
+        """
+    )
+    db_conn.commit()
+
+
+def task_results_consumer(sql_adapter: SQL_Adapter, celery_broker_url):
+    global scheduled_jobs
+    global jobs_lock
+
+    def callback(ch, method, properties, body):
+        global scheduled_jobs
+        global jobs_lock
+        global log
+
+        try:
+            # Validate message body
+            task_update = TaskUpdate.parse_raw(body)
+            if 'COMPLETED' == task_update.status:
+                task_update = TaskCompletionUpdate.parse_raw(body)
+            elif 'FAILED' == task_update.status:
+                task_update = TaskFailureUpdate.parse_raw(body)
+        except ValidationError as err:
+            log.error(err)
+            exit(-1)
+
+        with closing(sql_adapter.create_connection()) as db_conn, \
+                closing(db_conn.cursor(dictionary=True)) as db_cursor, jobs_lock:
+            log.debug(f'Task update received: '
+                      f'job_id={task_update.job_id} '
+                      f'task_id={task_update.task_id} '
+                      f'status={task_update.status}')
+
+            # Retrieve scheduler state
+            try:
+                job = scheduled_jobs[task_update.job_id]
+                task = job.tasks[task_update.task_id]
+            except KeyError:
+                # Scheduler detected response from task which it does not keep track of
+                # It could be that previous scheduler crashed.
+                # The only thing we can do is to log, and discard the message
+                # to prevent infinite loop
+                log.warning(f'Discarding untracked task update: {task_update.json()}')
+                ch.basic_ack(method.delivery_tag)
+                return
+
+            # Process task update and update database
+            try:
+                # Scheduler is aware of the task
+                now = datetime.datetime.utcnow()
+
+                if 'COMPRESSING' == task_update.status:
+                    # Update sent by worker when task began in the database
+                    update_task_metadata(db_cursor, task_update.task_id, dict(
+                        task_status=task_update.status,
+                        task_start_time=now.strftime('%Y-%m-%d %H:%M:%S')
+                    ))
+                elif 'COMPLETED' == task_update.status:
+                    # Update sent by worker when task finishes
+                    if 'COMPRESSING' != task.task_status:
+                        log.warning(f'Discarding untracked task update: {task_update.json()}')
+                        ch.basic_ack(method.delivery_tag)
+                        raise NotImplementedError
+
+                    task_duration = max(int((now - task.task_start_time).total_seconds()), 1)
+
+                    log.info(f'Task job-{task_update.job_id}-task-{task_update.task_id} '
+                             f'completed in {task_duration} second.')
+
+                    update_task_metadata(db_cursor, task_update.task_id, dict(
+                        task_status=task_update.status,
+                        partition_uncompressed_size=task_update.total_uncompressed_size,
+                        partition_compressed_size=task_update.total_compressed_size,
+                        task_duration=int(task_duration)
+                    ))
+                    increment_job_metadata(db_cursor, task_update.job_id, dict(
+                        job_uncompressed_size=task_update.total_uncompressed_size,
+                        job_compressed_size=task_update.total_compressed_size,
+                        num_tasks_completed=1
+                    ))
+                elif 'FAILED' == task_update.status:
+                    log.warning(f'Marking job_id={task_update.job_id} as failed.')
+                    log.warning(str(task_update.error_message))
+                    update_task_metadata(db_cursor, task_update.task_id, dict(
+                        task_status=task_update.status,
+                        task_duration=int((now - task.task_start_time).total_seconds())
+                    ))
+                    update_job_metadata(db_cursor, job.job_id, dict(
+                        job_status=task_update.status,
+                        job_status_msg=task_update.error_message
+                    ))
+                else:
+                    raise NotImplementedError
+
+                db_conn.commit()
+
+                # Only update scheduler metadata only after transaction finishes
+                # If update fails, rollback and avoid updating scheduler state
+                job.tasks[task_update.task_id].task_status = task_update.status
+                if 'COMPRESSING' == task_update.status:
+                    job.tasks[task_update.task_id].task_start_time = now
+                elif 'COMPLETED' == task_update.status:
+                    job.num_tasks_completed += 1
+                elif 'FAILED' == task_update.status:
+                    # TODO: how to handle failure scheduler state update besides simply recording acknowledgement?
+                    job.job_status = task_update.status
+                    pass
+                else:
+                    raise NotImplementedError
+
+                # Only send out the ACK if data successfully persisted to the database
+                ch.basic_ack(method.delivery_tag)
+
+            except Exception as error:
+                # Transaction failure, rollback, don't send ACK and simply reprocess the msg again
+                log.error(f'Database update failed: {error}.')
+                db_conn.rollback()
+
+    consumer = ReconnectingResultsConsumer(celery_broker_url, callback)
+    consumer_thread = threading.Thread(target=consumer.run)
+    consumer_thread.start()
+    return consumer
+
+
+def main(argv):
+    global scheduled_jobs
+    args_parser = argparse.ArgumentParser()
+    args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.')
+    args = args_parser.parse_args(argv[1:])
+
+    celery_broker_url = os.getenv('BROKER_URL')
+
+    # Load configuration
+    config_path = pathlib.Path(args.config)
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(config_path))
+    except ValidationError as err:
+        log.error(err)
+    except Exception as ex:
+        log.error(ex)
+        # read_yaml_config_file already logs the parsing error inside
+        pass
+    else:
+        # Collect new jobs from the database
+        log.info('Starting CLP job scheduler')
+        sql_adapter = SQL_Adapter(clp_config.database)
+
+        results_consumer = task_results_consumer(sql_adapter, celery_broker_url)
+
+        while True:
+            try:
+                # Start Job Processing Loop
+                with closing(sql_adapter.create_connection()) as db_conn, \
+                        closing(db_conn.cursor(dictionary=True)) as db_cursor:
+                    search_and_schedule_new_tasks(db_conn, db_cursor, sql_adapter.database_config)
+                    update_completed_jobs(db_conn, db_cursor)
+            except Exception as ex:
+                log.error('Error in scheduling: ')
+                log.error(ex)
+            finally:
+                try:
+                    time.sleep(clp_config.scheduler.jobs_poll_delay)
+                except KeyboardInterrupt:
+                    log.info('Gracefully shutting down')
+                    break
+
+        if results_consumer:
+            try:
+                results_consumer._consumer.stop()
+            except RuntimeError as err:
+                if 'IOLoop is not reentrant and is already running' != str(err):
+                    log.error(err)
+                    raise RuntimeError
+                else:
+                    # Normal graceful shutdown path
+                    pass
+            log.info('Scheduler stopped')
+
+
+if '__main__' == __name__:
+    main(sys.argv)
diff --git a/components/job-orchestration/job_orchestration/scheduler/scheduler_data.py b/components/job-orchestration/job_orchestration/scheduler/scheduler_data.py
new file mode 100644
index 000000000..8c8b411a4
--- /dev/null
+++ b/components/job-orchestration/job_orchestration/scheduler/scheduler_data.py
@@ -0,0 +1,63 @@
+import datetime
+import json
+import typing
+from typing import Dict
+
+import msgpack
+import zstandard
+from celery.result import AsyncResult
+from pydantic import BaseModel, validator
+
+
+class TaskUpdate(BaseModel):
+    job_id: int
+    task_id: int
+    status: str
+
+    @validator('status')
+    def valid_status(cls, field):
+        supported_status = ['COMPRESSING', 'COMPLETED', 'FAILED']
+        if field not in supported_status:
+            raise ValueError(f'must be one of the following {"|".join(supported_status)}')
+        return field
+
+
+class TaskCompletionUpdate(TaskUpdate):
+    total_uncompressed_size: int
+    total_compressed_size: int
+
+
+class TaskFailureUpdate(TaskUpdate):
+    error_message: str
+
+
+class Task(BaseModel):
+    task_id: int
+    task_status: str
+    priority: int = 1
+    clp_paths_to_compress: bytes
+    task_start_time: datetime.datetime = None
+    instance: AsyncResult = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    def get_clp_paths_to_compress_json(self, dctx: zstandard.ZstdDecompressor = None):
+        if dctx is None:
+            dctx = zstandard.ZstdDecompressor()
+        return json.dumps(msgpack.unpackb(dctx.decompress(self.clp_paths_to_compress)))
+
+
+class Job(BaseModel):
+    job_id: int
+    job_status: str
+    job_start_time: datetime.datetime
+    clp_config: bytes
+    num_tasks: typing.Optional[int]
+    num_tasks_completed: int
+    tasks: Dict[int, Task] = {}
+
+    def get_clp_config_json(self, dctx: zstandard.ZstdDecompressor = None):
+        if not dctx:
+            dctx = zstandard.ZstdDecompressor()
+        return json.dumps(msgpack.unpackb(dctx.decompress(self.clp_config)))
diff --git a/components/job-orchestration/requirements.txt b/components/job-orchestration/requirements.txt
new file mode 100644
index 000000000..0e8b2eb39
--- /dev/null
+++ b/components/job-orchestration/requirements.txt
@@ -0,0 +1,8 @@
+python-Levenshtein
+pika==1.2.0
+celery==5.1.2
+msgpack~=1.0.2
+zstandard~=0.15.2
+mysql-connector-python==8.0.26
+pydantic==1.8.2
+PyYAML==5.4
diff --git a/components/package-template/README.md b/components/package-template/README.md
new file mode 100644
index 000000000..86f70b507
--- /dev/null
+++ b/components/package-template/README.md
@@ -0,0 +1,5 @@
+# Package Template
+
+This component contains the base directory structure and files of the CLP package.
+
+*NOTE: This is only a small part of the complete CLP package and cannot be run alone.*
diff --git a/components/package-template/src/.gitignore b/components/package-template/src/.gitignore
new file mode 100644
index 000000000..3283d6cd6
--- /dev/null
+++ b/components/package-template/src/.gitignore
@@ -0,0 +1 @@
+etc/clp-config.yaml
diff --git a/components/package-template/src/LICENSE b/components/package-template/src/LICENSE
new file mode 100644
index 000000000..3340c889f
--- /dev/null
+++ b/components/package-template/src/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 YScope Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/components/package-template/src/README.md b/components/package-template/src/README.md
new file mode 100644
index 000000000..96883fde3
--- /dev/null
+++ b/components/package-template/src/README.md
@@ -0,0 +1,116 @@
+# CLP
+
+Compressed Log Processor (CLP) is a tool that compresses text logs and allows users to search the compressed data 
+without decompression. CLP's compression ratio is significantly higher than gzip.
+
+## Getting started
+
+CLP can be run in Docker containers, in one of two modes:
+* On a single-node (typically for development and testing)
+* Across multiple nodes
+
+## Single-node deployment
+
+### Requirements
+
+* [Docker](https://docs.docker.com/engine/install/)
+  * `docker` should be in the user's path, and
+  * [runnable without superuser privileges](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user)
+    (without sudo)
+* Plenty of disk space
+* Python3
+  * For systems with a version < 3.7, run `pip3 install -r requirements-pre-3.7.txt`
+
+### Starting CLP
+
+```bash
+./sbin/start-clp --uncompressed-logs-dir <directory containing your uncompressed logs>
+```
+
+Note that running CLP in containers means that the `uncompressed-logs-dir` must be mounted inside the container.
+Therefore:
+* The `uncompressed-logs-dir` must not include symbolic links to items **outside** of the directory 
+* Changing `uncompressed-logs-dir` requires restarting CLP.
+
+### Stopping CLP
+
+```bash
+./sbin/stop-clp
+```
+
+## Multi-node deployment
+
+### Requirements
+
+* The single-node deployment requirements
+* For the scheduler node, port 3306 and 5672 must be available and accessible from all compute nodes
+* A distributed file system mounted at the same path on all nodes
+
+### Starting the scheduler
+
+```bash
+sbin/start-clp --start-scheduler-only --publish-ports \
+  --uncompressed-logs-dir <location of your uncompressed logs on dfs>
+```
+
+### Starting the worker(s)
+
+```bash
+sbin/start-clp --start-worker-only --publish-ports \
+  --uncompressed-logs-dir <location of your uncompressed logs on dfs>
+```
+
+### Stopping components
+
+Every component can be stopped by:
+```bash
+./sbin/stop-clp
+```
+
+## Usage
+
+Once CLP is started, you can use it as follows.
+
+### Compressing logs
+
+```bash
+./sbin/compress <uncompressed log files/directories>
+```
+
+Note:
+* The uncompressed logs must be within `uncompressed-logs-dir`
+* CLP is designed to compress text logs
+
+For more options, run the script with the `--help` option.
+
+### Decompressing logs
+
+To decompress all compressed logs:
+```bash
+./sbin/decompress -d <output directory> 
+```
+For more options, run the script with the `--help` option.
+
+### Searching logs
+
+To search all logs for a given wildcard query:
+```bash
+./sbin/search <your wildcard query>
+```
+
+CLP supports two wildcard characters:
+* `*` which matches 0 or more characters
+* `?` which matches any single character
+
+For more options, run the script with the `--help` option.
+
+## Troubleshooting
+
+### ModuleNotFoundError
+
+**Error message**: ```ModuleNotFoundError: No module named 'dataclasses'```
+
+**Cause**: When starting the package on some older platforms like Ubuntu 18.04, some required Python modules are not in 
+the standard library
+
+**Solution**: `pip install -r requirements-pre-3.7.txt`
diff --git a/components/package-template/src/etc/clp-config.yaml.template b/components/package-template/src/etc/clp-config.yaml.template
new file mode 100644
index 000000000..e10728715
--- /dev/null
+++ b/components/package-template/src/etc/clp-config.yaml.template
@@ -0,0 +1,14 @@
+clp_cluster_name: clp-mini-cluster
+
+archive_output:
+  # How much data CLP should try to compress into each archive
+  target_archive_size: 268435456  # 256MB
+
+  # How large the dictionaries should be allowed to get before the archive is closed and a new one is created
+  target_dictionaries_size: 33554432  # 32MB
+
+  # How large each encoded file should be before being split into a new encoded file
+  target_encoded_file_size: 268435456 # 256MB
+
+  # How much data CLP should try to fit into each segment within an archive
+  target_segment_size: 268435456  # 256MB
diff --git a/components/package-template/src/lib/python3/site-packages/clp/__init__.py b/components/package-template/src/lib/python3/site-packages/clp/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/components/package-template/src/lib/python3/site-packages/clp/package_utils.py b/components/package-template/src/lib/python3/site-packages/clp/package_utils.py
new file mode 100644
index 000000000..854fe7576
--- /dev/null
+++ b/components/package-template/src/lib/python3/site-packages/clp/package_utils.py
@@ -0,0 +1,70 @@
+import json
+import pathlib
+import subprocess
+
+from clp_py_utils.clp_config import CLPConfig
+
+
+def check_dependencies():
+    try:
+        subprocess.run('command -v git', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+    except subprocess.CalledProcessError:
+        raise EnvironmentError('git is not installed on the path.')
+
+    try:
+        subprocess.run('command -v docker', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+        subprocess.run(['docker', 'ps'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+    except subprocess.CalledProcessError:
+        raise EnvironmentError('docker is not installed on the path or cannot run without superuser privileges (sudo).')
+
+
+def check_env(cluster_name: str):
+    check_docker_network_bridge_cmd = ['docker', 'network', 'inspect', cluster_name]
+    proc = subprocess.run(check_docker_network_bridge_cmd, stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
+    if 0 != proc.returncode:
+        raise EnvironmentError(f'Failed to inspect docker network bridge {cluster_name}')
+
+    bridge_bridge_specification = json.loads(proc.stdout.decode('utf-8'))[0]
+    required_containers = {cluster_name}
+    for container_id, container in bridge_bridge_specification['Containers'].items():
+        try:
+            required_containers.remove(container['Name'])
+        except KeyError:
+            pass
+
+    if required_containers:
+        raise EnvironmentError(f'The required container is not started: {",".join(required_containers)}')
+
+
+def prepare_package_and_config(clp_config: CLPConfig, clp_home: pathlib.Path, docker_clp_home: pathlib.Path):
+    host_data_directory = pathlib.Path(clp_config.data_directory)
+    if '' == host_data_directory.anchor:
+        # In the config file, we assume prefix is clp_home inside the docker (/root/clp)
+        host_data_directory = clp_home / clp_config.data_directory
+        clp_config.data_directory = str(docker_clp_home / clp_config.data_directory)
+    host_data_directory.mkdir(parents=True, exist_ok=True)
+
+    host_log_directory = pathlib.Path(clp_config.logs_directory)
+    if '' == host_log_directory.anchor:
+        # In the config file, we assume prefix is clp_home, inside the docker (/root/clp)
+        host_log_directory = clp_home / clp_config.logs_directory
+        clp_config.logs_directory = str(docker_clp_home / clp_config.logs_directory)
+    host_log_directory.mkdir(parents=True, exist_ok=True)
+
+    host_archive_output_directory = pathlib.Path(clp_config.archive_output.directory)
+    if '' == host_archive_output_directory.anchor:
+        # In the config file, we assume prefix is clp_home, inside the docker (/root/clp)
+        host_archive_output_directory = clp_home / clp_config.archive_output.directory
+        clp_config.archive_output.directory = \
+            str(docker_clp_home / clp_config.archive_output.directory)
+    host_archive_output_directory.mkdir(parents=True, exist_ok=True)
+
+    return host_data_directory, host_log_directory, host_archive_output_directory, clp_config
+
+
+def make_config_path_absolute(clp_home: pathlib.Path, config_path: pathlib.Path):
+    if config_path.is_absolute():
+        return config_path
+    else:
+        return clp_home / config_path
diff --git a/components/package-template/src/requirements-pre-3.7.txt b/components/package-template/src/requirements-pre-3.7.txt
new file mode 100644
index 000000000..60f564425
--- /dev/null
+++ b/components/package-template/src/requirements-pre-3.7.txt
@@ -0,0 +1 @@
+dataclasses==0.8
\ No newline at end of file
diff --git a/components/package-template/src/sbin/compress b/components/package-template/src/sbin/compress
new file mode 100755
index 000000000..7808ccb9c
--- /dev/null
+++ b/components/package-template/src/sbin/compress
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import os
+import pathlib
+import subprocess
+import sys
+
+# Setup logging
+# Create logger
+log = logging.getLogger('clp')
+log.setLevel(logging.DEBUG)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+log.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        log.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        log.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        log.error('Failed to load python3 packages bundled with CLP.')
+        return -1
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp.package_utils import check_env
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.clp_package_config import CLPPackageConfig
+from pydantic import ValidationError
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Startup script for CLP')
+    args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.')
+    args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to compress.')
+    args_parser.add_argument('-f', '--input-list', dest='input_list', help='A file listing all paths to compress.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Infer config file path
+    try:
+        if not parsed_args.config:
+            # Did not provide a config file
+            default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml'
+            if not default_clp_package_config_file.exists():
+                raise FileNotFoundError
+            log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}')
+            package_config_file_path = default_clp_package_config_file
+        else:
+            # Provided a config file
+            package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True)
+    except FileNotFoundError:
+        log.error('Did not provide a clp package config file or the specified config file does not exist.')
+        return
+
+    try:
+        clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path))
+    except ValidationError as err:
+        log.error(err)
+        return
+    except Exception as ex:
+        # read_yaml_config_file already logs the parsing error inside
+        return
+
+    clp_cluster_name = clp_package_config.cluster_name
+    try:
+        check_env(clp_cluster_name)
+    except EnvironmentError as ex:
+        logging.error(ex)
+        return -1
+
+    # TODO: check path and perform path conversion
+    docker_exec_cmd = [
+        'docker', 'exec',
+        '--workdir', '/root/clp',
+        clp_package_config.cluster_name,
+        'sbin/native/compress', '--config', f'/root/.{clp_package_config.cluster_name}.yaml'
+    ]
+    for path in parsed_args.paths:
+        docker_exec_cmd.append(path)
+    if parsed_args.input_list is not None:
+        docker_exec_cmd.append('--input-list')
+        docker_exec_cmd.append(parsed_args.input_list)
+    logging.info(docker_exec_cmd)
+    subprocess.run(docker_exec_cmd)
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/package-template/src/sbin/decompress b/components/package-template/src/sbin/decompress
new file mode 100755
index 000000000..acca6e3d4
--- /dev/null
+++ b/components/package-template/src/sbin/decompress
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+import uuid
+
+# Setup logging
+# Create logger
+log = logging.getLogger('clp')
+log.setLevel(logging.DEBUG)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+log.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        log.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        log.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        log.error('Failed to load python3 packages bundled with CLP.')
+        return -1
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp.package_utils import check_env
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.clp_package_config import CLPPackageConfig
+from clp_py_utils.clp_config import CLPConfig
+from pydantic import ValidationError
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Script to decompress logs')
+    args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.')
+    args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to decompress.')
+    args_parser.add_argument('-f', '--files-from', help='A file listing all files to decompress.')
+    args_parser.add_argument('-d', '--extraction-dir', metavar='DIR', default='.', help='Decompress files into DIR')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Infer config file path
+    try:
+        if not parsed_args.config:
+            # Did not provide a config file
+            default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml'
+            if not default_clp_package_config_file.exists():
+                raise FileNotFoundError
+            log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}')
+            package_config_file_path = default_clp_package_config_file
+        else:
+            # Provided a config file
+            package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True)
+    except FileNotFoundError:
+        log.error('Did not provide a clp package config file or the specified config file does not exist.')
+        return
+
+    try:
+        clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path))
+    except ValidationError as err:
+        log.error(err)
+        return
+    except Exception as ex:
+        # read_yaml_config_file already logs the parsing error inside
+        return
+
+    # Validate paths were specified using only one method
+    if len(parsed_args.paths) > 0 and parsed_args.files_from is not None:
+        args_parser.error(
+            "Paths cannot be specified both on the command line and through a file.")
+        return -1
+
+    files_to_decompress_path = None
+    if parsed_args.files_from:
+        files_to_decompress_path = pathlib.Path(parsed_args.files_from).resolve(strict=True)
+
+    # Validate extraction directory
+    extraction_dir = pathlib.Path(parsed_args.extraction_dir).resolve()
+    if extraction_dir.exists() and not extraction_dir.is_dir():
+        log.error(f'extraction-dir ({extraction_dir}) is not a valid directory.')
+        return -1
+    extraction_dir.mkdir(exist_ok=True)
+
+    clp_cluster_name = clp_package_config.cluster_name
+    try:
+        check_env(clp_cluster_name)
+    except EnvironmentError as ex:
+        log.error(ex)
+        return -1
+
+    # Parse and validate config file
+    container_clp_config_file_name = f'.{clp_package_config.cluster_name}.yaml'
+    host_config_file_path = clp_home / container_clp_config_file_name
+    container_config_file_path = f'/root/{container_clp_config_file_name}'
+
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(host_config_file_path))
+    except ValidationError as err:
+        log.error(err)
+        return -1
+    except Exception as ex:
+        log.error(ex)
+        return -1
+
+    docker_clp_home = pathlib.Path('/') / 'root' / 'clp'
+    docker_extraction_dir = pathlib.Path('/') / 'mnt' / '_extraction_dir_'
+
+    host_data_directory = clp_home / pathlib.Path(clp_config.data_directory).relative_to(docker_clp_home)
+    host_log_directory = clp_home / pathlib.Path(clp_config.logs_directory).relative_to(docker_clp_home)
+    host_archive_out_directory = \
+        clp_home / pathlib.Path(clp_config.archive_output.directory).relative_to(docker_clp_home)
+
+    # Start execution environment
+    clp_execution_env_container = 'whywhywhywhywhywhy/clp-execution-env:x86-ubuntu-focal-20210919'
+    container_name = f'{clp_cluster_name}-decompressor-{uuid.uuid4()}'[:62]   # max docker hostname = 63 chars
+    clp_execution_env_startup_cmd = [
+        'docker', 'run', '-di',
+        '--rm',
+        '--network', clp_cluster_name,
+        '--hostname', container_name,
+        '--name', container_name,
+        '-v', f'{clp_home}:{docker_clp_home}',
+        '-v', f'{extraction_dir}:{docker_extraction_dir}'
+    ]
+    if not clp_config.data_directory.startswith('/root/clp'):
+        clp_execution_env_startup_cmd.append('-v')
+        clp_execution_env_startup_cmd.append(f'{host_data_directory}:{clp_config.data_directory}')
+    if not clp_config.logs_directory.startswith('/root/clp'):
+        clp_execution_env_startup_cmd.append('-v')
+        clp_execution_env_startup_cmd.append(f'{host_log_directory}:{clp_config.logs_directory}')
+    if not clp_config.archive_output.directory.startswith('/root/clp'):
+        clp_execution_env_startup_cmd.append('-v')
+        clp_execution_env_startup_cmd.append(
+            f'{host_archive_out_directory}:{clp_config.archive_output.directory}')
+    clp_execution_env_startup_cmd.append(clp_execution_env_container)
+    subprocess.run(clp_execution_env_startup_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+    try:
+        # Copy config file into container
+        copy_cmd = ['docker', 'cp', host_config_file_path, f'{container_name}:{container_config_file_path}']
+        subprocess.run(copy_cmd)
+
+        docker_exec_cmd = [
+            'docker', 'exec',
+            '--workdir', str(docker_clp_home),
+            container_name,
+            'sbin/native/decompress', '--config', container_config_file_path,
+            '-d', str(docker_extraction_dir)
+        ]
+        for path in parsed_args.paths:
+            docker_exec_cmd.append(path)
+        temporary_files_to_decompress_path = None
+        if files_to_decompress_path:
+            # Copy list to logs directory
+            temp_list_name = f'{uuid.uuid4()}-decompress-paths.txt'
+            temporary_files_to_decompress_path = host_log_directory / temp_list_name
+            shutil.copyfile(files_to_decompress_path, temporary_files_to_decompress_path)
+
+            docker_exec_cmd.append('--files-from')
+            docker_exec_cmd.append(pathlib.Path(clp_config.logs_directory) / temp_list_name)
+        logging.info(docker_exec_cmd)
+        subprocess.run(docker_exec_cmd)
+        if files_to_decompress_path:
+            temporary_files_to_decompress_path.unlink()
+    finally:
+        docker_stop_cmd = ['docker', 'stop', container_name]
+        subprocess.run(docker_stop_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/package-template/src/sbin/native/compress b/components/package-template/src/sbin/native/compress
new file mode 100755
index 000000000..48aabd0d4
--- /dev/null
+++ b/components/package-template/src/sbin/native/compress
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import os
+import pathlib
+import shutil
+import sys
+import uuid
+
+# Setup logging
+# Create logger
+logger = logging.getLogger('compress')
+logger.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s")
+logging_console_handler.setFormatter(logging_formatter)
+logger.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        logging.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        logger.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        logger.error('Failed to load python3 packages bundled with CLP.')
+        return -1
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from pydantic import ValidationError
+from clp.package_utils import make_config_path_absolute
+from clp_py_utils.clp_config import CLPConfig
+from clp_py_utils.clp_io_config import InputConfig, OutputConfig, ClpIoConfig
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.sql_adapter import SQL_Adapter
+from compression_job_handler.compression_job_handler import handle_jobs
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Compress log files.')
+    args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.')
+    args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to compress.')
+    args_parser.add_argument('-f', '--input-list', dest='input_list', help='A file listing all paths to compress.')
+    args_parser.add_argument('--remove-path-prefix', metavar='DIR',
+                             help='Remove the given path prefix from each compressed file/dir.')
+    args_parser.add_argument('--no-progress-reporting', action='store_true', help='Disables progress reporting.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Validate some input paths were specified
+    if parsed_args.input_list is None and len(parsed_args.paths) == 0:
+        args_parser.error('No paths specified.')
+
+    # Validate paths were specified using only one method
+    if len(parsed_args.paths) > 0 and parsed_args.input_list is not None:
+        args_parser.error('Paths cannot be specified on the command line AND through a file.')
+
+    # Load configuration
+    clp_config_file_path = pathlib.Path(parsed_args.config)
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(clp_config_file_path))
+    except ValidationError as err:
+        logger.error(err)
+    except FileNotFoundError as err:
+        logger.error(f'CLP config file not found at "{str(clp_config_file_path)}"')
+    except Exception as ex:
+        logger.error(ex)
+    else:
+
+        logs_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.logs_directory))
+
+        comp_jobs_dir = logs_dir / 'comp-jobs'
+        comp_jobs_dir.mkdir(parents=True, exist_ok=True)
+
+        if parsed_args.input_list is None:
+            # Write paths to file
+            log_list_path = comp_jobs_dir / f'{str(uuid.uuid4())}.txt'
+            with open(log_list_path, 'w') as f:
+                for path in parsed_args.paths:
+                    stripped_path = path.strip()
+                    if '' == stripped_path:
+                        # Skip empty paths
+                        continue
+                    resolved_path = pathlib.Path(stripped_path).resolve()
+
+                    f.write(str(resolved_path) + '\n')
+        else:
+            # Copy to jobs directory
+            log_list_path = pathlib.Path(parsed_args.input_list).resolve()
+            shutil.copy(log_list_path, comp_jobs_dir / log_list_path.name)
+
+        logger.info(f'Compression job submitted to compression-job-handler.')
+
+        mysql_adapter = SQL_Adapter(clp_config.database)
+        clp_io_config = ClpIoConfig(
+            input=InputConfig(type='fs', list_path=str(log_list_path)),
+            output=OutputConfig.parse_obj(clp_config.archive_output)
+        )
+
+        # Execute compression-job-handler.handle_jobs
+        logs_directory_abs = str(pathlib.Path(clp_config.logs_directory).resolve())
+        handle_jobs(sql_adapter=mysql_adapter, clp_io_config=clp_io_config, logs_dir_abs=logs_directory_abs,
+                    fs_logs_required_parent_dir=pathlib.Path(clp_config.input_logs_dfs_path),
+                    no_progress_reporting=parsed_args.no_progress_reporting)
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/package-template/src/sbin/native/decompress b/components/package-template/src/sbin/native/decompress
new file mode 100755
index 000000000..02129a728
--- /dev/null
+++ b/components/package-template/src/sbin/native/decompress
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import os
+import pathlib
+import subprocess
+import sys
+import uuid
+
+# Setup logging
+# Create logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+logger.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        logger.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        logger.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        logger.error('Failed to load python3 packages bundled with CLP.')
+        sys.exit(-1)
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp.package_utils import make_config_path_absolute
+from clp_py_utils.clp_config import CLPConfig
+from clp_py_utils.core import read_yaml_config_file
+import yaml
+
+
+def decompress_paths(paths, list_path: pathlib.Path, clp_config: CLPConfig,
+                     archives_dir: pathlib.Path, logs_dir: pathlib.Path,
+                     extraction_dir: pathlib.Path):
+    # Generate database config file for clp
+    db_config_file_path = logs_dir / f'decompress-db-config-{uuid.uuid4()}.yml'
+    db_config_file = open(db_config_file_path, 'w')
+    yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(), db_config_file)
+    db_config_file.close()
+
+    decompression_cmd = [
+        str(clp_home / 'bin' / 'clp'),
+        'x', str(archives_dir), str(extraction_dir),
+        '--db-config-file', str(db_config_file_path),
+    ]
+    files_to_decompress_list_path = None
+    if list_path is not None:
+        decompression_cmd.append("-f")
+        decompression_cmd.append(str(list_path))
+    elif len(paths) > 0:
+        # Write paths to file
+        files_to_decompress_list_path = logs_dir / f'paths-to-decompress-{uuid.uuid4()}.txt'
+        with open(files_to_decompress_list_path, 'w') as stream:
+            for path in paths:
+                stream.write(path + '\n')
+
+        decompression_cmd.append('-f')
+        decompression_cmd.append(str(files_to_decompress_list_path))
+
+    proc = subprocess.run(decompression_cmd, close_fds=True)
+    return_code = proc.returncode
+    if 0 != return_code:
+        logger.error(f'Decompression failed, return_code={return_code}')
+        return return_code
+
+    # Remove generated files
+    if files_to_decompress_list_path is not None:
+        # Remove path list
+        files_to_decompress_list_path.unlink()
+    db_config_file_path.unlink()
+
+    return 0
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Decompresses logs')
+    args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.')
+    args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to decompress.')
+    args_parser.add_argument('-f', '--files-from', help='Decompress all paths in the given list.')
+    args_parser.add_argument('-d', '--extraction-dir', metavar='DIR', help='Decompress files into DIR', default='.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Validate paths were specified using only one method
+    if len(parsed_args.paths) > 0 and parsed_args.files_from is not None:
+        args_parser.error('Paths cannot be specified both on the command line and through a file.')
+        return -1
+
+    # Validate extraction directory
+    extraction_dir = pathlib.Path(parsed_args.extraction_dir)
+    if not extraction_dir.is_dir():
+        logger.error(f'extraction-dir ({extraction_dir}) is not a valid directory.')
+        return -1
+
+    # Load configuration
+    clp_config_file_path = pathlib.Path(parsed_args.config)
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(clp_config_file_path))
+    except FileNotFoundError:
+        logger.error(f'CLP config file not found at "{clp_config_file_path}"')
+        return -1
+    except Exception as ex:
+        logger.error(ex)
+        return -1
+
+    logs_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.logs_directory))
+    archives_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.archive_output.directory))
+
+    return_code = \
+        decompress_paths(parsed_args.paths, parsed_args.files_from, clp_config, archives_dir, logs_dir, extraction_dir)
+    return return_code
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/package-template/src/sbin/native/search b/components/package-template/src/sbin/native/search
new file mode 100755
index 000000000..8b1c49bd4
--- /dev/null
+++ b/components/package-template/src/sbin/native/search
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import os
+import pathlib
+import subprocess
+import sys
+import uuid
+
+# Setup logging
+# Create logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+logger.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        logger.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        logger.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        logger.error('Failed to load python3 packages bundled with CLP.')
+        sys.exit(-1)
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp.package_utils import make_config_path_absolute
+from clp_py_utils.clp_config import CLPConfig
+from clp_py_utils.core import read_yaml_config_file
+import yaml
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Searches the compressed logs.')
+    args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.')
+    args_parser.add_argument('wildcard_query', help='Wildcard query.')
+    args_parser.add_argument("--file-path", help="File to search.")
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Load configuration
+    clp_config_file_path = pathlib.Path(parsed_args.config)
+    try:
+        clp_config = CLPConfig.parse_obj(read_yaml_config_file(clp_config_file_path))
+    except FileNotFoundError:
+        logger.error(f'CLP config file not found at "{clp_config_file_path}"')
+        return -1
+    except Exception as ex:
+        logger.error(ex)
+        return -1
+
+    logs_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.logs_directory))
+    archives_dir = make_config_path_absolute(clp_home,
+                                             pathlib.Path(clp_config.archive_output.directory))
+
+    # Generate database config file for clp
+    db_config_file_path = logs_dir / f'decompress-db-config-{uuid.uuid4()}.yml'
+    db_config_file = open(db_config_file_path, 'w')
+    yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(), db_config_file)
+    db_config_file.close()
+
+    search_cmd = [
+        str(clp_home / 'bin' / 'clg'),
+        str(archives_dir), parsed_args.wildcard_query,
+        '--db-config-file', str(db_config_file_path),
+    ]
+    if parsed_args.file_path is not None:
+        search_cmd.append(parsed_args.file_path)
+
+    proc = subprocess.run(search_cmd, close_fds=True)
+    return_code = proc.returncode
+    if 0 != return_code:
+        logger.error(f'Search failed, return_code={return_code}')
+        return return_code
+
+    # Remove generated files
+    db_config_file_path.unlink()
+
+    return return_code
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/package-template/src/sbin/search b/components/package-template/src/sbin/search
new file mode 100755
index 000000000..51bd3110c
--- /dev/null
+++ b/components/package-template/src/sbin/search
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import os
+import pathlib
+import subprocess
+import sys
+
+# Setup logging
+# Create logger
+log = logging.getLogger('clp')
+log.setLevel(logging.DEBUG)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+log.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        log.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        log.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        log.error('Failed to load python3 packages bundled with CLP.')
+        return -1
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp.package_utils import check_env
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.clp_package_config import CLPPackageConfig
+from pydantic import ValidationError
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Searches the compressed logs.')
+    args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.')
+    args_parser.add_argument('wildcard_query', help="Wildcard query.")
+    args_parser.add_argument('--file-path', help="File to search.")
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Infer config file path
+    try:
+        if not parsed_args.config:
+            # Did not provide a config file
+            default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml'
+            if not default_clp_package_config_file.exists():
+                raise FileNotFoundError
+            log.debug(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}')
+            package_config_file_path = default_clp_package_config_file
+        else:
+            # Provided a config file
+            package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True)
+    except FileNotFoundError:
+        log.error('Did not provide a clp package config file or the specified config file does not exist.')
+        return
+
+    try:
+        clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path))
+    except ValidationError as err:
+        log.error(err)
+        return
+    except Exception as ex:
+        # read_yaml_config_file already logs the parsing error inside
+        return
+
+    clp_cluster_name = clp_package_config.cluster_name
+    try:
+        check_env(clp_cluster_name)
+    except EnvironmentError as ex:
+        logging.error(ex)
+        return -1
+
+    docker_exec_cmd = [
+        'docker', 'exec',
+        '--workdir', '/root/clp',
+        clp_cluster_name,
+        'sbin/native/search', '--config', f'/root/.{clp_cluster_name}.yaml',
+        parsed_args.wildcard_query
+    ]
+    if parsed_args.file_path:
+        docker_exec_cmd.append('--file-path')
+        docker_exec_cmd.append(parsed_args.file_path)
+    subprocess.run(docker_exec_cmd)
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/components/package-template/src/sbin/start-clp b/components/package-template/src/sbin/start-clp
new file mode 100755
index 000000000..2ee3da202
--- /dev/null
+++ b/components/package-template/src/sbin/start-clp
@@ -0,0 +1,425 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+import multiprocessing
+import os
+import pathlib
+import secrets
+import socket
+import subprocess
+import sys
+import time
+
+# Setup logging
+# Create logger
+log = logging.getLogger('clp')
+log.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+log.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        log.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        log.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        log.error('Failed to load python3 packages bundled with CLP.')
+        return -1
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp.package_utils import prepare_package_and_config, check_dependencies
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.clp_package_config import CLPPackageConfig, ArchiveOutput as PackageArchiveOutput
+from clp_py_utils.clp_config import Database, ArchiveOutput, CLPConfig, Scheduler
+
+
+def provision_docker_network_bridge(clp_cluster_name: str):
+    cmd = ['docker', 'network', 'create', '--driver', 'bridge', clp_cluster_name]
+    log.info('Provision docker network bridge')
+    log.debug(' '.join(cmd))
+    try:
+        subprocess.run(cmd, stdout=subprocess.PIPE, check=True)
+    except subprocess.CalledProcessError:
+        log.error(f'Cluster "{clp_cluster_name}" has already been provisioned.')
+        raise EnvironmentError
+
+
+def start_sql_db(cluster_name: str, clp_config: CLPConfig, host_data_directory: pathlib.Path, publish_ports: bool):
+    log.info(f'Starting scheduler {clp_config.database.type} database')
+
+    persistent_storage_path = host_data_directory / 'db'
+    persistent_storage_path.mkdir(exist_ok=True, parents=True)
+
+    database_startup_cmd = [
+        'docker', 'run', '-d',
+        '--network', cluster_name,
+        '--hostname', f'{clp_config.database.host}',
+        '--name', f'{clp_config.database.host}',
+        '-v', f'{str(persistent_storage_path)}:/var/lib/mysql',
+        '-e', f'MYSQL_ROOT_PASSWORD={clp_config.database.password}',
+        '-e', f'MYSQL_USER={clp_config.database.username}',
+        '-e', f'MYSQL_PASSWORD={clp_config.database.password}',
+        '-e', f'MYSQL_DATABASE=initial_database'
+    ]
+    if publish_ports:
+        database_startup_cmd.append('-p')
+        database_startup_cmd.append(f'{str(clp_config.database.port)}:{str(clp_config.database.port)}')
+    if 'mysql' == clp_config.database.type:
+        database_startup_cmd.append('mysql:8.0.23')
+    elif 'mariadb' == clp_config.database.type:
+        database_startup_cmd.append('mariadb:10.6.4-focal')
+    log.debug(' '.join(database_startup_cmd))
+    try:
+        subprocess.run(database_startup_cmd, stdout=subprocess.PIPE, check=True)
+    except subprocess.CalledProcessError:
+        log.error(f'Unable to start "{clp_config.database.type}" inside docker')
+        raise EnvironmentError
+
+
+def create_sql_db_tables(cluster_name: str, container_config_file_path: str):
+    # Initialize database tables
+    log.info('Initializing scheduler database tables')
+    database_table_creation_commands = [
+        ['python3', '/root/clp/lib/python3/site-packages/clp_py_utils/initialize-clp-metadata-db.py',
+         '--config', container_config_file_path],
+        ['python3', '/root/clp/lib/python3/site-packages/clp_py_utils/initialize-orchestration-db.py',
+         '--config', container_config_file_path]
+    ]
+    for command in database_table_creation_commands:
+        docker_exec_cmd = ['docker', 'exec', '-it',
+                           '-e', 'PYTHONPATH=/root/clp/lib/python3/site-packages', cluster_name]
+        docker_exec_cmd += command
+        log.debug(' '.join(docker_exec_cmd))
+        max_attempts = 20
+        for attempt in range(max_attempts + 1):
+            if attempt == max_attempts:
+                log.error('Unable to connect to the database with the provided credentials')
+                raise EnvironmentError
+            try:
+                subprocess.run(docker_exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+            except subprocess.CalledProcessError:
+                log.debug('Waiting for database to be ready')
+                time.sleep(1)  # database not ready
+            else:
+                break
+    log.debug('Scheduler database tables initialization completed')
+
+
+def provision_rabbitmq(cluster_name: str, clp_config: CLPConfig):
+    log.info('Starting scheduler queue')
+
+    # Start rabbitmq
+    docker_exec_cmd = ['docker', 'exec', '-d', '-e', 'RABBITMQ_PID_FILE=/tmp/rabbitmq.pid', cluster_name,
+                       'rabbitmq-server']
+    log.debug(' '.join(docker_exec_cmd))
+    try:
+        subprocess.run(docker_exec_cmd, stdout=subprocess.PIPE, check=True)
+    except subprocess.CalledProcessError:
+        log.error(f'Unable to start rabbitmq inside docker')
+        raise EnvironmentError
+
+    # Wait for rabbitmq to be available
+    docker_exec_cmd = ['docker', 'exec', '-e', 'RABBITMQ_PID_FILE=/tmp/rabbitmq.pid', cluster_name] + \
+                      'rabbitmqctl wait ${RABBITMQ_PID_FILE}'.split()
+    log.debug(' '.join(docker_exec_cmd))
+    subprocess.run(docker_exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    # Initialize rabbitmq
+    log.info('Initializing scheduler queue')
+    rabbitmq_provisioning_commands = [
+        f'rabbitmqctl add_user {clp_config.scheduler.username} {clp_config.scheduler.password}',
+        f'rabbitmqctl set_user_tags {clp_config.scheduler.username} administrator',
+        f'rabbitmqctl set_permissions -p / {clp_config.scheduler.username} .* .* .*'
+    ]
+    for command in rabbitmq_provisioning_commands:
+        docker_exec_cmd = ['docker', 'exec', '-e', 'RABBITMQ_PID_FILE=/tmp/rabbitmq.pid',
+                           cluster_name] + command.split()
+        log.debug(' '.join(docker_exec_cmd))
+        proc = subprocess.run(docker_exec_cmd, stdout=subprocess.PIPE)
+        log.debug(proc.stdout.decode('utf-8'))
+
+
+def start_scheduler(cluster_name: str, clp_config: CLPConfig, container_config_file_path: str):
+    scheduler_startup_cmd = ['python3', '-u', '-m', 'job_orchestration.scheduler.scheduler',
+                             '--config', container_config_file_path]
+    log.info('Starting scheduler service')
+    docker_exec_cmd = [
+        'docker', 'exec', '--detach', '--workdir', '/root/clp',
+        '-e', 'PYTHONPATH=/root/clp/lib/python3/site-packages',
+        '-e', f'BROKER_URL=amqp://{clp_config.scheduler.username}:{clp_config.scheduler.password}@localhost:5672',
+        # rabbitmq runs on scheduler node
+        cluster_name
+    ]
+    docker_exec_cmd += scheduler_startup_cmd
+    log.debug(docker_exec_cmd)
+    try:
+        subprocess.run(docker_exec_cmd)
+    except subprocess.CalledProcessError:
+        log.error('Failed to start clp scheduler service')
+        raise EnvironmentError
+
+
+def start_worker(cluster_name: str, clp_config: CLPConfig, num_cpus: int):
+    worker_startup_cmd = ['/root/clp/bin/celery', '-A', 'job_orchestration.executor', 'worker',
+                          '--concurrency', str(num_cpus),
+                          '--loglevel', 'WARNING',
+                          '-Q', 'compression']
+    log.info("Starting CLP worker")
+    docker_exec_cmd = [
+        'docker', 'exec', '--detach',
+        '--workdir', '/root/clp',
+        '-e', 'CLP_HOME=/root/clp',
+        '-e', f'CLP_DATA_DIR={clp_config.data_directory}',
+        '-e', f'CLP_LOGS_DIR={clp_config.logs_directory}',
+        '-e', 'PYTHONPATH=/root/clp/lib/python3/site-packages',
+        '-e', f'BROKER_URL=amqp://{clp_config.scheduler.username}:{clp_config.scheduler.password}'
+              f'@{clp_config.scheduler.host}:5672',
+        '-e', f'RESULT_BACKEND=rpc://{clp_config.scheduler.username}:{clp_config.scheduler.password}'
+              f'@{clp_config.scheduler.host}:5672',
+        cluster_name
+    ]
+    docker_exec_cmd += worker_startup_cmd
+    log.debug(docker_exec_cmd)
+    try:
+        subprocess.run(docker_exec_cmd)
+    except subprocess.CalledProcessError:
+        log.error('Failed to start CLP worker')
+        raise EnvironmentError
+
+
+def generate_default_package_config(package_config_file_path: pathlib.Path):
+    clp_package_config = CLPPackageConfig(
+        cluster_name='clp-mini-cluster',
+        archive_output=PackageArchiveOutput(
+            target_archive_size=268435456,  # 256MB
+            target_dictionaries_size=33554432,  # 32MB
+            target_encoded_file_size=268435456,  # 256MB
+            target_segment_size=268435456  # 256MB
+        )
+    )
+    with open(package_config_file_path, 'w') as config_file:
+        config_file.write(clp_package_config.generate_package_config_file_content_with_comments())
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Startup script for CLP')
+    args_parser.add_argument('--uncompressed-logs-dir', type=str, required=True,
+                             help='The directory containing uncompressed logs.')
+    args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.')
+    args_parser.add_argument('--num-cpus', type=int, default=0,
+                             help='Number of logical CPU cores to use for compression')
+    args_parser.add_argument('--publish-ports', action='store_true', help='Publish container ports to the host port')
+    args_parser.add_argument('--start-scheduler-only', action='store_true', help='Start only scheduler service')
+    args_parser.add_argument('--start-worker-only', action='store_true', help='Start only worker service')
+
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Check required system dependencies
+    try:
+        check_dependencies()
+    except EnvironmentError as ex:
+        log.error(ex)
+        return
+
+    # Infer components to enable
+    startup_component_count = parsed_args.start_scheduler_only + parsed_args.start_worker_only
+    if startup_component_count > 1:
+        log.error('--start-scheduler-only and --start-worker-only are mutually exclusive')
+        return
+    if not parsed_args.start_scheduler_only and not parsed_args.start_worker_only:
+        need_to_start_scheduler = True
+        need_to_start_worker = True
+    else:
+        need_to_start_scheduler = parsed_args.start_scheduler_only
+        need_to_start_worker = parsed_args.start_worker_only
+
+
+    # Infer number of CPU cores used for compression
+    num_cpus = parsed_args.num_cpus
+    if 0 == num_cpus:
+        num_cpus = multiprocessing.cpu_count()
+
+    # Validate uncompressed-log-dir
+    uncompressed_log_dir = pathlib.Path(parsed_args.uncompressed_logs_dir).resolve()
+    if not (uncompressed_log_dir.exists() and uncompressed_log_dir.is_dir()):
+        log.error(f'The specified uncompressed log directory path is invalid: {uncompressed_log_dir}')
+        return
+
+    # Infer config file path
+    try:
+        if not parsed_args.config:
+            # Did not provide a config file
+            default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml'
+            if not default_clp_package_config_file.exists():
+                log.info('Generating a default config file.')
+                generate_default_package_config(default_clp_package_config_file)
+            log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}')
+            package_config_file_path = default_clp_package_config_file
+        else:
+            # Provided a config file
+            package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True)
+    except FileNotFoundError:
+        log.error('Did not provide a clp package config file or the specified config file does not exist.')
+        return
+
+    # Parse and validate config file path
+    try:
+        clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path))
+
+        if need_to_start_scheduler:
+            # Generate a clp config from a clp package config (a reduced set of clp config)
+            # This config file will be used to start CLP
+            clp_config = CLPConfig(
+                input_logs_dfs_path=str(uncompressed_log_dir),
+                database=Database(
+                    type='mariadb',
+                    host=f'{clp_package_config.cluster_name}-db',
+                    port=3306,
+                    username='clp-user',
+                    password=f'clp-{secrets.token_urlsafe(8)}',
+                    name='initial_database'
+                ),
+                scheduler=Scheduler(
+                    host=f'{clp_package_config.cluster_name}',
+                    username='clp-user',
+                    password=f'clp-{secrets.token_urlsafe(8)}',
+                    jobs_poll_delay=1
+                ),
+                archive_output=ArchiveOutput(
+                    type='fs',
+                    directory=f'var/data/{clp_package_config.cluster_name}/archives',
+                    storage_is_node_specific=True,
+                    target_archive_size=clp_package_config.archive_output.target_archive_size,
+                    target_dictionaries_size=clp_package_config.archive_output.target_dictionaries_size,
+                    target_encoded_file_size=clp_package_config.archive_output.target_encoded_file_size,
+                    target_segment_size=clp_package_config.archive_output.target_segment_size
+                ),
+                data_directory=f'var/data/{clp_package_config.cluster_name}',
+                logs_directory=f'var/log/{clp_package_config.cluster_name}'
+            )
+
+            # If ports are published, user wants to run CLP in distributed mode
+            # Host parameter will be the "host"'s hostname instead of docker network hostname
+            if parsed_args.publish_ports:
+                host_hostname = socket.gethostname()
+                clp_config.database.host = host_hostname
+                clp_config.scheduler.host = host_hostname
+    except Exception as ex:
+        log.error(ex)
+        return
+
+    try:
+        # Create temporary clp config file which we mount into the container
+        # Prepare package and initialize all required directories if necessary
+        # Note: config file is also updated with absolute path
+        docker_clp_home = pathlib.Path('/') / 'root' / 'clp'
+        container_clp_config_file_name = f'.{clp_package_config.cluster_name}.yaml'
+        host_config_file_path = clp_home / container_clp_config_file_name
+        container_config_file_path = f'/root/{container_clp_config_file_name}'
+
+        # Persist config file used for container
+        if not host_config_file_path.exists() and need_to_start_scheduler:
+            host_data_directory, host_log_directory, host_archive_out_directory, clp_config = \
+                prepare_package_and_config(clp_config, clp_home, docker_clp_home)
+            with open(host_config_file_path, 'w') as config_file:
+                config_file.write(clp_config.generate_config_file_content_with_comments())
+        else:
+            try:
+                clp_config = CLPConfig.parse_obj(read_yaml_config_file(host_config_file_path))
+                host_data_directory = clp_home / pathlib.Path(clp_config.data_directory).relative_to(docker_clp_home)
+                host_log_directory = clp_home / pathlib.Path(clp_config.logs_directory).relative_to(docker_clp_home)
+                host_archive_out_directory = \
+                    clp_home / pathlib.Path(clp_config.archive_output.directory).relative_to(docker_clp_home)
+            except Exception as ex:
+                log.error(ex)
+                return
+
+        # Setup basic networking infrastructure
+        provision_docker_network_bridge(clp_package_config.cluster_name)
+
+        if need_to_start_scheduler:
+            # Optimize, start database as early as possible (slow process)
+            log.info('Starting CLP scheduler')
+            log.debug('Starting CLP scheduler database service')
+            start_sql_db(clp_package_config.cluster_name, clp_config, host_data_directory, parsed_args.publish_ports)
+
+        # Start execution environment
+        clp_execution_env_container = 'whywhywhywhywhywhy/clp-execution-env:x86-ubuntu-focal-20210919'
+        clp_execution_env_startup_cmd = [
+            'docker', 'run', '-di',
+            '--network', clp_package_config.cluster_name,
+            '--hostname', f'{clp_package_config.cluster_name}',
+            '--name', f'{clp_package_config.cluster_name}',
+            '-v', f'{clp_home}:/root/clp',
+            '-v', f'{uncompressed_log_dir}:{uncompressed_log_dir}'
+        ]
+        if parsed_args.publish_ports:
+            ports_to_publish = [
+                '-p', '5672:5672'  # Rabbitmq
+            ]
+            clp_execution_env_startup_cmd += ports_to_publish
+
+        # Mount data, logs, archive output directory if it is outside of the package
+        if not clp_config.data_directory.startswith('/root/clp'):
+            clp_execution_env_startup_cmd.append('-v')
+            clp_execution_env_startup_cmd.append(f'{host_data_directory}:{clp_config.data_directory}')
+        if not clp_config.logs_directory.startswith('/root/clp'):
+            clp_execution_env_startup_cmd.append('-v')
+            clp_execution_env_startup_cmd.append(f'{host_log_directory}:{clp_config.logs_directory}')
+        if not clp_config.archive_output.directory.startswith('/root/clp'):
+            clp_execution_env_startup_cmd.append('-v')
+            clp_execution_env_startup_cmd.append(f'{host_archive_out_directory}:{clp_config.archive_output.directory}')
+        clp_execution_env_startup_cmd.append(clp_execution_env_container)
+        log.debug(' '.join(clp_execution_env_startup_cmd))
+        subprocess.run(clp_execution_env_startup_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+
+        # Copy config file into container
+        copy_cmd = ['docker', 'cp', str(host_config_file_path),
+                    f'{clp_package_config.cluster_name}:{container_config_file_path}']
+        log.debug(' '.join(copy_cmd))
+        subprocess.run(copy_cmd)
+
+        if need_to_start_scheduler:
+            provision_rabbitmq(clp_package_config.cluster_name, clp_config)
+            create_sql_db_tables(clp_package_config.cluster_name, container_config_file_path)
+            start_scheduler(clp_package_config.cluster_name, clp_config, container_config_file_path)
+        if need_to_start_worker:
+            start_worker(clp_package_config.cluster_name, clp_config, num_cpus)
+    except subprocess.CalledProcessError as ex:
+        log.error(ex.stdout.decode('utf-8'))
+        log.error(f'Failed to provision "{clp_package_config.cluster_name}"')
+    except EnvironmentError as ex:
+        log.error(ex)
+        log.error(f'Failed to provision "{clp_package_config.cluster_name}"')
+
+
+if '__main__' == __name__:
+    main(sys.argv)
diff --git a/components/package-template/src/sbin/stop-clp b/components/package-template/src/sbin/stop-clp
new file mode 100755
index 000000000..0abbf493c
--- /dev/null
+++ b/components/package-template/src/sbin/stop-clp
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import logging
+import os
+import pathlib
+import subprocess
+import sys
+
+# Setup logging
+# Create logger
+log = logging.getLogger('clp')
+log.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+log.addHandler(logging_console_handler)
+
+
+def get_clp_home():
+    clp_home = None
+    if 'CLP_HOME' in os.environ:
+        clp_home = pathlib.Path(os.environ['CLP_HOME'])
+    else:
+        for path in pathlib.Path(__file__).resolve().parents:
+            if 'sbin' == path.name:
+                clp_home = path.parent
+                break
+
+    if clp_home is None:
+        log.error('CLP_HOME is not set and could not be determined automatically.')
+        return None
+    elif not clp_home.exists():
+        log.error('CLP_HOME does not exist.')
+        return None
+
+    return clp_home.resolve()
+
+
+def load_bundled_python_lib_path(clp_home):
+    python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages'
+    if not python_site_packages_path.is_dir():
+        log.error('Failed to load python3 packages bundled with CLP.')
+        return -1
+    # Add packages to the front of the path
+    sys.path.insert(0, str(python_site_packages_path))
+
+
+clp_home = get_clp_home()
+if clp_home is None:
+    sys.exit(-1)
+load_bundled_python_lib_path(clp_home)
+
+from clp_py_utils.core import read_yaml_config_file
+from clp_py_utils.clp_package_config import CLPPackageConfig
+from pydantic import ValidationError
+
+
+def inspect_docker_network_bridge(clp_cluster_name: str):
+    cmd = ['docker', 'network', 'inspect', clp_cluster_name]
+    log.info('Inspecting docker network bridge')
+    log.debug(' '.join(cmd))
+    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if 0 != proc.returncode:
+        log.error(f'Cluster "{clp_cluster_name}" has not been provisioned.')
+        raise EnvironmentError
+
+    bridge_bridge_specification = json.loads(proc.stdout.decode('utf-8'))[0]
+
+    return bridge_bridge_specification
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Startup script for CLP')
+    args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    # Infer config file path
+    try:
+        if not parsed_args.config:
+            # Did not provide a config file
+            default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml'
+            if not default_clp_package_config_file.exists():
+                raise FileNotFoundError
+            log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}')
+            package_config_file_path = default_clp_package_config_file
+        else:
+            # Provided a config file
+            package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True)
+    except FileNotFoundError:
+        log.error('Did not provide a clp package config file or the specified config file does not exist.')
+        return
+
+    try:
+        clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path))
+    except ValidationError as err:
+        log.error(err)
+        return
+    except Exception as ex:
+        # read_yaml_config_file already logs the parsing error inside
+        return
+
+    clp_cluster_name = clp_package_config.cluster_name
+    try:
+        bridge_bridge_specification = inspect_docker_network_bridge(clp_cluster_name)
+        for container_id in bridge_bridge_specification['Containers']:
+            # Stop and remove container
+            log.debug(f'Removing container {container_id}')
+            subprocess.run(['docker', 'stop', container_id], stdout=subprocess.DEVNULL)
+            subprocess.run(['docker', 'rm', container_id], stdout=subprocess.DEVNULL)
+        log.debug(f'Removing docker network bridge {clp_cluster_name}')
+        subprocess.run(['docker', 'network', 'rm', clp_cluster_name], stdout=subprocess.DEVNULL)
+    except EnvironmentError:
+        log.error(f'Failed to decommission "{clp_cluster_name}"')
+    else:
+        log.info(f'Successfully decommissioned "{clp_cluster_name}"')
+
+
+if '__main__' == __name__:
+    main(sys.argv)
diff --git a/config/build-clp-package.yaml b/config/build-clp-package.yaml
new file mode 100644
index 000000000..52b11a5f5
--- /dev/null
+++ b/config/build-clp-package.yaml
@@ -0,0 +1,16 @@
+working_dir: out
+version: 0.0.0
+artifact_name: clp-package-ubuntu-focal
+build_parallelism: 0   # 0 -> attempt to compile with available cores on the machine
+builder_dockerhub_image: whywhywhywhywhywhy/builders:x86-ubuntu-focal-20210915
+components:
+  - name: package-template
+    type: local
+  - name: compression-job-handler
+    type: local
+  - name: job-orchestration
+    type: local
+  - name: clp-py-utils
+    type: local
+  - name: core
+    type: local
\ No newline at end of file
diff --git a/tools/packager/.gitignore b/tools/packager/.gitignore
new file mode 100644
index 000000000..9fb18b426
--- /dev/null
+++ b/tools/packager/.gitignore
@@ -0,0 +1,2 @@
+.idea
+out
diff --git a/tools/packager/LICENSE b/tools/packager/LICENSE
new file mode 100644
index 000000000..7a4a3ea24
--- /dev/null
+++ b/tools/packager/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/tools/packager/README.md b/tools/packager/README.md
new file mode 100644
index 000000000..ab8a99603
--- /dev/null
+++ b/tools/packager/README.md
@@ -0,0 +1,38 @@
+# Packager
+
+Packager is a tool for generating a runnable CLP package by automatically downloading CLP's source, 
+compiling, and bundling it.
+
+## Requirements
+
+* 10GB of disk space
+* At least 2GB of RAM
+* An active internet connection
+* [Docker](https://docs.docker.com/engine/install/)
+  * `docker` should be in the user's path, and
+  * [runnable without superuser privileges](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user)
+    (without sudo)
+* `python3`, `pip`, and `git` pre-installed and available on the user's path
+  * For systems with a Python version < 3.7, run `pip3 install -r requirements-pre-3.7.txt`
+
+## Building the package
+
+```bash
+pip3 install -r requirements.txt
+python3 build-clp-package.py --config ../../config/build-clp-package.yaml
+```
+
+* After a successful build, the package will be available at `out/<versioned artifact name>.tar.gz`.
+* The package README.md is copied from [package-template](../../components/package-template).
+
+
+## Troubleshooting
+
+### ModuleNotFoundError
+
+**Error message**: ```ModuleNotFoundError: No module named 'dataclasses'```
+
+**Cause**: When starting the package on some older platforms like Ubuntu 18.04, some required Python modules are not in 
+the standard library
+
+**Solution**: `pip install -r requirements-pre-3.7.txt`
diff --git a/tools/packager/build-clp-package.py b/tools/packager/build-clp-package.py
new file mode 100644
index 000000000..99dbbc9ac
--- /dev/null
+++ b/tools/packager/build-clp-package.py
@@ -0,0 +1,266 @@
+import argparse
+import logging
+import os
+import pathlib
+import platform
+import shutil
+import subprocess
+import sys
+import typing
+import uuid
+from concurrent.futures import ProcessPoolExecutor
+
+import psutil
+import yaml
+from pydantic import BaseModel, validator
+
+# Setup logging
+# Create logger
+log = logging.getLogger('build-clp-package')
+log.setLevel(logging.INFO)
+# Setup console logging
+logging_console_handler = logging.StreamHandler()
+logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')
+logging_console_handler.setFormatter(logging_formatter)
+log.addHandler(logging_console_handler)
+
+
+class ClpComponent(BaseModel):
+    name: str
+    type: str
+    url: str = None
+    branch: str = None
+    commit: str = None
+
+    @validator('name', always=True)
+    def component_name_validation(cls, v):
+        currently_supported_component_names = [
+            'package-template',
+            'compression-job-handler',
+            'job-orchestration',
+            'clp-py-utils',
+            'core',
+        ]
+        if v not in currently_supported_component_names:
+            raise ValueError(f'The specified clp component name "{v}" not supported')
+        return v
+
+    @validator('type', always=True)
+    def component_type_validation(cls, v, values, **kwargs):
+        if 'git' == v:
+            if not values['url']:
+                raise ValueError('git url must be specified')
+            parameter_count = int(values['branch']) + int(values['commit'])
+            if 0 == parameter_count:
+                raise ValueError('git branch or commit must be specified')
+            elif 2 == parameter_count:
+                raise ValueError('can only specify either git branch or commit')
+        elif 'local' == v:
+            pass  # Nothing needs to be validated
+        else:
+            raise ValueError(f'The specified clp component type "{v}" not supported')
+        return v
+
+
+class PackagingConfig(BaseModel):
+    working_dir: str
+    version: str
+    arch: str = platform.machine()
+    artifact_name: str
+    build_parallelism: int
+    builder_dockerhub_image: str
+    components: typing.List[ClpComponent]
+
+
+def check_dependencies():
+    try:
+        subprocess.run('command -v git', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+    except subprocess.CalledProcessError:
+        log.error('git is not installed on the path.')
+        raise EnvironmentError
+
+    try:
+        subprocess.run('command -v docker', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+        subprocess.run(['docker', 'ps'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+    except subprocess.CalledProcessError:
+        log.error('docker is not installed on the path or cannot run without superuser privileges (sudo).')
+        raise EnvironmentError
+
+
+def replace_clp_core_version(project_dir: pathlib.Path, version: str):
+    target_replacement_line = 'constexpr char cVersion[] = '
+    target_replacement_file_path = project_dir / 'src' / 'version.hpp'
+    log.info(f'Updating clp core\'s version to {version} in {target_replacement_file_path}')
+    with open(target_replacement_file_path, 'r') as version_file:
+        version_file_lines = version_file.readlines()
+    for idx, line in enumerate(version_file_lines):
+        if line.startswith(target_replacement_line):
+            version_file_lines[idx] = f'{target_replacement_line}"{version}";'
+            break
+    with open(target_replacement_file_path, 'w') as version_file:
+        version_file.write('\n'.join(version_file_lines))
+
+
+def clone_and_checkout(component: ClpComponent, working_dir: pathlib.Path):
+    if component.branch:
+        subprocess.run(['git', 'clone', '-b', component.branch, '--depth', '1', component.url, component.name],
+                       cwd=working_dir, check=True)
+    elif component.commit:
+        subprocess.run(['git', 'clone', component.url, component.name], cwd=working_dir, check=True)
+        subprocess.run(['git', 'checkout', component.commit], cwd=working_dir / component.name, check=True)
+
+
+def clone_and_checkout_clp_core(component: ClpComponent, working_dir: pathlib.Path, version: str):
+    clone_and_checkout(component, working_dir)
+
+    log.info('Downloading clp core\'s submodules...')
+    subprocess.run(['./download-all.sh'], cwd=working_dir / 'core' / 'tools' / 'scripts' / 'deps-download')
+
+    replace_clp_core_version(working_dir / 'core', version)
+
+
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='CLP package builder')
+    args_parser.add_argument('--config', '-c', required=True, help='Build configuration file.')
+    parsed_args = args_parser.parse_args(argv[1:])
+
+    try:
+        check_dependencies()
+    except EnvironmentError:
+        log.error('Unmet dependency')
+        return -1
+
+    # Parse config file
+    with open(parsed_args.config, 'r') as config_file:
+        try:
+            packaging_config = PackagingConfig.parse_obj(yaml.safe_load(config_file))
+        except:
+            log.exception('Failed to parse config file.')
+            return -1
+
+    # Limit maximum build parallelization degree to minimize chance of running out of RAM
+    # Minimum 2GB per core to ensure successful compilation
+    if packaging_config.build_parallelism == 0:
+        build_parallelization = min(int(psutil.virtual_memory().total / (2 * 1024 * 1024 * 1024)), psutil.cpu_count())
+    elif packaging_config.build_parallelism > 0:
+        build_parallelization = int(packaging_config.build_parallelism)
+    else:
+        log.error(f'Unsupported build_parallelism: {packaging_config.build_parallelism}')
+        return -1
+
+    # Infer install scripts directory
+    script_dir = pathlib.Path(__file__).parent.resolve()
+    host_install_scripts_dir = script_dir / 'install-scripts'
+    container_install_scripts_dir = pathlib.PurePath('/tmp/install-scripts')
+
+    # Remove existing out directory to ensure clean state prior to cloning directories
+    host_working_dir = pathlib.Path(packaging_config.working_dir).resolve()
+    try:
+        shutil.rmtree(host_working_dir)
+    except FileNotFoundError:
+        pass
+    except:
+        log.exception(f'Failed to clean up working directory: {host_working_dir}')
+        return -1
+
+    host_working_dir.mkdir(parents=True, exist_ok=True)
+    container_working_directory = pathlib.PurePath('/tmp/out')
+    versioned_artifact_name = f'{packaging_config.artifact_name}-{packaging_config.arch}-v{packaging_config.version}'
+    artifact_dir = (host_working_dir / versioned_artifact_name).resolve()
+
+    # Download or copy source code to build working directory
+    project_root = script_dir.parent.parent
+    with ProcessPoolExecutor() as executor:
+        for component in packaging_config.components:
+            if 'git' == component.type:
+                # For "git" type components, clone and checkout
+                if 'core' == component.name:
+                    executor.submit(clone_and_checkout_clp_core, component, host_working_dir, packaging_config.version)
+                else:
+                    executor.submit(clone_and_checkout, component, host_working_dir)
+            elif 'local' == component.type:
+                if 'core' == component.name:
+                    log.info('Downloading clp core\'s submodules...')
+                    cwd = project_root / 'components' / 'core' / 'tools' / 'scripts' / 'deps-download'
+                    subprocess.run(['./download-all.sh'], cwd=cwd)
+
+                # For "local" type components, copy
+                shutil.copytree(project_root / 'components' / component.name, host_working_dir / component.name)
+
+                if 'core' == component.name:
+                    replace_clp_core_version(host_working_dir / component.name, packaging_config.version)
+
+    # Make a copy of package-template/src directory and name it as the {artifact_name}-{version}
+    shutil.copytree(host_working_dir / 'package-template' / 'src', artifact_dir)
+
+    # Start build environment container
+    build_environment_container_name = f'clp-builder-{uuid.uuid4()}'
+    log.info(f'Starting build environment container {build_environment_container_name}')
+    try:
+        build_environment_startup_cmd = [
+            'docker', 'run', '-di',
+            '--name', build_environment_container_name,
+            '-v', f'{host_working_dir}:{container_working_directory}',
+            '-v', f'{host_install_scripts_dir}:{container_install_scripts_dir}',
+            packaging_config.builder_dockerhub_image
+        ]
+        subprocess.run(build_environment_startup_cmd, check=True)
+
+        container_exec_prefix = [
+            'docker', 'exec', '-it',
+            '-e', f'WORKING_DIR={container_working_directory}',
+            '-e', f'ARTIFACT_NAME={versioned_artifact_name}',
+            '-e', f'BUILD_PARALLELISM={build_parallelization}',
+            '-w', str(container_working_directory),
+            build_environment_container_name
+        ]
+
+        # Run the component installation scripts
+        install_cmds = [
+            [str(container_install_scripts_dir / 'install-celery.sh')],
+            [str(container_install_scripts_dir / 'install-python-component.sh'), 'clp-py-utils'],
+            [str(container_install_scripts_dir / 'install-python-component.sh'), 'compression-job-handler'],
+            [str(container_install_scripts_dir / 'install-python-component.sh'), 'job-orchestration'],
+            [str(container_install_scripts_dir / 'install-core.sh')],
+        ]
+        for cmd in install_cmds:
+            container_exec_cmd = container_exec_prefix + cmd
+            log.info(' '.join(container_exec_cmd))
+            subprocess.run(container_exec_cmd, check=True)
+
+        # Set current user as owner of built files and build tar
+        cmds = [
+            f'chown -R {os.getuid()}:{os.getgid()} {container_working_directory}',
+            f'tar -czf {versioned_artifact_name}.tar.gz {versioned_artifact_name}',
+            f'chown -R {os.getuid()}:{os.getgid()} {versioned_artifact_name}.tar.gz'
+        ]
+        for cmd in cmds:
+            container_exec_cmd = container_exec_prefix + cmd.split()
+            subprocess.run(container_exec_cmd, check=True)
+    except subprocess.CalledProcessError as ex:
+        print(ex.stdout)
+        log.error('Failed to build CLP')
+    except:
+        log.exception('Failed to build CLP')
+    finally:
+        # Cleanup
+        log.info('Cleaning up')
+        try:
+            subprocess.run(['docker', 'rm', '-f', build_environment_container_name],
+                           stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        except:
+            pass
+
+        # Verify whether artifact is generated
+        artifact_tarball_path = host_working_dir / f'{versioned_artifact_name}.tar.gz'
+        if artifact_tarball_path.exists():
+            log.info(f'Artifact built successfully: {artifact_tarball_path}')
+        else:
+            log.error('Artifact build failure')
+            return -1
+
+    return 0
+
+
+if '__main__' == __name__:
+    sys.exit(main(sys.argv))
diff --git a/tools/packager/install-scripts/install-celery.sh b/tools/packager/install-scripts/install-celery.sh
new file mode 100755
index 000000000..fdf3dd9ef
--- /dev/null
+++ b/tools/packager/install-scripts/install-celery.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+echo "Installing celery"
+
+pip3 install celery==5.1.2
+
+bin_dir=${WORKING_DIR}/${ARTIFACT_NAME}/bin/
+mkdir -p ${bin_dir}
+cp /usr/local/bin/celery ${bin_dir}
diff --git a/tools/packager/install-scripts/install-core.sh b/tools/packager/install-scripts/install-core.sh
new file mode 100755
index 000000000..2c8256cce
--- /dev/null
+++ b/tools/packager/install-scripts/install-core.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+echo "Installing CLP core"
+
+build_dir=/tmp/core-build
+
+mkdir ${build_dir}
+cd ${build_dir}
+
+exes="clp clg"
+
+cmake ${WORKING_DIR}/core
+make -j${BUILD_PARALLELISM} ${exes}
+
+bin_dir=${WORKING_DIR}/${ARTIFACT_NAME}/bin/
+mkdir -p ${bin_dir}
+cp ${exes} ${bin_dir}
diff --git a/tools/packager/install-scripts/install-python-component.sh b/tools/packager/install-scripts/install-python-component.sh
new file mode 100755
index 000000000..950417a10
--- /dev/null
+++ b/tools/packager/install-scripts/install-python-component.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+cUsage="Usage: ${BASH_SOURCE[0]} <component-name>"
+if [ "$#" -lt 1 ] ; then
+    echo $cUsage
+    exit
+fi
+component_name=$1
+python_package_name=${component_name//-/_}
+
+echo "Installing ${component_name}"
+
+cd ${WORKING_DIR}/${component_name}
+
+xargs --max-args=1 --max-procs=16 \
+  pip install --target ${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages < requirements.txt
+
+cp -R ${python_package_name} ${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages
diff --git a/tools/packager/requirements-pre-3.7.txt b/tools/packager/requirements-pre-3.7.txt
new file mode 100644
index 000000000..60f564425
--- /dev/null
+++ b/tools/packager/requirements-pre-3.7.txt
@@ -0,0 +1 @@
+dataclasses==0.8
\ No newline at end of file
diff --git a/tools/packager/requirements.txt b/tools/packager/requirements.txt
new file mode 100644
index 000000000..b86590bcc
--- /dev/null
+++ b/tools/packager/requirements.txt
@@ -0,0 +1,3 @@
+PyYAML==5.4
+psutil==5.8.0
+pydantic==1.8.2
\ No newline at end of file