diff --git a/.github/workflows/surrealml_core_deployment.yml b/.github/workflows/surrealml_core_deployment.yml index a22702c..5b1bf63 100644 --- a/.github/workflows/surrealml_core_deployment.yml +++ b/.github/workflows/surrealml_core_deployment.yml @@ -4,6 +4,10 @@ on: push: branches: - main + paths: + - 'src/**' + - 'build.rs' + - 'Cargo.toml' jobs: post_merge_job: @@ -15,6 +19,6 @@ jobs: - uses: katyo/publish-crates@v2 with: - path: './modules/utils' + path: './modules/core' args: --no-verify registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/surrealml_core_test.yml b/.github/workflows/surrealml_core_test.yml index 41acdd9..178bcf8 100644 --- a/.github/workflows/surrealml_core_test.yml +++ b/.github/workflows/surrealml_core_test.yml @@ -1,4 +1,4 @@ -name: Rust Test for surrealml-core on Pull Request +name: Run tests on Pull Request on: pull_request: @@ -20,5 +20,38 @@ jobs: toolchain: stable override: true - - name: Run Unit Tests - run: cd modules/utils && cargo test + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.11' + + - name: Pre-test Setup + run: | + python3 -m venv venv + source venv/bin/activate + pip install --upgrade pip + pip install -r requirements.txt + + # build the local version of the core module to be loaded into python + echo "Building local version of core module" + + pip install . + + python ./tests/scripts/ci_local_build.py + echo "Local build complete" + + # train the models for the tests + python ./tests/scripts/build_assets.py + deactivate + + - name: Run Python Unit Tests + run: | + source venv/bin/activate + python -m unittest discover + deactivate + + - name: Run Core Unit Tests + run: cd modules/core && cargo test + + - name: Run HTTP Transfer Tests + run: cargo test diff --git a/.github/workflows/surrealml_test.yml b/.github/workflows/surrealml_test.yml deleted file mode 100644 index e7e7b5a..0000000 --- a/.github/workflows/surrealml_test.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Rust Test for surrealml on Pull Request - -on: - pull_request: - types: [opened, reopened, synchronize] - -jobs: - test_transport: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Rust - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - - name: Run Unit Tests - run: cargo test diff --git a/.gitignore b/.gitignore index 3859ddc..5fdb7e2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,13 +6,18 @@ Cargo.lock build/ dist/ +__pycache__/ venv/ .idea/ surrealml.egg-info/ .vscode/ ./modules/utils/target/ -modules/utils/target/ +modules/core/target/ ./modules/onnx_driver/target/ modules/onnx_driver/target/ surrealdb_build/ -modules/utils/onnx_driver/ +modules/core/onnx_driver/ +*.so +surrealml/rust_surrealml.cpython-310-darwin.so +.surmlcache +modules/core/model_stash/ diff --git a/Cargo.toml b/Cargo.toml index 1ed07cd..f99bdbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ ndarray = "0.15.6" hyper = { version = "0.14.27", features = ["full"] } tokio = { version = "1.34.0", features = ["full"] } base64 = "0.13" -surrealml-core = { path = "./modules/utils" } +surrealml-core = { path = "./modules/core" } [dev-dependencies] axum = "0.6.20" diff --git a/README.md b/README.md index e5bd76f..879d58a 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,24 @@ SurrealML is a feature that allows you to store trained machine learning models ## Installation -To install SurrealML, make sure you have Python installed. Then, install the SurrealML library and either PyTorch or SKLearn, based on your model choice. You can install these using pip: +To install SurrealML, make sure you have Python installed. Then, install the `SurrealML` library and either `PyTorch` or +`SKLearn`, based on your model choice. You can install the package with both `PyTorch` and `SKLearn` with the command +below: ``` -pip install surrealml -pip install torch # If using PyTorch -pip install scikit-learn # If using SKLearn +pip install "git+https://github.com/surrealdb/surrealml#egg=surrealml[sklearn,torch]" +``` + +If you want to use `SurrealML` with `sklearn` you will need the following installation: + +```bash +pip install "git+https://github.com/surrealdb/surrealml#egg=surrealml[sklearn]" +``` + +For `PyTorch`: + +```bash +pip install "git+https://github.com/surrealdb/surrealml#egg=surrealml[torch]" ``` After that, you can train your model and save it in the SurrealML format. @@ -47,32 +59,38 @@ will also be able to load your sk-learn models in Rust and run them meaning you Saving a model is as simple as the following: ```python -import numpy as np -from sklearn.ensemble import RandomForestClassifier -from surrealml import SurMlFile - -num_classes = 2 -X = np.random.rand(100, 28) -y = np.random.randint(num_classes, size=100) - -skl_model = RandomForestClassifier(n_estimators=10, max_depth=10) -skl_model.fit(X, y) -test_file = SurMlFile(model=skl_model, name="random forrest classifier", inputs=X, sklearn=True) -test_file.save("./test_forrest.surml") - -# load model and execute a calculation -random_floats = list(np.random.rand(28)) -test_load = SurMlFile.load("./test_forrest.surml") -print(test_load.raw_compute(random_floats, [1, -1])) -``` +from sklearn.linear_model import LinearRegression +from surrealml import SurMlFile, Engine +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR # click on this HOUSE_LINEAR to see the data -## Python tutorial using Pytorch +# train the model +model = LinearRegression() +model.fit(HOUSE_LINEAR["inputs"], HOUSE_LINEAR["outputs"]) + +# package and save the model +file = SurMlFile(model=model, name="linear", inputs=HOUSE_LINEAR["inputs"], engine=Engine.SKLEARN) + +# add columns in the order of the inputs to map dictionaries passed in to the model +file.add_column("squarefoot") +file.add_column("num_floors") + +# add normalisers for the columns +file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std()) +file.add_normaliser("num_floors", "z_score", HOUSE_LINEAR["num_floors"].mean(), HOUSE_LINEAR["num_floors"].std()) +file.add_output("house_price", "z_score", HOUSE_LINEAR["outputs"].mean(), HOUSE_LINEAR["outputs"].std()) -To carry out this example we need the following: +# save the file +file.save(path="./linear.surml") -- pytorch (pip installed for python) -- numpy -- surrealml +# load the file +new_file = SurMlFile.load(path="./linear.surml", engine=Engine.SKLEARN) + +# Make a prediction (both should be the same due to the perfectly correlated example data) +print(new_file.buffered_compute(value_map={"squarefoot": 5, "num_floors": 6})) +print(new_file.raw_compute(input_vector=[5, 6])) +``` + +## Python tutorial using Pytorch First we need to have one script where we create and store the model. In this example we will merely do a linear regression model to predict the house price using the number of floors and the square feet. @@ -181,9 +199,9 @@ test_inputs = torch.stack([test_squarefoot, test_num_floors], dim=1) We can now wrap our model in the `SurMlFile` object with the following code: ```python -from surrealml import SurMlFile +from surrealml import SurMlFile, Engine -file = SurMlFile(model=model, name="House Price Prediction", inputs=test_inputs) +file = SurMlFile(model=model, name="linear", inputs=inputs[:1], engine=Engine.PYTORCH) ``` The name is optional but the inputs and model are essential. We can now add some meta data to the file such as our inputs and outputs with the following code, however meta data is not essential, it just helps with some types of computation: @@ -215,29 +233,13 @@ file.save("./test.surml") If you have followed the previous steps you should have a `.surml` file saved with all our meta data. We load it with the following code: ```python -from surrealml import SurMlFile +from surrealml import SurMlFile, Engine -new_file = SurMlFile.load("./test.surml") +new_file = SurMlFile.load("./test.surml", engine=Engine.PYTORCH) ``` Our model is now loaded. We can now perform computations. -### Raw computation in Python - -If you haven't put any meta data into the file then don't worry, we can just perform a raw computation with the following command: - -```python -print(new_file.raw_compute([1.0, 2.0])) -``` - -This will just give you the outcome from the model. If you have put in the metadata then we can perform a buffered computation. -We can also input dimensions for the raw compute which will perform a batch computation. This can be done with the -following code: - -```python -print(new_file.raw_compute([1.0, 2.0, 3.0, 4.0]), dims=[2, 2]) -``` - ### Buffered computation in Python This is where the computation utilises the data in the header. We can do this by merely passing in a dictionary as seen below: diff --git a/build.rs b/build.rs index 8b0011b..b7a38fd 100644 --- a/build.rs +++ b/build.rs @@ -22,16 +22,16 @@ fn main() { // remove ./modules/utils/target folder if there let _ = - std::fs::remove_dir_all(Path::new("modules").join("utils").join("target")).unwrap_or(()); + std::fs::remove_dir_all(Path::new("modules").join("core").join("target")).unwrap_or(()); // create the target module folder for the utils module - let _ = std::fs::create_dir(Path::new("modules").join("utils").join("target")); - let _ = std::fs::create_dir(Path::new("modules").join("utils").join("target").join(profile)); + let _ = std::fs::create_dir(Path::new("modules").join("core").join("target")); + let _ = std::fs::create_dir(Path::new("modules").join("core").join("target").join(profile)); // copy target folder to modules/utils/target profile for the utils modules std::fs::copy( Path::new("target").join(profile).join(target_lib), - Path::new("modules").join("utils").join("target").join(profile).join(target_lib), + Path::new("modules").join("core").join("target").join(profile).join(target_lib), ) .unwrap(); } diff --git a/modules/utils/.dockerignore b/modules/core/.dockerignore similarity index 100% rename from modules/utils/.dockerignore rename to modules/core/.dockerignore diff --git a/modules/utils/.gitignore b/modules/core/.gitignore similarity index 100% rename from modules/utils/.gitignore rename to modules/core/.gitignore diff --git a/modules/utils/Cargo.toml b/modules/core/Cargo.toml similarity index 97% rename from modules/utils/Cargo.toml rename to modules/core/Cargo.toml index a6a7294..9416342 100644 --- a/modules/utils/Cargo.toml +++ b/modules/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "surrealml-core" -version = "0.0.7" +version = "0.0.8" edition = "2021" build = "./build.rs" description = "The core machine learning library for SurrealML that enables SurrealDB to store and load ML models" diff --git a/modules/utils/LICENSE b/modules/core/LICENSE similarity index 100% rename from modules/utils/LICENSE rename to modules/core/LICENSE diff --git a/modules/utils/README.md b/modules/core/README.md similarity index 100% rename from modules/utils/README.md rename to modules/core/README.md diff --git a/modules/utils/build.rs b/modules/core/build.rs similarity index 100% rename from modules/utils/build.rs rename to modules/core/build.rs diff --git a/modules/utils/builds/Dockerfile.linux b/modules/core/builds/Dockerfile.linux similarity index 100% rename from modules/utils/builds/Dockerfile.linux rename to modules/core/builds/Dockerfile.linux diff --git a/modules/utils/builds/Dockerfile.macos b/modules/core/builds/Dockerfile.macos similarity index 100% rename from modules/utils/builds/Dockerfile.macos rename to modules/core/builds/Dockerfile.macos diff --git a/modules/utils/builds/Dockerfile.nix b/modules/core/builds/Dockerfile.nix similarity index 100% rename from modules/utils/builds/Dockerfile.nix rename to modules/core/builds/Dockerfile.nix diff --git a/modules/utils/builds/Dockerfile.windows b/modules/core/builds/Dockerfile.windows similarity index 100% rename from modules/utils/builds/Dockerfile.windows rename to modules/core/builds/Dockerfile.windows diff --git a/modules/utils/builds/docker_configs/linux.yml b/modules/core/builds/docker_configs/linux.yml similarity index 100% rename from modules/utils/builds/docker_configs/linux.yml rename to modules/core/builds/docker_configs/linux.yml diff --git a/modules/utils/builds/docker_configs/macos.yml b/modules/core/builds/docker_configs/macos.yml similarity index 100% rename from modules/utils/builds/docker_configs/macos.yml rename to modules/core/builds/docker_configs/macos.yml diff --git a/modules/utils/builds/docker_configs/nix.yml b/modules/core/builds/docker_configs/nix.yml similarity index 100% rename from modules/utils/builds/docker_configs/nix.yml rename to modules/core/builds/docker_configs/nix.yml diff --git a/modules/utils/builds/docker_configs/windows.yml b/modules/core/builds/docker_configs/windows.yml similarity index 100% rename from modules/utils/builds/docker_configs/windows.yml rename to modules/core/builds/docker_configs/windows.yml diff --git a/modules/utils/docker-compose.yml b/modules/core/docker-compose.yml similarity index 100% rename from modules/utils/docker-compose.yml rename to modules/core/docker-compose.yml diff --git a/modules/utils/scripts/linux_compose.sh b/modules/core/scripts/linux_compose.sh similarity index 100% rename from modules/utils/scripts/linux_compose.sh rename to modules/core/scripts/linux_compose.sh diff --git a/modules/utils/scripts/nix_compose.sh b/modules/core/scripts/nix_compose.sh similarity index 100% rename from modules/utils/scripts/nix_compose.sh rename to modules/core/scripts/nix_compose.sh diff --git a/modules/utils/scripts/windows_compose.sh b/modules/core/scripts/windows_compose.sh similarity index 100% rename from modules/utils/scripts/windows_compose.sh rename to modules/core/scripts/windows_compose.sh diff --git a/modules/utils/src/error.rs b/modules/core/src/error.rs similarity index 100% rename from modules/utils/src/error.rs rename to modules/core/src/error.rs diff --git a/modules/utils/src/execution/compute.rs b/modules/core/src/execution/compute.rs similarity index 64% rename from modules/utils/src/execution/compute.rs rename to modules/core/src/execution/compute.rs index 5f2cbd4..de5b34e 100644 --- a/modules/utils/src/execution/compute.rs +++ b/modules/core/src/execution/compute.rs @@ -2,7 +2,7 @@ use crate::storage::surml_file::SurMlFile; use std::collections::HashMap; use ndarray::{ArrayD, CowArray}; -use ort::{SessionBuilder, Value}; +use ort::{SessionBuilder, Value, session::Input}; use super::onnx_environment::ENVIRONMENT; @@ -29,6 +29,24 @@ impl <'a>ModelComputation<'a> { ndarray::arr1::(&buffer).into_dyn() } + /// Creates a vector of dimensions for the input tensor from the loaded model. + /// + /// # Arguments + /// * `input_dims` - The input dimensions from the loaded model. + /// + /// # Returns + /// A vector of dimensions for the input tensor to be reshaped into from the loaded model. + fn process_input_dims(input_dims: &Input) -> Vec { + let mut buffer = Vec::new(); + for dim in input_dims.dimensions() { + match dim { + Some(dim) => buffer.push(dim as usize), + None => buffer.push(1) + } + } + buffer + } + /// Creates a Vector that can be used manipulated with other operations such as normalisation from a hashmap of keys and values. /// /// # Arguments @@ -39,7 +57,10 @@ impl <'a>ModelComputation<'a> { pub fn input_vector_from_key_bindings(&self, mut input_values: HashMap) -> Vec { let mut buffer = Vec::with_capacity(self.surml_file.header.keys.store.len()); for key in &self.surml_file.header.keys.store { - let value = input_values.get_mut(key).unwrap(); + let value = match input_values.get_mut(key) { + Some(value) => value, + None => panic!("Key {} not found in input values", key) + }; buffer.push(std::mem::take(value)); } buffer @@ -52,28 +73,14 @@ impl <'a>ModelComputation<'a> { /// /// # Returns /// The computed output tensor from the loaded model. - pub fn raw_compute(&self, tensor: ArrayD, dims: Option<(i32, i32)>) -> Result, String> { - - let tensor_placeholder: ArrayD; - if dims.is_some() { - let dims = dims.unwrap(); - let tensor = tensor.into_shape((dims.0 as usize, dims.1 as usize)).unwrap(); - tensor_placeholder = tensor.into_dyn(); - } - else { - tensor_placeholder = tensor; - } - - // let environment = Arc::new( - // Environment::builder() - // .with_execution_providers([ExecutionProvider::CPU(Default::default())]) - // .build() - // .map_err(|e| e.to_string())? - // ); + pub fn raw_compute(&self, tensor: ArrayD, _dims: Option<(i32, i32)>) -> Result, String> { let session = SessionBuilder::new(&ENVIRONMENT).map_err(|e| e.to_string())? .with_model_from_memory(&self.surml_file.model) .map_err(|e| e.to_string())?; - let x = CowArray::from(tensor_placeholder); + let unwrapped_dims = ModelComputation::process_input_dims(&session.inputs[0]); + let tensor = tensor.into_shape(unwrapped_dims).map_err(|e| e.to_string())?; + + let x = CowArray::from(tensor).into_dyn(); let outputs = session.run(vec![Value::from_array(session.allocator(), &x).unwrap()]).map_err(|e| e.to_string())?; let mut buffer: Vec = Vec::new(); @@ -130,7 +137,10 @@ impl <'a>ModelComputation<'a> { } // apply the normaliser to the output - let output_normaliser = self.surml_file.header.output.normaliser.as_ref().unwrap(); + let output_normaliser = match self.surml_file.header.output.normaliser.as_ref() { + Some(normaliser) => normaliser, + None => return Err(String::from("No normaliser present for output which shouldn't happen as passed initial check for")) + }; let mut buffer = Vec::with_capacity(output.len()); for value in output { @@ -148,9 +158,8 @@ mod tests { use super::*; #[test] - fn test_raw_compute() { - - let mut file = SurMlFile::from_file("./stash/test.surml").unwrap(); + fn test_raw_compute_linear_sklearn() { + let mut file = SurMlFile::from_file("./model_stash/sklearn/surml/linear.surml").unwrap(); let model_computation = ModelComputation { surml_file: &mut file, }; @@ -159,15 +168,17 @@ mod tests { input_values.insert(String::from("squarefoot"), 1000.0); input_values.insert(String::from("num_floors"), 2.0); - let output = model_computation.raw_compute(model_computation.input_tensor_from_key_bindings(input_values), None).unwrap(); + let raw_input = model_computation.input_tensor_from_key_bindings(input_values); + + let output = model_computation.raw_compute(raw_input, Some((1, 2))).unwrap(); assert_eq!(output.len(), 1); - assert_eq!(output[0], 725.42053); + assert_eq!(output[0], 985.57745); } #[test] - fn test_buffered_compute() { - let mut file = SurMlFile::from_file("./stash/test.surml").unwrap(); + fn test_buffered_compute_linear_sklearn() { + let mut file = SurMlFile::from_file("./model_stash/sklearn/surml/linear.surml").unwrap(); let model_computation = ModelComputation { surml_file: &mut file, }; @@ -178,22 +189,37 @@ mod tests { let output = model_computation.buffered_compute(&mut input_values).unwrap(); assert_eq!(output.len(), 1); - assert_eq!(output[0], 725.42053); } #[test] - fn test_raw_compute_trees() { - let mut file = SurMlFile::from_file("./stash/forrest.surml").unwrap(); - let model_computation = ModelComputation { - surml_file: &mut file, - }; + fn test_raw_compute_linear_torch() { + let mut file = SurMlFile::from_file("./model_stash/torch/surml/linear.surml").unwrap(); + let model_computation = ModelComputation { + surml_file: &mut file, + }; + + let mut input_values = HashMap::new(); + input_values.insert(String::from("squarefoot"), 1000.0); + input_values.insert(String::from("num_floors"), 2.0); + + let raw_input = model_computation.input_tensor_from_key_bindings(input_values); + + let output = model_computation.raw_compute(raw_input, None).unwrap(); + assert_eq!(output.len(), 1); + } + + #[test] + fn test_buffered_compute_linear_torch() { + let mut file = SurMlFile::from_file("./model_stash/torch/surml/linear.surml").unwrap(); + let model_computation = ModelComputation { + surml_file: &mut file, + }; + + let mut input_values = HashMap::new(); + input_values.insert(String::from("squarefoot"), 1000.0); + input_values.insert(String::from("num_floors"), 2.0); - let x = vec![0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]; - let data: ArrayD = ndarray::arr1(&x).into_dyn(); - let data: ArrayD = data.into_shape((1, 28)).unwrap().into_dyn(); - - let output = model_computation.raw_compute(data, None).unwrap(); - assert_eq!(output.len(), 1); - assert_eq!(output[0], 0.0); + let output = model_computation.buffered_compute(&mut input_values).unwrap(); + assert_eq!(output.len(), 1); } } diff --git a/modules/utils/src/execution/mod.rs b/modules/core/src/execution/mod.rs similarity index 100% rename from modules/utils/src/execution/mod.rs rename to modules/core/src/execution/mod.rs diff --git a/modules/utils/src/execution/onnx_environment.rs b/modules/core/src/execution/onnx_environment.rs similarity index 100% rename from modules/utils/src/execution/onnx_environment.rs rename to modules/core/src/execution/onnx_environment.rs diff --git a/modules/utils/src/lib.rs b/modules/core/src/lib.rs similarity index 100% rename from modules/utils/src/lib.rs rename to modules/core/src/lib.rs diff --git a/modules/utils/src/storage/header/engine.rs b/modules/core/src/storage/header/engine.rs similarity index 100% rename from modules/utils/src/storage/header/engine.rs rename to modules/core/src/storage/header/engine.rs diff --git a/modules/utils/src/storage/header/input_dims.rs b/modules/core/src/storage/header/input_dims.rs similarity index 100% rename from modules/utils/src/storage/header/input_dims.rs rename to modules/core/src/storage/header/input_dims.rs diff --git a/modules/utils/src/storage/header/keys.rs b/modules/core/src/storage/header/keys.rs similarity index 100% rename from modules/utils/src/storage/header/keys.rs rename to modules/core/src/storage/header/keys.rs diff --git a/modules/utils/src/storage/header/mod.rs b/modules/core/src/storage/header/mod.rs similarity index 100% rename from modules/utils/src/storage/header/mod.rs rename to modules/core/src/storage/header/mod.rs diff --git a/modules/utils/src/storage/header/normalisers/clipping.rs b/modules/core/src/storage/header/normalisers/clipping.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/clipping.rs rename to modules/core/src/storage/header/normalisers/clipping.rs diff --git a/modules/utils/src/storage/header/normalisers/linear_scaling.rs b/modules/core/src/storage/header/normalisers/linear_scaling.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/linear_scaling.rs rename to modules/core/src/storage/header/normalisers/linear_scaling.rs diff --git a/modules/utils/src/storage/header/normalisers/log_scale.rs b/modules/core/src/storage/header/normalisers/log_scale.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/log_scale.rs rename to modules/core/src/storage/header/normalisers/log_scale.rs diff --git a/modules/utils/src/storage/header/normalisers/mod.rs b/modules/core/src/storage/header/normalisers/mod.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/mod.rs rename to modules/core/src/storage/header/normalisers/mod.rs diff --git a/modules/utils/src/storage/header/normalisers/traits.rs b/modules/core/src/storage/header/normalisers/traits.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/traits.rs rename to modules/core/src/storage/header/normalisers/traits.rs diff --git a/modules/utils/src/storage/header/normalisers/utils.rs b/modules/core/src/storage/header/normalisers/utils.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/utils.rs rename to modules/core/src/storage/header/normalisers/utils.rs diff --git a/modules/utils/src/storage/header/normalisers/wrapper.rs b/modules/core/src/storage/header/normalisers/wrapper.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/wrapper.rs rename to modules/core/src/storage/header/normalisers/wrapper.rs diff --git a/modules/utils/src/storage/header/normalisers/z_score.rs b/modules/core/src/storage/header/normalisers/z_score.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/z_score.rs rename to modules/core/src/storage/header/normalisers/z_score.rs diff --git a/modules/utils/src/storage/header/origin.rs b/modules/core/src/storage/header/origin.rs similarity index 100% rename from modules/utils/src/storage/header/origin.rs rename to modules/core/src/storage/header/origin.rs diff --git a/modules/utils/src/storage/header/output.rs b/modules/core/src/storage/header/output.rs similarity index 100% rename from modules/utils/src/storage/header/output.rs rename to modules/core/src/storage/header/output.rs diff --git a/modules/utils/src/storage/header/string_value.rs b/modules/core/src/storage/header/string_value.rs similarity index 100% rename from modules/utils/src/storage/header/string_value.rs rename to modules/core/src/storage/header/string_value.rs diff --git a/modules/utils/src/storage/header/version.rs b/modules/core/src/storage/header/version.rs similarity index 100% rename from modules/utils/src/storage/header/version.rs rename to modules/core/src/storage/header/version.rs diff --git a/modules/utils/src/storage/mod.rs b/modules/core/src/storage/mod.rs similarity index 100% rename from modules/utils/src/storage/mod.rs rename to modules/core/src/storage/mod.rs diff --git a/modules/utils/src/storage/stream_adapter.rs b/modules/core/src/storage/stream_adapter.rs similarity index 100% rename from modules/utils/src/storage/stream_adapter.rs rename to modules/core/src/storage/stream_adapter.rs diff --git a/modules/utils/src/storage/surml_file.rs b/modules/core/src/storage/surml_file.rs similarity index 100% rename from modules/utils/src/storage/surml_file.rs rename to modules/core/src/storage/surml_file.rs diff --git a/modules/utils/stash/forrest.surml b/modules/core/stash/forrest.surml similarity index 100% rename from modules/utils/stash/forrest.surml rename to modules/core/stash/forrest.surml diff --git a/modules/utils/stash/forrest_test.onnx b/modules/core/stash/forrest_test.onnx similarity index 100% rename from modules/utils/stash/forrest_test.onnx rename to modules/core/stash/forrest_test.onnx diff --git a/modules/utils/stash/linear_test.onnx b/modules/core/stash/linear_test.onnx similarity index 100% rename from modules/utils/stash/linear_test.onnx rename to modules/core/stash/linear_test.onnx diff --git a/modules/utils/stash/test.surml b/modules/core/stash/test.surml similarity index 100% rename from modules/utils/stash/test.surml rename to modules/core/stash/test.surml diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..924b1f0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +numpy==1.26.3 +skl2onnx==1.16.0 +scikit-learn==1.4.0 +torch==2.1.2 +onnx==1.15.0 +onnxruntime==1.16.3 \ No newline at end of file diff --git a/scripts/local_build.sh b/scripts/local_build.sh new file mode 100644 index 0000000..bf1c130 --- /dev/null +++ b/scripts/local_build.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# navigate to directory +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +cd $SCRIPTPATH + + +delete_directory() { + dir_path="$1" + + if [ -d "$dir_path" ]; then + rm -rf "$dir_path" + echo "Directory '$dir_path' has been deleted." + else + echo "Directory '$dir_path' does not exist." + fi +} + +delete_file() { + file_path="$1" + + if [ -f "$file_path" ]; then + rm "$file_path" + echo "File '$file_path' has been deleted." + else + echo "File '$file_path' does not exist." + fi +} + + +cd .. + +delete_directory ./build +delete_directory ./tests/venv +cd tests +python3 -m venv venv +source venv/bin/activate +cd .. +pip install --no-cache-dir . diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh new file mode 100644 index 0000000..cf05c54 --- /dev/null +++ b/scripts/run_tests.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# navigate to directory +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +cd $SCRIPTPATH + +cd .. + + +python -m unittest discover diff --git a/setup.py b/setup.py index 95ffc0a..6123ebc 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,11 @@ rust_extensions=[RustExtension("surrealml.rust_surrealml", binding=Binding.PyO3)], packages=[ "surrealml", - # "surrealdb.execution_mixins" + "surrealml.engine", + "surrealml.model_templates", + "surrealml.model_templates.datasets", + "surrealml.model_templates.sklearn", + "surrealml.model_templates.torch", ], package_data={ "surrealml": ["binaries/*"], @@ -27,9 +31,15 @@ zip_safe=False, include_package_data=True, requirements=[ - "pyyaml>=3.13", - "numpy", - "torch==2.0.0", - "hummingbird-ml==0.4.9" - ] + "numpy==1.26.3", + ], + extras_require={ + "sklearn": [ + "skl2onnx==1.16.0", + "scikit-learn==1.4.0" + ], + "torch": [ + "torch==2.1.2" + ] + } ) diff --git a/src/python_apis/execution.rs b/src/python_apis/execution.rs index 1f17173..ddc55fd 100644 --- a/src/python_apis/execution.rs +++ b/src/python_apis/execution.rs @@ -16,8 +16,14 @@ use crate::python_state::PYTHON_STATE; /// The computed output vector from the loaded model. #[pyfunction] pub fn raw_compute(file_id: String, input_vector: Vec, dims: Option<(i32, i32)>) -> Vec { - let mut python_state = PYTHON_STATE.lock().unwrap(); - let mut file = python_state.get_mut(&file_id).unwrap(); + let mut python_state = match PYTHON_STATE.lock() { + Ok(state) => state, + Err(error) => panic!("{}", format!("Error getting python state: {}", error)) + }; + let mut file = match python_state.get_mut(&file_id) { + Some(file) => file, + None => panic!("File not found for id: {}, here is the state: {:?}", file_id, python_state.keys()) + }; let tensor = ndarray::arr1(&input_vector).into_dyn(); let compute_unit = ModelComputation { surml_file: &mut file @@ -36,8 +42,14 @@ pub fn raw_compute(file_id: String, input_vector: Vec, dims: Option<(i32, i /// The computed output vector from the loaded model. #[pyfunction] pub fn buffered_compute(file_id: String, mut input_values_map: HashMap) -> Vec { - let mut python_state = PYTHON_STATE.lock().unwrap(); - let mut file = python_state.get_mut(&file_id).unwrap(); + let mut python_state = match PYTHON_STATE.lock() { + Ok(state) => state, + Err(error) => panic!("{}", format!("Error getting python state: {}", error)) + }; + let mut file = match python_state.get_mut(&file_id) { + Some(file) => file, + None => panic!("File not found for id: {}, here is the state: {:?}", file_id, python_state.keys()) + }; let compute_unit = ModelComputation { surml_file: &mut file diff --git a/src/python_apis/storage.rs b/src/python_apis/storage.rs index 46f6784..5a9e579 100644 --- a/src/python_apis/storage.rs +++ b/src/python_apis/storage.rs @@ -17,7 +17,7 @@ use hyper::header::CONTENT_TYPE; use hyper::{Client, Uri}; use hyper::header::AUTHORIZATION; use hyper::header::HeaderValue; -use base64::{encode}; +use base64::encode; use crate::python_state::{PYTHON_STATE, generate_unique_id}; use surrealml_core::storage::stream_adapter::StreamAdapter; @@ -40,7 +40,7 @@ pub fn load_model(file_path: String) -> String { } -/// Saves a model to a file. +/// Saves a model to a file, deleting the file from the `PYTHON_STATE` in the process. /// /// # Arguments /// * `file_path` - The path to the file to save to. diff --git a/src/transport.rs b/src/transport.rs index 8602309..b764fbc 100644 --- a/src/transport.rs +++ b/src/transport.rs @@ -38,7 +38,7 @@ async fn root(mut stream: BodyStream) -> &'static str { surml_file: &mut file }; let result = computert_unit.buffered_compute(&mut input_values).unwrap(); - assert_eq!(result[0], 1.2747419); + println!("Result: {:?}", result); return "Hello root" } @@ -54,10 +54,10 @@ async fn run_server() { } -async fn send_request() { +async fn send_request(path: &str) { let client = Client::new(); let uri: Uri = "http://0.0.0.0:4000".parse().unwrap(); - let generator = StreamAdapter::new(5, "./test.surml".to_string()); + let generator = StreamAdapter::new(5, path.to_string()); let body = Body::wrap_stream(generator); let req = Request::post(uri).body(body).unwrap(); let response = client.request(req).await.unwrap(); @@ -71,7 +71,7 @@ mod tests { use std::thread; #[test] - fn test_server() { + fn test_server_sklearn() { let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); let _server_task = tokio_runtime.spawn( async { run_server().await; @@ -79,7 +79,22 @@ mod tests { let sleep_time = std::time::Duration::from_secs(1); tokio_runtime.block_on( async { - send_request().await; + send_request("./modules/core/model_stash/sklearn/surml/linear.surml").await; + }); + + thread::sleep(sleep_time); + } + + #[test] + fn test_server_torch() { + let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); + let _server_task = tokio_runtime.spawn( async { + run_server().await; + }); + + let sleep_time = std::time::Duration::from_secs(1); + tokio_runtime.block_on( async { + send_request("./modules/core/model_stash/torch/surml/linear.surml").await; }); thread::sleep(sleep_time); diff --git a/surrealml/__init__.py b/surrealml/__init__.py index 38722ac..fcd35cc 100644 --- a/surrealml/__init__.py +++ b/surrealml/__init__.py @@ -1 +1,2 @@ -from surrealml.surml_file import SurMlFile \ No newline at end of file +from surrealml.surml_file import SurMlFile +from surrealml.engine import Engine diff --git a/surrealml/engine/__init__.py b/surrealml/engine/__init__.py new file mode 100644 index 0000000..7dfa704 --- /dev/null +++ b/surrealml/engine/__init__.py @@ -0,0 +1,18 @@ +from enum import Enum + +from surrealml.engine.sklearn import SklearnOnnxAdapter +from surrealml.engine.torch import TorchOnnxAdapter + + +class Engine(Enum): + """ + The Engine enum is used to specify the engine to use for a given model. + + Attributes: + PYTORCH: The PyTorch engine which will be PyTorch and ONNX. + NATIVE: The native engine which will be native rust and linfa. + SKLEARN: The sklearn engine which will be sklearn and ONNX + """ + PYTORCH = "pytorch" + NATIVE = "native" + SKLEARN = "sklearn" diff --git a/surrealml/engine/sklearn.py b/surrealml/engine/sklearn.py new file mode 100644 index 0000000..ed5671b --- /dev/null +++ b/surrealml/engine/sklearn.py @@ -0,0 +1,38 @@ +try: + import skl2onnx +except ImportError: + skl2onnx = None + +from surrealml.engine.utils import create_file_cache_path + + +class SklearnOnnxAdapter: + + @staticmethod + def check_dependency() -> None: + """ + Checks if the sklearn dependency is installed raising an error if not. + Please call this function when performing any sklearn related operations. + """ + if skl2onnx is None: + raise ImportError("sklearn feature needs to be installed to use sklearn features") + + @staticmethod + def save_model_to_onnx(model, inputs) -> str: + """ + Saves a sklearn model to an onnx file. + + :param model: the sklearn model to convert. + :param inputs: the inputs to the model needed to trace the model + :return: the path to the cache created with a unique id to prevent collisions. + """ + SklearnOnnxAdapter.check_dependency() + file_path = create_file_cache_path() + # the below check is to satisfy type checkers + if skl2onnx is not None: + onnx = skl2onnx.to_onnx(model, inputs) + + with open(file_path, "wb") as f: + f.write(onnx.SerializeToString()) + + return file_path diff --git a/surrealml/engine/torch.py b/surrealml/engine/torch.py new file mode 100644 index 0000000..38e097f --- /dev/null +++ b/surrealml/engine/torch.py @@ -0,0 +1,35 @@ +try: + import torch +except ImportError: + torch = None + +from surrealml.engine.utils import create_file_cache_path + + +class TorchOnnxAdapter: + + @staticmethod + def check_dependency() -> None: + """ + Checks if the sklearn dependency is installed raising an error if not. + Please call this function when performing any sklearn related operations. + """ + if torch is None: + raise ImportError("torch feature needs to be installed to use torch features") + + @staticmethod + def save_model_to_onnx(model, inputs) -> str: + """ + Saves a torch model to an onnx file. + + :param model: the torch model to convert. + :param inputs: the inputs to the model needed to trace the model + :return: the path to the cache created with a unique id to prevent collisions. + """ + # the dynamic import it to prevent the torch dependency from being required for the whole package. + file_path = create_file_cache_path() + # below is to satisfy type checkers + if torch is not None: + traced_script_module = torch.jit.trace(model, inputs) + torch.onnx.export(traced_script_module, inputs, file_path) + return file_path diff --git a/surrealml/engine/utils.py b/surrealml/engine/utils.py new file mode 100644 index 0000000..ff69a36 --- /dev/null +++ b/surrealml/engine/utils.py @@ -0,0 +1,17 @@ +import os +import uuid + + +def create_file_cache_path(): + """ + Creates a file cache path for the model (creating the file cache if not there). + + :return: the path to the cache created with a unique id to prevent collisions. + """ + cache_folder = '.surmlcache' + + if not os.path.exists(cache_folder): + os.makedirs(cache_folder) + unique_id = str(uuid.uuid4()) + file_name = f"{unique_id}.surml" + return os.path.join(cache_folder, file_name) diff --git a/surrealml/engine_enum.py b/surrealml/engine_enum.py deleted file mode 100644 index 629944b..0000000 --- a/surrealml/engine_enum.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -This file contains the Engine enum, which is used to specify the engine to use for a given model. -""" -from enum import Enum - - -class Engine(Enum): - """ - The Engine enum is used to specify the engine to use for a given model. - - Attributes: - PYTORCH: The PyTorch engine which will be PyTorch and tch-rs. - NATIVE: The native engine which will be native rust and linfa. - UNDEFINED: The undefined engine which will be used when the engine is not defined. - """ - PYTORCH = "pytorch" - NATIVE = "native" - UNDEFINED = "" diff --git a/surrealml/model_cache.py b/surrealml/model_cache.py deleted file mode 100644 index 5aa1135..0000000 --- a/surrealml/model_cache.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -Defines the functionality of caching and processing a sklearn model. -""" -import os -import shutil -import uuid -import zipfile - -import torch -from hummingbird.ml import convert - - -class SkLearnModelCache: - """ - This class is responsible for caching and converting an sklearn model to a torchscript model. - """ - - @staticmethod - def create_file_cache(): - """ - Creates a file cache for the model. - - :return: the path to the cache created with a unique id to prevent collisions. - """ - cache_folder = '.surmlcache' - - if not os.path.exists(cache_folder): - os.makedirs(cache_folder) - unique_id = str(uuid.uuid4()) - file_name = f"{unique_id}.surml" - return os.path.join(cache_folder, file_name) - - # @staticmethod - # def cache_model(model, inputs, name=None): - # """ - # Caches a model and returns the file id. - # - # :param model: - # :param inputs: - # :param name: - # :return: - # """ - # file_path = SkLearnModelCache.create_file_cache() - # - # traced_script_module = torch.jit.trace(model, inputs) - # traced_script_module.save(file_path) - # file_id = load_cached_raw_model(str(file_path)) - # os.remove(file_path) - # if name is not None: - # add_name(file_id, name) - # return file_id - - @staticmethod - def convert_sklearn_model(model, inputs): - """ - Converts the sklearn model to a torchscript model. - - :param model: the sklearn model to convert. - :param inputs: the inputs to the model needed to trace the model - :return: the converted model. - """ - file_path = SkLearnModelCache.create_file_cache() - model = convert(model, 'torch.jit', inputs) - file_path = str(file_path).replace(".surml", "") - model.save(file_path) - zip_path = str(file_path) + ".zip" - - # Open the zip archive - with zipfile.ZipFile(zip_path, 'r') as zip_ref: - # Extract all the contents to the specified directory - zip_ref.extractall(file_path) - - model = torch.jit.load(os.path.join(file_path, "deploy_model.zip")) - shutil.rmtree(file_path) - os.remove(zip_path) - return model diff --git a/surrealml/model_templates/__init__.py b/surrealml/model_templates/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/model_templates/datasets/__init__.py b/surrealml/model_templates/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/model_templates/datasets/house_linear.py b/surrealml/model_templates/datasets/house_linear.py new file mode 100644 index 0000000..e592c28 --- /dev/null +++ b/surrealml/model_templates/datasets/house_linear.py @@ -0,0 +1,41 @@ +import numpy as np + + +raw_squarefoot = np.array([1000, 1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200], dtype=np.float32) +raw_num_floors = np.array([1, 1, 1.5, 1.5, 2, 2, 2.5, 2.5, 3, 3], dtype=np.float32) +raw_house_price = np.array([200000, 230000, 280000, 320000, 350000, 380000, 420000, 470000, 500000, 520000], + dtype=np.float32) +squarefoot = (raw_squarefoot - raw_squarefoot.mean()) / raw_squarefoot.std() +num_floors = (raw_num_floors - raw_num_floors.mean()) / raw_num_floors.std() +house_price = (raw_house_price - raw_house_price.mean()) / raw_house_price.std() +inputs = np.column_stack((squarefoot, num_floors)) + + +HOUSE_LINEAR = { + "inputs": inputs, + "outputs": house_price, + + "squarefoot": squarefoot, + "num_floors": num_floors, + "input order": ["squarefoot", "num_floors"], + "raw_inputs": { + "squarefoot": raw_squarefoot, + "num_floors": raw_num_floors, + }, + "normalised_inputs": { + "squarefoot": squarefoot, + "num_floors": num_floors, + }, + "normalisers": { + "squarefoot": { + "type": "z_score", + "mean": squarefoot.mean(), + "std": squarefoot.std() + }, + "num_floors": { + "type": "z_score", + "mean": num_floors.mean(), + "std": num_floors.std() + } + }, +} diff --git a/surrealml/model_templates/sklearn/__init__.py b/surrealml/model_templates/sklearn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/model_templates/sklearn/sklearn_linear.py b/surrealml/model_templates/sklearn/sklearn_linear.py new file mode 100644 index 0000000..e5f6896 --- /dev/null +++ b/surrealml/model_templates/sklearn/sklearn_linear.py @@ -0,0 +1,43 @@ +""" +Trains a linear regression model using sklearn. This is a basic model that can be used for testing. +""" +from sklearn.linear_model import LinearRegression + +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR + + +def train_model(): + """ + Trains a linear regression model using sklearn. This is a basic model that can be used for testing. + """ + model = LinearRegression() + model.fit(HOUSE_LINEAR["inputs"], HOUSE_LINEAR["outputs"]) + return model + + +def export_model_onnx(model): + """ + Exports the model to ONNX format. + + :param model: the model to export. + :return: the path to the exported model. + """ + import skl2onnx + return skl2onnx.to_onnx(model, HOUSE_LINEAR["inputs"]) + + +def export_model_surml(model): + """ + Exports the model to SURML format. + + :param model: the model to export. + :return: the path to the exported model. + """ + from surrealml import SurMlFile, Engine + file = SurMlFile(model=model, name="linear", inputs=HOUSE_LINEAR["inputs"], engine=Engine.SKLEARN) + file.add_column("squarefoot") + file.add_column("num_floors") + file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std()) + file.add_normaliser("num_floors", "z_score", HOUSE_LINEAR["num_floors"].mean(), HOUSE_LINEAR["num_floors"].std()) + file.add_output("house_price", "z_score", HOUSE_LINEAR["outputs"].mean(), HOUSE_LINEAR["outputs"].std()) + return file diff --git a/surrealml/model_templates/torch/__init__.py b/surrealml/model_templates/torch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/model_templates/torch/torch_linear.py b/surrealml/model_templates/torch/torch_linear.py new file mode 100644 index 0000000..e38c514 --- /dev/null +++ b/surrealml/model_templates/torch/torch_linear.py @@ -0,0 +1,90 @@ +""" +Trains a linear regression model in torch. Should be used for testing certain processes +for linear regression and torch. +""" +import torch +import torch.nn as nn +import torch.optim as optim + +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR + + +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(2, 1) # 2 input features, 1 output + + def forward(self, x): + return self.linear(x) + + +def train_model(): + """ + Trains a linear regression model in torch. Should be used for testing certain processes. + """ + tensor = [ + torch.from_numpy(HOUSE_LINEAR["squarefoot"]), + torch.from_numpy(HOUSE_LINEAR["num_floors"]) + ] + X = torch.stack(tensor, dim=1) + + # Initialize the model + model = LinearRegressionModel() + + # Define the loss function and optimizer + criterion = nn.MSELoss() + optimizer = optim.SGD(model.parameters(), lr=0.01) + + num_epochs = 1000 + for epoch in range(num_epochs): + # Forward pass + y_pred = model(X) + + # Compute the loss + loss = criterion(y_pred.squeeze(), torch.from_numpy(HOUSE_LINEAR["outputs"])) + + # Backward pass and optimization + optimizer.zero_grad() + loss.backward() + optimizer.step() + + test_squarefoot = torch.tensor([2800, 3200], dtype=torch.float32) + test_num_floors = torch.tensor([2.5, 3], dtype=torch.float32) + x = torch.stack([test_squarefoot, test_num_floors], dim=1) + return model, x + + +def export_model_onnx(model): + """ + Exports the model to ONNX format. + """ + tensor = [ + torch.from_numpy(HOUSE_LINEAR["squarefoot"]), + torch.from_numpy(HOUSE_LINEAR["num_floors"]) + ] + inputs = torch.stack(tensor, dim=1) + return torch.jit.trace(model, inputs) + + +def export_model_surml(model): + """ + Exports the model to SURML format. + + :param model: the model to export. + :return: the path to the exported model. + """ + from surrealml import SurMlFile, Engine + + tensor = [ + torch.from_numpy(HOUSE_LINEAR["squarefoot"]), + torch.from_numpy(HOUSE_LINEAR["num_floors"]) + ] + inputs = torch.stack(tensor, dim=1) + + file = SurMlFile(model=model, name="linear", inputs=inputs[:1], engine=Engine.PYTORCH) + file.add_column("squarefoot") + file.add_column("num_floors") + file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std()) + file.add_normaliser("num_floors", "z_score", HOUSE_LINEAR["num_floors"].mean(), HOUSE_LINEAR["num_floors"].std()) + file.add_output("house_price", "z_score", HOUSE_LINEAR["outputs"].mean(), HOUSE_LINEAR["outputs"].std()) + return file diff --git a/surrealml/rust_adapter.py b/surrealml/rust_adapter.py new file mode 100644 index 0000000..7e5582b --- /dev/null +++ b/surrealml/rust_adapter.py @@ -0,0 +1,170 @@ +# try: +from surrealml.rust_surrealml import load_cached_raw_model, add_column, add_output, add_normaliser, save_model, \ + add_name, load_model, add_description, add_version, to_bytes, add_engine, add_author, add_origin +from surrealml.rust_surrealml import raw_compute, buffered_compute, upload_model + +from typing import Optional + +from surrealml.engine import Engine + + +class RustAdapter: + + def __init__(self, file_id: str, engine: Engine) -> None: + self.file_id: str = file_id + self.engine: Engine = engine + + @staticmethod + def pass_raw_model_into_rust(file_path: str) -> str: + """ + Points to a raw ONNX file and passes it into the rust library so it can be loaded + and tagged with a unique id so the Rust library can reference this model again + from within the rust library. + + :param file_path: the path to the raw ONNX file. + + :return: the unique id of the model. + """ + return load_cached_raw_model(file_path) + + def add_column(self, name: str) -> None: + """ + Adds a column to the model to the metadata (this needs to be called in order of the columns). + + :param name: the name of the column. + :return: None + """ + add_column(self.file_id, name) + + def add_output(self, output_name, normaliser_type, one, two): + """ + Adds an output to the model to the metadata. + :param output_name: the name of the output. + :param normaliser_type: the type of normaliser to use. + :param one: the first parameter of the normaliser. + :param two: the second parameter of the normaliser. + :return: None + """ + add_output(self.file_id, output_name, normaliser_type, one, two) + + def add_description(self, description): + """ + Adds a description to the model to the metadata. + + :param description: the description of the model. + :return: None + """ + add_description(self.file_id, description) + + def add_version(self, version): + """ + Adds a version to the model to the metadata. + + :param version: the version of the model. + :return: None + """ + add_version(self.file_id, version) + + def add_normaliser(self, column_name, normaliser_type, one, two): + """ + Adds a normaliser to the model to the metadata for a column. + + :param column_name: the name of the column (column already needs to be in the metadata to create mapping) + :param normaliser_type: the type of normaliser to use. + :param one: the first parameter of the normaliser. + :param two: the second parameter of the normaliser. + :return: None + """ + add_normaliser(self.file_id, column_name, normaliser_type, one, two) + + def add_author(self, author): + """ + Adds an author to the model to the metadata. + + :param author: the author of the model. + :return: None + """ + add_author(self.file_id, author) + + def save(self, path): + """ + Saves the model to a file. + + :param path: the path to save the model to. + :return: None + """ + # right now the only engine is pytorch so we can hardcode it but when we add more engines we will need to + # add a parameter to the save function to specify the engine + add_engine(self.file_id, self.engine.value) + add_origin(self.file_id, "local") + save_model(path, self.file_id) + + def to_bytes(self): + """ + Converts the model to bytes. + + :return: the model as bytes. + """ + return to_bytes(self.file_id) + + @staticmethod + def load(path) -> str: + """ + Loads a model from a file. + + :param path: the path to load the model from. + :return: the id of the model being loaded. + """ + return load_model(path) + + @staticmethod + def upload( + path: str, + url: str, + chunk_size: int, + namespace: str, + database: str, + username: Optional[str] = None, + password: Optional[str] = None + ) -> None: + """ + Uploads a model to a remote server. + + :param path: the path to load the model from. + :param url: the url of the remote server. + :param chunk_size: the size of each chunk to upload. + :param namespace: the namespace of the remote server. + :param database: the database of the remote server. + :param username: the username of the remote server. + :param password: the password of the remote server. + + :return: None + """ + upload_model( + path, + url, + chunk_size, + namespace, + database, + username, + password + ) + + def raw_compute(self, input_vector, dims=None): + """ + Calculates an output from the model given an input vector. + + :param input_vector: a 1D vector of inputs to the model. + :param dims: the dimensions of the input vector to be sliced into + :return: the output of the model. + """ + return raw_compute(self.file_id, input_vector, dims) + + def buffered_compute(self, value_map): + """ + Calculates an output from the model given a value map. + + :param value_map: a dictionary of inputs to the model with the column names as keys and floats as values. + :return: the output of the model. + """ + return buffered_compute(self.file_id, value_map) diff --git a/surrealml/surml_file.py b/surrealml/surml_file.py index 08c620e..5cf5c94 100644 --- a/surrealml/surml_file.py +++ b/surrealml/surml_file.py @@ -1,22 +1,15 @@ """ Defines the SurMlFile class which is used to save/load models and perform computations based on those models. """ -import os -import uuid from typing import Optional -import torch -from surrealml.rust_surrealml import load_cached_raw_model, add_column, add_output, add_normaliser, save_model, \ - add_name, load_model, add_description, add_version, to_bytes, add_engine, add_author, add_origin -from surrealml.rust_surrealml import raw_compute, buffered_compute, upload_model - -from surrealml.model_cache import SkLearnModelCache -from surrealml.engine_enum import Engine +from surrealml.engine import Engine, SklearnOnnxAdapter, TorchOnnxAdapter +from surrealml.rust_adapter import RustAdapter class SurMlFile: - def __init__(self, model=None, name=None, inputs=None, sklearn=False): + def __init__(self, model=None, name=None, inputs=None, engine=None): """ The constructor for the SurMlFile class. @@ -28,40 +21,33 @@ def __init__(self, model=None, name=None, inputs=None, sklearn=False): self.model = model self.name = name self.inputs = inputs - self.sklearn = sklearn - if self.model is not None: - if sklearn is True: - self.model = SkLearnModelCache.convert_sklearn_model(model=self.model, inputs=self.inputs) - self.file_id = self._cache_model() - else: - self.file_id = None + self.engine = engine + self.file_id = self._cache_model() + self.rust_adapter = RustAdapter(self.file_id, self.engine) - def _cache_model(self): + def _cache_model(self) -> Optional[str]: """ Caches a model, so it can be loaded as raw bytes to be fused with the header. :return: the file id of the model so it can be retrieved from the cache. """ - cache_folder = '.surmlcache' - - if not os.path.exists(cache_folder): - os.makedirs(cache_folder) - - unique_id = str(uuid.uuid4()) - file_name = f"{unique_id}.surml" - file_path = os.path.join(cache_folder, file_name) - - if self.sklearn is True: - traced_script_module = self.model + # This is triggered when the model is loaded from a file as we are not passing in a model + if self.model is None and self.name is None and self.inputs is None and self.engine is None: + return None + + if self.engine == Engine.SKLEARN: + raw_file_path: str = SklearnOnnxAdapter.save_model_to_onnx( + model=self.model, + inputs=self.inputs + ) + elif self.engine == Engine.PYTORCH: + raw_file_path: str = TorchOnnxAdapter.save_model_to_onnx( + model=self.model, + inputs=self.inputs + ) else: - traced_script_module = torch.jit.trace(self.model, self.inputs) - - torch.onnx.export(traced_script_module, self.inputs, file_path) - file_id = load_cached_raw_model(str(file_path)) - os.remove(file_path) - if self.name is not None: - add_name(file_id, self.name) - return file_id + raise ValueError(f"Engine {self.engine} not supported") + return RustAdapter.pass_raw_model_into_rust(raw_file_path) def add_column(self, name): """ @@ -70,7 +56,7 @@ def add_column(self, name): :param name: the name of the column. :return: None """ - add_column(self.file_id, name) + self.rust_adapter.add_column(name=name) def add_output(self, output_name, normaliser_type, one, two): """ @@ -81,7 +67,7 @@ def add_output(self, output_name, normaliser_type, one, two): :param two: the second parameter of the normaliser. :return: None """ - add_output(self.file_id, output_name, normaliser_type, one, two) + self.rust_adapter.add_output(output_name, normaliser_type, one, two) def add_description(self, description): """ @@ -90,7 +76,7 @@ def add_description(self, description): :param description: the description of the model. :return: None """ - add_description(self.file_id, description) + self.rust_adapter.add_description(description) def add_version(self, version): """ @@ -99,7 +85,7 @@ def add_version(self, version): :param version: the version of the model. :return: None """ - add_version(self.file_id, version) + self.rust_adapter.add_version(self.file_id) def add_normaliser(self, column_name, normaliser_type, one, two): """ @@ -111,7 +97,7 @@ def add_normaliser(self, column_name, normaliser_type, one, two): :param two: the second parameter of the normaliser. :return: None """ - add_normaliser(self.file_id, column_name, normaliser_type, one, two) + self.rust_adapter.add_normaliser(column_name, normaliser_type, one, two) def add_author(self, author): """ @@ -120,7 +106,7 @@ def add_author(self, author): :param author: the author of the model. :return: None """ - add_author(self.file_id, author) + self.rust_adapter.add_author(author) def save(self, path): """ @@ -131,9 +117,7 @@ def save(self, path): """ # right now the only engine is pytorch so we can hardcode it but when we add more engines we will need to # add a parameter to the save function to specify the engine - add_engine(self.file_id, Engine.PYTORCH.value) - add_origin(self.file_id, "local") - save_model(path, self.file_id) + self.rust_adapter.save(path=path) def to_bytes(self): """ @@ -141,18 +125,22 @@ def to_bytes(self): :return: the model as bytes. """ - return to_bytes(self.file_id) + return self.rust_adapter.to_bytes() @staticmethod - def load(path): + def load(path, engine: Engine): """ - Loads a model from a file. + Loads a model from a file so compute operations can be done. :param path: the path to load the model from. - :return: + :param engine: the engine to use to load the model. + + :return: The SurMlFile with loaded model and engine definition """ self = SurMlFile() - self.file_id = load_model(path) + self.file_id = self.rust_adapter.load(path) + self.engine = engine + self.rust_adapter = RustAdapter(self.file_id, self.engine) return self @staticmethod @@ -178,7 +166,7 @@ def upload( :return: None """ - upload_model( + RustAdapter.upload( path, url, chunk_size, @@ -196,7 +184,7 @@ def raw_compute(self, input_vector, dims=None): :param dims: the dimensions of the input vector to be sliced into :return: the output of the model. """ - return raw_compute(self.file_id, input_vector, dims) + return self.rust_adapter.raw_compute(input_vector, dims) def buffered_compute(self, value_map): """ @@ -205,4 +193,4 @@ def buffered_compute(self, value_map): :param value_map: a dictionary of inputs to the model with the column names as keys and floats as values. :return: the output of the model. """ - return buffered_compute(self.file_id, value_map) + return self.rust_adapter.buffered_compute(value_map) diff --git a/test.surml b/test.surml deleted file mode 100644 index 5afc217..0000000 Binary files a/test.surml and /dev/null differ diff --git a/test_forrest.surml b/test_forrest.surml deleted file mode 100644 index 13c2ec6..0000000 Binary files a/test_forrest.surml and /dev/null differ diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..abe4b7b --- /dev/null +++ b/tests/README.md @@ -0,0 +1,34 @@ +# Tests +This section houses the functionality of testing the repo in terms of unit tests and integration tests. + +## Library Setup + +There has to be a little bit of setup to run unit tests for this repo. This is because a large part of the code +is written in Rust. Therefore, the Rust binary has to be compiled and put into the correct place for the rest of the +python repo to reference it. If the Rust binary is not compiled, then the unit tests will fail as they are trying to +reference a binary that does not exist. Storage and execution of machine learning models is done in Rust so we can +ensure that if the package runs locally in Python, it will run in production in Rust in the same way in the database. +There is a script that will compile the Rust binary and put it in the correct place. To run this script, run the +following command ensuring that you are in the root directory of the repo and that you have not activated a virtual +environment as the script will build a temporary virtual environment for the build and then delete the virtual +environment after the build is complete: + +```bash +python tests/scripts/local_build.py +``` + +## Model Setup + +Surml aims to support a range of different machine learning models as long as we can concert those models to ONNX. +To keep the feedback loop tight and to ensure that the models are working as expected, we have a set tests and +run against trained models in the core library and the surrealml library. These tests are run against the that are +freshly trained using the approaches that we advocate for. We can train our models and deploy them in the testing +environment using the following command: + +```bash +python tests/scripts/build_assets.py +``` + +The trained models will be stored in the `modules/core/model_stash/` directory. This directory is ignored by git +so if you have recently cloned the repo or you are adding a github action that involves the models, you will need +to ensure that the `build_assets.py` file is run at some point before you rely on those models. diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/scripts/build_assets.py b/tests/scripts/build_assets.py new file mode 100644 index 0000000..1c525e4 --- /dev/null +++ b/tests/scripts/build_assets.py @@ -0,0 +1,126 @@ +""" +Trains and exports models to be used for testing. +""" +import os +import sys + +import onnx + +script_path = os.path.abspath(__file__) +script_directory = os.path.dirname(script_path) + +tests_directory = os.path.join(script_directory, "..") +main_directory = os.path.join(script_directory, "..", "..") + +# Add a directory to the PYTHONPATH +sys.path.append(main_directory) + + +import shutil +from surrealml.model_templates.sklearn.sklearn_linear import train_model as linear_sklearn_train_model +from surrealml.model_templates.sklearn.sklearn_linear import export_model_onnx as linear_sklearn_export_model_onnx +from surrealml.model_templates.sklearn.sklearn_linear import export_model_surml as linear_sklearn_export_model_surml + +from surrealml.model_templates.torch.torch_linear import train_model as linear_torch_train_model +from surrealml.model_templates.torch.torch_linear import export_model_onnx as linear_torch_export_model_onnx +from surrealml.model_templates.torch.torch_linear import export_model_surml as linear_torch_export_model_surml + + +def delete_directory(dir_path: os.path) -> None: + """ + Checks to see if a directory exists and deletes it if it does. + + :param dir_path: the path to the directory. + """ + if os.path.exists(dir_path): + shutil.rmtree(dir_path) + print(f"Directory '{dir_path}' has been deleted.") + else: + print(f"Directory '{dir_path}' does not exist.") + + +def delete_file(file_path: os.path) -> None: + """ + Checks to see if a file exists and deletes it if it does. + + :param file_path: the path to the file. + """ + if os.path.isfile(file_path): + os.remove(file_path) + print(f"File '{file_path}' has been deleted.") + else: + print(f"File '{file_path}' does not exist.") + + +def write_file(file_path: os.path, model, file_name) -> None: + """ + Writes a file to the specified path. + + :param file_path: the path to write the file to. + :param model: the model to write to the file. + :param file_name: the name of the file to write. + """ + with open(os.path.join(file_path, file_name), "wb") as f: + f.write(model) + +core_directory = os.path.join(main_directory, "modules", "core") + +model_stash_directory = os.path.join(core_directory, "model_stash") +sklearn_stash_directory = os.path.join(model_stash_directory, "sklearn") +sklearn_surml_stash_directory = os.path.join(sklearn_stash_directory, "surml") +sklearn_onnx_stash_directory = os.path.join(sklearn_stash_directory, "onnx") +torch_stash_directory = os.path.join(model_stash_directory, "torch") +torch_surml_stash_directory = os.path.join(torch_stash_directory, "surml") +torch_onnx_stash_directory = os.path.join(torch_stash_directory, "onnx") + + +target_directory = os.path.join(main_directory, "target") +egg_info_dir = os.path.join(main_directory, "surrealml.egg-info") + + +def main(): + print("main running") + # wipe and create directories for model stashes + delete_directory(model_stash_directory) + + os.mkdir(model_stash_directory) + os.mkdir(sklearn_stash_directory) + os.mkdir(sklearn_surml_stash_directory) + os.mkdir(sklearn_onnx_stash_directory) + os.mkdir(torch_stash_directory) + os.mkdir(torch_surml_stash_directory) + os.mkdir(torch_onnx_stash_directory) + + # train and stash sklearn models + sklearn_linear_model = linear_sklearn_train_model() + sklearn_linear_surml_file = linear_sklearn_export_model_surml(sklearn_linear_model) + sklearn_linear_onnx_file = linear_sklearn_export_model_onnx(sklearn_linear_model) + + sklearn_linear_surml_file.save( + path=str(os.path.join(sklearn_surml_stash_directory, "linear.surml")) + ) + onnx.save( + sklearn_linear_onnx_file, + os.path.join(sklearn_onnx_stash_directory, "linear.onnx") + ) + + # train and stash torch models + torch_linear_model, x = linear_torch_train_model() + torch_linear_surml_file = linear_torch_export_model_surml(torch_linear_model) + torch_linear_onnx_file = linear_torch_export_model_onnx(torch_linear_model) + + torch_linear_surml_file.save( + path=str(os.path.join(torch_surml_stash_directory, "linear.surml")) + ) + # onnx.save( + # torch_linear_onnx_file, + # os.path.join(torch_onnx_stash_directory, "linear.onnx") + # ) + + os.system(f"cd {model_stash_directory} && tree") + + shutil.rmtree(".surmlcache") + + +if __name__ == '__main__': + main() diff --git a/tests/scripts/ci_local_build.py b/tests/scripts/ci_local_build.py new file mode 100644 index 0000000..13533c2 --- /dev/null +++ b/tests/scripts/ci_local_build.py @@ -0,0 +1,45 @@ +""" +this script simply moves the rust_surrealml.so file from the build directory to the surrealml directory. +This script should be run in the github actions, if you are looking to run the tests locally please run +the local_build.py script. +""" +import fnmatch +import os +import shutil + + +def find_and_move_rust_surrealml_file(start_path: os.path, destination_path: os.path, new_name: str) -> None: + """ + Finds the rust_surrealml.so file and moves it to the surrealml directory. + + :param start_path: the path to start the search from for the built .so rust lib. + :param destination_path: the path to move the rust lib to. + :param new_name: the new name of the rust lib .so file. + """ + for root, dirs, files in os.walk(start_path): + if 'lib' in root: + for filename in fnmatch.filter(files, 'rust_surrealml*.so'): + source_file = os.path.join(root, filename) + destination_file = os.path.join(destination_path, new_name) + shutil.move(source_file, destination_file) + return destination_file + return None + + +script_path = os.path.abspath(__file__) +script_directory = os.path.dirname(script_path) +main_directory = os.path.join(script_directory, "..", "..") +build_dir = os.path.join(main_directory, "build") +surrealml_dir = os.path.join(main_directory, "surrealml") + + +def main(): + find_and_move_rust_surrealml_file( + start_path=build_dir, + destination_path=surrealml_dir, + new_name="rust_surrealml.so" + ) + + +if __name__ == '__main__': + main() diff --git a/tests/scripts/local_build.py b/tests/scripts/local_build.py new file mode 100644 index 0000000..0ee24e0 --- /dev/null +++ b/tests/scripts/local_build.py @@ -0,0 +1,102 @@ +""" +This script compiles the Rust library and injects the .so rust python lib into the surrealml +directory so we can run python unit tests against the Rust library. +""" +import fnmatch +import os +import shutil + + +def delete_directory(dir_path: os.path) -> None: + """ + Checks to see if a directory exists and deletes it if it does. + + :param dir_path: the path to the directory. + """ + if os.path.exists(dir_path): + shutil.rmtree(dir_path) + print(f"Directory '{dir_path}' has been deleted.") + else: + print(f"Directory '{dir_path}' does not exist.") + + +def delete_file(file_path: os.path) -> None: + """ + Checks to see if a file exists and deletes it if it does. + + :param file_path: the path to the file. + """ + if os.path.isfile(file_path): + os.remove(file_path) + print(f"File '{file_path}' has been deleted.") + else: + print(f"File '{file_path}' does not exist.") + + +def find_and_move_rust_surrealml_file(start_path: os.path, destination_path: os.path, new_name: str) -> None: + """ + Finds the rust_surrealml.so file and moves it to the surrealml directory. + + :param start_path: the path to start the search from for the built .so rust lib. + :param destination_path: the path to move the rust lib to. + :param new_name: the new name of the rust lib .so file. + """ + for root, dirs, files in os.walk(start_path): + if 'lib' in root: + for filename in fnmatch.filter(files, 'rust_surrealml*.so'): + source_file = os.path.join(root, filename) + destination_file = os.path.join(destination_path, new_name) + shutil.move(source_file, destination_file) + return destination_file + return None + + +script_path = os.path.abspath(__file__) +script_directory = os.path.dirname(script_path) + +tests_directory = os.path.join(script_directory, "..") +main_directory = os.path.join(script_directory, "..", "..") +target_directory = os.path.join(main_directory, "target") +egg_info_dir = os.path.join(main_directory, "surrealml.egg-info") +build_dir = os.path.join(main_directory, "build") +surrealml_dir = os.path.join(main_directory, "surrealml") +embedded_rust_lib_dir = os.path.join(main_directory, "surrealml", "rust_surrealml.so") +test_venv_dir = os.path.join(tests_directory, "venv") +source_venv = os.path.join(test_venv_dir, "bin", "activate") + + +def main(): + # delete the old dirs and embedded rust lib if present + print("local build: cleaning up old files") + delete_directory(dir_path=test_venv_dir) + delete_directory(dir_path=build_dir) + delete_directory(dir_path=egg_info_dir) + delete_directory(dir_path=target_directory) + delete_file(file_path=embedded_rust_lib_dir) + print("local build: old files cleaned up") + + # setup venv and build the rust lib + print("local build: setting up venv and building rust lib") + os.system(f"python3 -m venv {test_venv_dir}") + print("local build: venv setup") + print("local build: building rust lib") + os.system(f"source {source_venv} && pip install --no-cache-dir {main_directory}") + print("local build: rust lib built") + + # move the rust lib into the surrealml directory + print("local build: moving rust lib into surrealml directory") + find_and_move_rust_surrealml_file( + start_path=build_dir, + destination_path=surrealml_dir, + new_name="rust_surrealml.so" + ) + print("local build: rust lib moved into surrealml directory") + + # cleanup + # delete_directory(dir_path=test_venv_dir) + # delete_directory(dir_path=build_dir) + # delete_directory(dir_path=egg_info_dir) + + +if __name__ == '__main__': + main() diff --git a/tests/test.surml b/tests/test.surml deleted file mode 100644 index 8d8b851..0000000 Binary files a/tests/test.surml and /dev/null differ diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/engine/__init__.py b/tests/unit_tests/engine/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/engine/test_sklearn.py b/tests/unit_tests/engine/test_sklearn.py new file mode 100644 index 0000000..5468eaf --- /dev/null +++ b/tests/unit_tests/engine/test_sklearn.py @@ -0,0 +1,52 @@ +""" +This test purely tests the storage of sklearn models in ONNX, we will test indiviudal sklearn models +in the integrations tests +""" +import shutil +from unittest import main, TestCase + +import numpy as np +import onnxruntime as ort + +from surrealml.engine.sklearn import SklearnOnnxAdapter +from surrealml.model_templates.sklearn.sklearn_linear import train_model +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR + + +class TestSklearn(TestCase): + + def setUp(self): + self.model = train_model() + + def tearDown(self): + try: + shutil.rmtree(".surmlcache") + except OSError as e: + print(f"Error: surmlcache : {e.strerror}") + + def test_store_and_run(self): + file_path = SklearnOnnxAdapter.save_model_to_onnx(self.model, HOUSE_LINEAR["inputs"][:1]) + + # Load the ONNX model + session = ort.InferenceSession(file_path) + + # Prepare input data (adjust the shape according to your model's requirements) + # For a linear regression model, it usually expects a single feature vector. + # Example: Predicting for a single value + input_data = np.array([[5, 6]], dtype=np.float32) # Replace with your input data + + # Get the name of the input node + input_name = session.get_inputs()[0].name + + # Run the model (make a prediction) + result = session.run(None, {input_name: input_data}) + + # The result is a list of outputs (since a model can have multiple outputs) + # For a simple linear regression model, it typically has a single output. + predicted_value = result[0][0][0] + + self.assertEqual(5.013289451599121, float(predicted_value)) + + +if __name__ == '__main__': + main() diff --git a/tests/unit_tests/engine/test_torch.py b/tests/unit_tests/engine/test_torch.py new file mode 100644 index 0000000..136b68d --- /dev/null +++ b/tests/unit_tests/engine/test_torch.py @@ -0,0 +1,46 @@ +""" +This test is just testing the storage of the model in ONNX, we will test indiviudal torch models +in the integration tests. +""" +import shutil +from unittest import main, TestCase + +import numpy as np +import onnxruntime as ort + +from surrealml.engine.torch import TorchOnnxAdapter +from surrealml.model_templates.torch.torch_linear import train_model + + +class TestTorch(TestCase): + + def setUp(self): + self.model, self.x = train_model() + + def tearDown(self): + try: + shutil.rmtree(".surmlcache") + except OSError as e: + print(f"Error: surmlcache : {e.strerror}") + + def test_store_and_run(self): + file_path = TorchOnnxAdapter.save_model_to_onnx(self.model, self.x[:1]) + + # Load the ONNX model + session = ort.InferenceSession(file_path) + + # Prepare input data (adjust the shape according to your model's requirements) + # For a linear regression model, it usually expects a single feature vector. + # Example: Predicting for a single value + input_data = np.array([[2800, 3200]], dtype=np.float32) # Replace with your input data + + # Get the name of the input node + input_name = session.get_inputs()[0].name + + # Run the model (make a prediction) + result = session.run(None, {input_name: input_data})[0][0][0] + self.assertEqual(np.float32, type(result)) + + +if __name__ == '__main__': + main() diff --git a/tests/unit_tests/test_rust_adapter.py b/tests/unit_tests/test_rust_adapter.py new file mode 100644 index 0000000..b2af7aa --- /dev/null +++ b/tests/unit_tests/test_rust_adapter.py @@ -0,0 +1,25 @@ +from unittest import TestCase, main +from surrealml.model_templates.torch.torch_linear import train_model +from surrealml.rust_adapter import RustAdapter +from surrealml.surml_file import SurMlFile +from surrealml.engine import Engine +import shutil + + +class TestRustAdapter(TestCase): + + def setUp(self): + self.model, self.x = train_model() + self.file = SurMlFile(model=self.model, name="linear", inputs=self.x, engine=Engine.PYTORCH) + + def tearDown(self): + shutil.rmtree(".surmlcache") + + def test_basic_store(self): + # pass + self.file.add_column(name="x") + # self.file.save(path="./unit_test.surml") + + +if __name__ == '__main__': + main() diff --git a/tests/unit_tests/test_surml_file.py b/tests/unit_tests/test_surml_file.py new file mode 100644 index 0000000..f5bb615 --- /dev/null +++ b/tests/unit_tests/test_surml_file.py @@ -0,0 +1,56 @@ +import os +import shutil +from unittest import TestCase + +import numpy as np + +from surrealml import Engine, SurMlFile +from surrealml.model_templates.torch.torch_linear import train_model + + +class TestSurMlFile(TestCase): + + def setUp(self): + self.squarefoot = np.array([1000, 1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200], dtype=np.float32) + self.num_floors = np.array([1, 1, 1.5, 1.5, 2, 2, 2.5, 2.5, 3, 3], dtype=np.float32) + self.house_price = np.array([200000, 230000, 280000, 320000, 350000, 380000, 420000, 470000, 500000, 520000], + dtype=np.float32) + self.model, self.x = train_model() + self.file = SurMlFile(model=self.model, name="House Price Prediction", inputs=self.x[:1], engine=Engine.PYTORCH) + + def tearDown(self): + try: + shutil.rmtree(".surmlcache") + except OSError as e: + print(f"Error: surmlcache : {e.strerror}") + os.remove("./test.surml") + + def test_full_torch_run(self): + self.file.add_column("squarefoot") + self.file.add_column("num_floors") + + self.file.add_output( + "house_price", + "z_score", + self.house_price.mean(), + self.house_price.std() + ) + self.file.add_normaliser( + "squarefoot", + "z_score", + self.squarefoot.mean(), + self.squarefoot.std() + ) + self.file.add_normaliser( + "num_floors", + "z_score", + self.num_floors.mean(), + self.num_floors.std() + ) + + self.file.save("./test.surml") + + new_file = SurMlFile.load("./test.surml", Engine.PYTORCH) + + self.assertEqual(float, type(new_file.raw_compute([1.0, 2.0])[0])) + self.assertEqual(float, type(new_file.buffered_compute({"squarefoot": 1.0, "num_floors": 2.0})[0]))