Skip to content

Commit

Permalink
adding tensorflow support (#45)
Browse files Browse the repository at this point in the history
* adding glue module to enable error handling (#27)

* added batching runner (#28)

* Kings college london integration (#30)

* adding build using binary downloads (#8)

* adding build using binary downloads

* sorting out the build.rs

* updating build.rs for surrealml package

* prepping version for release

* now has target tracking (#10)

* adding check in build.rs for docs.rs

* removing build.rs for main surrealml to ensure that libraries using the core do not need to do anything in their build.rs

* adding machine learning pipelines for bioengineering projects at Kings College London

* Remove integrated_training_runner/run_env/ from tracking

* adding machine learning pipelines for bioengineering projects at Kings College London

* Update FFmpeg data access module and README (#29)

* adding run_env to the gitignore

---------

Co-authored-by: Yang Li <oliverlee2018@163.com>

* bumping the version

* updating the README and module

* updating the surrealml-core deployment workflow

* updating the surrealml-core deployment workflow

* updating cargo

* Error modules (#36)

* Develop (#35)

* adding glue module to enable error handling (#27)

* added batching runner (#28)

* Kings college london integration (#30)

* adding build using binary downloads (#8)

* adding build using binary downloads

* sorting out the build.rs

* updating build.rs for surrealml package

* prepping version for release

* now has target tracking (#10)

* adding check in build.rs for docs.rs

* removing build.rs for main surrealml to ensure that libraries using the core do not need to do anything in their build.rs

* adding machine learning pipelines for bioengineering projects at Kings College London

* Remove integrated_training_runner/run_env/ from tracking

* adding machine learning pipelines for bioengineering projects at Kings College London

* Update FFmpeg data access module and README (#29)

* adding run_env to the gitignore

---------

Co-authored-by: Yang Li <oliverlee2018@163.com>

* bumping the version

* updating the README and module

* updating the surrealml-core deployment workflow

* updating the surrealml-core deployment workflow

* updating cargo

---------

Co-authored-by: Sam Hillman <116303632+SHillman836@users.noreply.github.com>
Co-authored-by: Yang Li <oliverlee2018@163.com>

* merging error modules into the core

* merging error modules into the core

* merging error modules into the core

---------

Co-authored-by: Sam Hillman <116303632+SHillman836@users.noreply.github.com>
Co-authored-by: Yang Li <oliverlee2018@163.com>

* Index overflow (#40)

* adding buffer out of index check

* adding buffer out of index check

* updating testing around meta data (#42)

* updating the naming and increasing tests around the meta data of the stored ML models

* updating the naming and increasing tests around the meta data of the stored ML models

* updating the naming and increasing tests around the meta data of the stored ML models

* Tensorflow support (#44)

* stashing for branch switch

* adding tests for tensorflow

* adding tests for tensorflow

* fixing requirement conflicts

* fixing requirement conflicts

* fixing requirement conflicts

* fixing requirement conflicts

* fixing requirement conflicts

---------

Co-authored-by: Sam Hillman <116303632+SHillman836@users.noreply.github.com>
Co-authored-by: Yang Li <oliverlee2018@163.com>
  • Loading branch information
3 people authored Apr 2, 2024
1 parent 7d129de commit 9f2a7bf
Show file tree
Hide file tree
Showing 17 changed files with 253 additions and 18 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ surrealml/rust_surrealml.cpython-310-darwin.so
./modules/pipelines/runners/batch_training_runner/run_env/
./modules/pipelines/data_access/target/
./modules/pipelines/runners/integrated_training_runner/run_env/
modules/pipelines/runners/integrated_training_runner/run_env/
modules/pipelines/runners/integrated_training_runner/run_env/
modules/pipelines/data_access/target/
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ For `PyTorch`:
pip install "git+https://github.com/surrealdb/surrealml#egg=surrealml[torch]"
```

For `Tensorflow`:

```bash
pip install "git+https://github.com/surrealdb/surrealml#egg=surrealml[tensorflow]"
```

After that, you can train your model and save it in the SurrealML format.

## Compilation config
Expand Down
2 changes: 1 addition & 1 deletion modules/core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "surrealml-core"
version = "0.1.1"
version = "0.1.2"
edition = "2021"
build = "./build.rs"
description = "The core machine learning library for SurrealML that enables SurrealDB to store and load ML models"
Expand Down
Binary file modified modules/core/model_stash/sklearn/surml/linear.surml
Binary file not shown.
Binary file not shown.
Binary file modified modules/core/model_stash/torch/surml/linear.surml
Binary file not shown.
32 changes: 32 additions & 0 deletions modules/core/src/execution/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,36 @@ mod tests {
let output = model_computation.buffered_compute(&mut input_values).unwrap();
assert_eq!(output.len(), 1);
}

#[test]
fn test_raw_compute_linear_tensorflow() {
let mut file = SurMlFile::from_file("./model_stash/tensorflow/surml/linear.surml").unwrap();
let model_computation = ModelComputation {
surml_file: &mut file,
};

let mut input_values = HashMap::new();
input_values.insert(String::from("squarefoot"), 1000.0);
input_values.insert(String::from("num_floors"), 2.0);

let raw_input = model_computation.input_tensor_from_key_bindings(input_values).unwrap();

let output = model_computation.raw_compute(raw_input, None).unwrap();
assert_eq!(output.len(), 1);
}

#[test]
fn test_buffered_compute_linear_tensorflow() {
let mut file = SurMlFile::from_file("./model_stash/tensorflow/surml/linear.surml").unwrap();
let model_computation = ModelComputation {
surml_file: &mut file,
};

let mut input_values = HashMap::new();
input_values.insert(String::from("squarefoot"), 1000.0);
input_values.insert(String::from("num_floors"), 2.0);

let output = model_computation.buffered_compute(&mut input_values).unwrap();
assert_eq!(output.len(), 1);
}
}
13 changes: 7 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
numpy==1.26.3
skl2onnx==1.16.0
scikit-learn==1.4.0
torch==2.1.2
onnx==1.15.0
onnxruntime==1.16.3
numpy
skl2onnx
scikit-learn
torch
tf2onnx
tensorflow
onnxruntime
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"surrealml.model_templates.datasets",
"surrealml.model_templates.sklearn",
"surrealml.model_templates.torch",
"surrealml.model_templates.tensorflow",
],
package_data={
"surrealml": ["binaries/*"],
Expand All @@ -40,6 +41,10 @@
],
"torch": [
"torch==2.1.2"
],
"tensorflow": [
"tf2onnx==1.16.1",
"tensorflow==2.16.1"
]
}
)
3 changes: 3 additions & 0 deletions surrealml/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from surrealml.engine.sklearn import SklearnOnnxAdapter
from surrealml.engine.torch import TorchOnnxAdapter
from surrealml.engine.tensorflow import TensorflowOnnxAdapter


class Engine(Enum):
Expand All @@ -12,7 +13,9 @@ class Engine(Enum):
PYTORCH: The PyTorch engine which will be PyTorch and ONNX.
NATIVE: The native engine which will be native rust and linfa.
SKLEARN: The sklearn engine which will be sklearn and ONNX
TENSOFRLOW: The TensorFlow engine which will be TensorFlow and ONNX
"""
PYTORCH = "pytorch"
NATIVE = "native"
SKLEARN = "sklearn"
TENSORFLOW = "tensorflow"
10 changes: 4 additions & 6 deletions surrealml/engine/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@ def save_model_to_onnx(model, inputs) -> str:
"""
SklearnOnnxAdapter.check_dependency()
file_path = create_file_cache_path()
# the below check is to satisfy type checkers
if skl2onnx is not None:
onnx = skl2onnx.to_onnx(model, inputs)
onnx = skl2onnx.to_onnx(model, inputs)

with open(file_path, "wb") as f:
f.write(onnx.SerializeToString())
with open(file_path, "wb") as f:
f.write(onnx.SerializeToString())

return file_path
return file_path
45 changes: 45 additions & 0 deletions surrealml/engine/tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import shutil
try:
import tf2onnx
import tensorflow as tf
except ImportError:
tf2onnx = None
tf = None

from surrealml.engine.utils import TensorflowCache


class TensorflowOnnxAdapter:

@staticmethod
def check_dependency() -> None:
"""
Checks if the tensorflow dependency is installed raising an error if not.
Please call this function when performing any tensorflow related operations.
"""
if tf2onnx is None or tf is None:
raise ImportError("tensorflow feature needs to be installed to use tensorflow features")

@staticmethod
def save_model_to_onnx(model, inputs) -> str:
"""
Saves a tensorflow model to an onnx file.
:param model: the tensorflow model to convert.
:param inputs: the inputs to the model needed to trace the model
:return: the path to the cache created with a unique id to prevent collisions.
"""
TensorflowOnnxAdapter.check_dependency()
cache = TensorflowCache()

model_file_path = cache.new_cache_path
onnx_file_path = cache.new_cache_path

tf.saved_model.save(model, model_file_path)

os.system(
f"python -m tf2onnx.convert --saved-model {model_file_path} --output {onnx_file_path}"
)
shutil.rmtree(model_file_path)
return onnx_file_path
24 changes: 21 additions & 3 deletions surrealml/engine/utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
"""
This file contains utility functions for the engine.
"""
import os
import uuid


def create_file_cache_path():
def create_file_cache_path(cache_folder: str = ".surmlcache") -> os.path:
"""
Creates a file cache path for the model (creating the file cache if not there).
:return: the path to the cache created with a unique id to prevent collisions.
"""
cache_folder = '.surmlcache'

if not os.path.exists(cache_folder):
os.makedirs(cache_folder)
unique_id = str(uuid.uuid4())
file_name = f"{unique_id}.surml"
return os.path.join(cache_folder, file_name)


class TensorflowCache:
"""
A class to create a cache for tensorflow models.
Attributes:
cache_path: The path to the cache created with a unique id to prevent collisions.
"""
def __init__(self) -> None:
create_file_cache_path()
self.cache_path = os.path.join(".surmlcache", "tensorflow")
create_file_cache_path(cache_folder=self.cache_path)

@property
def new_cache_path(self) -> str:
return str(os.path.join(self.cache_path, str(uuid.uuid4())))
Empty file.
96 changes: 96 additions & 0 deletions surrealml/model_templates/tensorflow/tensorflow_linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Trains a linear regression model in TensorFlow. Should be used for testing certain processes
for linear regression and TensorFlow.
"""
import os
import shutil

import tensorflow as tf

from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR


class LinearModel(tf.Module):
def __init__(self, W, b):
super(LinearModel, self).__init__()
self.W = tf.Variable(W, dtype=tf.float32)
self.b = tf.Variable(b, dtype=tf.float32)

@tf.function(input_signature=[tf.TensorSpec(shape=[None, 2], dtype=tf.float32)])
def predict(self, x):
return tf.matmul(x, self.W) + self.b


def train_model():
# Convert inputs and outputs to TensorFlow tensors
inputs = tf.constant(HOUSE_LINEAR["inputs"], dtype=tf.float32)
outputs = tf.constant(HOUSE_LINEAR["outputs"], dtype=tf.float32)

# Model parameters
W = tf.Variable(tf.random.normal([2, 1]), name='weights') # Adjusted for two input features
b = tf.Variable(tf.zeros([1]), name='bias')

# Training parameters
learning_rate = 0.01
epochs = 100

# Training loop
for epoch in range(epochs):
with tf.GradientTape() as tape:
y_pred = tf.matmul(inputs, W) + b # Adjusted for matrix multiplication
loss = tf.reduce_mean(tf.square(y_pred - outputs))

gradients = tape.gradient(loss, [W, b])
W.assign_sub(learning_rate * gradients[0])
b.assign_sub(learning_rate * gradients[1])

if epoch % 10 == 0: # Print loss every 10 epochs
print(f"Epoch {epoch}: Loss = {loss.numpy()}")

# Final parameters after training
final_W = W.numpy()
final_b = b.numpy()

print(f"Trained W: {final_W}, Trained b: {final_b}")
return LinearModel(final_W, final_b)


def export_model_tf(model):
"""
Exports the model to TensorFlow SavedModel format.
"""
tf.saved_model.save(model, "linear_regression_model_tf")
return 'linear_regression_model_tf'


def export_model_onnx(model):
"""
Exports the model to ONNX format.
:return: the path to the exported model.
"""
export_model_tf(model)
os.system("python -m tf2onnx.convert --saved-model linear_regression_model_tf --output model.onnx")

with open("model.onnx", "rb") as f:
onnx_model = f.read()
shutil.rmtree("linear_regression_model_tf")
os.remove("model.onnx")
return onnx_model


def export_model_surml(model):
"""
Exports the model to SURML format.
:param model: the model to export.
:return: the path to the exported model.
"""
from surrealml import SurMlFile, Engine
file = SurMlFile(model=model, name="linear", inputs=HOUSE_LINEAR["inputs"], engine=Engine.TENSORFLOW)
file.add_column("squarefoot")
file.add_column("num_floors")
file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std())
file.add_normaliser("num_floors", "z_score", HOUSE_LINEAR["num_floors"].mean(), HOUSE_LINEAR["num_floors"].std())
file.add_output("house_price", "z_score", HOUSE_LINEAR["outputs"].mean(), HOUSE_LINEAR["outputs"].std())
return file
7 changes: 6 additions & 1 deletion surrealml/surml_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from typing import Optional

from surrealml.engine import Engine, SklearnOnnxAdapter, TorchOnnxAdapter
from surrealml.engine import Engine, SklearnOnnxAdapter, TorchOnnxAdapter, TensorflowOnnxAdapter
from surrealml.rust_adapter import RustAdapter


Expand Down Expand Up @@ -49,6 +49,11 @@ def _cache_model(self) -> Optional[str]:
model=self.model,
inputs=self.inputs
)
elif self.engine == Engine.TENSORFLOW:
raw_file_path: str = TensorflowOnnxAdapter.save_model_to_onnx(
model=self.model,
inputs=self.inputs
)
else:
raise ValueError(f"Engine {self.engine} not supported")
return RustAdapter.pass_raw_model_into_rust(raw_file_path)
Expand Down
Loading

0 comments on commit 9f2a7bf

Please sign in to comment.