Skip to content

Commit

Permalink
Slight Resizing Optimizations for preprocess_imagesπŸƒβ€β™‚οΈ (#190)
Browse files Browse the repository at this point in the history
* Removing ndarray from DB

* Attempting to reduce number of clones during preprocessing

* Adding tracing for more viz

* Attempting speedup with fast_resize

* Adding notes for providers
  • Loading branch information
deven96 authored Jan 29, 2025
1 parent c288b19 commit 7f7987b
Show file tree
Hide file tree
Showing 14 changed files with 168 additions and 134 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ services:
```

### Execution Providers (Ahnlich AI)

`CUDA`: Only supports >= CUDAv12 and might need to `sudo apt install libcudnn9-dev-cuda-12`
`CoreML (Apple)`: Not advised for NLP models due to often large dimensionality.

### Contributing

View [contribution guide](CONTRIBUTING.md)
31 changes: 31 additions & 0 deletions ahnlich/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion ahnlich/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async-trait = "0.1"
serde = { version = "1.0.*", features = ["derive", "rc"] }
bincode = "1.3.3"
ndarray = { version = "0.16.1", features = ["serde", "rayon"] }
image = "0.25.2"
image = "0.25.5"
serde_json = "1.0.116"
itertools = "0.10.0"
clap = { version = "4.5.4", features = ["derive"] }
Expand Down
1 change: 1 addition & 0 deletions ahnlich/ai/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dirs = "5.0.1"
ort = { version = "=2.0.0-rc.5", features = [
"ndarray",
] }
fast_image_resize = { version = "5.1.1", features = ["rayon"]}
ort-sys = "=2.0.0-rc.8"
moka = { version = "0.12.8", features = ["future", "sync"] }
tracing-opentelemetry.workspace = true
Expand Down
225 changes: 109 additions & 116 deletions ahnlich/ai/src/engine/ai/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,30 @@ use crate::engine::ai::providers::ProviderTrait;
use crate::error::AIProxyError;
use ahnlich_types::ai::ExecutionProvider;
use ahnlich_types::{ai::AIStoreInputType, keyval::StoreKey};
use image::{DynamicImage, GenericImageView, ImageFormat, ImageReader};
use fast_image_resize::images::Image;
use fast_image_resize::images::ImageRef;
use fast_image_resize::FilterType;
use fast_image_resize::PixelType;
use fast_image_resize::ResizeAlg;
use fast_image_resize::ResizeOptions;
use fast_image_resize::Resizer;
use image::imageops;
use image::ImageReader;
use image::RgbImage;
use ndarray::{Array, Ix3};
use ndarray::{ArrayView, Ix4};
use nonzero_ext::nonzero;
use serde::de::Error as DeError;
use serde::ser::Error as SerError;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::io::Cursor;
use std::num::NonZeroUsize;
use std::path::PathBuf;
use strum::Display;
use tokenizers::Encoding;

static CHANNELS: Lazy<u8> = Lazy::new(|| image::ColorType::Rgb8.channel_count());

#[derive(Display, Debug, Serialize, Deserialize)]
pub enum ModelType {
Text {
Expand Down Expand Up @@ -246,31 +256,71 @@ pub enum ModelInput {
Images(Array<f32, Ix4>),
}

#[derive(Debug, Clone)]
pub struct ImageArray {
#[derive(Debug)]
pub struct OnnxTransformResult {
array: Array<f32, Ix3>,
image: DynamicImage,
image_format: ImageFormat,
onnx_transformed: bool,
}

impl ImageArray {
pub fn try_new(bytes: Vec<u8>) -> Result<Self, AIProxyError> {
let img_reader = ImageReader::new(Cursor::new(&bytes))
.with_guessed_format()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?;
impl OnnxTransformResult {
pub fn view(&self) -> ArrayView<f32, Ix3> {
self.array.view()
}

let image_format = &img_reader
.format()
.ok_or(AIProxyError::ImageBytesDecodeError)?;
pub fn image_dim(&self) -> (NonZeroUsize, NonZeroUsize) {
let shape = self.array.shape();
(
NonZeroUsize::new(shape[2]).expect("Array columns should be non zero"),
NonZeroUsize::new(shape[1]).expect("Array channels should be non zero"),
)
}
}

let image = img_reader
.decode()
impl TryFrom<ImageArray> for OnnxTransformResult {
type Error = AIProxyError;

// Swapping axes from [rows, columns, channels] to [channels, rows, columns] for ONNX
#[tracing::instrument(skip_all)]
fn try_from(value: ImageArray) -> Result<Self, Self::Error> {
let image = value.image;
let mut array = Array::from_shape_vec(
(
image.height() as usize,
image.width() as usize,
*CHANNELS as usize,
),
image.into_raw(),
)
.map_err(|e| AIProxyError::ImageArrayToNdArrayError {
message: format!("Error running onnx transform {e}"),
})?
.mapv(f32::from);
array.swap_axes(1, 2);
array.swap_axes(0, 1);
Ok(Self { array })
}
}

#[derive(Debug)]
pub struct ImageArray {
image: RgbImage,
}

impl TryFrom<&[u8]> for ImageArray {
type Error = AIProxyError;

#[tracing::instrument(skip_all)]
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let img_reader = ImageReader::new(Cursor::new(value))
.with_guessed_format()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?;

// Always convert to RGB8 format
// https://github.com/Anush008/fastembed-rs/blob/cea92b6c8b877efda762393848d1c449a4eea126/src/image_embedding/utils.rs#L198
let image: DynamicImage = image.to_owned().into_rgb8().into();
let image = img_reader
.decode()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?
.into_rgb8();

let (width, height) = image.dimensions();

if width == 0 || height == 0 {
Expand All @@ -279,116 +329,59 @@ impl ImageArray {
height: height as usize,
});
}

let channels = &image.color().channel_count();
let shape = (height as usize, width as usize, *channels as usize);
let array = Array::from_shape_vec(shape, image.clone().into_bytes())
.map_err(|_| AIProxyError::ImageBytesDecodeError)?
.mapv(f32::from);

Ok(ImageArray {
array,
image,
image_format: image_format.to_owned(),
onnx_transformed: false,
})
}

// Swapping axes from [rows, columns, channels] to [channels, rows, columns] for ONNX
pub fn onnx_transform(&mut self) {
if self.onnx_transformed {
return;
}
self.array.swap_axes(1, 2);
self.array.swap_axes(0, 1);
self.onnx_transformed = true;
}

pub fn view(&self) -> ArrayView<f32, Ix3> {
self.array.view()
Ok(Self { image })
}
}

pub fn get_bytes(&self) -> Result<Vec<u8>, AIProxyError> {
let mut buffer = Cursor::new(Vec::new());
let _ = &self
.image
.write_to(&mut buffer, self.image_format)
.map_err(|_| AIProxyError::ImageBytesEncodeError)?;
let bytes = buffer.into_inner();
Ok(bytes)
impl ImageArray {
fn array_view(&self) -> ArrayView<u8, Ix3> {
let shape = (
self.image.height() as usize,
self.image.width() as usize,
*CHANNELS as usize,
);
let raw_bytes = self.image.as_raw();
ArrayView::from_shape(shape, raw_bytes).expect("Image bytes decode error")
}

#[tracing::instrument(skip(self))]
pub fn resize(
&self,
&mut self,
width: u32,
height: u32,
filter: Option<image::imageops::FilterType>,
) -> Result<Self, AIProxyError> {
let filter_type = filter.unwrap_or(image::imageops::FilterType::CatmullRom);
let resized_img = self.image.resize_exact(width, height, filter_type);
let channels = resized_img.color().channel_count();
let shape = (height as usize, width as usize, channels as usize);

let flattened_pixels = resized_img.clone().into_bytes();
let array = Array::from_shape_vec(shape, flattened_pixels)
.map_err(|_| AIProxyError::ImageResizeError)?
.mapv(f32::from);
Ok(ImageArray {
array,
image: resized_img,
image_format: self.image_format,
onnx_transformed: false,
})
// Create container for data of destination image
let (width, height) = self.image.dimensions();
let mut dest_image = Image::new(width, height, PixelType::U8x3);
let mut resizer = Resizer::new();
resizer
.resize(
&ImageRef::new(width, height, self.image.as_raw(), PixelType::U8x3)
.map_err(|e| AIProxyError::ImageResizeError(e.to_string()))?,
&mut dest_image,
&ResizeOptions::new().resize_alg(ResizeAlg::Convolution(FilterType::CatmullRom)),
)
.map_err(|e| AIProxyError::ImageResizeError(e.to_string()))?;
let resized_img = RgbImage::from_raw(width, height, dest_image.into_vec())
.expect("Could not get image after resizing");
Ok(ImageArray { image: resized_img })
}

pub fn crop(&self, x: u32, y: u32, width: u32, height: u32) -> Result<Self, AIProxyError> {
let cropped_img = self.image.crop_imm(x, y, width, height);
let channels = cropped_img.color().channel_count();
let shape = (height as usize, width as usize, channels as usize);

let flattened_pixels = cropped_img.clone().into_bytes();
let array = Array::from_shape_vec(shape, flattened_pixels)
.map_err(|_| AIProxyError::ImageCropError)?
.mapv(f32::from);
Ok(ImageArray {
array,
image: cropped_img,
image_format: self.image_format,
onnx_transformed: false,
})
}

pub fn image_dim(&self) -> (NonZeroUsize, NonZeroUsize) {
let shape = self.array.shape();
match self.onnx_transformed {
true => (
NonZeroUsize::new(shape[2]).expect("Array columns should be non-zero"),
NonZeroUsize::new(shape[1]).expect("Array channels should be non-zero"),
), // (width, channels)
false => (
NonZeroUsize::new(shape[1]).expect("Array columns should be non-zero"),
NonZeroUsize::new(shape[0]).expect("Array rows should be non-zero"),
), // (width, height)
}
}
}
#[tracing::instrument(skip(self))]
pub fn crop(&mut self, x: u32, y: u32, width: u32, height: u32) -> Result<Self, AIProxyError> {
let cropped_img = imageops::crop(&mut self.image, x, y, width, height).to_image();

impl Serialize for ImageArray {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(&self.get_bytes().map_err(S::Error::custom)?)
Ok(ImageArray { image: cropped_img })
}
}

impl<'de> Deserialize<'de> for ImageArray {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let bytes: Vec<u8> = Deserialize::deserialize(deserializer)?;
ImageArray::try_new(bytes).map_err(D::Error::custom)
pub fn image_dim(&self) -> (NonZeroUsize, NonZeroUsize) {
let arr_view = self.array_view();
let shape = arr_view.shape();
(
NonZeroUsize::new(shape[1]).expect("Array columns should be non-zero"),
NonZeroUsize::new(shape[0]).expect("Array rows should be non-zero"),
)
}
}

Expand Down
8 changes: 4 additions & 4 deletions ahnlich/ai/src/engine/ai/providers/processors/center_crop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::engine::ai::providers::processors::{
Preprocessor, PreprocessorData, CONV_NEXT_FEATURE_EXTRACTOR_CENTER_CROP_THRESHOLD,
};
use crate::error::AIProxyError;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use rayon::iter::{IntoParallelIterator, ParallelIterator};

pub struct CenterCrop {
crop_size: (u32, u32), // (width, height)
Expand Down Expand Up @@ -98,18 +98,18 @@ impl CenterCrop {
}

impl Preprocessor for CenterCrop {
#[tracing::instrument(skip_all)]
fn process(&self, data: PreprocessorData) -> Result<PreprocessorData, AIProxyError> {
match data {
PreprocessorData::ImageArray(image_array) => {
let processed = image_array
.par_iter()
.map(|image| {
.into_par_iter()
.map(|mut image| {
let (width, height) = image.image_dim();
let width = width.get() as u32;
let height = height.get() as u32;
let (crop_width, crop_height) = self.crop_size;
if crop_width == width && crop_height == height {
let image = image.to_owned();
Ok(image)
} else if crop_width <= width || crop_height <= height {
let x = (width - crop_width) / 2;
Expand Down
Loading

0 comments on commit 7f7987b

Please sign in to comment.