From c739e3ef7a00d2b43d67eb0c166543de3d439ddb Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Sat, 16 Mar 2024 23:29:30 -0700 Subject: [PATCH] Initial commit for adding micro benchmarks in K-NN Signed-off-by: Navneet Verma --- build.gradle | 2 +- jni/CMakeLists.txt | 2 +- .../org_opensearch_knn_jni_FaissService.h | 3 + jni/src/faiss_wrapper.cpp | 2 +- .../org_opensearch_knn_jni_FaissService.cpp | 18 +++ micro-benchmarks/README.md | 91 ++++++++++++++ micro-benchmarks/build.gradle | 66 ++++++++++ .../knn/TransferVectorsBenchmarks.java | 85 +++++++++++++ .../src/main/resources/log4j2.properties | 19 +++ settings.gradle | 1 + .../org/opensearch/knn/jni/FaissService.java | 9 +- .../org/opensearch/knn/jni/JNIService.java | 4 + .../knn/jni/TransferVectorsBenchmarksIT.java | 113 ++++++++++++++++++ 13 files changed, 411 insertions(+), 4 deletions(-) create mode 100644 micro-benchmarks/README.md create mode 100644 micro-benchmarks/build.gradle create mode 100644 micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java create mode 100644 micro-benchmarks/src/main/resources/log4j2.properties create mode 100644 src/test/java/org/opensearch/knn/jni/TransferVectorsBenchmarksIT.java diff --git a/build.gradle b/build.gradle index e1b191ec2..bccbae33e 100644 --- a/build.gradle +++ b/build.gradle @@ -306,7 +306,7 @@ task cmakeJniLib(type:Exec) { commandLine 'cmake', '.', "-G", "Unix Makefiles", "-DKNN_PLUGIN_VERSION=${opensearch_version}", "-DBLAS_LIBRARIES=$rootDir\\src\\main\\resources\\windowsDependencies\\libopenblas.dll", "-DLAPACK_LIBRARIES=$rootDir\\src\\main\\resources\\windowsDependencies\\libopenblas.dll", "-DSIMD_ENABLED=${simd_enabled}" } else { - commandLine 'cmake', '.', "-DKNN_PLUGIN_VERSION=${opensearch_version}", "-DSIMD_ENABLED=${simd_enabled}" + commandLine '/usr/local/bin/cmake', '.', "-DKNN_PLUGIN_VERSION=${opensearch_version}", "-DSIMD_ENABLED=${simd_enabled}" } } diff --git a/jni/CMakeLists.txt b/jni/CMakeLists.txt index 60321ed1b..6123cf1c3 100644 --- a/jni/CMakeLists.txt +++ b/jni/CMakeLists.txt @@ -155,7 +155,7 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S find_package(OpenMP REQUIRED) find_package(ZLIB REQUIRED) find_package(BLAS REQUIRED) - enable_language(Fortran) + #enable_language(Fortran) find_package(LAPACK REQUIRED) # Check if faiss exists diff --git a/jni/include/org_opensearch_knn_jni_FaissService.h b/jni/include/org_opensearch_knn_jni_FaissService.h index 64a858f84..cfaea8708 100644 --- a/jni/include/org_opensearch_knn_jni_FaissService.h +++ b/jni/include/org_opensearch_knn_jni_FaissService.h @@ -122,6 +122,9 @@ JNIEXPORT jbyteArray JNICALL Java_org_opensearch_knn_jni_FaissService_trainIndex JNIEXPORT jlong JNICALL Java_org_opensearch_knn_jni_FaissService_transferVectors (JNIEnv *, jclass, jlong, jobjectArray); +JNIEXPORT jlong JNICALL Java_org_opensearch_knn_jni_FaissService_transferVectorsV2 + (JNIEnv *, jclass, jlong, jobjectArray); + /* * Class: org_opensearch_knn_jni_FaissService * Method: freeVectors diff --git a/jni/src/faiss_wrapper.cpp b/jni/src/faiss_wrapper.cpp index a7075740e..b961d611f 100644 --- a/jni/src/faiss_wrapper.cpp +++ b/jni/src/faiss_wrapper.cpp @@ -122,7 +122,7 @@ void knn_jni::faiss_wrapper::CreateIndex(knn_jni::JNIUtilInterface * jniUtil, JN // Create faiss index jobject indexDescriptionJ = knn_jni::GetJObjectFromMapOrThrow(parametersCpp, knn_jni::INDEX_DESCRIPTION); std::string indexDescriptionCpp(jniUtil->ConvertJavaObjectToCppString(env, indexDescriptionJ)); - + std::cout<<"Navneet "< indexWriter; indexWriter.reset(faiss::index_factory(dim, indexDescriptionCpp.c_str(), metric)); diff --git a/jni/src/org_opensearch_knn_jni_FaissService.cpp b/jni/src/org_opensearch_knn_jni_FaissService.cpp index c81f23a62..3d9624c25 100644 --- a/jni/src/org_opensearch_knn_jni_FaissService.cpp +++ b/jni/src/org_opensearch_knn_jni_FaissService.cpp @@ -191,6 +191,24 @@ JNIEXPORT jlong JNICALL Java_org_opensearch_knn_jni_FaissService_transferVectors return (jlong) vect; } +JNIEXPORT jlong JNICALL Java_org_opensearch_knn_jni_FaissService_transferVectorsV2(JNIEnv * env, jclass cls, +jlong vectorsPointerJ, + jobjectArray vectorsJ) +{ + std::vector *vect; + if ((long) vectorsPointerJ == 0) { + vect = new std::vector; + } else { + vect = reinterpret_cast*>(vectorsPointerJ); + } + + int dim = jniUtil.GetInnerDimensionOf2dJavaFloatArray(env, vectorsJ); + auto dataset = jniUtil.Convert2dJavaObjectArrayToCppFloatVector(env, vectorsJ, dim); + vect->insert(vect->end(), dataset.begin(), dataset.end()); + + return (jlong) vect; +} + JNIEXPORT void JNICALL Java_org_opensearch_knn_jni_FaissService_freeVectors(JNIEnv * env, jclass cls, jlong vectorsPointerJ) { diff --git a/micro-benchmarks/README.md b/micro-benchmarks/README.md new file mode 100644 index 000000000..c3306837e --- /dev/null +++ b/micro-benchmarks/README.md @@ -0,0 +1,91 @@ +# OpenSearch K-NN Microbenchmark Suite + +This directory contains the microbenchmark suite of Opensearch K-NN Plugin. It relies on [JMH](http://openjdk.java.net/projects/code-tools/jmh/). + +This module draws a lot of inspiration from [Opensearch benchmarks](https://github.com/opensearch-project/OpenSearch/tree/main/benchmarks). + +## Purpose + +Micro benchmarks are intended to spot performance regressions in performance-critical components. + +The microbenchmark suite is also handy for ad-hoc micro benchmarks but please remove them again before merging your PR. + +## Getting Started + +Just run `gradlew -p micro-benchmarks run` from the project root +directory. It will build all microbenchmarks, execute them and print +the result. + +## Running Microbenchmarks + +Running via an IDE is not supported as the results are meaningless +because we have no control over the JVM running the benchmarks. + +If you want to run a specific benchmark class like, say, +`TransferVectorsBenchmarks`, you can use `--args`: + +``` +gradlew -p micro-benchmarks run --args ' TransferVectorsBenchmarks' +``` + +Everything in the `'` gets sent on the command line to JMH. The leading ` ` +inside the `'`s is important. Without it parameters are sometimes sent to +gradle. + +## Adding Microbenchmarks + +Before adding a new microbenchmark, make yourself familiar with the JMH API. You can check our existing microbenchmarks and also the +[JMH samples](http://hg.openjdk.java.net/code-tools/jmh/file/tip/jmh-samples/src/main/java/org/openjdk/jmh/samples/). + +In contrast to tests, the actual name of the benchmark class is not relevant to JMH. However, stick to the naming convention and +end the class name of a benchmark with `Benchmark`. To have JMH execute a benchmark, annotate the respective methods with `@Benchmark`. + +## Tips and Best Practices + +To get realistic results, you should exercise care when running benchmarks. Here are a few tips: + +### Do + +* Ensure that the system executing your microbenchmarks has as little load as possible. Shutdown every process that can cause unnecessary + runtime jitter. Watch the `Error` column in the benchmark results to see the run-to-run variance. +* Ensure to run enough warmup iterations to get the benchmark into a stable state. If you are unsure, don't change the defaults. +* Avoid CPU migrations by pinning your benchmarks to specific CPU cores. On Linux you can use `taskset`. +* Fix the CPU frequency to avoid Turbo Boost from kicking in and skewing your results. On Linux you can use `cpufreq-set` and the + `performance` CPU governor. +* Vary the problem input size with `@Param`. +* Use the integrated profilers in JMH to dig deeper if benchmark results to not match your hypotheses: + * Add `-prof gc` to the options to check whether the garbage collector runs during a microbenchmarks and skews + your results. If so, try to force a GC between runs (`-gc true`) but watch out for the caveats. + * Add `-prof perf` or `-prof perfasm` (both only available on Linux) to see hotspots. +* Have your benchmarks peer-reviewed. + +### Don't + +* Blindly believe the numbers that your microbenchmark produces but verify them by measuring e.g. with `-prof perfasm`. +* Run more threads than your number of CPU cores (in case you run multi-threaded microbenchmark). +* Look only at the `Score` column and ignore `Error`. Instead, take countermeasures to keep `Error` low / variance explainable. + +## Disassembling + +Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and FCML's `hsdis`. +`perf` is generally available via `apg-get install perf` or `pacman -S perf`. FCML is a little more involved. This worked +on 2020-08-01: + +``` +wget https://github.com/swojtasiak/fcml-lib/releases/download/v1.2.2/fcml-1.2.2.tar.gz +tar xf fcml* +cd fcml* +./configure +make +cd example/hsdis +make +sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so +``` + +If you want to disassemble a single method do something like this: + +``` +gradlew -p micro-benchmarks run --args ' MemoryStatsBenchmark -jvmArgs "-XX:+UnlockDiagnosticVMOptions -XX:CompileCommand=print,*.yourMethodName -XX:PrintAssemblyOptions=intel" +``` + +If you want `perf` to find the hot methods for you then do add `-prof:perfasm`. diff --git a/micro-benchmarks/build.gradle b/micro-benchmarks/build.gradle new file mode 100644 index 000000000..67dd17ba5 --- /dev/null +++ b/micro-benchmarks/build.gradle @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +import org.opensearch.gradle.info.BuildParams + +apply plugin: 'opensearch.build' +apply plugin: 'application' +apply plugin: 'java' +apply plugin: 'io.freefair.lombok' + +assemble.enabled = false + +application { + mainClass = 'org.openjdk.jmh.Main' +} + +test.enabled = false + +repositories { + mavenLocal() + maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" } + mavenCentral() + maven { url "https://plugins.gradle.org/m2/" } +} + +dependencies { + // This will take root project as the dependency + api(project(':')) + api "org.openjdk.jmh:jmh-core:$versions.jmh" + annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh" + // Dependencies of JMH + runtimeOnly 'net.sf.jopt-simple:jopt-simple:5.0.4' + runtimeOnly 'org.apache.commons:commons-math3:3.6.1' +} + +// enable the JMH's BenchmarkProcessor to generate the final benchmark classes +// needs to be added separately otherwise Gradle will quote it and javac will fail +compileJava.options.compilerArgs.addAll(["-processor", "org.openjdk.jmh.generators.BenchmarkProcessor"]) + + +run { + // This is required for C++ code + systemProperty "java.library.path", "$rootDir/jni/release" + executable = "${BuildParams.runtimeJavaHome}/bin/java" +} + + +// No licenses for our benchmark deps (we don't ship benchmarks) +tasks.named("dependencyLicenses").configure { it.enabled = false } +dependenciesInfo.enabled = false + +thirdPartyAudit.ignoreViolations( + // these classes intentionally use JDK internal API (and this is ok since the project is maintained by Oracle employees) + 'org.openjdk.jmh.util.Utils' +) + +spotless { + java { + // IDEs can sometimes run annotation processors that leave files in + // here, causing Spotless to complain. Even though this path ought not + // to exist, exclude it anyway in order to avoid spurious failures. + targetExclude 'src/main/generated/**/*.java' + } +} + diff --git a/micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java b/micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java new file mode 100644 index 000000000..604b2c6d6 --- /dev/null +++ b/micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.opensearch.knn.jni.JNIService; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 1, timeUnit = TimeUnit.SECONDS, time = 20) +@Measurement(iterations = 3, timeUnit = TimeUnit.SECONDS, time = 20) +@Fork(1) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Benchmark) +public class TransferVectorsBenchmarks { + private static final Random random = new Random(1212121212); + private static final int TOTAL_NUMBER_OF_VECTOR_TO_BE_TRANSFERRED = 1000000; + + @Param({ "128", "256", "384", "512" }) + private int dimension; + + //@Param({ "1000000", "500000", "100000" }) + @Param({ "500000"}) + private int vectorsPerTransfer; + + private List vectorList; + + @Setup(Level.Invocation) + public void setup() { + vectorList = new ArrayList<>(); + for (int i = 0; i < TOTAL_NUMBER_OF_VECTOR_TO_BE_TRANSFERRED; i++) { + vectorList.add(generateRandomVector(dimension)); + } + } + + @Benchmark + public void transferVectors() { + long vectorsAddress = 0; + List vectorToTransfer = new ArrayList<>(); + for (float[] floats : vectorList) { + if (vectorToTransfer.size() == vectorsPerTransfer) { + vectorsAddress = JNIService.transferVectorsV2(vectorsAddress, vectorToTransfer.toArray(new float[][]{})); + vectorToTransfer = new ArrayList<>(); + } + vectorToTransfer.add(floats); + } + if(!vectorToTransfer.isEmpty()) { + vectorsAddress = JNIService.transferVectorsV2(vectorsAddress, vectorToTransfer.toArray(new float[][]{})); + } + + JNIService.freeVectors(vectorsAddress); + } + + private float[] generateRandomVector(int dimensions) { + float[] vector = new float[dimensions]; + for (int i = 0; i < dimensions; i++) { + vector[i] = -500 + (float) random.nextGaussian() * (1000); + } + return vector; + } +} diff --git a/micro-benchmarks/src/main/resources/log4j2.properties b/micro-benchmarks/src/main/resources/log4j2.properties new file mode 100644 index 000000000..2cd74124e --- /dev/null +++ b/micro-benchmarks/src/main/resources/log4j2.properties @@ -0,0 +1,19 @@ +# +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# + +appender.console.type = Console +appender.console.name = console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %marker %m%n + +# Do not log at all if it is not really critical - we're in a benchmark +rootLogger.level = error +rootLogger.appenderRef.console.ref = console diff --git a/settings.gradle b/settings.gradle index 9056e382e..fd4369d4a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -8,4 +8,5 @@ rootProject.name = 'opensearch-knn' include ":qa" include ":qa:rolling-upgrade" include ":qa:restart-upgrade" +include ":micro-benchmarks" diff --git a/src/main/java/org/opensearch/knn/jni/FaissService.java b/src/main/java/org/opensearch/knn/jni/FaissService.java index 0da6f54ef..00d9385e0 100644 --- a/src/main/java/org/opensearch/knn/jni/FaissService.java +++ b/src/main/java/org/opensearch/knn/jni/FaissService.java @@ -34,10 +34,15 @@ class FaissService { static { AccessController.doPrivileged((PrivilegedAction) () -> { + boolean isFaissAVX2Disabled = true; + try { + isFaissAVX2Disabled = isFaissAVX2Disabled(); + } catch (Exception e) { + } // Even if the underlying system supports AVX2, users can override and disable it by using the // 'knn.faiss.avx2.disabled' setting by setting it to true in the opensearch.yml configuration - if (!isFaissAVX2Disabled() && isAVX2SupportedBySystem()) { + if (!isFaissAVX2Disabled && isAVX2SupportedBySystem()) { System.loadLibrary(KNNConstants.FAISS_AVX2_JNI_LIBRARY_NAME); } else { System.loadLibrary(KNNConstants.FAISS_JNI_LIBRARY_NAME); @@ -181,6 +186,8 @@ public static native KNNQueryResult[] queryIndexWithFilter( */ public static native long transferVectors(long vectorsPointer, float[][] trainingData); + public static native long transferVectorsV2(long vectorsPointer, float[][] trainingData); + /** * Free vectors from memory * diff --git a/src/main/java/org/opensearch/knn/jni/JNIService.java b/src/main/java/org/opensearch/knn/jni/JNIService.java index 555c2d6a6..081eb9d31 100644 --- a/src/main/java/org/opensearch/knn/jni/JNIService.java +++ b/src/main/java/org/opensearch/knn/jni/JNIService.java @@ -253,4 +253,8 @@ public static long transferVectors(long vectorsPointer, float[][] trainingData) public static void freeVectors(long vectorsPointer) { FaissService.freeVectors(vectorsPointer); } + + public static long transferVectorsV2(long vectorsPointer, float[][] trainingData) { + return FaissService.transferVectorsV2(vectorsPointer, trainingData); + } } diff --git a/src/test/java/org/opensearch/knn/jni/TransferVectorsBenchmarksIT.java b/src/test/java/org/opensearch/knn/jni/TransferVectorsBenchmarksIT.java new file mode 100644 index 000000000..d8b49b342 --- /dev/null +++ b/src/test/java/org/opensearch/knn/jni/TransferVectorsBenchmarksIT.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.jni; + +import org.junit.Assert; +import org.opensearch.core.common.util.CollectionUtils; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +public class TransferVectorsBenchmarksIT extends OpenSearchTestCase { + + private static final Random random = new Random(1212121212); + + private static final int MAX_DATA_SIZE_TO_BE_TRANSFERRED_IN_MB = 100; + private static final int CONVERSION_CONSTANT = 1024; + + private static final List DIMENSIONS_LIST = List.of(1024, 1536, 968, 768, 512, 256, 128); + //List.of(128, 256, 512, 768, 968, 1024, 1536); + + public void test_transferVectorV2Speed() { + final List elapsedTime = new ArrayList<>(); + System.out.println("Detail for Vector transfer based on specific MB: " + MAX_DATA_SIZE_TO_BE_TRANSFERRED_IN_MB); + System.out.println("Dimension, Elapsed Time"); + for (int i = 0; i < DIMENSIONS_LIST.size(); i++) { + System.gc(); + int vectorsPerTransfer = (MAX_DATA_SIZE_TO_BE_TRANSFERRED_IN_MB * CONVERSION_CONSTANT * CONVERSION_CONSTANT) / (DIMENSIONS_LIST.get(i) * Float.BYTES); + elapsedTime.add(transferVectorUtil(DIMENSIONS_LIST.get(i), 1000000, vectorsPerTransfer)); + System.out.println(DIMENSIONS_LIST.get(i) + " , " + elapsedTime.get(i)); + } + Assert.assertTrue(true); + } + + + public void test_transferVectorOfSpecificCount() { + final List elapsedTime = new ArrayList<>(); + int vectorsPerTransfer = 10 * 1000; + System.out.println("Detail for Vector transfer based on specific size: " + vectorsPerTransfer); + System.out.println("Dimension, Elapsed Time"); + for (int i = 0; i < DIMENSIONS_LIST.size(); i++) { + System.gc(); + elapsedTime.add(transferVectorUtil(DIMENSIONS_LIST.get(i), 1000000, vectorsPerTransfer)); + System.out.println(DIMENSIONS_LIST.get(i) + " , " + elapsedTime.get(i)); + } + Assert.assertTrue(true); + } + + public void test_transferVectorSendAllAtOnce() { + final List elapsedTime = new ArrayList<>(); + + Collections.reverse(DIMENSIONS_LIST); + + int vectorsPerTransfer = 1000000; + System.out.println("Detail for Vector transfer When all vectors sent at once"); + System.out.println("Dimension, Elapsed Time"); + for (int i = 0; i < DIMENSIONS_LIST.size(); i++) { + System.gc(); + elapsedTime.add(transferVectorUtil(DIMENSIONS_LIST.get(i), 1000000, vectorsPerTransfer)); + System.out.println(DIMENSIONS_LIST.get(i) + " , " + elapsedTime.get(i)); + } + Assert.assertTrue(true); + } + + + private static float[] generateRandomVector(int dimensions) { + float[] vector = new float[dimensions]; + for (int i = 0; i < dimensions; i++) { + vector[i] = -500 + (float) random.nextGaussian() * (1000); + } + return vector; + } + + private double transferVectorUtil(int dimension, int totalNumberOfVectorsToBeTransferred, int vectorsPerTransfer) { + List vectorList = new ArrayList<>(); + long vectorsAddress = 0; + long timeInNano = 0; + + + for (int i = 0; i < totalNumberOfVectorsToBeTransferred; i++) { + vectorList.add(generateRandomVector(dimension)); + if (vectorList.size() == vectorsPerTransfer) { + long startTime = System.nanoTime(); + vectorsAddress = FaissService.transferVectorsV2(vectorsAddress, vectorList.toArray(new float[][]{})); + long endTime = System.nanoTime(); + timeInNano = timeInNano + (endTime - startTime); + vectorList = new ArrayList<>(); + } + } + + if (!CollectionUtils.isEmpty(vectorList)) { + long startTime = System.nanoTime(); + vectorsAddress = FaissService.transferVectorsV2(vectorsAddress, vectorList.toArray(new float[][]{})); + long endTime = System.nanoTime(); + timeInNano = timeInNano + (endTime - startTime); + vectorList = new ArrayList<>(); + } + FaissService.freeVectors(vectorsAddress); + return timeInNano / 1000000000d; + } + +}