diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..f087b429e2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.tar.gz filter=lfs diff=lfs merge=lfs -text diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index ca5e7cc523..7149f5403c 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -430,3 +430,35 @@ original PR with an appropriate label `backport ` is merge run successfully on the PR. For example, if a PR on main needs to be backported to `1.x` branch, add a label `backport 1.x` to the PR and make sure the backport workflow runs on the PR along with other checks. Once this PR is merged to main, the workflow will create a backport PR to the `1.x` branch. + + +## Creating graphs once +1. Run this command to setup the cluster (code for OS: https://github.com/navneet1v/OpenSearch/tree/build-time) +``` +./gradlew run -PcustomDistributionUrl="/path/to/repo/k-NN/temp/cluster.tar.gz" +``` +2. First disable graph creation: +``` +PUT _cluster/settings +{ + "persistent": { + "knn.create_graphs": false + } +} +``` + +3. Now go ahead and index your data. and force merge segments down to 1. +4. Now enable the graph creation. +``` +PUT _cluster/settings +{ + "persistent": { + "knn.create_graphs": true + } +} +``` +5. Now hit this API: +``` +POST /_forcemerge?one_merge=true +``` +The above API will create a new segment and graph will be generated for that segment. diff --git a/src/main/java/org/opensearch/knn/index/KNNSettings.java b/src/main/java/org/opensearch/knn/index/KNNSettings.java index 04e50ed9b8..9d87eebebc 100644 --- a/src/main/java/org/opensearch/knn/index/KNNSettings.java +++ b/src/main/java/org/opensearch/knn/index/KNNSettings.java @@ -108,6 +108,12 @@ public class KNNSettings { Setting.Property.Deprecated ); + public static final Setting CREATE_GRAPHS = Setting.boolSetting( + "knn.create_graphs", + false, + NodeScope, Dynamic + ); + /** * M - the number of bi-directional links created for every new element during construction. * Reasonable range for M is 2-100. Higher M work better on datasets with high intrinsic @@ -354,6 +360,10 @@ private Setting getSetting(String key) { return KNN_FAISS_AVX2_DISABLED_SETTING; } + if("knn.create_graphs".equals(key)) { + return CREATE_GRAPHS; + } + throw new IllegalArgumentException("Cannot find setting by key [" + key + "]"); } @@ -371,7 +381,7 @@ public List> getSettings() { MODEL_INDEX_NUMBER_OF_REPLICAS_SETTING, MODEL_CACHE_SIZE_LIMIT_SETTING, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING, - KNN_FAISS_AVX2_DISABLED_SETTING + KNN_FAISS_AVX2_DISABLED_SETTING, CREATE_GRAPHS ); return Stream.concat(settings.stream(), dynamicCacheSettings.values().stream()).collect(Collectors.toList()); } @@ -513,6 +523,10 @@ public void onIndexModule(IndexModule module) { }); } + public static boolean canCreateGraphs() { + return KNNSettings.state().getSettingValue("knn.create_graphs"); + } + private static String percentageAsString(Integer percentage) { return percentage + "%"; } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumer.java b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumer.java index 57192becac..4cbfa936e6 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumer.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumer.java @@ -108,6 +108,11 @@ private KNNEngine getKNNEngine(@NonNull FieldInfo field) { public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer, boolean isMerge, boolean isRefresh) throws IOException { + if(KNNSettings.canCreateGraphs() == false) { + log.info("Not creating graphs as value is : {}", KNNSettings.canCreateGraphs()); + return; + } + log.info("Creating graphs as value is : {}", KNNSettings.canCreateGraphs()); // Get values to be indexed BinaryDocValues values = valuesProducer.getBinary(field); KNNCodecUtil.Pair pair = KNNCodecUtil.getFloats(values); diff --git a/temp/cluster.tar.gz b/temp/cluster.tar.gz new file mode 100644 index 0000000000..3b8b060a20 --- /dev/null +++ b/temp/cluster.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4b8dec5130474c314c46413075075ff1141716cb8a4970cb8bad0490c7d026b +size 239468188