From ca43f3d0f1c76834c9ec02c3817fbc81fce544ce Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Thu, 20 Feb 2025 10:50:59 +1300 Subject: [PATCH] Add simple pattern tokenizers Signed-off-by: Thomas Farr --- CHANGELOG.md | 1 + spec/schemas/_common.analysis.yaml | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e72765caf..c577d3b3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added `POST _plugins/_security/api/internalusers/{username}` response `201` ([#810](https://github.com/opensearch-project/opensearch-api-specification/pull/810)) - Added `POST /_plugins/_ml/_execute/{algorithm_name}` ([#811](https://github.com/opensearch-project/opensearch-api-specification/pull/811)) - Added search suggester types ([#817](https://github.com/opensearch-project/opensearch-api-specification/pull/817)) +- Added `SimplePatternTokenizer` and `SimplePatternSplitTokenizer` ([#820](https://github.com/opensearch-project/opensearch-api-specification/pull/820)) ### Removed - Removed unsupported `_common.mapping:SourceField`'s `mode` field and associated `_common.mapping:SourceFieldMode` enum ([#652](https://github.com/opensearch-project/opensearch-api-specification/pull/652)) diff --git a/spec/schemas/_common.analysis.yaml b/spec/schemas/_common.analysis.yaml index 73b2ec4b8..9b71cd090 100644 --- a/spec/schemas/_common.analysis.yaml +++ b/spec/schemas/_common.analysis.yaml @@ -1588,6 +1588,8 @@ components: - $ref: '#/components/schemas/WhitespaceTokenizer' - $ref: '#/components/schemas/KuromojiTokenizer' - $ref: '#/components/schemas/PatternTokenizer' + - $ref: '#/components/schemas/SimplePatternTokenizer' + - $ref: '#/components/schemas/SimplePatternSplitTokenizer' - $ref: '#/components/schemas/IcuTokenizer' - $ref: '#/components/schemas/SmartcnTokenizer' CharGroupTokenizer: @@ -1831,6 +1833,32 @@ components: type: string required: - type + SimplePatternTokenizer: + allOf: + - $ref: '#/components/schemas/TokenizerBase' + - type: object + properties: + type: + type: string + enum: + - simple_pattern + pattern: + type: string + required: + - type + SimplePatternSplitTokenizer: + allOf: + - $ref: '#/components/schemas/TokenizerBase' + - type: object + properties: + type: + type: string + enum: + - simple_pattern_split + pattern: + type: string + required: + - type SmartcnTokenizer: allOf: - $ref: '#/components/schemas/TokenizerBase'