diff --git a/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/PatternsWindowFunctionBenchmark.java b/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/PatternsWindowFunctionBenchmark.java new file mode 100644 index 0000000000..a4ec4e1c55 --- /dev/null +++ b/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/PatternsWindowFunctionBenchmark.java @@ -0,0 +1,98 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.operator.predicate; + +import static org.opensearch.sql.data.model.ExprTupleValue.fromExprValueMap; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.opensearch.sql.common.patterns.BrainLogParser; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.expression.window.frame.BufferPatternRowsWindowFrame; +import org.opensearch.sql.expression.window.frame.CurrentRowWindowFrame; +import org.opensearch.sql.expression.window.frame.WindowFrame; + +@Warmup(iterations = 1) +@Measurement(iterations = 3) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@Fork(value = 1) +public class PatternsWindowFunctionBenchmark { + + private static final String TEST_MESSAGE_1 = + "12.132.31.17 - - [2018-07-22T05:36:25.812Z] \\\"GET /opensearch HTTP/1.1\\\" 200 9797" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_2 = + "129.138.185.193 - - [2018-07-22T05:39:39.668Z] \\\"GET /opensearch HTTP/1.1\\\" 404 9920" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_3 = + "240.58.187.246 - - [2018-07-22T06:02:46.006Z] \\\"GET /opensearch HTTP/1.1\\\" 500 6936" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko)" + + " Chrome/11.0.696.50 Safari/534.24\\\""; + + private PeekingIterator tuples; + private final BufferPatternRowsWindowFrame bufferWindowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()), + new BrainLogParser(), + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + @Benchmark + public void testSimplePattern() { + CurrentRowWindowFrame windowFrame = + new CurrentRowWindowFrame(new WindowDefinition(ImmutableList.of(), ImmutableList.of())); + + run(windowFrame, DSL.simple_pattern(DSL.ref("message", STRING))); + } + + @Benchmark + public void testBrain() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()), + new BrainLogParser(), + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + run(windowFrame, DSL.brain(DSL.ref("message", STRING))); + } + + private void run(WindowFrame windowFrame, Expression windowFunction) { + tuples = + Iterators.peekingIterator( + Iterators.forArray( + tuple(TEST_MESSAGE_1), tuple(TEST_MESSAGE_2), tuple(TEST_MESSAGE_3))); + while (tuples.hasNext() || windowFrame.hasNext()) { + windowFrame.load(tuples); + windowFunction.valueOf(windowFrame); + } + } + + private ExprValue tuple(String message) { + return fromExprValueMap(ImmutableMap.of("message", new ExprStringValue(message))); + } +} diff --git a/common/src/main/java/org/opensearch/sql/common/patterns/BrainLogParser.java b/common/src/main/java/org/opensearch/sql/common/patterns/BrainLogParser.java new file mode 100644 index 0000000000..4f79d0a47f --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/patterns/BrainLogParser.java @@ -0,0 +1,443 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.patterns; + +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.OptionalLong; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** Log parser Brain algorithm implementation. See: https://ieeexplore.ieee.org/document/10109145 */ +public class BrainLogParser { + + private static final String VARIABLE_DENOTER = "<*>"; + private static final Map DEFAULT_FILTER_PATTERN_VARIABLE_MAP = + new LinkedHashMap<>(); + + static { + // IP + DEFAULT_FILTER_PATTERN_VARIABLE_MAP.put( + Pattern.compile("(/|)([0-9]+\\.){3}[0-9]+(:[0-9]+|)(:|)"), "<*IP*>"); + // Simple ISO date and time + DEFAULT_FILTER_PATTERN_VARIABLE_MAP.put( + Pattern.compile( + "(\\d{4}-\\d{2}-\\d{2})[T" + + " ]?(\\d{2}:\\d{2}:\\d{2})(\\.\\d{3})?(Z|([+-]\\d{2}:?\\d{2}))?"), + "<*DATETIME*>"); + // Hex Decimal, letters followed by digits, float numbers + DEFAULT_FILTER_PATTERN_VARIABLE_MAP.put( + Pattern.compile( + "((0x|0X)[0-9a-fA-F]+)|[a-zA-Z]+\\d+|([+-]?(?!\\d{3}$)\\d{4,}(\\.\\d*)?|\\.\\d+)"), + VARIABLE_DENOTER); + // generic number surrounded by non-alphanumeric + DEFAULT_FILTER_PATTERN_VARIABLE_MAP.put( + Pattern.compile("(?<=[^A-Za-z0-9 ])(-?\\+?\\d+)(?=[^A-Za-z0-9])"), VARIABLE_DENOTER); + } + + private static final List DEFAULT_DELIMITERS = List.of(",", "+"); + // counting frequency will be grouped by composite of position and token string + private static final String POSITIONED_TOKEN_KEY_FORMAT = "%d-%s"; + // Token set will be grouped by composite of tokens length per log message, word combination + // candidate and token position. + private static final String GROUP_TOKEN_SET_KEY_FORMAT = "%d-%s-%d"; + // By default, algorithm treats more than 2 different tokens in the group per position as variable + // token + private static final int DEFAULT_VARIABLE_COUNT_THRESHOLD = 5; + /* + * By default, algorithm treats the longest word combinations as the group root, no matter what its frequency is. + * Otherwise, the longest word combination will be selected when frequency >= highest frequency of log * threshold percentage + */ + private static final float DEFAULT_FREQUENCY_THRESHOLD_PERCENTAGE = 0.3f; + + private final Map tokenFreqMap; + private final Map> groupTokenSetMap; + private final Map logIdGroupCandidateMap; + private final int variableCountThreshold; + private final float thresholdPercentage; + private final Map filterPatternVariableMap; + private final List delimiters; + + /** Creates new Brain log parser with default parameters */ + public BrainLogParser() { + this( + DEFAULT_VARIABLE_COUNT_THRESHOLD, + DEFAULT_FREQUENCY_THRESHOLD_PERCENTAGE, + DEFAULT_FILTER_PATTERN_VARIABLE_MAP, + DEFAULT_DELIMITERS); + } + + /** + * Creates new Brain log parser with overridden variableCountThreshold and thresholdPercentage + * + * @param variableCountThreshold the threshold to decide whether low frequency token is variable + * @param thresholdPercentage the threshold percentage to decide which frequency is representative + * frequency per log message + */ + public BrainLogParser(int variableCountThreshold, float thresholdPercentage) { + this( + variableCountThreshold, + thresholdPercentage, + DEFAULT_FILTER_PATTERN_VARIABLE_MAP, + DEFAULT_DELIMITERS); + } + + /** + * Creates new Brain log parser with overridden filter patterns and delimiters + * + * @param filterPatternVariableMap a map of regex patterns to variable denoter, with which the + * matched pattern will be replaced, recommend to use LinkedHashMap to make sure patterns in + * order + * @param delimiters a list of delimiters to be replaced with empty string after regex replacement + */ + public BrainLogParser(Map filterPatternVariableMap, List delimiters) { + this( + DEFAULT_VARIABLE_COUNT_THRESHOLD, + DEFAULT_FREQUENCY_THRESHOLD_PERCENTAGE, + filterPatternVariableMap, + delimiters); + } + + /** + * Creates new Brain log parser with overridden variableCountThreshold and thresholdPercentage and + * overridden filter patterns and delimiters + * + * @param variableCountThreshold the threshold to decide whether low frequency token is variable + * @param thresholdPercentage the threshold percentage to decide which frequency is representative + * frequency per log message + * @param filterPatternVariableMap a map of regex patterns to variable denoter, with which the + * matched pattern will be replaced, recommend to use LinkedHashMap to make sure patterns in + * order + * @param delimiters a list of delimiters to be replaced with empty string after regex replacement + */ + public BrainLogParser( + int variableCountThreshold, + float thresholdPercentage, + Map filterPatternVariableMap, + List delimiters) { + if (thresholdPercentage < 0.0f || thresholdPercentage > 1.0f) { + throw new IllegalArgumentException("Threshold percentage must be between 0.0 and 1.0"); + } + this.tokenFreqMap = new HashMap<>(); + this.groupTokenSetMap = new HashMap<>(); + this.logIdGroupCandidateMap = new HashMap<>(); + this.variableCountThreshold = variableCountThreshold; + this.thresholdPercentage = thresholdPercentage; + this.filterPatternVariableMap = filterPatternVariableMap; + this.delimiters = delimiters; + } + + /** + * Preprocess single line of log message with logId + * + * @param logMessage log message body per log + * @param logId logId of the log + * @return list of tokens by splitting preprocessed log message + */ + public List preprocess(String logMessage, String logId) { + if (logMessage == null || logId == null) { + throw new IllegalArgumentException("log message or logId must not be null"); + } + // match regex and replace it with variable denoter in order + for (Map.Entry patternVariablePair : filterPatternVariableMap.entrySet()) { + logMessage = + patternVariablePair + .getKey() + .matcher(logMessage) + .replaceAll(patternVariablePair.getValue()); + } + + for (String delimiter : delimiters) { + logMessage = logMessage.replace(delimiter, " "); + } + + // Append logId/docId to the end of the split tokens + logMessage = logMessage.trim() + " " + logId; + + return Arrays.asList(logMessage.split("\\s+")); + } + + /** + * Count token frequency per position/index in the token list + * + * @param tokens list of tokens from preprocessed log message + */ + public void processTokenHistogram(List tokens) { + // Ignore last element since it's designed to be appended logId + for (int i = 0; i < tokens.size() - 1; i++) { + String tokenKey = String.format(Locale.ROOT, POSITIONED_TOKEN_KEY_FORMAT, i, tokens.get(i)); + tokenFreqMap.compute(tokenKey, (k, v) -> v == null ? 1 : v + 1); + } + } + + /** + * Preprocess all lines of log messages with logId list. Empty logId list is allowed as the index + * within the list will be logId by default + * + * @param logMessages list of log messages + * @return list of token lists + */ + public List> preprocessAllLogs(List logMessages) { + List> preprocessedLogs = new ArrayList<>(); + + for (int i = 0; i < logMessages.size(); i++) { + String logId = String.valueOf(i); + List tokens = this.preprocess(logMessages.get(i), logId); + preprocessedLogs.add(tokens); + this.processTokenHistogram(tokens); + } + + this.calculateGroupTokenFreq(preprocessedLogs); + + return preprocessedLogs; + } + + /** + * The second process step to calculate initial groups of tokens based on previous token + * histogram. The group will be represented by the representative word combination of the log + * message. The word combination usually selects the longest word combination with the same + * frequency that should be above designed threshold. + * + *

Within initial group, new group level token set per position is counted for final log + * pattern calculation + * + * @param preprocessedLogs preprocessed list of log messages + */ + void calculateGroupTokenFreq(List> preprocessedLogs) { + for (List tokens : preprocessedLogs) { + Map wordOccurrences = this.getWordOccurrences(tokens); + List sortedWordCombinations = + wordOccurrences.entrySet().stream() + .map(entry -> new WordCombination(entry.getKey(), entry.getValue())) + .sorted() + .collect(Collectors.toList()); + WordCombination candidate = this.findCandidate(sortedWordCombinations); + String groupCandidateStr = + String.format(Locale.ROOT, "%d,%d", candidate.wordFreq(), candidate.sameFreqCount()); + this.logIdGroupCandidateMap.put(tokens.get(tokens.size() - 1), groupCandidateStr); + this.updateGroupTokenFreqMap(tokens, groupCandidateStr); + } + } + + /** + * Parse single line of log pattern after preprocess - processTokenHistogram - + * calculateGroupTokenFreq + * + * @param tokens list of tokens for a specific log message + * @return parsed log pattern that is a list of string + */ + public List parseLogPattern(List tokens) { + String logId = tokens.get(tokens.size() - 1); + String groupCandidateStr = this.logIdGroupCandidateMap.get(logId); + String[] groupCandidate = groupCandidateStr.split(","); + Long repFreq = Long.parseLong(groupCandidate[0]); // representative frequency of the group + return IntStream.range(0, tokens.size() - 1) + .mapToObj(i -> new AbstractMap.SimpleEntry<>(i, tokens.get(i))) + .map( + entry -> { + int index = entry.getKey(); + String token = entry.getValue(); + String tokenKey = + String.format(Locale.ROOT, POSITIONED_TOKEN_KEY_FORMAT, index, token); + assert this.tokenFreqMap.get(tokenKey) != null + : String.format(Locale.ROOT, "Not found token: %s on position %d", token, index); + + boolean isHigherFrequency = this.tokenFreqMap.get(tokenKey) > repFreq; + boolean isLowerFrequency = this.tokenFreqMap.get(tokenKey) < repFreq; + String groupTokenKey = + String.format( + Locale.ROOT, + GROUP_TOKEN_SET_KEY_FORMAT, + tokens.size() - 1, + groupCandidateStr, + index); + assert this.groupTokenSetMap.get(groupTokenKey) != null + : String.format(Locale.ROOT, "Not found any token in group: %s", groupTokenKey); + + if (isHigherFrequency) { + // For higher frequency token that doesn't belong to word combination, it's likely + // to be constant token only if + // it's unique token on that position within the group + boolean isUniqueToken = this.groupTokenSetMap.get(groupTokenKey).size() == 1; + if (!isUniqueToken) { + return VARIABLE_DENOTER; + } + } else if (isLowerFrequency) { + // For lower frequency token that doesn't belong to word combination, it's likely to + // be constant token only if + // it doesn't exceed the preset variable count threshold. For example, some variable + // are limited number of enums, + // and sometimes they could be treated as constant tokens. + if (this.groupTokenSetMap.get(groupTokenKey).size() >= variableCountThreshold) { + return VARIABLE_DENOTER; + } + } + return token; + }) + .collect(Collectors.toList()); + } + + /** + * Parse all lines of log messages to generate the log pattern map. + * + * @param logMessages all lines of log messages + * @return log pattern map with log pattern string as key, grouped logIds as value + */ + public Map> parseAllLogPatterns(List logMessages) { + List> processedMessages = this.preprocessAllLogs(logMessages); + + Map> logPatternMap = new HashMap<>(); + for (List processedMessage : processedMessages) { + String logId = processedMessage.get(processedMessage.size() - 1); + List logPattern = this.parseLogPattern(processedMessage); + String patternKey = String.join(" ", logPattern); + logPatternMap.computeIfAbsent(patternKey, k -> new ArrayList<>()).add(logId); + } + return logPatternMap; + } + + /** + * Get token histogram + * + * @return map of token per position key and its frequency + */ + public Map getTokenFreqMap() { + return this.tokenFreqMap; + } + + /** + * Get group per length per position to its token set map + * + * @return map of pattern group per length per position key and its token set + */ + public Map> getGroupTokenSetMap() { + return this.groupTokenSetMap; + } + + /** + * Get logId to its group candidate map + * + * @return map of logId and group candidate + */ + public Map getLogIdGroupCandidateMap() { + return this.logIdGroupCandidateMap; + } + + private Map getWordOccurrences(List tokens) { + Map occurrences = new HashMap<>(); + for (int i = 0; i < tokens.size() - 1; i++) { + String tokenKey = String.format(Locale.ROOT, POSITIONED_TOKEN_KEY_FORMAT, i, tokens.get(i)); + Long tokenFreq = tokenFreqMap.get(tokenKey); + occurrences.compute(tokenFreq, (k, v) -> v == null ? 1 : v + 1); + } + return occurrences; + } + + private WordCombination findCandidate(List sortedWordCombinations) { + if (sortedWordCombinations.isEmpty()) { + throw new IllegalArgumentException("Sorted word combinations must be non empty"); + } + OptionalLong maxFreqOptional = + sortedWordCombinations.stream().mapToLong(WordCombination::wordFreq).max(); + long maxFreq = maxFreqOptional.getAsLong(); + float threshold = maxFreq * this.thresholdPercentage; + for (WordCombination wordCombination : sortedWordCombinations) { + if (wordCombination.wordFreq() > threshold) { + return wordCombination; + } + } + return sortedWordCombinations.get(0); + } + + private void updateGroupTokenFreqMap(List tokens, String groupCandidateStr) { + int tokensLen = tokens.size() - 1; + for (int i = 0; i < tokensLen; i++) { + String groupTokenFreqKey = + String.format(Locale.ROOT, GROUP_TOKEN_SET_KEY_FORMAT, tokensLen, groupCandidateStr, i); + this.groupTokenSetMap + .computeIfAbsent(groupTokenFreqKey, k -> new HashSet<>()) + .add(tokens.get(i)); + } + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + BrainLogParser other = (BrainLogParser) obj; + return Objects.equals(variableCountThreshold, other.variableCountThreshold) + && Objects.equals(thresholdPercentage, other.thresholdPercentage) + && Objects.equals(filterPatternVariableMap, other.filterPatternVariableMap) + && Objects.equals(delimiters, other.delimiters); + } + + @Override + public int hashCode() { + return Objects.hash( + variableCountThreshold, thresholdPercentage, filterPatternVariableMap, delimiters); + } + + private static final class WordCombination implements Comparable { + private final Long wordFreq; + private final Integer sameFreqCount; + + public WordCombination(Long wordFreq, Integer sameFreqCount) { + this.wordFreq = wordFreq; + this.sameFreqCount = sameFreqCount; + } + + public Long wordFreq() { + return this.wordFreq; + } + + public Integer sameFreqCount() { + return this.sameFreqCount; + } + + @Override + public int compareTo(WordCombination other) { + // Compare by same frequency count in descending order + int wordFreqComparison = other.sameFreqCount.compareTo(this.sameFreqCount); + if (wordFreqComparison != 0) { + return wordFreqComparison; + } + // If sameFreqCount are the same, compare by wordFreq in descending order + return other.wordFreq.compareTo(this.wordFreq); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + WordCombination other = (WordCombination) obj; + return Objects.equals(wordFreq, other.wordFreq) + && Objects.equals(sameFreqCount, other.sameFreqCount); + } + + @Override + public int hashCode() { + return Objects.hash(wordFreq, sameFreqCount); + } + } +} diff --git a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java index a9fa693a22..cbf0e19276 100644 --- a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java +++ b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java @@ -27,6 +27,7 @@ public enum Key { /** PPL Settings. */ PPL_ENABLED("plugins.ppl.enabled"), + DEFAULT_PATTERN_METHOD("plugins.ppl.default.pattern.method"), /** Query Settings. */ FIELD_TYPE_TOLERANCE("plugins.query.field_type_tolerance"), diff --git a/common/src/test/java/org/opensearch/sql/common/patterns/BrainLogParserTest.java b/common/src/test/java/org/opensearch/sql/common/patterns/BrainLogParserTest.java new file mode 100644 index 0000000000..78a69269c2 --- /dev/null +++ b/common/src/test/java/org/opensearch/sql/common/patterns/BrainLogParserTest.java @@ -0,0 +1,273 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.patterns; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.junit.Before; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.runners.MethodSorters; + +@FixMethodOrder(MethodSorters.NAME_ASCENDING) +public class BrainLogParserTest { + + private static final List TEST_HDFS_LOGS = + Arrays.asList( + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to" + + " blk_-7017553867379051457 size 67108864", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000296_0/part-00296." + + " blk_-6620182933895093708", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.250.7.244:50010 is added to" + + " blk_-6956067134432991406 size 67108864", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000230_0/part-00230." + + " blk_559204981722276126", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000169_0/part-00169." + + " blk_-7105305952901940477", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to" + + " blk_-3249711809227781266 size 67108864", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000318_0/part-00318." + + " blk_-207775976836691685", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.250.6.4:50010 is added to" + + " blk_5114010683183383297 size 67108864", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000318_0/part-00318." + + " blk_2096692261399680562", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.250.15.240:50010 is added to" + + " blk_-1055254430948037872 size 67108864", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.250.7.146:50010 is added to" + + " blk_278357163850888 size 67108864", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000138_0/part-00138." + + " blk_-210021574616486609", + "Verification succeeded for blk_-1547954353065580372", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.39.242:50010 is added to" + + " blk_-4110733372292809607 size 67108864", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/randtxt/_temporary/_task_200811092030_0003_m_000382_0/part-00382." + + " blk_8935202950442998446", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/randtxt/_temporary/_task_200811092030_0003_m_000392_0/part-00392." + + " blk_-3010126661650043258", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.25.237:50010 is added to" + + " blk_541463031152673662 size 67108864", + "Verification succeeded for blk_6996194389878584395", + "PacketResponder failed for blk_6996194389878584395", + "PacketResponder failed for blk_-1547954353065580372"); + private BrainLogParser parser; + + @Before + public void setUp() throws Exception { + parser = new BrainLogParser(); + } + + @Test + public void testNewParserWithIllegalArgument() { + String exceptionMessage = "Threshold percentage must be between 0.0 and 1.0"; + Throwable throwable = + assertThrows(IllegalArgumentException.class, () -> new BrainLogParser(2, -1.0f)); + assertEquals(exceptionMessage, throwable.getMessage()); + throwable = assertThrows(IllegalArgumentException.class, () -> new BrainLogParser(2, 1.1f)); + assertEquals(exceptionMessage, throwable.getMessage()); + } + + @Test + public void testPreprocess() { + String logMessage = "127.0.0.1 - 1234 something"; + String logId = "log1"; + List expectedResult = Arrays.asList("<*IP*>", "-", "<*>", "something", "log1"); + List result = parser.preprocess(logMessage, logId); + assertEquals(expectedResult, result); + // Test with different delimiter + logMessage = "127.0.0.1=1234 something"; + logId = "log2"; + expectedResult = Arrays.asList("<*IP*>=<*>", "something", "log2"); + result = parser.preprocess(logMessage, logId); + assertEquals(expectedResult, result); + } + + @Test + public void testPreprocessWithIllegalInput() { + String logMessage = "127.0.0.1 - 1234 something"; + String logId = "log1"; + String exceptionMessage = "log message or logId must not be null"; + Throwable throwable = + assertThrows(IllegalArgumentException.class, () -> parser.preprocess(null, logId)); + assertEquals(exceptionMessage, throwable.getMessage()); + throwable = + assertThrows(IllegalArgumentException.class, () -> parser.preprocess(logMessage, null)); + assertEquals(exceptionMessage, throwable.getMessage()); + throwable = assertThrows(IllegalArgumentException.class, () -> parser.preprocess(null, null)); + assertEquals(exceptionMessage, throwable.getMessage()); + } + + @Test + public void testPreprocessAllLogs() { + List logMessages = + Arrays.asList("127.0.0.1 - 1234 something", "192.168.0.1 - 5678 something_else"); + List> result = parser.preprocessAllLogs(logMessages); + assertEquals(2, result.size()); + assertEquals(Arrays.asList("<*IP*>", "-", "<*>", "something", "0"), result.get(0)); + assertEquals(Arrays.asList("<*IP*>", "-", "<*>", "something_else", "1"), result.get(1)); + } + + @Test + public void testProcessTokenHistogram() { + String something = String.format(Locale.ROOT, "%d-%s", 0, "something"); + String up = String.format(Locale.ROOT, "%d-%s", 1, "up"); + List firstTokens = Arrays.asList("something", "up", "0"); + parser.processTokenHistogram(firstTokens); + assertEquals(1L, parser.getTokenFreqMap().get(something).longValue()); + assertEquals(1L, parser.getTokenFreqMap().get(up).longValue()); + List secondTokens = Arrays.asList("something", "down", "1"); + parser.processTokenHistogram(secondTokens); + assertEquals(2L, parser.getTokenFreqMap().get(something).longValue()); + assertEquals(1L, parser.getTokenFreqMap().get(up).longValue()); + } + + @Test + public void testCalculateGroupTokenFreq() { + List logMessages = + Arrays.asList( + "127.0.0.1 - 1234 something", + "192.168.0.1:5678 something_else", + "0.0.0.0:42 something_else"); + List logIds = Arrays.asList("0", "1", "2"); + List> preprocessedLogs = parser.preprocessAllLogs(logMessages); + for (String logId : logIds) { + String groupCandidate = parser.getLogIdGroupCandidateMap().get(logId); + assertNotNull(groupCandidate); + } + assertTrue(parser.getGroupTokenSetMap().containsValue(Set.of("something"))); + assertTrue(parser.getGroupTokenSetMap().containsValue(Set.of("something_else"))); + String sampleGroupTokenKey = + String.format(Locale.ROOT, "%d-%s-%d", 4, parser.getLogIdGroupCandidateMap().get("0"), 3); + assertTrue(parser.getGroupTokenSetMap().get(sampleGroupTokenKey).contains("something")); + } + + @Test + public void testCalculateGroupTokenFreqWithIllegalInput() { + List> preprocessedLogs = Arrays.asList(List.of()); + String exceptionMessage = "Sorted word combinations must be non empty"; + Throwable throwable = + assertThrows( + IllegalArgumentException.class, () -> parser.calculateGroupTokenFreq(preprocessedLogs)); + assertEquals(exceptionMessage, throwable.getMessage()); + } + + @Test + public void testParseLogPattern() { + List> preprocessedLogs = parser.preprocessAllLogs(TEST_HDFS_LOGS); + List expectedLogPattern = + Arrays.asList( + "BLOCK*", + "NameSystem.addStoredBlock:", + "blockMap", + "updated:", + "<*IP*>", + "is", + "added", + "to", + "blk_<*>", + "size", + "<*>"); + List logPattern = parser.parseLogPattern(preprocessedLogs.get(0)); + assertEquals(expectedLogPattern, logPattern); + } + + @Test + public void testParseAllLogPatterns() { + Map> logPatternMap = parser.parseAllLogPatterns(TEST_HDFS_LOGS); + Map expectedResult = + Map.of( + "PacketResponder failed for blk_<*>", + 2, + "Verification succeeded for blk_<*>", + 2, + "BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size" + + " <*>", + 8, + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_<*>_<*>_r_<*>_<*>/part<*> blk_<*>", + 6, + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/randtxt/_temporary/_task_<*>_<*>_m_<*>_<*>/part<*> blk_<*>", + 2); + Map logPatternByCountMap = + logPatternMap.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().size())); + assertEquals(expectedResult, logPatternByCountMap); + } + + @Test + public void testParseLogPatternWhenLowerFrequencyTokenIsVariable() { + int testVariableCountThreshold = 3; + parser = new BrainLogParser(testVariableCountThreshold, 0.0f); + List logMessages = + Arrays.asList( + "Verification succeeded a blk_-1547954353065580372", + "Verification succeeded b blk_6996194389878584395", + "Verification succeeded c blk_6996194389878584395", + "Verification succeeded d blk_6996194389878584395"); + Map> expectedResult = + Map.of("Verification succeeded <*> blk_<*>", Arrays.asList("0", "1", "2", "3")); + Map> logPatternMap = parser.parseAllLogPatterns(logMessages); + assertEquals(expectedResult, logPatternMap); + /* + * 'a', 'b', 'c' and 'd' token is on the 3rd position in the group 2,3, their frequency is lower than + * representative frequency. Since that position's distinct token number exceeds the variable count threshold, + * the third position in this log group is treated as variable + */ + assertTrue( + parser.getTokenFreqMap().get("2-a") < parser.getTokenFreqMap().get("0-Verification")); + assertTrue( + parser.getTokenFreqMap().get("2-b") < parser.getTokenFreqMap().get("0-Verification")); + assertTrue(testVariableCountThreshold <= parser.getGroupTokenSetMap().get("4-4,3-2").size()); + } + + @Test + public void testParseLogPatternWhenHigherFrequencyTokenIsVariable() { + List logMessages = + Arrays.asList( + "Verification succeeded for blk_-1547954353065580372", + "Verification succeeded for blk_6996194389878584395", + "Test succeeded for blk_6996194389878584395", + "Verification", + "Verification"); + Map> expectedResult = + Map.of( + "<*> succeeded for blk_<*>", + Arrays.asList("0", "1"), + "Test succeeded for blk_<*>", + Arrays.asList("2"), + "Verification", + Arrays.asList("3", "4")); + Map> logPatternMap = parser.parseAllLogPatterns(logMessages); + assertEquals(expectedResult, logPatternMap); + /* + * 'Verification' and 'Test' token is on the 1st position in the group 3,3, 'Verification' frequency is higher than + * representative frequency because there are other groups which have 'Verification' token on the 1st position as well. + * Since first position's distinct token number is not unique, 'Verification' is treated as variable eventually. + */ + assertTrue( + parser.getTokenFreqMap().get("0-Verification") + > parser.getTokenFreqMap().get("1-succeeded")); + assertTrue(parser.getGroupTokenSetMap().get("4-3,3-0").size() > 1); + } +} diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d5e8b93b13..80c8da7c77 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -63,6 +63,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; +import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; @@ -100,6 +101,7 @@ import org.opensearch.sql.planner.logical.LogicalRename; import org.opensearch.sql.planner.logical.LogicalSort; import org.opensearch.sql.planner.logical.LogicalValues; +import org.opensearch.sql.planner.logical.LogicalWindow; import org.opensearch.sql.planner.physical.datasource.DataSourceTable; import org.opensearch.sql.storage.Table; import org.opensearch.sql.utils.ParseUtils; @@ -464,6 +466,22 @@ public LogicalPlan visitParse(Parse node, AnalysisContext context) { return child; } + @Override + public LogicalPlan visitWindow(Window node, AnalysisContext context) { + LogicalPlan child = node.getChild().get(0).accept(this, context); + WindowExpressionAnalyzer windowAnalyzer = + new WindowExpressionAnalyzer(expressionAnalyzer, child); + child = windowAnalyzer.analyze(node.getWindowFunction(), context); + + TypeEnvironment curEnv = context.peek(); + LogicalWindow window = (LogicalWindow) child; + curEnv.define( + new Symbol(Namespace.FIELD_NAME, window.getWindowFunction().getNameOrAlias()), + window.getWindowFunction().getDelegated().type()); + + return child; + } + /** Build {@link LogicalSort}. */ @Override public LogicalPlan visitSort(Sort node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index eab0eff03c..fa2478d53a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -24,6 +24,7 @@ import org.opensearch.sql.ast.expression.AggregateFunction; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.And; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.Between; import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Cast; @@ -400,6 +401,11 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); } + @Override + public Expression visitArgument(Argument node, AnalysisContext context) { + return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); + } + /** * If QualifiedName is actually a reserved metadata field, return the expr type associated with * the metadata field. diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 973b10310b..e1484a2098 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -60,6 +60,7 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Values; +import org.opensearch.sql.ast.tree.Window; /** AST nodes visitor Defines the traverse path. */ public abstract class AbstractNodeVisitor { @@ -312,4 +313,8 @@ public T visitFetchCursor(FetchCursor cursor, C context) { public T visitCloseCursor(CloseCursor closeCursor, C context) { return visitChildren(closeCursor, context); } + + public T visitWindow(Window window, C context) { + return visitChildren(window, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 830dbf0a65..fc09e0cfd2 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -5,8 +5,10 @@ package org.opensearch.sql.ast.dsl; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.stream.Collectors; import lombok.experimental.UtilityClass; import org.apache.commons.lang3.tuple.Pair; @@ -33,6 +35,7 @@ import org.opensearch.sql.ast.expression.Not; import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.ParseMethod; +import org.opensearch.sql.ast.expression.PatternMethod; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.Span; @@ -61,6 +64,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; +import org.opensearch.sql.ast.tree.Window; /** Class of static methods to create specific node instances. */ @UtilityClass @@ -471,4 +475,24 @@ public static Parse parse( java.util.Map arguments) { return new Parse(parseMethod, sourceField, pattern, arguments, input); } + + public static Window window( + UnresolvedPlan input, + PatternMethod patternMethod, + UnresolvedExpression sourceField, + String alias, + List arguments) { + List funArgs = new ArrayList<>(); + funArgs.add(sourceField); + funArgs.addAll(arguments); + return new Window( + new Alias( + alias, + new WindowFunction( + new Function(patternMethod.name().toLowerCase(Locale.ROOT), funArgs), + List.of(), + List.of()), + alias), + input); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ParseMethod.java b/core/src/main/java/org/opensearch/sql/ast/expression/ParseMethod.java index 7a2587c5f0..aad093554f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ParseMethod.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ParseMethod.java @@ -11,8 +11,7 @@ @RequiredArgsConstructor public enum ParseMethod { REGEX("regex"), - GROK("grok"), - PATTERNS("patterns"); + GROK("grok"); @Getter private final String name; } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/PatternMethod.java b/core/src/main/java/org/opensearch/sql/ast/expression/PatternMethod.java new file mode 100644 index 0000000000..e75210f9fa --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/PatternMethod.java @@ -0,0 +1,17 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public enum PatternMethod { + SIMPLE_PATTERN("simple_pattern"), + BRAIN("brain"); + + @Getter final String name; +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Window.java b/core/src/main/java/org/opensearch/sql/ast/tree/Window.java new file mode 100644 index 0000000000..d2c8fe5a8e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Window.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +@AllArgsConstructor +public class Window extends UnresolvedPlan { + + private final UnresolvedExpression windowFunction; + + private UnresolvedPlan child; + + @Override + public Window attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitWindow(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 49a1197957..4a989b9303 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -965,6 +965,14 @@ public static FunctionExpression utc_timestamp( return compile(functionProperties, BuiltinFunctionName.UTC_TIMESTAMP, args); } + public static FunctionExpression brain(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.BRAIN, args); + } + + public static FunctionExpression simple_pattern(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.SIMPLE_PATTERN, args); + } + @SuppressWarnings("unchecked") private static T compile( FunctionProperties functionProperties, BuiltinFunctionName bfn, Expression... args) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index fd5ea14a2e..dcc5348276 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -213,6 +213,9 @@ public enum BuiltinFunctionName { RANK(FunctionName.of("rank")), DENSE_RANK(FunctionName.of("dense_rank")), + SIMPLE_PATTERN(FunctionName.of("simple_pattern")), + BRAIN(FunctionName.of("brain")), + INTERVAL(FunctionName.of("interval")), /** Data Type Convert Function. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/parse/PatternsExpression.java b/core/src/main/java/org/opensearch/sql/expression/parse/PatternsExpression.java index 5b92779c35..db2d8b1b2d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/parse/PatternsExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/parse/PatternsExpression.java @@ -50,8 +50,8 @@ public PatternsExpression(Expression sourceField, Expression pattern, Expression } @Override - ExprValue parseValue(ExprValue value) throws ExpressionEvaluationException { - String rawString = value.stringValue(); + public ExprValue parseValue(ExprValue value) throws ExpressionEvaluationException { + String rawString = value.isNull() || value.isMissing() ? "" : value.stringValue(); if (useCustomPattern) { return new ExprStringValue(pattern.matcher(rawString).replaceAll("")); } diff --git a/core/src/main/java/org/opensearch/sql/expression/window/WindowFunctions.java b/core/src/main/java/org/opensearch/sql/expression/window/WindowFunctions.java index 3df59c52c0..d2de8302a9 100644 --- a/core/src/main/java/org/opensearch/sql/expression/window/WindowFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/window/WindowFunctions.java @@ -6,7 +6,11 @@ package org.opensearch.sql.expression.window; import static java.util.Collections.emptyList; +import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.function.Supplier; import lombok.experimental.UtilityClass; @@ -16,6 +20,8 @@ import org.opensearch.sql.expression.function.FunctionBuilder; import org.opensearch.sql.expression.function.FunctionName; import org.opensearch.sql.expression.function.FunctionSignature; +import org.opensearch.sql.expression.window.patterns.BufferPatternWindowFunction; +import org.opensearch.sql.expression.window.patterns.StreamPatternWindowFunction; import org.opensearch.sql.expression.window.ranking.DenseRankFunction; import org.opensearch.sql.expression.window.ranking.RankFunction; import org.opensearch.sql.expression.window.ranking.RankingWindowFunction; @@ -34,6 +40,8 @@ public void register(BuiltinFunctionRepository repository) { repository.register(rowNumber()); repository.register(rank()); repository.register(denseRank()); + repository.register(brain()); + repository.register(simplePattern()); } private DefaultFunctionResolver rowNumber() { @@ -48,6 +56,40 @@ private DefaultFunctionResolver denseRank() { return rankingFunction(BuiltinFunctionName.DENSE_RANK.getName(), DenseRankFunction::new); } + private DefaultFunctionResolver brain() { + FunctionName functionName = BuiltinFunctionName.BRAIN.getName(); + FunctionBuilder functionBuilder = + (functionProperties, arguments) -> new BufferPatternWindowFunction(arguments); + return new DefaultFunctionResolver( + functionName, + ImmutableMap.of( + new FunctionSignature(functionName, ImmutableList.of(STRING)), functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, STRING)), functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, INTEGER)), functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, DOUBLE)), functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, STRING, INTEGER)), + functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, STRING, DOUBLE)), + functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, INTEGER, DOUBLE)), + functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, STRING, INTEGER, DOUBLE)), + functionBuilder)); + } + + private DefaultFunctionResolver simplePattern() { + FunctionName functionName = BuiltinFunctionName.SIMPLE_PATTERN.getName(); + FunctionBuilder functionBuilder = + (functionProperties, arguments) -> new StreamPatternWindowFunction(arguments); + return new DefaultFunctionResolver( + functionName, + ImmutableMap.of( + new FunctionSignature(functionName, ImmutableList.of(STRING)), functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, STRING)), functionBuilder, + new FunctionSignature(functionName, ImmutableList.of(STRING, STRING, STRING)), + functionBuilder)); + } + private DefaultFunctionResolver rankingFunction( FunctionName functionName, Supplier constructor) { FunctionSignature functionSignature = new FunctionSignature(functionName, emptyList()); diff --git a/core/src/main/java/org/opensearch/sql/expression/window/frame/BufferPatternRowsWindowFrame.java b/core/src/main/java/org/opensearch/sql/expression/window/frame/BufferPatternRowsWindowFrame.java new file mode 100644 index 0000000000..7a0ad37c47 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/window/frame/BufferPatternRowsWindowFrame.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.window.frame; + +import com.google.common.collect.PeekingIterator; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.common.patterns.BrainLogParser; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.window.WindowDefinition; + +@EqualsAndHashCode(callSuper = true) +@ToString +public class BufferPatternRowsWindowFrame extends PeerRowsWindowFrame { + + private final Expression sourceField; + + @Getter private final BrainLogParser logParser; + + @EqualsAndHashCode.Exclude private final List> preprocessedMessages; + + public BufferPatternRowsWindowFrame( + WindowDefinition windowDefinition, BrainLogParser logParser, Expression sourceField) { + super(windowDefinition); + this.logParser = logParser; + this.sourceField = sourceField; + this.preprocessedMessages = new ArrayList<>(); + } + + @Override + public void load(PeekingIterator it) { + if (hasNext()) { + return; + } + + this.preprocessedMessages.clear(); + loadAllRows(it); + + List logMessages = + peers.stream() + .map( + exprValue -> { + ExprValue value = sourceField.valueOf(exprValue.bindingTuples()); + return value.isNull() || value.isMissing() ? "" : value.stringValue(); + }) + .collect(Collectors.toList()); + this.preprocessedMessages.addAll(logParser.preprocessAllLogs(logMessages)); + } + + public List currentPreprocessedMessage() { + return this.preprocessedMessages.get(position); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/window/frame/PeerRowsWindowFrame.java b/core/src/main/java/org/opensearch/sql/expression/window/frame/PeerRowsWindowFrame.java index a98826d333..5eeb8769bc 100644 --- a/core/src/main/java/org/opensearch/sql/expression/window/frame/PeerRowsWindowFrame.java +++ b/core/src/main/java/org/opensearch/sql/expression/window/frame/PeerRowsWindowFrame.java @@ -10,6 +10,7 @@ import java.util.Collections; import java.util.List; import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.data.model.ExprValue; @@ -22,6 +23,7 @@ * window definition). See PeerWindowFrameTest for details about how this window frame interacts * with window operator and window function. */ +@EqualsAndHashCode @RequiredArgsConstructor public class PeerRowsWindowFrame implements WindowFrame { @@ -31,10 +33,10 @@ public class PeerRowsWindowFrame implements WindowFrame { * All peer rows (peer means rows in a partition that share same sort key based on sort list in * window definition. */ - private final List peers = new ArrayList<>(); + protected final List peers = new ArrayList<>(); /** Which row in the peer is currently being enriched by window function. */ - private int position; + protected int position; /** Does row at current position represents a new partition. */ private boolean isNewPartition = true; @@ -92,6 +94,10 @@ public void load(PeekingIterator it) { return; } + loadAllRows(it); + } + + protected void loadAllRows(PeekingIterator it) { // Reset state: reset new partition before clearing peers isNewPartition = !isSamePartition(it.peek()); position = 0; diff --git a/core/src/main/java/org/opensearch/sql/expression/window/patterns/BufferPatternWindowFunction.java b/core/src/main/java/org/opensearch/sql/expression/window/patterns/BufferPatternWindowFunction.java new file mode 100644 index 0000000000..aaa9e4d29e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/window/patterns/BufferPatternWindowFunction.java @@ -0,0 +1,70 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.window.patterns; + +import static org.opensearch.sql.utils.ExpressionUtils.format; + +import java.util.List; +import java.util.Locale; +import lombok.EqualsAndHashCode; +import org.opensearch.sql.common.patterns.BrainLogParser; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.expression.window.WindowFunctionExpression; +import org.opensearch.sql.expression.window.frame.BufferPatternRowsWindowFrame; +import org.opensearch.sql.expression.window.frame.WindowFrame; +import org.opensearch.sql.utils.FunctionUtils; + +@EqualsAndHashCode(callSuper = true) +public class BufferPatternWindowFunction extends FunctionExpression + implements WindowFunctionExpression { + + public BufferPatternWindowFunction(List arguments) { + super(BuiltinFunctionName.BRAIN.getName(), arguments); + } + + @Override + public WindowFrame createWindowFrame(WindowDefinition definition) { + int variableCountThreshold = + FunctionUtils.getNamedArgumentValue(getArguments(), "variable_count_threshold") + .map(ExprValue::integerValue) + .orElse(5); + float thresholdPercentage = + FunctionUtils.getNamedArgumentValue(getArguments(), "frequency_threshold_percentage") + .map(ExprValue::floatValue) + .orElse(0.3f); + return new BufferPatternRowsWindowFrame( + definition, + new BrainLogParser(variableCountThreshold, thresholdPercentage), + getArguments().get(0)); // actually only first argument is meaningful + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + BufferPatternRowsWindowFrame frame = (BufferPatternRowsWindowFrame) valueEnv; + List preprocessedMessage = frame.currentPreprocessedMessage(); + frame.next(); + List logPattern = frame.getLogParser().parseLogPattern(preprocessedMessage); + return new ExprStringValue(String.join(" ", logPattern)); + } + + @Override + public ExprType type() { + return ExprCoreType.STRING; + } + + @Override + public String toString() { + return String.format(Locale.ROOT, "%s(%s)", getFunctionName(), format(getArguments())); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/window/patterns/StreamPatternWindowFunction.java b/core/src/main/java/org/opensearch/sql/expression/window/patterns/StreamPatternWindowFunction.java new file mode 100644 index 0000000000..6cca0d6813 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/window/patterns/StreamPatternWindowFunction.java @@ -0,0 +1,70 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.window.patterns; + +import static org.opensearch.sql.utils.ExpressionUtils.format; + +import java.util.List; +import java.util.Locale; +import lombok.EqualsAndHashCode; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.parse.PatternsExpression; +import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.expression.window.WindowFunctionExpression; +import org.opensearch.sql.expression.window.frame.CurrentRowWindowFrame; +import org.opensearch.sql.expression.window.frame.WindowFrame; +import org.opensearch.sql.utils.FunctionUtils; + +@EqualsAndHashCode(callSuper = true) +public class StreamPatternWindowFunction extends FunctionExpression + implements WindowFunctionExpression { + + private final PatternsExpression patternsExpression; + + public StreamPatternWindowFunction(List arguments) { + super(BuiltinFunctionName.SIMPLE_PATTERN.getName(), arguments); + String pattern = + FunctionUtils.getNamedArgumentValue(getArguments(), "pattern") + .map(ExprValue::stringValue) + .orElse(""); + this.patternsExpression = + new PatternsExpression( + getArguments().get(0), + new LiteralExpression(new ExprStringValue(pattern)), + new LiteralExpression(new ExprStringValue(""))); + } + + @Override + public WindowFrame createWindowFrame(WindowDefinition definition) { + return new CurrentRowWindowFrame(definition); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + CurrentRowWindowFrame frame = (CurrentRowWindowFrame) valueEnv; + ExprValue sourceFieldValue = + patternsExpression.getSourceField().valueOf(frame.current().bindingTuples()); + return patternsExpression.parseValue(sourceFieldValue); + } + + @Override + public ExprType type() { + return ExprCoreType.STRING; + } + + @Override + public String toString() { + return String.format(Locale.ROOT, "%s(%s)", getFunctionName(), format(getArguments())); + } +} diff --git a/core/src/main/java/org/opensearch/sql/utils/FunctionUtils.java b/core/src/main/java/org/opensearch/sql/utils/FunctionUtils.java new file mode 100644 index 0000000000..5a7e3048d4 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/FunctionUtils.java @@ -0,0 +1,30 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import java.util.List; +import java.util.Optional; +import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.NamedArgumentExpression; + +@UtilityClass +public final class FunctionUtils { + + public static Optional getNamedArgumentValue( + List arguments, String argName) { + return arguments.stream() + .filter( + expression -> + expression instanceof NamedArgumentExpression + && ((NamedArgumentExpression) expression) + .getArgName() + .equalsIgnoreCase(argName)) + .map(expression -> ((NamedArgumentExpression) expression).getValue().valueOf()) + .findFirst(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java b/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java index e659cfdf50..55756b8cec 100644 --- a/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java @@ -14,7 +14,6 @@ import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.parse.GrokExpression; import org.opensearch.sql.expression.parse.ParseExpression; -import org.opensearch.sql.expression.parse.PatternsExpression; import org.opensearch.sql.expression.parse.RegexExpression; /** Utils for {@link ParseExpression}. */ @@ -24,8 +23,7 @@ public class ParseUtils { private static final Map FACTORY_MAP = ImmutableMap.of( ParseMethod.REGEX, RegexExpression::new, - ParseMethod.GROK, GrokExpression::new, - ParseMethod.PATTERNS, PatternsExpression::new); + ParseMethod.GROK, GrokExpression::new); /** * Construct corresponding ParseExpression by {@link ParseMethod}. @@ -52,13 +50,8 @@ public static List getNamedGroupCandidates( switch (parseMethod) { case REGEX: return RegexExpression.getNamedGroupCandidates(pattern); - case GROK: - return GrokExpression.getNamedGroupCandidates(pattern); default: - return PatternsExpression.getNamedGroupCandidates( - arguments.containsKey(NEW_FIELD_KEY) - ? (String) arguments.get(NEW_FIELD_KEY).getValue() - : null); + return GrokExpression.getNamedGroupCandidates(pattern); } } diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 2f4d6e8ada..78fe42b227 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -76,6 +76,7 @@ import org.opensearch.sql.ast.expression.HighlightFunction; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; +import org.opensearch.sql.ast.expression.PatternMethod; import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; @@ -1370,58 +1371,6 @@ public void parse_relation_with_regex_expression() { AstDSL.alias("string_value", qualifiedName("string_value")))); } - @Test - public void parse_relation_with_patterns_expression() { - Map arguments = - ImmutableMap.builder() - .put("new_field", AstDSL.stringLiteral("custom_field")) - .put("pattern", AstDSL.stringLiteral("custom_pattern")) - .build(); - - assertAnalyzeEqual( - LogicalPlanDSL.project( - LogicalPlanDSL.relation("schema", table), - ImmutableList.of(DSL.named("string_value", DSL.ref("string_value", STRING))), - ImmutableList.of( - DSL.named( - "custom_field", - DSL.patterns( - DSL.ref("string_value", STRING), - DSL.literal("custom_pattern"), - DSL.literal("custom_field"))))), - AstDSL.project( - AstDSL.parse( - AstDSL.relation("schema"), - ParseMethod.PATTERNS, - AstDSL.field("string_value"), - AstDSL.stringLiteral("custom_pattern"), - arguments), - AstDSL.alias("string_value", qualifiedName("string_value")))); - } - - @Test - public void parse_relation_with_patterns_expression_no_args() { - assertAnalyzeEqual( - LogicalPlanDSL.project( - LogicalPlanDSL.relation("schema", table), - ImmutableList.of(DSL.named("string_value", DSL.ref("string_value", STRING))), - ImmutableList.of( - DSL.named( - "patterns_field", - DSL.patterns( - DSL.ref("string_value", STRING), - DSL.literal(""), - DSL.literal("patterns_field"))))), - AstDSL.project( - AstDSL.parse( - AstDSL.relation("schema"), - ParseMethod.PATTERNS, - AstDSL.field("string_value"), - AstDSL.stringLiteral(""), - ImmutableMap.of()), - AstDSL.alias("string_value", qualifiedName("string_value")))); - } - @Test public void kmeanns_relation() { Map argumentMap = @@ -1767,4 +1716,120 @@ public void visit_close_cursor() { () -> assertEquals("pewpew", ((LogicalFetchCursor) analyzed.getChild().get(0)).getCursor())); } + + @Test + public void simple_pattern_window_function_with_no_additional_args() { + UnresolvedPlan unresolvedWindow = + AstDSL.project( + AstDSL.window( + AstDSL.relation("schema"), + PatternMethod.SIMPLE_PATTERN, + AstDSL.field("string_value"), + "patterns_field", + ImmutableList.of()), + AstDSL.field("string_value")); + LogicalPlan expectedPlan = + LogicalPlanDSL.project( + LogicalPlanDSL.window( + LogicalPlanDSL.relation("schema", table), + DSL.named( + "patterns_field", + DSL.simple_pattern(DSL.ref("string_value", STRING)), + "patterns_field"), + new WindowDefinition(ImmutableList.of(), ImmutableList.of())), + DSL.named("string_value", DSL.ref("string_value", STRING))); + + assertAnalyzeEqual(expectedPlan, unresolvedWindow); + } + + @Test + public void simple_pattern_window_function() { + UnresolvedPlan unresolvedWindow = + AstDSL.project( + AstDSL.window( + AstDSL.relation("schema"), + PatternMethod.SIMPLE_PATTERN, + AstDSL.field("string_value"), + "custom_field", + ImmutableList.of( + new Argument( + "pattern", AstDSL.stringLiteral("[0-9]")))), // with pattern argument + AstDSL.field("string_value")); + LogicalPlan expectedPlan = + LogicalPlanDSL.project( + LogicalPlanDSL.window( + LogicalPlanDSL.relation("schema", table), + DSL.named( + "custom_field", + DSL.simple_pattern( + DSL.ref("string_value", STRING), + DSL.namedArgument( + "pattern", DSL.literal("[0-9]"))), // with additional pattern argument + "custom_field"), + new WindowDefinition(ImmutableList.of(), ImmutableList.of())), + DSL.named("string_value", DSL.ref("string_value", STRING))); + + assertAnalyzeEqual(expectedPlan, unresolvedWindow); + } + + @Test + public void brain_window_function_with_no_additional_args() { + UnresolvedPlan unresolvedWindow = + AstDSL.project( + AstDSL.window( + AstDSL.relation("schema"), + PatternMethod.BRAIN, + AstDSL.field("string_value"), + "patterns_field", + ImmutableList.of()), + AstDSL.field("string_value")); + LogicalPlan expectedPlan = + LogicalPlanDSL.project( + LogicalPlanDSL.window( + LogicalPlanDSL.relation("schema", table), + DSL.named( + "patterns_field", DSL.brain(DSL.ref("string_value", STRING)), "patterns_field"), + new WindowDefinition(ImmutableList.of(), ImmutableList.of())), + DSL.named("string_value", DSL.ref("string_value", STRING))); + + assertAnalyzeEqual(expectedPlan, unresolvedWindow); + } + + @Test + public void brain_window_function() { + UnresolvedPlan unresolvedWindow = + AstDSL.project( + AstDSL.window( + AstDSL.relation("schema"), + PatternMethod.BRAIN, + AstDSL.field("string_value"), + "custom_field", + ImmutableList.of( + new Argument( + "variable_count_threshold", AstDSL.intLiteral(10)), // with integer argument + new Argument( + "frequency_threshold_percentage", + AstDSL.doubleLiteral(0.1)) // with double argument + )), + AstDSL.field("string_value")); + LogicalPlan expectedPlan = + LogicalPlanDSL.project( + LogicalPlanDSL.window( + LogicalPlanDSL.relation("schema", table), + DSL.named( + "custom_field", + DSL.brain( + DSL.ref("string_value", STRING), + DSL.namedArgument( + "variable_count_threshold", + DSL.literal(10)), // with additional integer argument + DSL.namedArgument( + "frequency_threshold_percentage", + DSL.literal(0.1))), // with additional double argument + "custom_field"), + new WindowDefinition(ImmutableList.of(), ImmutableList.of())), + DSL.named("string_value", DSL.ref("string_value", STRING))); + + assertAnalyzeEqual(expectedPlan, unresolvedWindow); + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/parse/PatternsExpressionTest.java b/core/src/test/java/org/opensearch/sql/expression/parse/PatternsExpressionTest.java index 7237f0673b..f23e4510aa 100644 --- a/core/src/test/java/org/opensearch/sql/expression/parse/PatternsExpressionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/parse/PatternsExpressionTest.java @@ -15,12 +15,16 @@ import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import com.google.common.collect.ImmutableList; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprMissingValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -86,4 +90,26 @@ public void throws_semantic_exception_if_value_type_is_not_string() { DSL.ref("boolean_value", BOOLEAN), DSL.literal("pattern"), DSL.literal("group")) .valueOf(valueEnv())); } + + @Test + public void parse_null_or_missing_expr_value() { + PatternsExpression patternsExpression = + DSL.patterns(DSL.ref("string_value", STRING), DSL.literal("pattern"), DSL.literal("group")); + assertEquals(new ExprStringValue(""), patternsExpression.parseValue(ExprNullValue.of())); + assertEquals(new ExprStringValue(""), patternsExpression.parseValue(ExprMissingValue.of())); + } + + @Test + public void get_named_group_candidates_with_default_field() { + assertEquals( + ImmutableList.of(PatternsExpression.DEFAULT_NEW_FIELD), + PatternsExpression.getNamedGroupCandidates(null)); + } + + @Test + public void get_named_group_candidates_with_specified_field() { + assertEquals( + ImmutableList.of("specified_field"), + PatternsExpression.getNamedGroupCandidates("specified_field")); + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/window/frame/BufferPatternRowsWindowFrameTest.java b/core/src/test/java/org/opensearch/sql/expression/window/frame/BufferPatternRowsWindowFrameTest.java new file mode 100644 index 0000000000..4f9e554c5a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/window/frame/BufferPatternRowsWindowFrameTest.java @@ -0,0 +1,326 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.window.frame; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; +import static org.opensearch.sql.data.model.ExprTupleValue.fromExprValueMap; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.patterns.BrainLogParser; +import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprMissingValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.window.WindowDefinition; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +@ExtendWith(MockitoExtension.class) +public class BufferPatternRowsWindowFrameTest { + + // Single partition for all rows + @Test + void test_single_partition_with_no_order_by() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator(Iterators.forArray(TEST_TUPLE_1, TEST_TUPLE_2, TEST_TUPLE_3)); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_1, TEST_TUPLE_2, TEST_TUPLE_3), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_2, "1"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_3, "2"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + @Test + void test_single_partition_with_different_order_by_values() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition( + ImmutableList.of(DSL.ref("level", STRING)), + ImmutableList.of(Pair.of(DEFAULT_ASC, DSL.ref("timestamp", INTEGER)))), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator(Iterators.forArray(TEST_TUPLE_1, TEST_TUPLE_2, TEST_TUPLE_3)); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_1, TEST_TUPLE_2), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_2, "1"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + + // Similar to PeerRowsWindowFrame concept, patterns will be grouped on peer rows level if + // specified order by + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_3, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_3), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + @Test + void test_two_partitions_with_partition_by_and_order_by() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition( + ImmutableList.of(DSL.ref("level", STRING)), + ImmutableList.of(Pair.of(DEFAULT_ASC, DSL.ref("timestamp", INTEGER)))), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator(Iterators.forArray(TEST_TUPLE_1, TEST_TUPLE_3, TEST_TUPLE_4)); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_1), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + + // Similar to PeerRowsWindowFrame concept, patterns will be grouped on peer rows level if + // specified order by + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_3, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_3), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_ERROR_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_4), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + @Test + void test_two_partitions_with_no_order_by() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(DSL.ref("level", STRING)), ImmutableList.of()), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator(Iterators.forArray(TEST_TUPLE_1, TEST_TUPLE_3, TEST_TUPLE_5)); + + // it just cares about partitions regardless of different order by values with only specified + // partition by + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_1, TEST_TUPLE_3), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_3, "1"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_ERROR_2, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_5), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + @Test + void test_two_partitions_with_no_partition_by() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition( + ImmutableList.of(), + ImmutableList.of(Pair.of(DEFAULT_ASC, DSL.ref("timestamp", INTEGER)))), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator(Iterators.forArray(TEST_TUPLE_1, TEST_TUPLE_4, TEST_TUPLE_5)); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_1), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + + // Though the same partition, peer rows are still grouped by order by level + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_ERROR_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_4, TEST_TUPLE_5), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_ERROR_2, "1"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + // Will have the same effect with single partition for all rows + @Test + void test_two_partitions_with_no_partition_by_and_order_by() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator(Iterators.forArray(TEST_TUPLE_1, TEST_TUPLE_4, TEST_TUPLE_5)); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(TEST_TUPLE_1, TEST_TUPLE_4, TEST_TUPLE_5), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_ERROR_1, "1"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_ERROR_2, "2"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + @Test + public void test_load_mixed_expr_null_value_and_string_value() { + BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + PeekingIterator tuples = + Iterators.peekingIterator( + Iterators.forArray(TEST_TUPLE_1, TEST_NULL_TUPLE, TEST_MISSING_TUPLE)); + + windowFrame.load(tuples); + assertTrue(windowFrame.isNewPartition()); + assertEquals( + LOG_PARSER.preprocess(TEST_MESSAGE_1, "0"), windowFrame.currentPreprocessedMessage()); + assertEquals( + ImmutableList.of(TEST_TUPLE_1, TEST_NULL_TUPLE, TEST_MISSING_TUPLE), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals(LOG_PARSER.preprocess("", "1"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertTrue(windowFrame.hasNext()); + + windowFrame.load(tuples); + assertFalse(windowFrame.isNewPartition()); + assertEquals(LOG_PARSER.preprocess("", "2"), windowFrame.currentPreprocessedMessage()); + assertEquals(ImmutableList.of(), windowFrame.next()); + assertFalse(windowFrame.hasNext()); + } + + private static ExprValue tuple(String level, int timestamp, ExprValue message) { + return fromExprValueMap( + ImmutableMap.of( + "level", + new ExprStringValue(level), + "timestamp", + new ExprIntegerValue(timestamp), + "message", + message)); + } + + private static final String TEST_MESSAGE_1 = + "12.132.31.17 - - [2018-07-22T05:36:25.812Z] \\\"GET /opensearch HTTP/1.1\\\" 200 9797" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_2 = + "129.138.185.193 - - [2018-07-22T05:39:39.668Z] \\\"GET /opensearch HTTP/1.1\\\" 404 9920" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_3 = + "240.58.187.246 - - [2018-07-22T06:02:46.006Z] \\\"GET /opensearch HTTP/1.1\\\" 500 6936" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko)" + + " Chrome/11.0.696.50 Safari/534.24\\\""; + private static final String TEST_ERROR_1 = + "Unexpected exception causing shutdown while sock still open"; + private static final String TEST_ERROR_2 = "ERROR in contacting RM"; + private static final ExprValue TEST_TUPLE_1 = + tuple("INFO", 10, new ExprStringValue(TEST_MESSAGE_1)); + private static final ExprValue TEST_TUPLE_2 = + tuple("INFO", 10, new ExprStringValue(TEST_MESSAGE_2)); + private static final ExprValue TEST_TUPLE_3 = + tuple("INFO", 15, new ExprStringValue(TEST_MESSAGE_3)); + private static final ExprValue TEST_TUPLE_4 = + tuple("ERROR", 20, new ExprStringValue(TEST_ERROR_1)); + private static final ExprValue TEST_TUPLE_5 = + tuple("ERROR", 20, new ExprStringValue(TEST_ERROR_2)); + private static final ExprValue TEST_NULL_TUPLE = tuple("INFO", 10, ExprNullValue.of()); + private static final ExprValue TEST_MISSING_TUPLE = tuple("INFO", 10, ExprMissingValue.of()); + private static final BrainLogParser LOG_PARSER = new BrainLogParser(); +} diff --git a/core/src/test/java/org/opensearch/sql/expression/window/patterns/BufferPatternRowsWindowFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/window/patterns/BufferPatternRowsWindowFunctionTest.java new file mode 100644 index 0000000000..8898b4e75a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/window/patterns/BufferPatternRowsWindowFunctionTest.java @@ -0,0 +1,95 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.window.patterns; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.opensearch.sql.data.model.ExprTupleValue.fromExprValueMap; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import java.util.Arrays; +import java.util.List; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.patterns.BrainLogParser; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.expression.window.frame.BufferPatternRowsWindowFrame; + +public class BufferPatternRowsWindowFunctionTest { + + private final BufferPatternRowsWindowFrame windowFrame = + new BufferPatternRowsWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()), + LOG_PARSER, + new NamedArgumentExpression("message", new ReferenceExpression("message", STRING))); + + @Test + void test_value_of() { + PeekingIterator tuples = + Iterators.peekingIterator( + Iterators.forArray( + tuple(TEST_MESSAGE_1), tuple(TEST_MESSAGE_2), tuple(TEST_MESSAGE_3))); + + BufferPatternWindowFunction brain = + (BufferPatternWindowFunction) + DSL.brain(DSL.namedArgument("message", DSL.ref("message", STRING))); + List> preprocessedMessages = + LOG_PARSER.preprocessAllLogs(Arrays.asList(TEST_MESSAGE_1, TEST_MESSAGE_2, TEST_MESSAGE_3)); + windowFrame.load(tuples); + + assertEquals( + String.join(" ", LOG_PARSER.parseLogPattern(preprocessedMessages.get(0))), + brain.valueOf(windowFrame).stringValue()); + assertEquals( + String.join(" ", LOG_PARSER.parseLogPattern(preprocessedMessages.get(1))), + brain.valueOf(windowFrame).stringValue()); + assertEquals( + String.join(" ", LOG_PARSER.parseLogPattern(preprocessedMessages.get(2))), + brain.valueOf(windowFrame).stringValue()); + } + + @Test + void test_create_window_frame() { + BufferPatternWindowFunction brain = + (BufferPatternWindowFunction) + DSL.brain(DSL.namedArgument("message", DSL.ref("message", STRING))); + assertEquals( + windowFrame, + brain.createWindowFrame(new WindowDefinition(ImmutableList.of(), ImmutableList.of()))); + } + + @Test + void test_to_string() { + BufferPatternWindowFunction brain = + (BufferPatternWindowFunction) DSL.brain(DSL.ref("message", STRING)); + assertEquals("brain(message)", brain.toString()); + } + + private ExprValue tuple(String message) { + return fromExprValueMap(ImmutableMap.of("message", new ExprStringValue(message))); + } + + private static final String TEST_MESSAGE_1 = + "12.132.31.17 - - [2018-07-22T05:36:25.812Z] \\\"GET /opensearch HTTP/1.1\\\" 200 9797" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_2 = + "129.138.185.193 - - [2018-07-22T05:39:39.668Z] \\\"GET /opensearch HTTP/1.1\\\" 404 9920" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_3 = + "240.58.187.246 - - [2018-07-22T06:02:46.006Z] \\\"GET /opensearch HTTP/1.1\\\" 500 6936" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko)" + + " Chrome/11.0.696.50 Safari/534.24\\\""; + private static final BrainLogParser LOG_PARSER = new BrainLogParser(); +} diff --git a/core/src/test/java/org/opensearch/sql/expression/window/patterns/StreamPatternWindowFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/window/patterns/StreamPatternWindowFunctionTest.java new file mode 100644 index 0000000000..e6d6534750 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/window/patterns/StreamPatternWindowFunctionTest.java @@ -0,0 +1,94 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.window.patterns; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.opensearch.sql.data.model.ExprTupleValue.fromExprValueMap; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.patterns.BrainLogParser; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.parse.PatternsExpression; +import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.expression.window.frame.CurrentRowWindowFrame; + +public class StreamPatternWindowFunctionTest { + + private final CurrentRowWindowFrame windowFrame = + new CurrentRowWindowFrame(new WindowDefinition(ImmutableList.of(), ImmutableList.of())); + + @Test + void test_value_of() { + PeekingIterator tuples = + Iterators.peekingIterator( + Iterators.forArray( + tuple(TEST_MESSAGE_1), tuple(TEST_MESSAGE_2), tuple(TEST_MESSAGE_3))); + + StreamPatternWindowFunction simplePattern = + (StreamPatternWindowFunction) DSL.simple_pattern(DSL.ref("message", STRING)); + PatternsExpression patternsExpression = + DSL.patterns( + DSL.ref("message", STRING), + new LiteralExpression(new ExprStringValue("")), + new LiteralExpression(new ExprStringValue(""))); + + windowFrame.load(tuples); + assertEquals( + patternsExpression.parseValue(new ExprStringValue(TEST_MESSAGE_1)), + simplePattern.valueOf(windowFrame)); + windowFrame.load(tuples); + assertEquals( + patternsExpression.parseValue(new ExprStringValue(TEST_MESSAGE_2)), + simplePattern.valueOf(windowFrame)); + windowFrame.load(tuples); + assertEquals( + patternsExpression.parseValue(new ExprStringValue(TEST_MESSAGE_3)), + simplePattern.valueOf(windowFrame)); + } + + @Test + void test_create_window_frame() { + StreamPatternWindowFunction simplePattern = + (StreamPatternWindowFunction) DSL.simple_pattern(DSL.ref("message", STRING)); + assertEquals( + windowFrame, + simplePattern.createWindowFrame( + new WindowDefinition(ImmutableList.of(), ImmutableList.of()))); + } + + @Test + void test_to_string() { + StreamPatternWindowFunction simplePattern = + (StreamPatternWindowFunction) DSL.simple_pattern(DSL.ref("message", STRING)); + assertEquals("simple_pattern(message)", simplePattern.toString()); + } + + private ExprValue tuple(String message) { + return fromExprValueMap(ImmutableMap.of("message", new ExprStringValue(message))); + } + + private static final String TEST_MESSAGE_1 = + "12.132.31.17 - - [2018-07-22T05:36:25.812Z] \\\"GET /opensearch HTTP/1.1\\\" 200 9797" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_2 = + "129.138.185.193 - - [2018-07-22T05:39:39.668Z] \\\"GET /opensearch HTTP/1.1\\\" 404 9920" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421" + + " Firefox/6.0a1\\\""; + private static final String TEST_MESSAGE_3 = + "240.58.187.246 - - [2018-07-22T06:02:46.006Z] \\\"GET /opensearch HTTP/1.1\\\" 500 6936" + + " \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko)" + + " Chrome/11.0.696.50 Safari/534.24\\\""; + private static final BrainLogParser LOG_PARSER = new BrainLogParser(); +} diff --git a/core/src/test/java/org/opensearch/sql/utils/FunctionUtilsTest.java b/core/src/test/java/org/opensearch/sql/utils/FunctionUtilsTest.java new file mode 100644 index 0000000000..b90aee37e7 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/utils/FunctionUtilsTest.java @@ -0,0 +1,112 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.NamedArgumentExpression; + +public class FunctionUtilsTest { + @Test + void return_empty_optional_for_empty_arguments_list() { + // Given + List emptyList = Collections.emptyList(); + + // When + Optional result = FunctionUtils.getNamedArgumentValue(emptyList, "anyArg"); + + // Then + assertTrue(result.isEmpty()); + } + + @Test + void return_empty_optional_when_no_matching_argument_found() { + // Given + NamedArgumentExpression namedArg = Mockito.mock(NamedArgumentExpression.class); + when(namedArg.getArgName()).thenReturn("differentArg"); + List arguments = Collections.singletonList(namedArg); + + // When + Optional result = FunctionUtils.getNamedArgumentValue(arguments, "searchedArg"); + + // Then + assertTrue(result.isEmpty()); + } + + @Test + void return_value_when_matching_argument_found() { + // Given + ExprValue mockValue = Mockito.mock(ExprValue.class); + Expression mockExpr = Mockito.mock(Expression.class); + when(mockExpr.valueOf()).thenReturn(mockValue); + + NamedArgumentExpression namedArg = Mockito.mock(NamedArgumentExpression.class); + when(namedArg.getArgName()).thenReturn("targetArg"); + when(namedArg.getValue()).thenReturn(mockExpr); + + List arguments = Collections.singletonList(namedArg); + + // When + Optional result = FunctionUtils.getNamedArgumentValue(arguments, "targetArg"); + + // Then + assertTrue(result.isPresent()); + assertEquals(mockValue, result.get()); + } + + @Test + void case_insensitive_when_matching_argument_names() { + // Given + ExprValue mockValue = Mockito.mock(ExprValue.class); + Expression mockExpr = Mockito.mock(Expression.class); + when(mockExpr.valueOf()).thenReturn(mockValue); + + NamedArgumentExpression namedArg = Mockito.mock(NamedArgumentExpression.class); + when(namedArg.getArgName()).thenReturn("TARGETARG"); + when(namedArg.getValue()).thenReturn(mockExpr); + + List arguments = Collections.singletonList(namedArg); + + // When + Optional result = FunctionUtils.getNamedArgumentValue(arguments, "targetArg"); + + // Then + assertTrue(result.isPresent()); + assertEquals(mockValue, result.get()); + } + + @Test + void ignore_non_named_argument_expressions() { + // Given + Expression regularExpression = Mockito.mock(Expression.class); + NamedArgumentExpression namedArg = Mockito.mock(NamedArgumentExpression.class); + when(namedArg.getArgName()).thenReturn("targetArg"); + + ExprValue mockValue = Mockito.mock(ExprValue.class); + Expression mockExpr = Mockito.mock(Expression.class); + when(mockExpr.valueOf()).thenReturn(mockValue); + when(namedArg.getValue()).thenReturn(mockExpr); + + List arguments = Arrays.asList(regularExpression, namedArg); + + // When + Optional result = FunctionUtils.getNamedArgumentValue(arguments, "targetArg"); + + // Then + assertTrue(result.isPresent()); + assertEquals(mockValue, result.get()); + } +} diff --git a/docs/user/ppl/cmd/patterns.rst b/docs/user/ppl/cmd/patterns.rst index 13f08d0aa6..62e34b575c 100644 --- a/docs/user/ppl/cmd/patterns.rst +++ b/docs/user/ppl/cmd/patterns.rst @@ -11,25 +11,58 @@ patterns Description ============ -| The ``patterns`` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. - +* The ``patterns`` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. +* ``patterns`` command now allows users to select different log parsing algorithms to get high log pattern grouping accuracy. Two pattern methods are supported, aka ``simple_pattern`` and ``brain``. +* ``simple_pattern`` algorithm is basically a regex parsing method vs ``brain`` algorithm is an automatic log grouping algorithm with high grouping accuracy and keeps semantic meaning. Syntax ============ +patterns [new_field=] (algorithm parameters...) + +Simple Pattern +============ patterns [new_field=] [pattern=] +or + +patterns [new_field=] [pattern=] SIMPLE_PATTERN + * new-field-name: optional string. The name of the new field for extracted patterns, default is ``patterns_field``. If the name already exists, it will replace the original field. * pattern: optional string. The regex pattern of characters that should be filtered out from the text field. If absent, the default pattern is alphanumeric characters (``[a-zA-Z\d]``). * field: mandatory. The field must be a text field. +* SIMPLE_PATTERN: Specify pattern method to be simple_pattern. By default, it's simple_pattern if the setting ``plugins.ppl.default.pattern.method`` is not specified. + +Brain +============ +patterns [new_field=] [variable_count_threshold=] [frequency_threshold_percentage=] BRAIN + +* new-field-name: optional string. The name of the new field for extracted patterns, default is ``patterns_field``. If the name already exists, it will replace the original field. +* variable_count_threshold: optional integer. Number of tokens in the group per position as variable threshold in case of word tokens appear rarely. +* frequency_threshold_percentage: optional double. To select longest word combination frequency, it needs a lower bound of frequency. The representative frequency of longest word combination should be >= highest token frequency of log * threshold percentage +* field: mandatory. The field must be a text field. +* BRAIN: Specify pattern method to be brain. By default, it's simple_pattern if the setting ``plugins.ppl.default.pattern.method`` is not specified. + +Change default pattern method +============ +To override default pattern method, users can run following command + +.. code-block:: + + PUT _cluster/settings + { + "transient": { + "plugins.ppl.default.pattern.method": "BRAIN" + } + } -Example 1: Create the new field +Simple Pattern Example 1: Create the new field =============================== The example shows how to use extract punctuations in ``email`` for each document. Parsing a null field will return an empty string. PPL query:: - os> source=accounts | patterns email | fields email, patterns_field ; + os> source=accounts | patterns email SIMPLE_PATTERN | fields email, patterns_field ; fetched rows / total rows = 4/4 +-----------------------+----------------+ | email | patterns_field | @@ -40,14 +73,14 @@ PPL query:: | daleadams@boink.com | @. | +-----------------------+----------------+ -Example 2: Extract log patterns +Simple Pattern Example 2: Extract log patterns =============================== The example shows how to extract punctuations from a raw log field using the default patterns. PPL query:: - os> source=apache | patterns message | fields message, patterns_field ; + os> source=apache | patterns message SIMPLE_PATTERN | fields message, patterns_field ; fetched rows / total rows = 4/4 +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------+ | message | patterns_field | @@ -58,14 +91,14 @@ PPL query:: | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | ... - - [//::: -] " / /." | +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------+ -Example 3: Extract log patterns with custom regex pattern +Simple Pattern Example 3: Extract log patterns with custom regex pattern ========================================================= The example shows how to extract punctuations from a raw log field using user defined patterns. PPL query:: - os> source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | fields message, no_numbers ; + os> source=apache | patterns new_field='no_numbers' pattern='[0-9]' message SIMPLE_PATTERN | fields message, no_numbers ; fetched rows / total rows = 4/4 +-----------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ | message | no_numbers | @@ -76,7 +109,44 @@ PPL query:: | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | ... - - [/Sep/::: -] "POST /users HTTP/." | +-----------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ -Limitation +Brain Example 1: Extract log patterns +=============================== + +The example shows how to extract semantic meaningful log patterns from a raw log field using the brain algorithm. The default variable count threshold is 5. + +PPL query:: + + os> source=apache | patterns message BRAIN | fields message, patterns_field ; + fetched rows / total rows = 4/4 + +-----------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ + | message | patterns_field | + |-----------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------| + | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] "HEAD /e-business/mindshare HTTP/<*><*>" 404 <*> | + | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] "GET /architectures/convergence/niches/mindshare HTTP/<*><*>" 100 <*> | + | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*IP*> - - [<*>/Sep/<*>:<*>:<*>:<*> <*>] "PATCH /strategize/out-of-the-box HTTP/<*><*>" 401 <*> | + | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*IP*> - - [<*>/Sep/<*>:<*>:<*>:<*> <*>] "POST /users HTTP/<*><*>" 301 <*> | + +-----------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ + +Brain Example 2: Extract log patterns with custom parameters +=============================== + +The example shows how to extract semantic meaningful log patterns from a raw log field using defined parameter of brain algorithm. + +PPL query:: + + os> source=apache | patterns variable_count_threshold=2 message BRAIN | fields message, patterns_field ; + fetched rows / total rows = 4/4 + +-----------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------+ + | message | patterns_field | + |-----------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------| + | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*><*>" <*> <*> | + | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*><*>" <*> <*> | + | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*><*>" <*> <*> | + | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*><*>" <*> <*> | + +-----------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------+ + + +Limitations ========== -The patterns command has the same limitations as the parse command, see `parse limitations <./parse.rst#Limitations>`_ for details. +- Patterns command is not pushed down to OpenSearch data node for now. It will only group log patterns on log messages returned to coordinator node. diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java index d484f3c4d0..74f6c7d11b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java @@ -224,7 +224,7 @@ public QueryManager queryManager() { @Provides public PPLService pplService(QueryManager queryManager, QueryPlanFactory queryPlanFactory) { - return new PPLService(new PPLSyntaxParser(), queryManager, queryPlanFactory); + return new PPLService(new PPLSyntaxParser(), queryManager, queryPlanFactory, settings); } @Provides diff --git a/integ-test/src/test/java/org/opensearch/sql/util/StandaloneModule.java b/integ-test/src/test/java/org/opensearch/sql/util/StandaloneModule.java index 5d6f0b5a55..bdf9d6c228 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/StandaloneModule.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/StandaloneModule.java @@ -89,7 +89,7 @@ public QueryManager queryManager() { @Provides public PPLService pplService(QueryManager queryManager, QueryPlanFactory queryPlanFactory) { - return new PPLService(new PPLSyntaxParser(), queryManager, queryPlanFactory); + return new PPLService(new PPLSyntaxParser(), queryManager, queryPlanFactory, settings); } @Provides diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java index 612771eea4..78b0948762 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java @@ -85,6 +85,13 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting DEFAULT_PATTERN_METHOD_SETTING = + Setting.simpleString( + Key.DEFAULT_PATTERN_METHOD.getKeyValue(), + "SIMPLE_PATTERN", + Setting.Property.NodeScope, + Setting.Property.Dynamic); + public static final Setting QUERY_MEMORY_LIMIT_SETTING = new Setting<>( Key.QUERY_MEMORY_LIMIT.getKeyValue(), @@ -276,6 +283,12 @@ public OpenSearchSettings(ClusterSettings clusterSettings) { Key.PPL_ENABLED, PPL_ENABLED_SETTING, new Updater(Key.PPL_ENABLED)); + register( + settingBuilder, + clusterSettings, + Key.DEFAULT_PATTERN_METHOD, + DEFAULT_PATTERN_METHOD_SETTING, + new Updater(Key.DEFAULT_PATTERN_METHOD)); register( settingBuilder, clusterSettings, @@ -450,6 +463,7 @@ public static List> pluginSettings() { .add(SQL_DELETE_ENABLED_SETTING) .add(SQL_PAGINATION_API_SEARCH_AFTER_SETTING) .add(PPL_ENABLED_SETTING) + .add(DEFAULT_PATTERN_METHOD_SETTING) .add(QUERY_MEMORY_LIMIT_SETTING) .add(QUERY_SIZE_LIMIT_SETTING) .add(METRICS_ROLLING_WINDOW_SETTING) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java index 33a785c498..5c2acdc628 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java @@ -85,8 +85,9 @@ public QueryManager queryManager(NodeClient nodeClient) { } @Provides - public PPLService pplService(QueryManager queryManager, QueryPlanFactory queryPlanFactory) { - return new PPLService(new PPLSyntaxParser(), queryManager, queryPlanFactory); + public PPLService pplService( + QueryManager queryManager, QueryPlanFactory queryPlanFactory, Settings settings) { + return new PPLService(new PPLSyntaxParser(), queryManager, queryPlanFactory, settings); } @Provides diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index a3e7b5ff35..5158931657 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -35,6 +35,11 @@ NEW_FIELD: 'NEW_FIELD'; KMEANS: 'KMEANS'; AD: 'AD'; ML: 'ML'; +PATTERN_METHOD: 'PATTERN_METHOD'; +SIMPLE_PATTERN: 'SIMPLE_PATTERN'; +BRAIN: 'BRAIN'; +VARIABLE_COUNT_THRESHOLD: 'VARIABLE_COUNT_THRESHOLD'; +FREQUENCY_THRESHOLD_PERCENTAGE: 'FREQUENCY_THRESHOLD_PERCENTAGE'; // COMMAND ASSIST KEYWORDS AS: 'AS'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 4dc223b028..cc882b6b1a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -45,10 +45,10 @@ commands | rareCommand | grokCommand | parseCommand - | patternsCommand | kmeansCommand | adCommand | mlCommand + | patternsCommand ; searchCommand @@ -113,18 +113,25 @@ parseCommand : PARSE (source_field = expression) (pattern = stringLiteral) ; +patternsMethod + : PUNCT + | REGEX + ; + patternsCommand - : PATTERNS (patternsParameter)* (source_field = expression) + : PATTERNS (patternsParameter)* (source_field = expression) (pattern_method = patternMethod)* ; patternsParameter : (NEW_FIELD EQUAL new_field = stringLiteral) | (PATTERN EQUAL pattern = stringLiteral) + | (VARIABLE_COUNT_THRESHOLD EQUAL variable_count_threshold = integerLiteral) + | (FREQUENCY_THRESHOLD_PERCENTAGE EQUAL frequency_threshold_percentage = decimalLiteral) ; -patternsMethod - : PUNCT - | REGEX +patternMethod + : SIMPLE_PATTERN + | BRAIN ; kmeansCommand @@ -925,4 +932,6 @@ keywordsCanBeId | SPARKLINE | C | DC + | patternMethod + | patternsMethod ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java index 7769f5dfae..14839aa644 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java @@ -14,6 +14,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.QueryContext; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; import org.opensearch.sql.executor.QueryManager; @@ -35,6 +36,8 @@ public class PPLService { private final QueryPlanFactory queryExecutionFactory; + private final Settings settings; + private final PPLQueryDataAnonymizer anonymizer = new PPLQueryDataAnonymizer(); private static final Logger LOG = LogManager.getLogger(); @@ -77,7 +80,7 @@ private AbstractPlan plan( Statement statement = cst.accept( new AstStatementBuilder( - new AstBuilder(new AstExpressionBuilder(), request.getRequest()), + new AstBuilder(new AstExpressionBuilder(), settings, request.getRequest()), AstStatementBuilder.StatementBuilderContext.builder() .isExplain(request.isExplainRequest()) .build())); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 1bef820399..1c9534c054 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -31,15 +31,18 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; -import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Alias; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Function; import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.Map; @@ -47,6 +50,7 @@ import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; @@ -64,6 +68,9 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.ast.tree.Window; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.AdCommandContext; @@ -79,6 +86,8 @@ public class AstBuilder extends OpenSearchPPLParserBaseVisitor { private final AstExpressionBuilder expressionBuilder; + private final Settings settings; + /** * PPL query to get original token text. This is necessary because token.getText() returns text * without whitespaces or other characters discarded by lexer. @@ -280,18 +289,35 @@ public UnresolvedPlan visitParseCommand(OpenSearchPPLParser.ParseCommandContext @Override public UnresolvedPlan visitPatternsCommand(OpenSearchPPLParser.PatternsCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); - ImmutableMap.Builder builder = ImmutableMap.builder(); + List unresolvedArguments = new ArrayList<>(); + unresolvedArguments.add(sourceField); + AtomicReference alias = new AtomicReference<>("patterns_field"); ctx.patternsParameter() .forEach( x -> { - builder.put( - x.children.get(0).toString(), - (Literal) internalVisitExpression(x.children.get(2))); + String argName = x.children.get(0).toString(); + Literal value = (Literal) internalVisitExpression(x.children.get(2)); + if ("new_field".equalsIgnoreCase(argName)) { + alias.set((String) value.getValue()); + } + unresolvedArguments.add(new Argument(argName, value)); }); - java.util.Map arguments = builder.build(); - Literal pattern = arguments.getOrDefault("pattern", AstDSL.stringLiteral("")); - - return new Parse(ParseMethod.PATTERNS, sourceField, pattern, arguments); + return new Window( + new Alias( + alias.get(), + new WindowFunction( + new Function( + ctx.pattern_method != null + ? StringUtils.unquoteIdentifier(ctx.pattern_method.getText()) + .toLowerCase(Locale.ROOT) + : settings + .getSettingValue(Key.DEFAULT_PATTERN_METHOD) + .toString() + .toLowerCase(Locale.ROOT), + unresolvedArguments), + List.of(), // ignore partition by list for now as we haven't seen such requirement + List.of()), // ignore sort by list for now as we haven't seen such requirement + alias.get())); } /** Top command. */ diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java index 598f6691cb..9314b66e88 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java @@ -18,6 +18,7 @@ import org.mockito.Mock; import org.mockito.junit.MockitoJUnitRunner; import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.executor.DefaultQueryManager; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; @@ -44,13 +45,16 @@ public class PPLServiceTest { @Mock private ExecutionEngine.Schema schema; + @Mock private Settings settings; + /** Setup the test context. */ @Before public void setUp() { queryManager = DefaultQueryManager.defaultQueryManager(); pplService = - new PPLService(new PPLSyntaxParser(), queryManager, new QueryPlanFactory(queryService)); + new PPLService( + new PPLSyntaxParser(), queryManager, new QueryPlanFactory(queryService), settings); } @After diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index ced266ed78..0fd2b84a39 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -7,6 +7,7 @@ import static java.util.Collections.emptyList; import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; import static org.opensearch.sql.ast.dsl.AstDSL.agg; import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; import static org.opensearch.sql.ast.dsl.AstDSL.alias; @@ -39,6 +40,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.tableFunction; import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; +import static org.opensearch.sql.ast.dsl.AstDSL.window; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import static org.opensearch.sql.utils.SystemIndexUtils.mappingTable; @@ -48,15 +50,20 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.mockito.Mockito; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; +import org.opensearch.sql.ast.expression.PatternMethod; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; public class AstBuilderTest { @@ -65,6 +72,8 @@ public class AstBuilderTest { private PPLSyntaxParser parser = new PPLSyntaxParser(); + private final Settings settings = Mockito.mock(Settings.class); + @Test public void testSearchCommand() { assertEqual( @@ -599,33 +608,6 @@ public void testParseCommand() { ImmutableMap.of())); } - @Test - public void testPatternsCommand() { - assertEqual( - "source=t | patterns new_field=\"custom_field\" " + "pattern=\"custom_pattern\" raw", - parse( - relation("t"), - ParseMethod.PATTERNS, - field("raw"), - stringLiteral("custom_pattern"), - ImmutableMap.builder() - .put("new_field", stringLiteral("custom_field")) - .put("pattern", stringLiteral("custom_pattern")) - .build())); - } - - @Test - public void testPatternsCommandWithoutArguments() { - assertEqual( - "source=t | patterns raw", - parse( - relation("t"), - ParseMethod.PATTERNS, - field("raw"), - stringLiteral(""), - ImmutableMap.of())); - } - @Test public void testKmeansCommand() { assertEqual( @@ -742,6 +724,47 @@ public void testShowDataSourcesCommand() { assertEqual("show datasources", relation(DATASOURCES_TABLE_NAME)); } + @Test + public void testPatternsCommand() { + when(settings.getSettingValue(Key.DEFAULT_PATTERN_METHOD)).thenReturn("SIMPLE_PATTERN"); + assertEqual( + "source=t | patterns new_field=\"custom_field\" pattern=\"custom_pattern\" raw", + window( + relation("t"), + PatternMethod.SIMPLE_PATTERN, + field("raw"), + "custom_field", + Arrays.asList( + new Argument("new_field", new Literal("custom_field", DataType.STRING)), + new Argument("pattern", new Literal("custom_pattern", DataType.STRING))))); + + assertEqual( + "source=t | patterns variable_count_threshold=2 frequency_threshold_percentage=0.1 raw" + + " BRAIN", + window( + relation("t"), + PatternMethod.BRAIN, + field("raw"), + "patterns_field", + Arrays.asList( + new Argument("variable_count_threshold", new Literal(2, DataType.INTEGER)), + new Argument( + "frequency_threshold_percentage", new Literal(0.1, DataType.DOUBLE))))); + } + + @Test + public void testPatternsWithoutArguments() { + when(settings.getSettingValue(Key.DEFAULT_PATTERN_METHOD)).thenReturn("SIMPLE_PATTERN"); + assertEqual( + "source=t | patterns raw", + window( + relation("t"), + PatternMethod.SIMPLE_PATTERN, + field("raw"), + "patterns_field", + Arrays.asList())); + } + protected void assertEqual(String query, Node expectedPlan) { Node actualPlan = plan(query); assertEquals(expectedPlan, actualPlan); @@ -753,7 +776,7 @@ protected void assertEqual(String query, String expected) { } private Node plan(String query) { - AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), query); + AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), settings, query); return astBuilder.visit(parser.parse(query)); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstNowLikeFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstNowLikeFunctionTest.java index 16aa0752e6..e13e8cb5b4 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstNowLikeFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstNowLikeFunctionTest.java @@ -20,7 +20,9 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.mockito.Mock; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; @RunWith(Parameterized.class) @@ -28,6 +30,8 @@ public class AstNowLikeFunctionTest { private final PPLSyntaxParser parser = new PPLSyntaxParser(); + @Mock private Settings settings; + /** * Set parameterized values used in test. * @@ -110,7 +114,7 @@ protected void assertEqual(Node expectedPlan, String query) { } private Node plan(String query) { - AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), query); + AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), settings, query); return astBuilder.visit(parser.parse(query)); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index 7d7b31e822..ef73eb8e62 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -19,17 +19,21 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.mockito.Mock; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.statement.Explain; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; public class AstStatementBuilderTest { @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Mock private Settings settings; + private PPLSyntaxParser parser = new PPLSyntaxParser(); @Test @@ -65,7 +69,7 @@ private void assertExplainEqual(String query, Statement expectedStatement) { private Node plan(String query, boolean isExplain) { final AstStatementBuilder builder = new AstStatementBuilder( - new AstBuilder(new AstExpressionBuilder(), query), + new AstBuilder(new AstExpressionBuilder(), settings, query), AstStatementBuilder.StatementBuilderContext.builder().isExplain(isExplain).build()); return builder.visit(parser.parse(query)); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index cd51ea07df..252f366b9d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -13,9 +13,11 @@ import java.util.Collections; import org.junit.Test; import org.junit.runner.RunWith; +import org.mockito.Mock; import org.mockito.junit.MockitoJUnitRunner; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; import org.opensearch.sql.ppl.parser.AstBuilder; import org.opensearch.sql.ppl.parser.AstExpressionBuilder; @@ -26,6 +28,8 @@ public class PPLQueryDataAnonymizerTest { private final PPLSyntaxParser parser = new PPLSyntaxParser(); + @Mock private Settings settings; + @Test public void testSearchCommand() { assertEquals("source=t | where a = ***", anonymize("search source=t a=1")); @@ -165,7 +169,7 @@ public void anonymizeFieldsNoArg() { } private String anonymize(String query) { - AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), query); + AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), settings, query); return anonymize(astBuilder.visit(parser.parse(query))); } @@ -177,7 +181,7 @@ private String anonymize(UnresolvedPlan plan) { private String anonymizeStatement(String query, boolean isExplain) { AstStatementBuilder builder = new AstStatementBuilder( - new AstBuilder(new AstExpressionBuilder(), query), + new AstBuilder(new AstExpressionBuilder(), settings, query), AstStatementBuilder.StatementBuilderContext.builder().isExplain(isExplain).build()); Statement statement = builder.visit(parser.parse(query)); PPLQueryDataAnonymizer anonymize = new PPLQueryDataAnonymizer();