Skip to content

Commit

Permalink
Merge pull request #39238 from sanjana/regexp-improvements
Browse files Browse the repository at this point in the history
Throw Runtime exceptions for invalid find indexes in Regexps
  • Loading branch information
pcnfernando authored Feb 15, 2023
2 parents 7526336 + f24608a commit ae723d0
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static io.ballerina.runtime.api.constants.RuntimeConstants.DECIMAL_LANG_LIB;
import static io.ballerina.runtime.api.constants.RuntimeConstants.FUTURE_LANG_LIB;
import static io.ballerina.runtime.api.constants.RuntimeConstants.MAP_LANG_LIB;
import static io.ballerina.runtime.api.constants.RuntimeConstants.REGEXP_LANG_LIB;
import static io.ballerina.runtime.api.constants.RuntimeConstants.STRING_LANG_LIB;
import static io.ballerina.runtime.api.constants.RuntimeConstants.TABLE_LANG_LIB;
import static io.ballerina.runtime.api.constants.RuntimeConstants.VALUE_LANG_LIB;
Expand Down Expand Up @@ -124,6 +125,9 @@ public class BallerinaErrorReasons {
public static final BString REG_EXP_PARSING_ERROR =
StringUtils.fromString(BALLERINA_PREFIX.concat(REG_EXP_PARSING_ERROR_IDENTIFIER));

public static final BString REGEXP_OPERATION_ERROR = getModulePrefixedReason(REGEXP_LANG_LIB,
"RegularExpressionOperationError");

public static BString getModulePrefixedReason(String moduleName, String identifier) {
return StringUtils.fromString(BALLERINA_ORG_PREFIX.concat(moduleName)
.concat(CLOSING_CURLY_BRACE).concat(identifier));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ public enum RuntimeErrors implements DiagnosticCode {
INVALID_FRACTION_DIGITS("invalid.fraction.digits", "RUNTIME_0099"),
INVALID_UTF_8_BYTE_ARRAY_VALUE("invalid.utf8.byte.array.value", "RUNTIME_0100"),
INCOMPATIBLE_ARGUMENTS("incompatible.arguments", "RUNTIME_0101"),
DECIMAL_VALUE_OUT_OF_RANGE("decimal.value.out.of.range", "RUNTIME_0102");
DECIMAL_VALUE_OUT_OF_RANGE("decimal.value.out.of.range", "RUNTIME_0102"),
NEGATIVE_REGEXP_FIND_INDEX("regexp.match.string.negative.index", "RUNTIME_0103"),
INVALID_REGEXP_FIND_INDEX("regexp.match.string.index.out.of.range", "RUNTIME_0104");

private String errorMsgKey;
private String errorCode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,5 @@ decimal.value.out.of.range = decimal range overflow
config.size.mismatch = [{0}] the size for configurable variable ''{1}'' is expected to be ''{2}'', \
but found ''{3}''
invalid.utf8.byte.array.value = array contains invalid UTF-8 byte value
regexp.match.string.negative.index = start index cannot be less than 0
regexp.match.string.index.out.of.range = start index ''{0}'' cannot be greater than input string length ''{1}''
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import io.ballerina.runtime.api.values.BArray;
import io.ballerina.runtime.api.values.BRegexpValue;
import io.ballerina.runtime.api.values.BString;
import io.ballerina.runtime.internal.util.exceptions.BLangExceptionHelper;
import io.ballerina.runtime.internal.util.exceptions.BallerinaErrorReasons;
import io.ballerina.runtime.internal.util.exceptions.RuntimeErrors;

import java.util.regex.Matcher;

Expand All @@ -34,18 +37,20 @@
*/
public class Find {

public static BArray find(BRegexpValue regExp, BString str, int startIndex) {
public static BArray find(BRegexpValue regExp, BString str, long startIndex) {
checkIndexWithinRange(str, startIndex);
Matcher matcher = RegexUtil.getMatcher(regExp, str);
if (matcher.find(startIndex)) {
if (matcher.find((int) startIndex)) {
return RegexUtil.getGroupZeroAsSpan(matcher);
}
return null;
}

public static BArray findGroups(BRegexpValue regExp, BString str, int startIndex) {
public static BArray findGroups(BRegexpValue regExp, BString str, long startIndex) {
checkIndexWithinRange(str, startIndex);
Matcher matcher = RegexUtil.getMatcher(regExp, str);
BArray resultArray = ValueCreator.createArrayValue(GROUPS_AS_SPAN_ARRAY_TYPE);
matcher.region(startIndex, str.length());
matcher.region((int) startIndex, str.length());
if (matcher.find()) {
resultArray.append(RegexUtil.getGroupZeroAsSpan(matcher));
if (matcher.groupCount() != 0) {
Expand All @@ -61,10 +66,11 @@ public static BArray findGroups(BRegexpValue regExp, BString str, int startIndex
return resultArray;
}

public static BArray findAll(BRegexpValue regExp, BString str, int startIndex) {
public static BArray findAll(BRegexpValue regExp, BString str, long startIndex) {
checkIndexWithinRange(str, startIndex);
Matcher matcher = RegexUtil.getMatcher(regExp, str);
BArray resultArray = ValueCreator.createArrayValue(GROUPS_AS_SPAN_ARRAY_TYPE);
matcher.region(startIndex, str.length());
matcher.region((int) startIndex, str.length());
while (matcher.find()) {
resultArray.append(RegexUtil.getGroupZeroAsSpan(matcher));
}
Expand All @@ -74,9 +80,10 @@ public static BArray findAll(BRegexpValue regExp, BString str, int startIndex) {
return resultArray;
}

public static BArray findAllGroups(BRegexpValue regExp, BString str, int startIndex) {
public static BArray findAllGroups(BRegexpValue regExp, BString str, long startIndex) {
checkIndexWithinRange(str, startIndex);
Matcher matcher = RegexUtil.getMatcher(regExp, str);
matcher.region(startIndex, str.length());
matcher.region((int) startIndex, str.length());
BArray groupArray = ValueCreator.createArrayValue(RegexUtil.GROUPS_ARRAY_TYPE);
while (matcher.find()) {
BArray group = RegexUtil.getMatcherGroupsAsSpanArr(matcher);
Expand All @@ -89,4 +96,22 @@ public static BArray findAllGroups(BRegexpValue regExp, BString str, int startIn
}
return groupArray;
}

private static void checkIndexWithinRange(BString str, long startIndex) {
if (startIndex != (int) startIndex) {
throw BLangExceptionHelper.getRuntimeException(BallerinaErrorReasons.REGEXP_OPERATION_ERROR,
RuntimeErrors.INDEX_NUMBER_TOO_LARGE, startIndex);
}

if (startIndex < 0) {
throw BLangExceptionHelper.getRuntimeException(BallerinaErrorReasons.INDEX_OUT_OF_RANGE_ERROR,
RuntimeErrors.NEGATIVE_REGEXP_FIND_INDEX);
}

int strLength = str.length();
if (strLength <= startIndex) {
throw BLangExceptionHelper.getRuntimeException(BallerinaErrorReasons.INDEX_OUT_OF_RANGE_ERROR,
RuntimeErrors.INVALID_REGEXP_FIND_INDEX, startIndex, strLength);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.ballerinalang.test.BCompileUtil;
import org.ballerinalang.test.BRunUtil;
import org.ballerinalang.test.CompileResult;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
Expand All @@ -33,16 +34,18 @@
*/
public class LangLibRegexpTest {

private CompileResult compileResult;
private CompileResult compileResult, negativeTests;

@BeforeClass
public void setup() {
compileResult = BCompileUtil.compile("test-src/regexp_test.bal");
negativeTests = BCompileUtil.compile("test-src/regexp_negative_test.bal");
}

@AfterClass
public void tearDown() {
compileResult = null;
negativeTests = null;
}

@Test(dataProvider = "testRegexLangLibFunctionList")
Expand All @@ -69,4 +72,57 @@ public Object[] testRegexLangLibFunctions() {
"testSplit"
};
}

@Test(dataProvider = "negativeRegexpFindIndexProvider")
public void testNegativeRegexp(String functionName) {
Object returns = BRunUtil.invoke(negativeTests, functionName);
Assert.assertEquals(returns.toString(),
"error(\"IndexOutOfRange\",message=\"start index cannot be less than 0\")");
}

@DataProvider(name = "negativeRegexpFindIndexProvider")
private Object[][] negativeRegexpFindIndexes() {
return new Object[][] {
{"testNegativeIndexFind"},
{"testNegativeIndexFindAll"},
{"testNegativeIndexFindGroups"},
{"testNegativeIndexFindAllGroups"},
};
}

@Test(dataProvider = "invalidRegexpFindIndexProvider")
public void testInvalidRegexp(String functionName, int startIndex, int length) {
Object returns = BRunUtil.invoke(negativeTests, functionName);
Assert.assertEquals(returns.toString(),
"error(\"IndexOutOfRange\",message=\"start index '" + startIndex + "' cannot be greater than input " +
"string length '" + length + "'\")");
}

@DataProvider(name = "invalidRegexpFindIndexProvider")
private Object[][] invalidRegexpFindIndexes() {
return new Object[][] {
{"testInvalidIndexFind", 12, 5},
{"testInvalidIndexFindAll", 112, 63},
{"testInvalidIndexFindGroups", 97, 52},
{"testInvalidIndexFindAllGroups", 123, 31},
};
}

@Test(dataProvider = "longRegexpFindIndexProvider")
public void testLongIndexRegexp(String functionName, long startIndex) {
Object returns = BRunUtil.invoke(negativeTests, functionName);
Assert.assertEquals(returns.toString(),
String.format("error(\"{ballerina/lang.regexp}RegularExpressionOperationError\",message=\"index " +
"number too large: %,d\")", startIndex));
}

@DataProvider(name = "longRegexpFindIndexProvider")
private Object[][] longRegexpFindIndexes() {
return new Object[][] {
{"testLongIndexFind", 68719476704L},
{"testLongIndexFindAll", 137438953408L},
{"testLongIndexFindGroups", 274877906816L},
{"testLongIndexFindAllGroups", 549755813632L},
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) 2023 WSO2 Inc. (http://www.wso2.org) All Rights Reserved.
//
// WSO2 Inc. licenses this file to you under the Apache License,
// Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

public function testNegativeIndexFind() returns error? {
string:RegExp r = re `e`;
_ = check (trap r.find("Hello", -3));
}

public function testInvalidIndexFind() returns error? {
string:RegExp r = re `r`;
_ = check (trap r.find("World", 12));
}

public function testLongIndexFind() returns error? {
string:RegExp r = re `r`;
_ = check (trap r.find("World", 68719476704));
}

public function testNegativeIndexFindAll() returns error? {
string:RegExp r = re `/(g|ng)/gim`;
string phrase = "There once was a king who liked to sing in the rain";
_ = check (trap r.findAll(phrase, -8));
}

public function testInvalidIndexFindAll() returns error? {
string:RegExp r = re `/(op|p)/gim`;
string phrase = "Don't stoP going up till you get to the tOp if you want to shop";
_ = check (trap r.findAll(phrase, 112));
}

public function testLongIndexFindAll() returns error? {
string:RegExp r = re `/(ab|c)/gim`;
string phrase = "Don't stoP going up till you get to the tOp if you want to shop";
_ = check (trap r.findAll(phrase, 137438953408));
}

public function testNegativeIndexFindGroups() returns error? {
string:RegExp r = re `([bB].tt[a-z]*)`;
_ = check (trap r.findGroups("Butter was bought by Betty but the butter was bitter", -3));
}

public function testInvalidIndexFindGroups() returns error? {
string:RegExp r = re `([bB].tt[a-z]*)`;
_ = check (trap r.findGroups("Butter was bought by Betty but the butter was bitter", 97));
}

public function testLongIndexFindGroups() returns error? {
string:RegExp r = re `([aA].ee[A-Z]*)`;
_ = check (trap r.findGroups("Butter was bought by Betty but the butter was bitter", 274877906816));
}

public function testNegativeIndexFindAllGroups() returns error? {
string:RegExp r = re `(([a-z]u)(bble))`;
_ = check (trap r.findAllGroups("rubble, trouble, bubble, hubble", -4));
}

public function testInvalidIndexFindAllGroups() returns error? {
string:RegExp r = re `(([a-z]u)(bble))`;
_ = check (trap r.findAllGroups("rubble, trouble, bubble, hubble", 123));
}

public function testLongIndexFindAllGroups() returns error? {
string:RegExp r = re `(([0-9A-Z]u)+(ful))`;
_ = check (trap r.findAllGroups("rubble, trouble, bubble, hubble", 549755813632));
}

0 comments on commit ae723d0

Please sign in to comment.