Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Jacob Hearst committed Aug 26, 2024
1 parent 25d1e6a commit 2d7cadc
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 38 deletions.
9 changes: 6 additions & 3 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ fileprivate extension Compiler.ByteCodeGen {
}

mutating func emitQuotedLiteral(_ s: String) {
assert(!reverse)
guard options.semanticLevel == .graphemeCluster else {
for char in s {
for scalar in char.unicodeScalars {
Expand Down Expand Up @@ -137,6 +138,7 @@ fileprivate extension Compiler.ByteCodeGen {
}

mutating func emitReverseQuotedLiteral(_ s: String) {
assert(reverse)
guard options.semanticLevel == .graphemeCluster else {
for char in s {
for scalar in char.unicodeScalars.reversed() {
Expand Down Expand Up @@ -408,13 +410,14 @@ fileprivate extension Compiler.ByteCodeGen {
_ kind: (forwards: Bool, positive: Bool),
_ child: DSLTree.Node
) throws {
let previousReverse = reverse
reverse = !kind.forwards
if kind.positive {
try emitPositiveLookaround(child)
} else {
try emitNegativeLookaround(child)
}
reverse = false
reverse = previousReverse
}

mutating func emitAtomicNoncapturingGroup(
Expand Down Expand Up @@ -1116,9 +1119,9 @@ fileprivate extension Compiler.ByteCodeGen {
if let asciiBitset = ccc.asAsciiBitset(options),
optimizationsEnabled {
if options.semanticLevel == .unicodeScalar {
builder.buildScalarMatchAsciiBitset(asciiBitset)
builder.buildScalarMatchAsciiBitset(asciiBitset, reverse: reverse)
} else {
builder.buildMatchAsciiBitset(asciiBitset)
builder.buildMatchAsciiBitset(asciiBitset, reverse: reverse)
}
return
}
Expand Down
12 changes: 8 additions & 4 deletions Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -194,17 +194,21 @@ extension MEProgram.Builder {
}

mutating func buildMatchAsciiBitset(
_ b: DSLTree.CustomCharacterClass.AsciiBitset
_ b: DSLTree.CustomCharacterClass.AsciiBitset,
reverse: Bool
) {
let opcode = reverse ? Instruction.OpCode.reverseMatchBitset : .matchBitset
instructions.append(.init(
.matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: false)))
opcode, .init(bitset: makeAsciiBitset(b), isScalar: false)))
}

mutating func buildScalarMatchAsciiBitset(
_ b: DSLTree.CustomCharacterClass.AsciiBitset
_ b: DSLTree.CustomCharacterClass.AsciiBitset,
reverse: Bool
) {
let opcode = reverse ? Instruction.OpCode.reverseMatchBitset : .matchBitset
instructions.append(.init(
.matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: true)))
opcode, .init(bitset: makeAsciiBitset(b), isScalar: true)))
}

mutating func buildMatchBuiltin(model: _CharacterClassModel, reverse: Bool) {
Expand Down
10 changes: 8 additions & 2 deletions Sources/_StringProcessing/Engine/MEBuiltins.swift
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,10 @@ extension String {
/// - Returns: The character at `pos`, bounded by `end`, if it exists, along
/// with the upper bound of that character. The upper bound is always
/// scalar-aligned.
func characterAndEnd(at pos: String.Index, limitedBy end: String.Index) -> (Character, String.Index)? {
func characterAndEnd(
at pos: String.Index,
limitedBy end: String.Index
) -> (Character, characterEnd: String.Index)? {
// FIXME: Sink into the stdlib to avoid multiple boundary calculations
guard pos < end else { return nil }
let next = index(after: pos)
Expand Down Expand Up @@ -204,7 +207,10 @@ extension String {
/// - Returns: The character at `pos`, bounded by `start`, if it exists, along
/// with the lower bound of that character. The lower bound is always
/// scalar-aligned.
func characterAndStart(at pos: String.Index, limitedBy start: String.Index) -> (Character, String.Index)? {
func characterAndStart(
at pos: String.Index,
limitedBy start: String.Index
) -> (Character, characterStart: String.Index)? {
// FIXME: Sink into the stdlib to avoid multiple boundary calculations
guard pos > start else { return nil }
let previous = index(before: pos)
Expand Down
58 changes: 29 additions & 29 deletions Tests/RegexTests/MatchTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1587,9 +1587,8 @@ extension RegexTests {
#"(*positive_lookbehind:USD)\d+"#,
input: "Price: USD100", match: "100")

// TODO: Why is a match not found when unoptimized?
firstMatchTest(
#"\d{3}(?<=USD\d{3})"#, input: "Price: USD100", match: "100", validateOptimizations: false)
#"\d{3}(?<=USD\d{3})"#, input: "Price: USD100", match: "100")

firstMatchTest(
#"(?<!USD)\d+"#, input: "Price: JYP100", match: "100")
Expand All @@ -1602,33 +1601,34 @@ extension RegexTests {
firstMatchTest(
#"\d{3}(?<!USD\d{3})"#, input: "Price: JYP100", match: "100")

firstMatchTest(#"(?<=abc)def"#, input: "abcdefg", match: "def", validateOptimizations: false)
firstMatchTests(
#"(?<=az|b|c)def"#,
("azdefg", "def"),
("bdefg", "def"),
("cdefg", "def"),
("123defg", nil),
validateOptimizations: false
)

// FIXME: quickMatch and thoroughMatch have different results
firstMatchTest(
#"(?<=\d{1,3}-.{1,3}-\d{1,3})suffix"#,
input: "123-_+/-789suffix",
match: "suffix",
validateOptimizations: false
)

firstMatchTests(
#"(?<=^\d{1,3})abc"#,
("123abc", "abc"),
("12abc", "abc"),
("1abc", "abc"),
("1234abc", nil), // FIXME: Shouldn't match but does because `^` assertions are broken
("z123abc", nil), // FIXME: Same as above
validateOptimizations: false
)
firstMatchTest(#"(?<=abc)def"#, input: "abcdefg", match: "def")
firstMatchTests(
#"(?<=az|b|c)def"#,
("azdefg", "def"),
("bdefg", "def"),
("cdefg", "def"),
("123defg", nil),
validateOptimizations: false
)

firstMatchTest(#"abcd(?<=bc(?=d).)"#, input: "abcdefg", match: "abcd")

// FIXME: quickMatch and thoroughMatch have different results
// firstMatchTest(
// #"(?<=\d{1,3}-.{1,3}-\d{1,3})suffix"#,
// input: "123-_+/-789suffix",
// match: "suffix",
// validateOptimizations: false
// )

firstMatchTests(
#"(?<=^\d{1,3})abc"#,
("123abc", "abc"),
("12abc", "abc"),
("1abc", "abc"),
("1234abc", nil), // FIXME: Shouldn't match but does because `^` assertions are broken
("z123abc", nil) // FIXME: Same as above
)
}

func testMatchAnchors() throws {
Expand Down

0 comments on commit 2d7cadc

Please sign in to comment.