Skip to content

Commit

Permalink
Squash some bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Jacob Hearst committed Dec 27, 2024
1 parent 7ed23cc commit b3f706f
Show file tree
Hide file tree
Showing 5 changed files with 418 additions and 14 deletions.
6 changes: 3 additions & 3 deletions Sources/_StringProcessing/Engine/MEBuiltins.swift
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ extension String {
limitedBy start: String.Index,
isScalarSemantics: Bool
) -> String.Index? {
guard currentPosition >= start else { return nil }
guard currentPosition > start else { return nil }
if case .definite(let result) = _quickReverseMatchAnyNonNewline(
at: currentPosition,
limitedBy: start,
Expand Down Expand Up @@ -297,7 +297,7 @@ extension String {
limitedBy start: String.Index,
isScalarSemantics: Bool
) -> QuickResult<String.Index?> {
assert(currentPosition >= start)
assert(currentPosition > start)
guard let (asciiValue, previous, isCRLF) = _quickReverseASCIICharacter(
at: currentPosition, limitedBy: start
) else {
Expand Down Expand Up @@ -338,7 +338,7 @@ extension String {
isScalarSemantics: Bool
) -> String.Index? {
if isScalarSemantics {
guard currentPosition >= start else { return nil }
guard currentPosition > start else { return nil }
let scalar = unicodeScalars[currentPosition]
guard !scalar.isNewline else { return nil }
return unicodeScalars.index(before: currentPosition)
Expand Down
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ extension Processor {

// Reverse in our input
//
// Returns whether the advance succeeded. On failure, our
// Returns whether the reverse succeeded. On failure, our
// save point was restored
mutating func reverseConsume(_ n: Distance) -> Bool {
// TODO: needs benchmark coverage
Expand All @@ -234,7 +234,7 @@ extension Processor {

// If `start` falls in the middle of a character, and we are trying to advance
// by one "character", then we should max out at `start` even though the above
// advancement will result in `nil`.
// reversal will result in `nil`.
if n == 1, let idx = input.unicodeScalars.index(
currentPosition, offsetBy: -n.rawValue, limitedBy: start
) {
Expand Down
18 changes: 9 additions & 9 deletions Sources/_StringProcessing/Unicode/ASCII.swift
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ extension String {
let tail = utf8[next]
guard tail._isSub300StartingByte else { return nil }

// Handle CR-LF:
// Handle CR-LF by advancing past the sequence if both characters are present
if base == ._carriageReturn && tail == ._lineFeed {
utf8.formIndex(after: &next)
guard next == end || utf8[next]._isSub300StartingByte else {
Expand All @@ -123,17 +123,17 @@ extension String {
}

/// TODO: better to take isScalarSemantics parameter, we can return more results
/// and we can give the right `previous` index, not requiring the caller to re-adjust it
/// and we can give the right `next` index, not requiring the caller to re-adjust it
/// TODO: detailed description of nuanced semantics
func _quickReverseASCIICharacter(
at idx: Index,
limitedBy start: Index
) -> (char: UInt8, previous: Index, crLF: Bool)? {
) -> (first: UInt8, previous: Index, crLF: Bool)? {
// TODO: fastUTF8 version
assert(String.Index(idx, within: unicodeScalars) != nil)
assert(idx >= start)

// Exit if we're at our limit
// If we're already at the start, there is no previous character
if idx == start {
return nil
}
Expand All @@ -146,23 +146,23 @@ extension String {

var previous = utf8.index(before: idx)
if previous == start {
return (char: char, previous: previous, crLF: false)
return (first: char, previous: previous, crLF: false)
}

let head = utf8[previous]
guard head._isSub300StartingByte else { return nil }

// Handle CR-LF:
if char == ._carriageReturn && head == ._lineFeed {
// Handle CR-LF by reversing past the sequence if both characters are present
if char == ._lineFeed && head == ._carriageReturn {
utf8.formIndex(before: &previous)
guard previous == start || utf8[previous]._isSub300StartingByte else {
return nil
}
return (char: char, previous: previous, crLF: true)
return (first: char, previous: previous, crLF: true)
}

assert(self[idx].isASCII && self[idx] != "\r\n")
return (char: char, previous: previous, crLF: false)
return (first: char, previous: previous, crLF: false)
}

func _quickMatch(
Expand Down
153 changes: 153 additions & 0 deletions Tests/MatchingEngineTests/ASCIITests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import XCTest

@testable import _StringProcessing

final class QuickASCIICharacterTests: XCTestCase {
func testHappyPath() throws {
// Given
let sut = "foo"

// When
let result = sut._quickASCIICharacter(at: sut.startIndex, limitedBy: sut.endIndex)

// Then
let (char, nextIdx, isCRLF) = try XCTUnwrap(result)
XCTAssertEqual(char, sut.utf8[sut.startIndex])
XCTAssertEqual(nextIdx, sut.index(after: sut.startIndex))
XCTAssertFalse(isCRLF)
}

func testAtEnd() throws {
// Given
let sut = "foo"

// When
let result = sut._quickASCIICharacter(at: sut.endIndex, limitedBy: sut.endIndex)

// Then
XCTAssertNil(result)
}

func testNonASCIIChar() throws {
// Given
let sut = "é"

// When
let result = sut._quickASCIICharacter(at: sut.startIndex, limitedBy: sut.endIndex)

// Then
XCTAssertNil(result)
}

func testNextIsEnd() throws {
// Given
let sut = "foo"
let index = sut.index(before: sut.endIndex)

// When
let result = sut._quickASCIICharacter(at: index, limitedBy: sut.endIndex)

// Then
let (char, nextIdx, isCRLF) = try XCTUnwrap(result)
XCTAssertEqual(char, sut.utf8[index])
XCTAssertEqual(nextIdx, sut.endIndex)
XCTAssertFalse(isCRLF)
}

// TODO: JH - Figure out how to test sub 300 starting bytes
func testIsCRLF() throws {
// Given
let sut = "\r\n"

// When
let result = sut._quickASCIICharacter(at: sut.utf8.startIndex, limitedBy: sut.endIndex)

// Then
let (char, nextIdx, isCRLF) = try XCTUnwrap(result)
XCTAssertEqual(char, sut.utf8[sut.startIndex])
XCTAssertEqual(nextIdx, sut.endIndex)
XCTAssertTrue(isCRLF)
}
}

final class QuickReverseASCIICharacterTests: XCTestCase {
func testHappyPath() throws {
// Given
let sut = "foo"
let index = sut.index(after: sut.startIndex)

// When
let result = sut._quickReverseASCIICharacter(at: index, limitedBy: sut.startIndex)

// Then
let (char, previousIdx, isCRLF) = try XCTUnwrap(result)
XCTAssertEqual(char, sut.utf8[index])
XCTAssertEqual(previousIdx, sut.startIndex)
XCTAssertFalse(isCRLF)
}

func testAtStart() throws {
// Given
let sut = "foo"

// When
let result = sut._quickReverseASCIICharacter(at: sut.startIndex, limitedBy: sut.startIndex)

// Then
XCTAssertNil(result)
}

func testNonASCIIChar() throws {
// Given
let sut = "é"

// When
let result = sut._quickReverseASCIICharacter(at: sut.startIndex, limitedBy: sut.startIndex)

// Then
XCTAssertNil(result)
}

func testPreviousIsStart() throws {
// Given
let sut = "foo"
let index = sut.index(after: sut.startIndex)

// When
let result = sut._quickReverseASCIICharacter(at: index, limitedBy: sut.startIndex)

// Then
let (char, previousIdx, isCRLF) = try XCTUnwrap(result)
XCTAssertEqual(char, sut.utf8[index])
XCTAssertEqual(previousIdx, sut.startIndex)
XCTAssertFalse(isCRLF)
}

// TODO: JH - Figure out how to test sub 300 starting bytes
func testIsCRLF() throws {
// Given
let sut = "foo\r\n"
// Start at '\n'
let index = sut.utf8.index(before: sut.endIndex)

// When
let result = sut._quickReverseASCIICharacter(at: index, limitedBy: sut.startIndex)

// Then
let (char, previousIndex, isCRLF) = try XCTUnwrap(result)
XCTAssertEqual(char, sut.utf8[index])
XCTAssertEqual(previousIndex, sut.index(sut.startIndex, offsetBy: 2))
XCTAssertTrue(isCRLF)
}
}
Loading

0 comments on commit b3f706f

Please sign in to comment.