Skip to content

Commit

Permalink
As part of regular work on the library, I was able to find a 10% perf…
Browse files Browse the repository at this point in the history
…ormance Improvement by reworking some calls here and there.
  • Loading branch information
mangiucugna committed Feb 16, 2025
1 parent 6725afb commit c38b9e5
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "json_repair"
version = "0.36.1"
version = "0.37.0"
license = {file = "LICENSE"}
authors = [
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
Expand Down
11 changes: 8 additions & 3 deletions src/json_repair/json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def parse_string(self) -> Union[str, bool, None]:
string_acc += char
self.index += 1
char = self.get_char_at()
if char and len(string_acc) > 0 and string_acc[-1] == "\\":
if char and string_acc[-1] == "\\":
# This is a special case, if people use real strings this might happen
self.log("Found a stray escape sequence, normalizing it")
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
Expand Down Expand Up @@ -646,10 +646,15 @@ def parse_string(self) -> Union[str, bool, None]:
self.log(
"While parsing a string, we missed the closing quote, ignoring",
)
string_acc = string_acc.rstrip()
else:
self.index += 1

return string_acc.rstrip()
if missing_quotes or (string_acc and string_acc[-1] == "\n"):
# Clean the whitespaces for some corner cases
string_acc = string_acc.rstrip()

return string_acc

def parse_number(self) -> Union[float, int, str, JSONReturnType]:
# <number> is a valid real number expressed in one of a number of given formats
Expand All @@ -661,7 +666,7 @@ def parse_number(self) -> Union[float, int, str, JSONReturnType]:
number_str += char
self.index += 1
char = self.get_char_at()
if len(number_str) > 1 and number_str[-1] in "-eE/,":
if number_str and number_str[-1] in "-eE/,":
# The number ends with a non valid character for a number/currency, rolling back one
number_str = number_str[:-1]
self.index -= 1
Expand Down
6 changes: 5 additions & 1 deletion tests/profiler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from cProfile import Profile
import pstats
from pstats import SortKey, Stats
import time
from json_repair import repair_json
from src.json_repair.json_repair import repair_json


# Hack: Monkey patch pstats to change the formatting method and increase precision
Expand Down

0 comments on commit c38b9e5

Please sign in to comment.