diff --git a/pyproject.toml b/pyproject.toml index f4eb970..8289f52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] name = "json_repair" -version = "0.36.1" +version = "0.37.0" license = {file = "LICENSE"} authors = [ { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" }, diff --git a/src/json_repair/json_parser.py b/src/json_repair/json_parser.py index 225a8f1..ce90153 100644 --- a/src/json_repair/json_parser.py +++ b/src/json_repair/json_parser.py @@ -438,7 +438,7 @@ def parse_string(self) -> Union[str, bool, None]: string_acc += char self.index += 1 char = self.get_char_at() - if char and len(string_acc) > 0 and string_acc[-1] == "\\": + if char and string_acc[-1] == "\\": # This is a special case, if people use real strings this might happen self.log("Found a stray escape sequence, normalizing it") if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]: @@ -646,10 +646,15 @@ def parse_string(self) -> Union[str, bool, None]: self.log( "While parsing a string, we missed the closing quote, ignoring", ) + string_acc = string_acc.rstrip() else: self.index += 1 - return string_acc.rstrip() + if missing_quotes or (string_acc and string_acc[-1] == "\n"): + # Clean the whitespaces for some corner cases + string_acc = string_acc.rstrip() + + return string_acc def parse_number(self) -> Union[float, int, str, JSONReturnType]: # is a valid real number expressed in one of a number of given formats @@ -661,7 +666,7 @@ def parse_number(self) -> Union[float, int, str, JSONReturnType]: number_str += char self.index += 1 char = self.get_char_at() - if len(number_str) > 1 and number_str[-1] in "-eE/,": + if number_str and number_str[-1] in "-eE/,": # The number ends with a non valid character for a number/currency, rolling back one number_str = number_str[:-1] self.index -= 1 diff --git a/tests/profiler.py b/tests/profiler.py index 2fc6caf..01af7b9 100644 --- a/tests/profiler.py +++ b/tests/profiler.py @@ -1,8 +1,12 @@ +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from cProfile import Profile import pstats from pstats import SortKey, Stats import time -from json_repair import repair_json +from src.json_repair.json_repair import repair_json # Hack: Monkey patch pstats to change the formatting method and increase precision