Skip to content

Commit

Permalink
Release 0.29.3, simplified logging logic, removed overhead and made t…
Browse files Browse the repository at this point in the history
…he library overall faster.
  • Loading branch information
mangiucugna committed Sep 22, 2024
1 parent 7c5e4cd commit 6172cec
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 56 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "json_repair"
version = "0.29.2"
version = "0.29.3"
license = {file = "LICENSE"}
authors = [
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
Expand Down
69 changes: 29 additions & 40 deletions src/json_repair/json_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal

from .string_file_wrapper import StringFileWrapper
from .logger_config import LoggerConfig, LogLevel
from .json_context import JsonContext, ContextValues

JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
Expand All @@ -26,9 +25,16 @@ def __init__(
# This is used in the object member parsing to manage the special cases of missing quotes in key or value
self.context = JsonContext()
# Use this to log the activity, but only if logging is active
self.logger = LoggerConfig(
log_level=LogLevel.INFO if logging else LogLevel.NONE
)

# This is a trick but a beatiful one. We call self.log in the code over and over even if it's not needed.
# We could add a guard in the code for each call but that would make this code unreadable, so here's this neat trick
# Replace self.log with a noop
self.logging = logging
if logging:
self.logger: List[Dict[str, str]] = []
self.log = self._log
else:
self.log = self.noop

def parse(
self,
Expand All @@ -37,7 +43,6 @@ def parse(
if self.index < len(self.json_str):
self.log(
"The parser returned early, checking if there's more json elements",
LogLevel.INFO,
)
json = [json]
last_index = self.index
Expand All @@ -52,13 +57,12 @@ def parse(
if len(json) == 1:
self.log(
"There were no more elements, returning the element without the array",
LogLevel.INFO,
)
json = json[0]
if self.logger.log_level == LogLevel.NONE:
return json
if self.logging:
return json, self.logger
else:
return json, self.logger.log
return json

def parse_json(
self,
Expand All @@ -81,7 +85,6 @@ def parse_json(
elif char == "}":
self.log(
"At the end of an object we found a key with missing value, skipping",
LogLevel.INFO,
)
return ""
# <string> starts with a quote
Expand Down Expand Up @@ -113,7 +116,6 @@ def parse_object(self) -> Dict[str, JSONReturnType]:
if (self.get_char_at() or "") == ":":
self.log(
"While parsing an object we found a : before a key, ignoring",
LogLevel.INFO,
)
self.index += 1

Expand Down Expand Up @@ -144,7 +146,6 @@ def parse_object(self) -> Dict[str, JSONReturnType]:
if (self.get_char_at() or "") != ":":
self.log(
"While parsing an object we missed a : after a key",
LogLevel.INFO,
)

self.index += 1
Expand Down Expand Up @@ -182,7 +183,6 @@ def parse_array(self) -> List[JSONReturnType]:
if value == "..." and self.get_char_at(-1) == ".":
self.log(
"While parsing an array, found a stray '...'; ignoring it",
LogLevel.INFO,
)
else:
arr.append(value)
Expand All @@ -198,7 +198,6 @@ def parse_array(self) -> List[JSONReturnType]:
if char and char != "]":
self.log(
"While parsing an array we missed the closing ], adding it back",
LogLevel.INFO,
)
self.index -= 1

Expand Down Expand Up @@ -243,11 +242,9 @@ def parse_string(self) -> Union[str, bool, None]:
return value
self.log(
"While parsing a string, we found a literal instead of a quote",
LogLevel.INFO,
)
self.log(
"While parsing a string, we found no starting quote. Will add the quote back",
LogLevel.INFO,
)
missing_quotes = True

Expand All @@ -273,7 +270,6 @@ def parse_string(self) -> Union[str, bool, None]:
if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
self.log(
"While parsing a string, we found a valid starting doubled quote, ignoring it",
LogLevel.INFO,
)
doubled_quotes = True
self.index += 1
Expand All @@ -284,7 +280,6 @@ def parse_string(self) -> Union[str, bool, None]:
if next_c not in [",", "]", "}"]:
self.log(
"While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
LogLevel.INFO,
)
self.index += 1

Expand All @@ -306,7 +301,6 @@ def parse_string(self) -> Union[str, bool, None]:
):
self.log(
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
LogLevel.INFO,
)
break
if self.context.is_current(ContextValues.OBJECT_VALUE) and char in [
Expand All @@ -329,15 +323,14 @@ def parse_string(self) -> Union[str, bool, None]:
if rstring_delimiter_missing:
self.log(
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
LogLevel.INFO,
)
break
string_acc += char
self.index += 1
char = self.get_char_at()
if char and len(string_acc) > 0 and string_acc[-1] == "\\":
# This is a special case, if people use real strings this might happen
self.log("Found a stray escape sequence, normalizing it", LogLevel.INFO)
self.log("Found a stray escape sequence, normalizing it")
string_acc = string_acc[:-1]
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
Expand All @@ -349,8 +342,7 @@ def parse_string(self) -> Union[str, bool, None]:
# Special case here, in case of double quotes one after another
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
self.log(
"While parsing a string, we found a doubled quote, ignoring it",
LogLevel.INFO,
"While parsing a string, we found a doubled quote, ignoring it"
)
self.index += 1
elif missing_quotes and self.context.is_current(
Expand All @@ -377,7 +369,6 @@ def parse_string(self) -> Union[str, bool, None]:
char = self.get_char_at()
self.log(
"In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
LogLevel.INFO,
)
break
else:
Expand Down Expand Up @@ -433,7 +424,6 @@ def parse_string(self) -> Union[str, bool, None]:
# OK this is valid then
self.log(
"While parsing a string, we misplaced a quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
LogLevel.INFO,
)
string_acc += str(char)
self.index += 1
Expand Down Expand Up @@ -464,7 +454,6 @@ def parse_string(self) -> Union[str, bool, None]:
if next_c != ":":
self.log(
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
LogLevel.INFO,
)
string_acc += str(char)
self.index += 1
Expand All @@ -478,7 +467,6 @@ def parse_string(self) -> Union[str, bool, None]:
):
self.log(
"While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
LogLevel.INFO,
)
self.skip_whitespaces_at()
if self.get_char_at() not in [":", ","]:
Expand All @@ -489,7 +477,6 @@ def parse_string(self) -> Union[str, bool, None]:
if char != rstring_delimiter:
self.log(
"While parsing a string, we missed the closing quote, ignoring",
LogLevel.INFO,
)
else:
self.index += 1
Expand Down Expand Up @@ -595,15 +582,17 @@ def skip_to_character(
return idx
return idx

def log(self, text: str, level: LogLevel) -> None:
if level == self.logger.log_level:
context = ""
start = max(self.index - self.logger.window, 0)
end = min(self.index + self.logger.window, len(self.json_str))
context = self.json_str[start:end]
self.logger.log.append(
{
"text": text,
"context": context,
}
)
def _log(self, text: str) -> None:
window: int = 10
start: int = max(self.index - window, 0)
end: int = min(self.index + window, len(self.json_str))
context: str = self.json_str[start:end]
self.logger.append(
{
"text": text,
"context": context,
}
)

def noop(*args: Any, **kwargs: Any) -> None:
pass
15 changes: 0 additions & 15 deletions src/json_repair/logger_config.py

This file was deleted.

0 comments on commit 6172cec

Please sign in to comment.