Skip to content

Commit

Permalink
Release 0.6.1
Browse files Browse the repository at this point in the history
Fixed a few corner cases that were missed before, this shouldn't affect anybody but is good to have those covered
  • Loading branch information
mangiucugna committed Jan 23, 2024
1 parent 895fb7d commit 6e17875
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "json_repair"
version = "0.6.0"
version = "0.6.1"
license = {file = "LICENSE"}
authors = [
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
Expand Down
29 changes: 22 additions & 7 deletions src/json_repair/json_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def parse_json(
elif char.isdigit() or char == "-":
return self.parse_number()
# <boolean> could be (T)rue or (F)alse or (N)ull
elif char == "t" or char == "f" or char == "n":
elif char.lower() in ["t", "f", "n"]:
return self.parse_boolean_or_null()
# This might be a <string> that is missing the starting '"'
elif char.isalpha():
Expand Down Expand Up @@ -212,11 +212,12 @@ def parse_string(self, use_single_quotes=False) -> str:
self.index += 1
char = self.get_char_at()
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
if char == string_terminator and (
fix_broken_markdown_link
or (
self.index - 2 > 0
and self.json_str[self.index - 2 : self.index] == "]("
if (
char == string_terminator
and self.get_next_char() != ","
and (
fix_broken_markdown_link
or (self.get_prev_char(2) + self.get_prev_char()) == "]("
)
):
fix_broken_markdown_link = not fix_broken_markdown_link
Expand Down Expand Up @@ -260,7 +261,7 @@ def parse_boolean_or_null(self) -> Union[bool, None]:
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
for key, (value, length) in boolean_map.items():
if self.json_str.startswith(key, self.index):
if self.json_str.lower().startswith(key, self.index):
self.index += length
return value

Expand All @@ -278,6 +279,20 @@ def get_char_at(self) -> Union[str, bool]:
except IndexError:
return False

def get_prev_char(self, count=1):
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
try:
return self.json_str[self.index - count]
except IndexError:
return False

def get_next_char(self, count=1):
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
try:
return self.json_str[self.index + count]
except IndexError:
return False

def remove_char_at(self) -> None:
self.json_str = self.json_str[: self.index] + self.json_str[self.index + 1 :]

Expand Down
3 changes: 3 additions & 0 deletions tests/test_json_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def test_repair_json():
assert repair_json("\"") == '""'
assert repair_json("\n") == '""'
assert repair_json('{"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null}') == '{"key": true, "key2": false, "key3": null}'
assert repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}") == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
assert (
repair_json('{"name": "John", "age": 30, "city": "New York"}')
Expand Down Expand Up @@ -90,6 +91,8 @@ def test_repair_json():

#Test markdown stupidities from ChatGPT
assert repair_json('{ "content": "[LINK]("https://google.com")" }') == '{"content": "[LINK](\\"https://google.com\\")"}'
assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'



Expand Down

0 comments on commit 6e17875

Please sign in to comment.