Skip to content

Commit 65b0ba4

Browse files
Sync upstream
1 parent 0bf655c commit 65b0ba4

File tree

3 files changed

+147
-44
lines changed

3 files changed

+147
-44
lines changed

cli/jsonrepair-cli.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
"os"
88
)
99

10-
const AppVersion = "0.0.13"
10+
const AppVersion = "0.0.14"
1111

1212
var (
1313
versionFlag bool

jsonrepair.go

+135-39
Original file line numberDiff line numberDiff line change
@@ -101,29 +101,35 @@ type JSONParser struct {
101101
// receiver p
102102
// return any
103103
func (p *JSONParser) parseJSON() any {
104-
c, b := p.getByte(0)
105-
if !b {
106-
return ""
107-
}
108104

109-
switch {
110-
case c == '{':
111-
p.index++
112-
return p.parseObject()
113-
case c == '[':
105+
for {
106+
c, b := p.getByte(0)
107+
108+
if !b {
109+
return ""
110+
}
111+
112+
isInMarkers := len(p.marker) > 0
113+
114+
switch {
115+
case c == '{':
116+
p.index++
117+
return p.parseObject()
118+
case c == '[':
119+
p.index++
120+
return p.parseArray()
121+
case c == '}':
122+
return ""
123+
// TODO Full-width character support
124+
case isInMarkers && (bytes.IndexByte([]byte{'"', '\''}, c) != -1 || unicode.IsLetter(rune(c))):
125+
return p.parseString()
126+
case isInMarkers && (unicode.IsNumber(rune(c)) || bytes.IndexByte([]byte{'-', '.'}, c) != -1):
127+
return p.parseNumber()
128+
}
129+
114130
p.index++
115-
return p.parseArray()
116-
case c == '}':
117-
return ""
118-
// TODO Full-width character support
119-
case bytes.IndexByte([]byte{'"', '\''}, c) != -1 || unicode.IsLetter(rune(c)):
120-
return p.parseString()
121-
case unicode.IsNumber(rune(c)) || bytes.IndexByte([]byte{'-', '.'}, c) != -1:
122-
return p.parseNumber()
123131
}
124132

125-
p.index++
126-
return p.parseJSON()
127133
}
128134

129135
// parseObject
@@ -166,11 +172,15 @@ func (p *JSONParser) parseObject() map[string]any {
166172
}
167173
}
168174

175+
p.skipWhitespaces()
176+
169177
c, b = p.getByte(0)
170178
if b && c == '}' {
171179
continue
172180
}
173181

182+
p.skipWhitespaces()
183+
174184
c, b = p.getByte(0)
175185
//nolint
176186
if !b || c != ':' {
@@ -226,7 +236,7 @@ func (p *JSONParser) parseArray() []any {
226236
p.skipWhitespaces()
227237
value := p.parseJSON()
228238

229-
if value == nil {
239+
if value == nil || value == "" {
230240
break
231241
}
232242

@@ -307,11 +317,6 @@ func (p *JSONParser) parseString() any {
307317
}
308318
}
309319

310-
if p.getMarker() == "" {
311-
p.index++
312-
return p.parseJSON()
313-
}
314-
315320
missingQuotes = true
316321
}
317322

@@ -333,7 +338,17 @@ func (p *JSONParser) parseString() any {
333338
if nextB && b && c == rStringDelimiter {
334339
doubledQuotes = true
335340
p.index++
341+
} else {
342+
i = 1
343+
nextC, nextB = p.getByte(i)
344+
for nextB && nextC == ' ' {
345+
i++
346+
nextC, nextB = p.getByte(i)
347+
}
336348

349+
if nextB && bytes.IndexByte([]byte{',', ']', '}'}, nextC) == -1 {
350+
p.index++
351+
}
337352
}
338353
}
339354

@@ -346,7 +361,33 @@ func (p *JSONParser) parseString() any {
346361
if p.getMarker() == "object_key" && (c == ':' || unicode.IsSpace(rune(c))) {
347362
break
348363
} else if p.getMarker() == "object_value" && bytes.IndexByte([]byte{',', '}'}, c) != -1 {
349-
break
364+
365+
rStringDelimiterMissing := true
366+
i := 1
367+
nextC, nextB := p.getByte(i)
368+
for nextB && nextC != rStringDelimiter {
369+
i++
370+
nextC, nextB = p.getByte(i)
371+
}
372+
373+
if nextB {
374+
i++
375+
nextC, nextB = p.getByte(i)
376+
}
377+
378+
for nextB && nextC == ' ' {
379+
i++
380+
nextC, nextB = p.getByte(i)
381+
}
382+
383+
if nextB && bytes.IndexByte([]byte{',', '}'}, nextC) != -1 {
384+
rStringDelimiterMissing = false
385+
}
386+
387+
if rStringDelimiterMissing {
388+
break
389+
}
390+
350391
}
351392
}
352393

@@ -383,24 +424,53 @@ func (p *JSONParser) parseString() any {
383424

384425
if doubledQuotes && p.container[p.index+1] == rStringDelimiter {
385426

427+
} else if missingQuotes && p.getMarker() == "object_value" {
428+
429+
i := 1
430+
nextC, nextB := p.getByte(i)
431+
for nextB && bytes.IndexByte([]byte{rStringDelimiter, lStringDelimiter}, nextC) == -1 {
432+
i++
433+
nextC, nextB = p.getByte(i)
434+
}
435+
436+
if nextB {
437+
i++
438+
nextC, nextB = p.getByte(i)
439+
for nextB && nextC == ' ' {
440+
i++
441+
nextC, nextB = p.getByte(i)
442+
}
443+
444+
if nextB && nextC == ':' {
445+
p.index--
446+
c, b = p.getByte(0)
447+
break
448+
}
449+
}
450+
386451
} else {
387452

388453
i := 1
389454
nextC, nextB := p.getByte(i)
390-
for nextB && nextC != rStringDelimiter {
455+
checkCommaInObjectValue := true
456+
for nextB && bytes.IndexByte([]byte{rStringDelimiter, lStringDelimiter}, nextC) == -1 {
391457

392-
if nextC == lStringDelimiter ||
393-
(slices.Contains(p.marker, "object_key") && nextC == ':') ||
394-
(slices.Contains(p.marker, "object_value") && bytes.IndexByte([]byte{'}', ','}, nextC) != -1) ||
395-
(slices.Contains(p.marker, "array") && bytes.IndexByte([]byte{']', ','}, nextC) != -1) {
458+
if unicode.IsLetter(rune(c)) {
459+
checkCommaInObjectValue = false
460+
}
461+
462+
if (slices.Contains(p.marker, "object_key") && bytes.IndexByte([]byte{':', '}'}, nextC) != -1) ||
463+
(slices.Contains(p.marker, "object_value") && nextC == '}') ||
464+
(slices.Contains(p.marker, "array") && bytes.IndexByte([]byte{']', ','}, nextC) != -1) ||
465+
(checkCommaInObjectValue && p.getMarker() == "object_value" && nextC == ',') {
396466
break
397467
}
398468

399469
i++
400470
nextC, nextB = p.getByte(i)
401471
}
402472

403-
if nextC == rStringDelimiter {
473+
if nextC == ',' && p.getMarker() == "object_value" {
404474
i++
405475
nextC, nextB = p.getByte(i)
406476
for nextB && nextC != rStringDelimiter {
@@ -410,20 +480,43 @@ func (p *JSONParser) parseString() any {
410480
i++
411481
nextC, nextB = p.getByte(i)
412482

413-
for nextB && nextC != ':' {
414-
if bytes.IndexByte([]byte{lStringDelimiter, rStringDelimiter, ','}, nextC) != -1 {
415-
break
416-
}
483+
for nextB && nextC == ' ' {
417484
i++
418485
nextC, nextB = p.getByte(i)
419486
}
420487

421-
// upstream
422-
if !nextB || nextC != ':' {
488+
if nextB && nextC == '}' {
423489
rst = append(rst, c)
424490
p.index++
425491
c, b = p.getByte(0)
426492
}
493+
} else if nextB && nextC == rStringDelimiter {
494+
495+
if p.getMarker() == "object_value" {
496+
i++
497+
nextC, nextB = p.getByte(i)
498+
for nextB && nextC != rStringDelimiter {
499+
i++
500+
nextC, nextB = p.getByte(i)
501+
}
502+
i++
503+
nextC, nextB = p.getByte(i)
504+
for nextB && nextC != ':' {
505+
if bytes.IndexByte([]byte{',', lStringDelimiter, rStringDelimiter}, nextC) != -1 {
506+
break
507+
}
508+
i++
509+
nextC, nextB = p.getByte(i)
510+
}
511+
512+
if nextC != ':' {
513+
rst = append(rst, c)
514+
p.index++
515+
c, b = p.getByte(0)
516+
}
517+
518+
}
519+
427520
}
428521
}
429522
}
@@ -462,7 +555,10 @@ func (p *JSONParser) parseNumber() any {
462555

463556
c, b = p.getByte(0)
464557

465-
for b && bytes.IndexByte(numberChars, c) != -1 {
558+
isArray := p.getMarker() == "array"
559+
560+
for b && bytes.IndexByte(numberChars, c) != -1 &&
561+
(c != ',' || !isArray) {
466562
rst = append(rst, c)
467563
p.index++
468564
c, b = p.getByte(0)

jsonrepair_test.go

+11-4
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,12 @@ func Test_RepairJSON(t *testing.T) {
142142
want: `[]`,
143143
},
144144
{
145-
in: "'\"'",
146-
want: `"\""`,
145+
in: `'\"'`,
146+
want: `""`,
147147
},
148148
{
149-
in: "'string\"",
150-
want: `"string\""`,
149+
in: "string",
150+
want: `""`,
151151
},
152152
{
153153
in: `{foo: [}`,
@@ -291,6 +291,13 @@ func Test_RepairJSON(t *testing.T) {
291291
in: "```json{\"array_key\": [{\"item_key\": 1\n}], \"outer_key\": 2}```",
292292
want: `{"array_key": [{"item_key": 1}], "outer_key": 2}`,
293293
},
294+
295+
{
296+
in: `[
297+
{"Master""господин"}
298+
]`,
299+
want: `[{"Master":"господин"}]`,
300+
},
294301
}
295302

296303
caseNo := 1

0 commit comments

Comments
 (0)