Skip to content

Commit 9f0e081

Browse files
committed
init
1 parent a0510fa commit 9f0e081

File tree

8 files changed

+598
-1
lines changed

8 files changed

+598
-1
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@
1313

1414
# Dependency directories (remove the comment below to include it)
1515
# vendor/
16+
cmd/diff/diff

README.md

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
11
# diff
2-
fast diff library for Myers algorithm
2+
3+
Fast diff library for Myers algorithm.
4+
The algorithm is described in "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, pp. 251-266.
5+
6+
```
7+
BenchmarkBytesDiff/bytestrings-12 74 16075247 ns/op 91153 B/op 12 allocs/op
8+
BenchmarkDiff-12 638715 1882 ns/op 680 B/op 6 allocs/op
9+
BenchmarkInts-12 582735 1971 ns/op 728 B/op 7 allocs/op
10+
BenchmarkDiffRunes-12 574765 1919 ns/op 728 B/op 7 allocs/op
11+
BenchmarkDiffBytes-12 66489 19385 ns/op 3408 B/op 8 allocs/op
12+
BenchmarkDiffByteStrings-12 72484 16184 ns/op 3392 B/op 8 allocs/op
13+
```

cmd/diff/1.txt

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
X
2+
Y
3+
Z
4+
A
5+
B
6+
C
7+
A
8+
B
9+
C
10+
D
11+
E
12+
F

cmd/diff/2.txt

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
X
2+
Y
3+
Z
4+
B
5+
A
6+
C
7+
C
8+
A
9+
B
10+
D
11+
E
12+
F

cmd/diff/main.go

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"log"
6+
"os"
7+
8+
"github.com/covrom/diff"
9+
)
10+
11+
func main() {
12+
if len(os.Args) != 3 {
13+
fmt.Println("usage: diff [src] [dst]")
14+
os.Exit(1)
15+
}
16+
17+
src, err := diff.GetFileLines(os.Args[1])
18+
if err != nil {
19+
log.Fatal(err)
20+
}
21+
22+
dst, err := diff.GetFileLines(os.Args[2])
23+
if err != nil {
24+
log.Fatal(err)
25+
}
26+
27+
diff.PrintDiffSlices(src, dst)
28+
}

diff.go

+281
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
// Package diff implements a difference algorithm.
2+
// The algorithm is described in "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, pp. 251-266.
3+
package diff
4+
5+
import (
6+
"bufio"
7+
"fmt"
8+
"os"
9+
)
10+
11+
// A type that satisfies diff.Data can be diffed by this package.
12+
// It typically has two sequences A and B of comparable elements.
13+
type Data interface {
14+
// Equal returns whether the elements at i and j are considered equal.
15+
Equal(i, j int) bool
16+
}
17+
18+
// ByteStrings returns the differences of two strings in bytes.
19+
func ByteStringSlices(a, b []string) []Change {
20+
return Diff(len(a), len(b), &stringSlices{a, b})
21+
}
22+
23+
type stringSlices struct{ a, b []string }
24+
25+
func (d *stringSlices) Equal(i, j int) bool { return d.a[i] == d.b[j] }
26+
27+
// ByteStrings returns the differences of two strings in bytes.
28+
func ByteStrings(a, b string) []Change {
29+
return Diff(len(a), len(b), &strings{a, b})
30+
}
31+
32+
type strings struct{ a, b string }
33+
34+
func (d *strings) Equal(i, j int) bool { return d.a[i] == d.b[j] }
35+
36+
// Bytes returns the difference of two byte slices
37+
func Bytes(a, b []byte) []Change {
38+
return Diff(len(a), len(b), &bytes{a, b})
39+
}
40+
41+
type bytes struct{ a, b []byte }
42+
43+
func (d *bytes) Equal(i, j int) bool { return d.a[i] == d.b[j] }
44+
45+
// Ints returns the difference of two int slices
46+
func Ints(a, b []int) []Change {
47+
return Diff(len(a), len(b), &ints{a, b})
48+
}
49+
50+
type ints struct{ a, b []int }
51+
52+
func (d *ints) Equal(i, j int) bool { return d.a[i] == d.b[j] }
53+
54+
// Runes returns the difference of two rune slices
55+
func Runes(a, b []rune) []Change {
56+
return Diff(len(a), len(b), &runes{a, b})
57+
}
58+
59+
type runes struct{ a, b []rune }
60+
61+
func (d *runes) Equal(i, j int) bool { return d.a[i] == d.b[j] }
62+
63+
// Granular merges neighboring changes smaller than the specified granularity.
64+
// The changes must be ordered by ascending positions as returned by this package.
65+
func Granular(granularity int, changes []Change) []Change {
66+
if len(changes) == 0 {
67+
return changes
68+
}
69+
gap := 0
70+
for i := 1; i < len(changes); i++ {
71+
curr := changes[i]
72+
prev := changes[i-gap-1]
73+
// same as curr.B-(prev.B+prev.Ins); consistency is key
74+
if curr.A-(prev.A+prev.Del) <= granularity {
75+
// merge changes:
76+
curr = Change{
77+
A: prev.A, B: prev.B, // start at same spot
78+
Del: curr.A - prev.A + curr.Del, // from first to end of second
79+
Ins: curr.B - prev.B + curr.Ins, // from first to end of second
80+
}
81+
gap++
82+
}
83+
changes[i-gap] = curr
84+
}
85+
return changes[:len(changes)-gap]
86+
}
87+
88+
// Diff returns the differences of data.
89+
// data.Equal is called repeatedly with 0<=i<n and 0<=j<m
90+
func Diff(n, m int, data Data) []Change {
91+
c := &context{data: data}
92+
if n > m {
93+
c.flags = make([]byte, n)
94+
} else {
95+
c.flags = make([]byte, m)
96+
}
97+
c.max = n + m + 1
98+
c.compare(0, 0, n, m)
99+
return c.result(n, m)
100+
}
101+
102+
// A Change contains one or more deletions or inserts
103+
// at one position in two sequences.
104+
type Change struct {
105+
A, B int // position in input a and b
106+
Del int // delete Del elements from input a
107+
Ins int // insert Ins elements from input b
108+
}
109+
110+
type context struct {
111+
data Data
112+
flags []byte // element bits 1 delete, 2 insert
113+
max int
114+
// forward and reverse d-path endpoint x components
115+
forward, reverse []int
116+
}
117+
118+
func (c *context) compare(aoffset, boffset, alimit, blimit int) {
119+
// eat common prefix
120+
for aoffset < alimit && boffset < blimit && c.data.Equal(aoffset, boffset) {
121+
aoffset++
122+
boffset++
123+
}
124+
// eat common suffix
125+
for alimit > aoffset && blimit > boffset && c.data.Equal(alimit-1, blimit-1) {
126+
alimit--
127+
blimit--
128+
}
129+
// both equal or b inserts
130+
if aoffset == alimit {
131+
for boffset < blimit {
132+
c.flags[boffset] |= 2
133+
boffset++
134+
}
135+
return
136+
}
137+
// a deletes
138+
if boffset == blimit {
139+
for aoffset < alimit {
140+
c.flags[aoffset] |= 1
141+
aoffset++
142+
}
143+
return
144+
}
145+
x, y := c.findMiddleSnake(aoffset, boffset, alimit, blimit)
146+
c.compare(aoffset, boffset, x, y)
147+
c.compare(x, y, alimit, blimit)
148+
}
149+
150+
func (c *context) findMiddleSnake(aoffset, boffset, alimit, blimit int) (int, int) {
151+
// midpoints
152+
fmid := aoffset - boffset
153+
rmid := alimit - blimit
154+
// correct offset in d-path slices
155+
foff := c.max - fmid
156+
roff := c.max - rmid
157+
isodd := (rmid-fmid)&1 != 0
158+
maxd := (alimit - aoffset + blimit - boffset + 2) / 2
159+
// allocate when first used
160+
if c.forward == nil {
161+
c.forward = make([]int, 2*c.max)
162+
c.reverse = make([]int, 2*c.max)
163+
}
164+
c.forward[c.max+1] = aoffset
165+
c.reverse[c.max-1] = alimit
166+
var x, y int
167+
for d := 0; d <= maxd; d++ {
168+
// forward search
169+
for k := fmid - d; k <= fmid+d; k += 2 {
170+
if k == fmid-d || k != fmid+d && c.forward[foff+k+1] > c.forward[foff+k-1] {
171+
x = c.forward[foff+k+1] // down
172+
} else {
173+
x = c.forward[foff+k-1] + 1 // right
174+
}
175+
y = x - k
176+
for x < alimit && y < blimit && c.data.Equal(x, y) {
177+
x++
178+
y++
179+
}
180+
c.forward[foff+k] = x
181+
if isodd && k > rmid-d && k < rmid+d {
182+
if c.reverse[roff+k] <= c.forward[foff+k] {
183+
return x, x - k
184+
}
185+
}
186+
}
187+
// reverse search x,y correspond to u,v
188+
for k := rmid - d; k <= rmid+d; k += 2 {
189+
if k == rmid+d || k != rmid-d && c.reverse[roff+k-1] < c.reverse[roff+k+1] {
190+
x = c.reverse[roff+k-1] // up
191+
} else {
192+
x = c.reverse[roff+k+1] - 1 // left
193+
}
194+
y = x - k
195+
for x > aoffset && y > boffset && c.data.Equal(x-1, y-1) {
196+
x--
197+
y--
198+
}
199+
c.reverse[roff+k] = x
200+
if !isodd && k >= fmid-d && k <= fmid+d {
201+
if c.reverse[roff+k] <= c.forward[foff+k] {
202+
// lookup opposite end
203+
x = c.forward[foff+k]
204+
return x, x - k
205+
}
206+
}
207+
}
208+
}
209+
panic("should never be reached")
210+
}
211+
212+
func (c *context) result(n, m int) (res []Change) {
213+
var x, y int
214+
for x < n || y < m {
215+
if x < n && y < m && c.flags[x]&1 == 0 && c.flags[y]&2 == 0 {
216+
x++
217+
y++
218+
} else {
219+
a := x
220+
b := y
221+
for x < n && (y >= m || c.flags[x]&1 != 0) {
222+
x++
223+
}
224+
for y < m && (x >= n || c.flags[y]&2 != 0) {
225+
y++
226+
}
227+
if a < x || b < y {
228+
res = append(res, Change{a, b, x - a, y - b})
229+
}
230+
}
231+
}
232+
return
233+
}
234+
235+
func GetFileLines(filename string) ([]string, error) {
236+
f, err := os.Open(filename)
237+
238+
if err != nil {
239+
return nil, err
240+
}
241+
242+
defer f.Close()
243+
244+
scanner := bufio.NewScanner(f)
245+
246+
var lines []string
247+
248+
for scanner.Scan() {
249+
lines = append(lines, scanner.Text())
250+
}
251+
252+
if err := scanner.Err(); err != nil {
253+
return nil, err
254+
}
255+
256+
return lines, nil
257+
}
258+
259+
func PrintDiffSlices(src, dst []string) {
260+
entries := ByteStringSlices(src, dst)
261+
// fmt.Printf("%+v\n", entries)
262+
prevA := 0
263+
for _, v := range entries {
264+
if prevA < v.A {
265+
fmt.Printf("\033[39m %s\n", src[prevA:v.A])
266+
}
267+
268+
if v.Del > 0 {
269+
fmt.Printf("\033[31m- %s\n", src[v.A:v.A+v.Del])
270+
}
271+
272+
if v.Ins > 0 {
273+
fmt.Printf("\033[32m+ %s\n", dst[v.B:v.B+v.Ins])
274+
}
275+
276+
prevA = v.A + v.Del
277+
}
278+
if prevA < len(src) {
279+
fmt.Printf("\033[39m %s\n", src[prevA:])
280+
}
281+
}

0 commit comments

Comments
 (0)