|
| 1 | +// Package diff implements a difference algorithm. |
| 2 | +// The algorithm is described in "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, pp. 251-266. |
| 3 | +package diff |
| 4 | + |
| 5 | +import ( |
| 6 | + "bufio" |
| 7 | + "fmt" |
| 8 | + "os" |
| 9 | +) |
| 10 | + |
| 11 | +// A type that satisfies diff.Data can be diffed by this package. |
| 12 | +// It typically has two sequences A and B of comparable elements. |
| 13 | +type Data interface { |
| 14 | + // Equal returns whether the elements at i and j are considered equal. |
| 15 | + Equal(i, j int) bool |
| 16 | +} |
| 17 | + |
| 18 | +// ByteStrings returns the differences of two strings in bytes. |
| 19 | +func ByteStringSlices(a, b []string) []Change { |
| 20 | + return Diff(len(a), len(b), &stringSlices{a, b}) |
| 21 | +} |
| 22 | + |
| 23 | +type stringSlices struct{ a, b []string } |
| 24 | + |
| 25 | +func (d *stringSlices) Equal(i, j int) bool { return d.a[i] == d.b[j] } |
| 26 | + |
| 27 | +// ByteStrings returns the differences of two strings in bytes. |
| 28 | +func ByteStrings(a, b string) []Change { |
| 29 | + return Diff(len(a), len(b), &strings{a, b}) |
| 30 | +} |
| 31 | + |
| 32 | +type strings struct{ a, b string } |
| 33 | + |
| 34 | +func (d *strings) Equal(i, j int) bool { return d.a[i] == d.b[j] } |
| 35 | + |
| 36 | +// Bytes returns the difference of two byte slices |
| 37 | +func Bytes(a, b []byte) []Change { |
| 38 | + return Diff(len(a), len(b), &bytes{a, b}) |
| 39 | +} |
| 40 | + |
| 41 | +type bytes struct{ a, b []byte } |
| 42 | + |
| 43 | +func (d *bytes) Equal(i, j int) bool { return d.a[i] == d.b[j] } |
| 44 | + |
| 45 | +// Ints returns the difference of two int slices |
| 46 | +func Ints(a, b []int) []Change { |
| 47 | + return Diff(len(a), len(b), &ints{a, b}) |
| 48 | +} |
| 49 | + |
| 50 | +type ints struct{ a, b []int } |
| 51 | + |
| 52 | +func (d *ints) Equal(i, j int) bool { return d.a[i] == d.b[j] } |
| 53 | + |
| 54 | +// Runes returns the difference of two rune slices |
| 55 | +func Runes(a, b []rune) []Change { |
| 56 | + return Diff(len(a), len(b), &runes{a, b}) |
| 57 | +} |
| 58 | + |
| 59 | +type runes struct{ a, b []rune } |
| 60 | + |
| 61 | +func (d *runes) Equal(i, j int) bool { return d.a[i] == d.b[j] } |
| 62 | + |
| 63 | +// Granular merges neighboring changes smaller than the specified granularity. |
| 64 | +// The changes must be ordered by ascending positions as returned by this package. |
| 65 | +func Granular(granularity int, changes []Change) []Change { |
| 66 | + if len(changes) == 0 { |
| 67 | + return changes |
| 68 | + } |
| 69 | + gap := 0 |
| 70 | + for i := 1; i < len(changes); i++ { |
| 71 | + curr := changes[i] |
| 72 | + prev := changes[i-gap-1] |
| 73 | + // same as curr.B-(prev.B+prev.Ins); consistency is key |
| 74 | + if curr.A-(prev.A+prev.Del) <= granularity { |
| 75 | + // merge changes: |
| 76 | + curr = Change{ |
| 77 | + A: prev.A, B: prev.B, // start at same spot |
| 78 | + Del: curr.A - prev.A + curr.Del, // from first to end of second |
| 79 | + Ins: curr.B - prev.B + curr.Ins, // from first to end of second |
| 80 | + } |
| 81 | + gap++ |
| 82 | + } |
| 83 | + changes[i-gap] = curr |
| 84 | + } |
| 85 | + return changes[:len(changes)-gap] |
| 86 | +} |
| 87 | + |
| 88 | +// Diff returns the differences of data. |
| 89 | +// data.Equal is called repeatedly with 0<=i<n and 0<=j<m |
| 90 | +func Diff(n, m int, data Data) []Change { |
| 91 | + c := &context{data: data} |
| 92 | + if n > m { |
| 93 | + c.flags = make([]byte, n) |
| 94 | + } else { |
| 95 | + c.flags = make([]byte, m) |
| 96 | + } |
| 97 | + c.max = n + m + 1 |
| 98 | + c.compare(0, 0, n, m) |
| 99 | + return c.result(n, m) |
| 100 | +} |
| 101 | + |
| 102 | +// A Change contains one or more deletions or inserts |
| 103 | +// at one position in two sequences. |
| 104 | +type Change struct { |
| 105 | + A, B int // position in input a and b |
| 106 | + Del int // delete Del elements from input a |
| 107 | + Ins int // insert Ins elements from input b |
| 108 | +} |
| 109 | + |
| 110 | +type context struct { |
| 111 | + data Data |
| 112 | + flags []byte // element bits 1 delete, 2 insert |
| 113 | + max int |
| 114 | + // forward and reverse d-path endpoint x components |
| 115 | + forward, reverse []int |
| 116 | +} |
| 117 | + |
| 118 | +func (c *context) compare(aoffset, boffset, alimit, blimit int) { |
| 119 | + // eat common prefix |
| 120 | + for aoffset < alimit && boffset < blimit && c.data.Equal(aoffset, boffset) { |
| 121 | + aoffset++ |
| 122 | + boffset++ |
| 123 | + } |
| 124 | + // eat common suffix |
| 125 | + for alimit > aoffset && blimit > boffset && c.data.Equal(alimit-1, blimit-1) { |
| 126 | + alimit-- |
| 127 | + blimit-- |
| 128 | + } |
| 129 | + // both equal or b inserts |
| 130 | + if aoffset == alimit { |
| 131 | + for boffset < blimit { |
| 132 | + c.flags[boffset] |= 2 |
| 133 | + boffset++ |
| 134 | + } |
| 135 | + return |
| 136 | + } |
| 137 | + // a deletes |
| 138 | + if boffset == blimit { |
| 139 | + for aoffset < alimit { |
| 140 | + c.flags[aoffset] |= 1 |
| 141 | + aoffset++ |
| 142 | + } |
| 143 | + return |
| 144 | + } |
| 145 | + x, y := c.findMiddleSnake(aoffset, boffset, alimit, blimit) |
| 146 | + c.compare(aoffset, boffset, x, y) |
| 147 | + c.compare(x, y, alimit, blimit) |
| 148 | +} |
| 149 | + |
| 150 | +func (c *context) findMiddleSnake(aoffset, boffset, alimit, blimit int) (int, int) { |
| 151 | + // midpoints |
| 152 | + fmid := aoffset - boffset |
| 153 | + rmid := alimit - blimit |
| 154 | + // correct offset in d-path slices |
| 155 | + foff := c.max - fmid |
| 156 | + roff := c.max - rmid |
| 157 | + isodd := (rmid-fmid)&1 != 0 |
| 158 | + maxd := (alimit - aoffset + blimit - boffset + 2) / 2 |
| 159 | + // allocate when first used |
| 160 | + if c.forward == nil { |
| 161 | + c.forward = make([]int, 2*c.max) |
| 162 | + c.reverse = make([]int, 2*c.max) |
| 163 | + } |
| 164 | + c.forward[c.max+1] = aoffset |
| 165 | + c.reverse[c.max-1] = alimit |
| 166 | + var x, y int |
| 167 | + for d := 0; d <= maxd; d++ { |
| 168 | + // forward search |
| 169 | + for k := fmid - d; k <= fmid+d; k += 2 { |
| 170 | + if k == fmid-d || k != fmid+d && c.forward[foff+k+1] > c.forward[foff+k-1] { |
| 171 | + x = c.forward[foff+k+1] // down |
| 172 | + } else { |
| 173 | + x = c.forward[foff+k-1] + 1 // right |
| 174 | + } |
| 175 | + y = x - k |
| 176 | + for x < alimit && y < blimit && c.data.Equal(x, y) { |
| 177 | + x++ |
| 178 | + y++ |
| 179 | + } |
| 180 | + c.forward[foff+k] = x |
| 181 | + if isodd && k > rmid-d && k < rmid+d { |
| 182 | + if c.reverse[roff+k] <= c.forward[foff+k] { |
| 183 | + return x, x - k |
| 184 | + } |
| 185 | + } |
| 186 | + } |
| 187 | + // reverse search x,y correspond to u,v |
| 188 | + for k := rmid - d; k <= rmid+d; k += 2 { |
| 189 | + if k == rmid+d || k != rmid-d && c.reverse[roff+k-1] < c.reverse[roff+k+1] { |
| 190 | + x = c.reverse[roff+k-1] // up |
| 191 | + } else { |
| 192 | + x = c.reverse[roff+k+1] - 1 // left |
| 193 | + } |
| 194 | + y = x - k |
| 195 | + for x > aoffset && y > boffset && c.data.Equal(x-1, y-1) { |
| 196 | + x-- |
| 197 | + y-- |
| 198 | + } |
| 199 | + c.reverse[roff+k] = x |
| 200 | + if !isodd && k >= fmid-d && k <= fmid+d { |
| 201 | + if c.reverse[roff+k] <= c.forward[foff+k] { |
| 202 | + // lookup opposite end |
| 203 | + x = c.forward[foff+k] |
| 204 | + return x, x - k |
| 205 | + } |
| 206 | + } |
| 207 | + } |
| 208 | + } |
| 209 | + panic("should never be reached") |
| 210 | +} |
| 211 | + |
| 212 | +func (c *context) result(n, m int) (res []Change) { |
| 213 | + var x, y int |
| 214 | + for x < n || y < m { |
| 215 | + if x < n && y < m && c.flags[x]&1 == 0 && c.flags[y]&2 == 0 { |
| 216 | + x++ |
| 217 | + y++ |
| 218 | + } else { |
| 219 | + a := x |
| 220 | + b := y |
| 221 | + for x < n && (y >= m || c.flags[x]&1 != 0) { |
| 222 | + x++ |
| 223 | + } |
| 224 | + for y < m && (x >= n || c.flags[y]&2 != 0) { |
| 225 | + y++ |
| 226 | + } |
| 227 | + if a < x || b < y { |
| 228 | + res = append(res, Change{a, b, x - a, y - b}) |
| 229 | + } |
| 230 | + } |
| 231 | + } |
| 232 | + return |
| 233 | +} |
| 234 | + |
| 235 | +func GetFileLines(filename string) ([]string, error) { |
| 236 | + f, err := os.Open(filename) |
| 237 | + |
| 238 | + if err != nil { |
| 239 | + return nil, err |
| 240 | + } |
| 241 | + |
| 242 | + defer f.Close() |
| 243 | + |
| 244 | + scanner := bufio.NewScanner(f) |
| 245 | + |
| 246 | + var lines []string |
| 247 | + |
| 248 | + for scanner.Scan() { |
| 249 | + lines = append(lines, scanner.Text()) |
| 250 | + } |
| 251 | + |
| 252 | + if err := scanner.Err(); err != nil { |
| 253 | + return nil, err |
| 254 | + } |
| 255 | + |
| 256 | + return lines, nil |
| 257 | +} |
| 258 | + |
| 259 | +func PrintDiffSlices(src, dst []string) { |
| 260 | + entries := ByteStringSlices(src, dst) |
| 261 | + // fmt.Printf("%+v\n", entries) |
| 262 | + prevA := 0 |
| 263 | + for _, v := range entries { |
| 264 | + if prevA < v.A { |
| 265 | + fmt.Printf("\033[39m %s\n", src[prevA:v.A]) |
| 266 | + } |
| 267 | + |
| 268 | + if v.Del > 0 { |
| 269 | + fmt.Printf("\033[31m- %s\n", src[v.A:v.A+v.Del]) |
| 270 | + } |
| 271 | + |
| 272 | + if v.Ins > 0 { |
| 273 | + fmt.Printf("\033[32m+ %s\n", dst[v.B:v.B+v.Ins]) |
| 274 | + } |
| 275 | + |
| 276 | + prevA = v.A + v.Del |
| 277 | + } |
| 278 | + if prevA < len(src) { |
| 279 | + fmt.Printf("\033[39m %s\n", src[prevA:]) |
| 280 | + } |
| 281 | +} |
0 commit comments