-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add: detect typo's and suggest correct naming (#78)
* Detect typo's and suggest correct naming Using the [Levensthein Distance algorithm](https://en.wikipedia.org/wiki/Levenshtein_distance) we can detect if 2 strings could be made with an x amount of edits. The implementation I made is a variation of the the 2 matrix-rows implementation so it's optimal to check if we can "bail out" if the maxEdits is too high. It should resolve openstyles/stylus#893 Even though that issue mentions to only error on typo's I'm not sure if that's correct and should be the behavior, but It would take little edit to have such behavior. Regards, Gusted * Patch up comments and variables * Fix failing tests * Fix variables and avoid oldDistanceCost variable * Better variable naming * Code style * Apply requested changes * Patch up the implemenation and add some tests Let's account for add/del so it won't match `name` to `advance` * Shouldn't match 2 different words * Add new test case * Update readme * Pass test-case * Add message * Switch to Math.log2 * Apply suggested changes * Improvement to bail out early
- Loading branch information
Gusted
authored
Aug 1, 2021
1 parent
16cfc67
commit 19ea62a
Showing
7 changed files
with
115 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/** | ||
* Gives you a array with filled with 0...amount - 1. | ||
* @param {number} amount | ||
* @returns {number[]} | ||
*/ | ||
function range(amount) { | ||
const range = Array(amount); | ||
for (let i = 0; i < amount; i++) { | ||
range[i] = i; | ||
} | ||
|
||
return range; | ||
} | ||
|
||
/** | ||
* Check if the amount of edits between firstString and secondString is <= maxEdits. | ||
* It uses the Levenshtein distance algorithm with the two matrix rows variant. | ||
* @param {string} firstString First string to be checked against the other string | ||
* @param {string} secondString Second string to be checked against the other string | ||
* @param {number} maxEdit The maximum amount of edits that these 2 string should have. | ||
* @returns {boolean} indicate if the 2 strings's edits are less or equal to maxEdits | ||
*/ | ||
function LevenshteinDistanceWithMax(firstString, secondString, maxEdit) { | ||
const lenOne = firstString.length; | ||
const lenTwo = secondString.length; | ||
|
||
const lenDiff = Math.abs(lenOne - lenTwo); | ||
// Are the difference between 2 lengths greater than | ||
// maxEdit, we know to bail out early on. | ||
if (lenDiff > maxEdit) { | ||
return false; | ||
} | ||
|
||
let prevRowDistance = range(lenOne + 1); | ||
let currentRowDistance = Array(lenOne + 1); | ||
for (let i = 1; i <= lenTwo; i++) { | ||
// Calculate the current row distances from the previous row. | ||
currentRowDistance[0] = i; | ||
let minDistance = i; | ||
for (let j = 1; j <= lenOne; j++) { | ||
const editCost = firstString[j - 1] === secondString[i - 1] ? 0 : 1; | ||
|
||
const addCost = prevRowDistance[j] + 1; | ||
const delCost = currentRowDistance[j - 1] + 1; | ||
const substitionCost = prevRowDistance[j - 1] + editCost; | ||
|
||
currentRowDistance[j] = Math.min(addCost, delCost, substitionCost); | ||
if (currentRowDistance[j] < minDistance) { | ||
minDistance = currentRowDistance[j]; | ||
} | ||
} | ||
|
||
if (minDistance > maxEdit) { | ||
return false; | ||
} | ||
|
||
// Swap the vectors | ||
const vtemp = currentRowDistance; | ||
currentRowDistance = prevRowDistance; | ||
prevRowDistance = vtemp; | ||
} | ||
|
||
return prevRowDistance[lenOne] <= maxEdit; | ||
} | ||
|
||
module.exports = { | ||
LevenshteinDistanceWithMax | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
const test = require('ava'); | ||
const {LevenshteinDistanceWithMax} = require('../lib/levensthein-distance'); | ||
|
||
test("shouldn't match", t => { | ||
t.is(LevenshteinDistanceWithMax('water', 'atect', 2), false); | ||
t.is(LevenshteinDistanceWithMax('water', 'christmas', 3), false); | ||
t.is(LevenshteinDistanceWithMax('water', 'water1', 0), false); | ||
t.is(LevenshteinDistanceWithMax('thea', 'ythee', 1), false); | ||
t.is(LevenshteinDistanceWithMax('12345', '567', 4), false); | ||
}); | ||
|
||
test('should match', t => { | ||
t.is(LevenshteinDistanceWithMax('advanced', 'advance', 3), true); | ||
t.is(LevenshteinDistanceWithMax('water', 'water', 0), true); | ||
t.is(LevenshteinDistanceWithMax('wayer', 'water', 1), true); | ||
t.is(LevenshteinDistanceWithMax('thea', 'ythee', 2), true); | ||
t.is(LevenshteinDistanceWithMax('12345', '567', 5), true); | ||
t.is(LevenshteinDistanceWithMax('wayter', 'water', 1), true); | ||
t.is(LevenshteinDistanceWithMax('var', 'abc', 3), true); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters