From 19ea62ae7f0678aabf663393db1af81692c86e7f Mon Sep 17 00:00:00 2001 From: Gusted Date: Sun, 1 Aug 2021 08:39:02 +0200 Subject: [PATCH] Add: detect typo's and suggest correct naming (#78) * Detect typo's and suggest correct naming Using the [Levensthein Distance algorithm](https://en.wikipedia.org/wiki/Levenshtein_distance) we can detect if 2 strings could be made with an x amount of edits. The implementation I made is a variation of the the 2 matrix-rows implementation so it's optimal to check if we can "bail out" if the maxEdits is too high. It should resolve https://github.com/openstyles/stylus/issues/893 Even though that issue mentions to only error on typo's I'm not sure if that's correct and should be the behavior, but It would take little edit to have such behavior. Regards, Gusted * Patch up comments and variables * Fix failing tests * Fix variables and avoid oldDistanceCost variable * Better variable naming * Code style * Apply requested changes * Patch up the implemenation and add some tests Let's account for add/del so it won't match `name` to `advance` * Shouldn't match 2 different words * Add new test case * Update readme * Pass test-case * Add message * Switch to Math.log2 * Apply suggested changes * Improvement to bail out early --- README.md | 2 +- browser-test.js | 1 + lib/levensthein-distance.js | 68 ++++++++++++++++++++++++++++++ lib/parse.js | 11 ++++- package.json | 1 + tests/levensthein-distance.test.js | 20 +++++++++ tests/parser.test.js | 15 +++++++ 7 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 lib/levensthein-distance.js create mode 100644 tests/levensthein-distance.test.js diff --git a/README.md b/README.md index a41f25d..ef339d8 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,7 @@ A table of errors thrown by the parser: |`missingMandatory`|A list of missing keys|This error doesn't have `err.index`.| |`missingValue`||Expect a non-whitespace value.| |`unknownJSONLiteral`|Literal value|JSON has only 3 literals: `true`, `false`, and `null`.| -|`unknownMeta`|Key of unknown metadata|Unknown `@metadata`.| +|`unknownMeta`|`[META_KEY, SUGGESTED_META_KEY]`|Unknown `@metadata`. It may suggest the correct metadata name if there is a typo. `SUGGESTED_META_KEY` can be null| |`unknownVarType`|`[META_KEY, VARIABLE_TYPE]`|Unknown variable type. `META_KEY` could be `var` or `advanced`.| ### util diff --git a/browser-test.js b/browser-test.js index d6a1877..a986d67 100644 --- a/browser-test.js +++ b/browser-test.js @@ -87,5 +87,6 @@ rl.question('Open browser-test.html and check if all tests have passed (y/N): ', if (!/y/i.test(ans)) { process.exit(1); // eslint-disable-line unicorn/no-process-exit } + rl.close(); }); diff --git a/lib/levensthein-distance.js b/lib/levensthein-distance.js new file mode 100644 index 0000000..efb83f2 --- /dev/null +++ b/lib/levensthein-distance.js @@ -0,0 +1,68 @@ +/** + * Gives you a array with filled with 0...amount - 1. + * @param {number} amount + * @returns {number[]} + */ +function range(amount) { + const range = Array(amount); + for (let i = 0; i < amount; i++) { + range[i] = i; + } + + return range; +} + +/** + * Check if the amount of edits between firstString and secondString is <= maxEdits. + * It uses the Levenshtein distance algorithm with the two matrix rows variant. + * @param {string} firstString First string to be checked against the other string + * @param {string} secondString Second string to be checked against the other string + * @param {number} maxEdit The maximum amount of edits that these 2 string should have. + * @returns {boolean} indicate if the 2 strings's edits are less or equal to maxEdits + */ +function LevenshteinDistanceWithMax(firstString, secondString, maxEdit) { + const lenOne = firstString.length; + const lenTwo = secondString.length; + + const lenDiff = Math.abs(lenOne - lenTwo); + // Are the difference between 2 lengths greater than + // maxEdit, we know to bail out early on. + if (lenDiff > maxEdit) { + return false; + } + + let prevRowDistance = range(lenOne + 1); + let currentRowDistance = Array(lenOne + 1); + for (let i = 1; i <= lenTwo; i++) { + // Calculate the current row distances from the previous row. + currentRowDistance[0] = i; + let minDistance = i; + for (let j = 1; j <= lenOne; j++) { + const editCost = firstString[j - 1] === secondString[i - 1] ? 0 : 1; + + const addCost = prevRowDistance[j] + 1; + const delCost = currentRowDistance[j - 1] + 1; + const substitionCost = prevRowDistance[j - 1] + editCost; + + currentRowDistance[j] = Math.min(addCost, delCost, substitionCost); + if (currentRowDistance[j] < minDistance) { + minDistance = currentRowDistance[j]; + } + } + + if (minDistance > maxEdit) { + return false; + } + + // Swap the vectors + const vtemp = currentRowDistance; + currentRowDistance = prevRowDistance; + prevRowDistance = vtemp; + } + + return prevRowDistance[lenOne] <= maxEdit; +} + +module.exports = { + LevenshteinDistanceWithMax +}; diff --git a/lib/parse.js b/lib/parse.js index 66104f1..e64a077 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -4,6 +4,7 @@ const {URL} = require('url'); const UNITS = require('../data/units'); const {ParseError, MissingCharError} = require('./error'); +const {LevenshteinDistanceWithMax} = require('./levensthein-distance'); const { eatLine, eatWhitespace, @@ -377,6 +378,7 @@ function createParser({ } const parser = Object.assign(Object.create(null), DEFAULT_PARSER, userParseKey); + const keysOfParser = [...Object.keys(parser), 'advanced', 'var']; const varParser = Object.assign({}, DEFAULT_VAR_PARSER, userParseVar); const validator = Object.assign({}, DEFAULT_VALIDATOR, userValidateKey); const varValidator = Object.assign({}, DEFAULT_VAR_VALIDATOR, userValidateVar); @@ -530,11 +532,16 @@ function createParser({ return; } + // TODO: Suggest the item with the smallest distance or even multiple results? + // Implementation note: swtich to Levenshtein automaton variation. + const MAX_EDIT = Math.log2(state.key.length); + const maybeSuggestion = keysOfParser.find(metaKey => LevenshteinDistanceWithMax(metaKey, state.key, MAX_EDIT)); + // throw throw new ParseError({ code: 'unknownMeta', - args: [state.key], - message: `Unknown metadata: @${state.key}`, + args: [state.key, maybeSuggestion], + message: `Unknown metadata: @${state.key}${maybeSuggestion ? `, did you mean @${maybeSuggestion}?` : ''}`, index: state.index }); } diff --git a/package.json b/package.json index a8082ea..1670229 100644 --- a/package.json +++ b/package.json @@ -86,6 +86,7 @@ "node/no-path-concat": 0, "unicorn/catch-error-name": 0, "unicorn/explicit-length-check": 0, + "unicorn/new-for-builtins": 0, "unicorn/no-reduce": 0, "unicorn/no-array-for-each": 0, "unicorn/no-array-reduce": 0, diff --git a/tests/levensthein-distance.test.js b/tests/levensthein-distance.test.js new file mode 100644 index 0000000..cad1ef5 --- /dev/null +++ b/tests/levensthein-distance.test.js @@ -0,0 +1,20 @@ +const test = require('ava'); +const {LevenshteinDistanceWithMax} = require('../lib/levensthein-distance'); + +test("shouldn't match", t => { + t.is(LevenshteinDistanceWithMax('water', 'atect', 2), false); + t.is(LevenshteinDistanceWithMax('water', 'christmas', 3), false); + t.is(LevenshteinDistanceWithMax('water', 'water1', 0), false); + t.is(LevenshteinDistanceWithMax('thea', 'ythee', 1), false); + t.is(LevenshteinDistanceWithMax('12345', '567', 4), false); +}); + +test('should match', t => { + t.is(LevenshteinDistanceWithMax('advanced', 'advance', 3), true); + t.is(LevenshteinDistanceWithMax('water', 'water', 0), true); + t.is(LevenshteinDistanceWithMax('wayer', 'water', 1), true); + t.is(LevenshteinDistanceWithMax('thea', 'ythee', 2), true); + t.is(LevenshteinDistanceWithMax('12345', '567', 5), true); + t.is(LevenshteinDistanceWithMax('wayter', 'water', 1), true); + t.is(LevenshteinDistanceWithMax('var', 'abc', 3), true); +}); diff --git a/tests/parser.test.js b/tests/parser.test.js index 092bfee..3ac0e5b 100644 --- a/tests/parser.test.js +++ b/tests/parser.test.js @@ -244,6 +244,21 @@ test('validateVar', t => { t.is(drawRange(text, err.index), raw); }); +test('suggestive metadata', t => { + const {text, raw} = extractRange(` + /* ==UserStyle== + |@advance color font-color "Font color" #ffffff + ==/UserStyle== */ + `); + + const error = t.throws(() => { + parse(text, {unknownKey: 'throw', mandatoryKeys: []}); + }); + t.is(error.message, 'Unknown metadata: @advance, did you mean @advanced?'); + t.is(error.args[1], 'advanced'); + t.is(drawRange(text, error.index), raw); +}); + test('allowErrors', t => { const parser = createParser({ allowErrors: true