Skip to content

Commit

Permalink
Add: detect typo's and suggest correct naming (#78)
Browse files Browse the repository at this point in the history
* Detect typo's and suggest correct naming

 Using the [Levensthein Distance algorithm](https://en.wikipedia.org/wiki/Levenshtein_distance) we can detect if 2 strings could be made with an x amount of edits. The implementation I made is a variation of the the 2 matrix-rows implementation so it's optimal to check if we can "bail out" if the maxEdits is too high.

It should resolve openstyles/stylus#893
Even though that issue mentions to only error on typo's I'm not sure if that's correct and should be the behavior, but It would take little edit to have such behavior.

Regards,
Gusted

* Patch up comments and variables

* Fix failing tests

* Fix variables and avoid oldDistanceCost variable

* Better variable naming

* Code style

* Apply requested changes

* Patch up the implemenation and add some tests

Let's account for add/del so it won't match `name` to `advance`

* Shouldn't match 2 different words

* Add new test case

* Update readme

* Pass test-case

* Add message

* Switch to Math.log2

* Apply suggested changes

* Improvement to bail out early
  • Loading branch information
Gusted authored Aug 1, 2021
1 parent 16cfc67 commit 19ea62a
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ A table of errors thrown by the parser:
|`missingMandatory`|A list of missing keys|This error doesn't have `err.index`.|
|`missingValue`||Expect a non-whitespace value.|
|`unknownJSONLiteral`|Literal value|JSON has only 3 literals: `true`, `false`, and `null`.|
|`unknownMeta`|Key of unknown metadata|Unknown `@metadata`.|
|`unknownMeta`|`[META_KEY, SUGGESTED_META_KEY]`|Unknown `@metadata`. It may suggest the correct metadata name if there is a typo. `SUGGESTED_META_KEY` can be null|
|`unknownVarType`|`[META_KEY, VARIABLE_TYPE]`|Unknown variable type. `META_KEY` could be `var` or `advanced`.|

### util
Expand Down
1 change: 1 addition & 0 deletions browser-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,6 @@ rl.question('Open browser-test.html and check if all tests have passed (y/N): ',
if (!/y/i.test(ans)) {
process.exit(1); // eslint-disable-line unicorn/no-process-exit
}

rl.close();
});
68 changes: 68 additions & 0 deletions lib/levensthein-distance.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/**
* Gives you a array with filled with 0...amount - 1.
* @param {number} amount
* @returns {number[]}
*/
function range(amount) {
const range = Array(amount);
for (let i = 0; i < amount; i++) {
range[i] = i;
}

return range;
}

/**
* Check if the amount of edits between firstString and secondString is <= maxEdits.
* It uses the Levenshtein distance algorithm with the two matrix rows variant.
* @param {string} firstString First string to be checked against the other string
* @param {string} secondString Second string to be checked against the other string
* @param {number} maxEdit The maximum amount of edits that these 2 string should have.
* @returns {boolean} indicate if the 2 strings's edits are less or equal to maxEdits
*/
function LevenshteinDistanceWithMax(firstString, secondString, maxEdit) {
const lenOne = firstString.length;
const lenTwo = secondString.length;

const lenDiff = Math.abs(lenOne - lenTwo);
// Are the difference between 2 lengths greater than
// maxEdit, we know to bail out early on.
if (lenDiff > maxEdit) {
return false;
}

let prevRowDistance = range(lenOne + 1);
let currentRowDistance = Array(lenOne + 1);
for (let i = 1; i <= lenTwo; i++) {
// Calculate the current row distances from the previous row.
currentRowDistance[0] = i;
let minDistance = i;
for (let j = 1; j <= lenOne; j++) {
const editCost = firstString[j - 1] === secondString[i - 1] ? 0 : 1;

const addCost = prevRowDistance[j] + 1;
const delCost = currentRowDistance[j - 1] + 1;
const substitionCost = prevRowDistance[j - 1] + editCost;

currentRowDistance[j] = Math.min(addCost, delCost, substitionCost);
if (currentRowDistance[j] < minDistance) {
minDistance = currentRowDistance[j];
}
}

if (minDistance > maxEdit) {
return false;
}

// Swap the vectors
const vtemp = currentRowDistance;
currentRowDistance = prevRowDistance;
prevRowDistance = vtemp;
}

return prevRowDistance[lenOne] <= maxEdit;
}

module.exports = {
LevenshteinDistanceWithMax
};
11 changes: 9 additions & 2 deletions lib/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const {URL} = require('url');

const UNITS = require('../data/units');
const {ParseError, MissingCharError} = require('./error');
const {LevenshteinDistanceWithMax} = require('./levensthein-distance');
const {
eatLine,
eatWhitespace,
Expand Down Expand Up @@ -377,6 +378,7 @@ function createParser({
}

const parser = Object.assign(Object.create(null), DEFAULT_PARSER, userParseKey);
const keysOfParser = [...Object.keys(parser), 'advanced', 'var'];
const varParser = Object.assign({}, DEFAULT_VAR_PARSER, userParseVar);
const validator = Object.assign({}, DEFAULT_VALIDATOR, userValidateKey);
const varValidator = Object.assign({}, DEFAULT_VAR_VALIDATOR, userValidateVar);
Expand Down Expand Up @@ -530,11 +532,16 @@ function createParser({
return;
}

// TODO: Suggest the item with the smallest distance or even multiple results?
// Implementation note: swtich to Levenshtein automaton variation.
const MAX_EDIT = Math.log2(state.key.length);
const maybeSuggestion = keysOfParser.find(metaKey => LevenshteinDistanceWithMax(metaKey, state.key, MAX_EDIT));

// throw
throw new ParseError({
code: 'unknownMeta',
args: [state.key],
message: `Unknown metadata: @${state.key}`,
args: [state.key, maybeSuggestion],
message: `Unknown metadata: @${state.key}${maybeSuggestion ? `, did you mean @${maybeSuggestion}?` : ''}`,
index: state.index
});
}
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
"node/no-path-concat": 0,
"unicorn/catch-error-name": 0,
"unicorn/explicit-length-check": 0,
"unicorn/new-for-builtins": 0,
"unicorn/no-reduce": 0,
"unicorn/no-array-for-each": 0,
"unicorn/no-array-reduce": 0,
Expand Down
20 changes: 20 additions & 0 deletions tests/levensthein-distance.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const test = require('ava');
const {LevenshteinDistanceWithMax} = require('../lib/levensthein-distance');

test("shouldn't match", t => {
t.is(LevenshteinDistanceWithMax('water', 'atect', 2), false);
t.is(LevenshteinDistanceWithMax('water', 'christmas', 3), false);
t.is(LevenshteinDistanceWithMax('water', 'water1', 0), false);
t.is(LevenshteinDistanceWithMax('thea', 'ythee', 1), false);
t.is(LevenshteinDistanceWithMax('12345', '567', 4), false);
});

test('should match', t => {
t.is(LevenshteinDistanceWithMax('advanced', 'advance', 3), true);
t.is(LevenshteinDistanceWithMax('water', 'water', 0), true);
t.is(LevenshteinDistanceWithMax('wayer', 'water', 1), true);
t.is(LevenshteinDistanceWithMax('thea', 'ythee', 2), true);
t.is(LevenshteinDistanceWithMax('12345', '567', 5), true);
t.is(LevenshteinDistanceWithMax('wayter', 'water', 1), true);
t.is(LevenshteinDistanceWithMax('var', 'abc', 3), true);
});
15 changes: 15 additions & 0 deletions tests/parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,21 @@ test('validateVar', t => {
t.is(drawRange(text, err.index), raw);
});

test('suggestive metadata', t => {
const {text, raw} = extractRange(`
/* ==UserStyle==
|@advance color font-color "Font color" #ffffff
==/UserStyle== */
`);

const error = t.throws(() => {
parse(text, {unknownKey: 'throw', mandatoryKeys: []});
});
t.is(error.message, 'Unknown metadata: @advance, did you mean @advanced?');
t.is(error.args[1], 'advanced');
t.is(drawRange(text, error.index), raw);
});

test('allowErrors', t => {
const parser = createParser({
allowErrors: true
Expand Down

0 comments on commit 19ea62a

Please sign in to comment.