Skip to content

Commit

Permalink
feat(unicode): switch to NFKC normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink authored and orangejulius committed Nov 7, 2019
1 parent fc999ff commit 3b4abdf
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion helper/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ function normalize(str) {
if(!_.isString(str)){ return str; }

return str
.normalize('NFC')
.normalize('NFKC')
.replace(CONTROL_CODES, '')
.replace(ALTERNATE_SPACES, ' ')
.replace(MISC_UNSUPPORTED_SYMBOLS, '')
Expand Down
9 changes: 8 additions & 1 deletion test/unit/helper/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@ module.exports.tests = {};

module.exports.tests.normalize = function (test) {
const norm = unicode.normalize;
test('normalize: NFC', function (t) {
test('normalize: NFKC', function (t) {
let decomposed = String.fromCharCode(105) + String.fromCharCode(776);
let composed = String.fromCharCode(239);
t.equal(norm(decomposed), composed);
t.equal(norm(composed), composed);
t.end();
});
test('normalize: NFKC', function (t) {
let decomposed = '²';
let composed = '2';
t.equal(norm(decomposed), composed);
t.equal(norm(composed), composed);
t.end();
});
test('normalize: remove control codes', function (t) {
t.equal(norm('a\u0000b\u001Fc'), 'abc');
t.equal(norm('a\u007Fb\u007Fc'), 'abc');
Expand Down

0 comments on commit 3b4abdf

Please sign in to comment.