From fd98d047f07b35528d973f47a5d2b915be66f065 Mon Sep 17 00:00:00 2001 From: Pawel Czekaj Date: Thu, 8 Sep 2022 09:31:20 +0200 Subject: [PATCH 1/2] fix: prevent linking text with directional change char --- src/parser/parse-matches.ts | 3 ++- src/parser/uri-utils.ts | 14 ++++++++++++++ tests/autolinker-url.spec.ts | 7 +++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/parser/parse-matches.ts b/src/parser/parse-matches.ts index 0259ac0b..99feddc1 100644 --- a/src/parser/parse-matches.ts +++ b/src/parser/parse-matches.ts @@ -3,6 +3,7 @@ import { UrlMatch, UrlMatchType } from '../match/url-match'; import { Match } from '../match/match'; import { remove, assertNever } from '../utils'; import { + hasDirectionalChar, httpSchemeRe, isDomainLabelChar, isDomainLabelStartChar, @@ -411,7 +412,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] { } else if (isUrlSuffixStartChar(char)) { // '/', '?', or '#' stateMachine.state = State.Path; - } else if (isDomainLabelChar(char)) { + } else if (isDomainLabelChar(char) || hasDirectionalChar(char)) { // Stay in the DomainLabelChar state } else { // Anything else, end the domain name diff --git a/src/parser/uri-utils.ts b/src/parser/uri-utils.ts index 8cd34f7c..dfff1501 100644 --- a/src/parser/uri-utils.ts +++ b/src/parser/uri-utils.ts @@ -110,6 +110,10 @@ export function isDomainLabelChar(char: string): boolean { return char === '_' || isDomainLabelStartChar(char); } +export function hasDirectionalChar(char: string) { + return /[\u202a-\u202e\u200e-\u200f]/g.test(char); +} + /** * Determines if the character is a path character ("pchar") as defined by * https://tools.ietf.org/html/rfc3986#appendix-A @@ -168,6 +172,11 @@ export function isValidSchemeUrl(url: string): boolean { return false; } + // If url contains directional change character prevent it from linking + if (hasDirectionalChar(url)) { + return false; + } + const isAuthorityMatch = !!schemeMatch![1]; const host = schemeMatch![2]; if (isAuthorityMatch) { @@ -213,6 +222,11 @@ export function isValidTldMatch(url: string): boolean { return false; } + // If url contains directional change character prevent it from linking + if (hasDirectionalChar(url)) { + return false; + } + const tld = hostLabels[hostLabels.length - 1]; if (!isKnownTld(tld)) { return false; diff --git a/tests/autolinker-url.spec.ts b/tests/autolinker-url.spec.ts index 643b62bf..1376f1b1 100644 --- a/tests/autolinker-url.spec.ts +++ b/tests/autolinker-url.spec.ts @@ -1032,6 +1032,13 @@ describe('Autolinker Url Matching >', () => { }); }); + describe('unicode exploits', () => { + fit('text with directional change characters should not be linked', () => { + expect(autolinker.link('foo.com\u202Ebar.com')).toBe('foo.com\u202Ebar.com'); + expect(autolinker.link('foo.com\u202Emoc.rab')).toBe('foo.com\u202Emoc.rab'); + }); + }); + function generateCombinationTests({ schemes, hosts, From 1ae8bd075aa945a84b0215dab1f1b9ac744a604b Mon Sep 17 00:00:00 2001 From: Pawel Czekaj Date: Fri, 9 Sep 2022 06:54:44 +0200 Subject: [PATCH 2/2] fix: url encode directional characters --- src/match/url-match.ts | 6 +++--- src/parser/parse-matches.ts | 4 ++-- src/parser/uri-utils.ts | 20 ++++++++------------ tests/autolinker-url.spec.ts | 31 ++++++++++++++++++++++++++++--- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/match/url-match.ts b/src/match/url-match.ts index ddccb3a3..ab728405 100644 --- a/src/match/url-match.ts +++ b/src/match/url-match.ts @@ -1,5 +1,5 @@ import { AbstractMatch, AbstractMatchConfig } from './abstract-match'; -import { httpSchemePrefixRe } from '../parser/uri-utils'; +import { directionalCharRe, httpSchemePrefixRe } from '../parser/uri-utils'; import type { StripPrefixConfigObj } from '../autolinker'; /** @@ -162,7 +162,7 @@ export class UrlMatch extends AbstractMatch { public getAnchorHref(): string { let url = this.getUrl(); - return url.replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html + return url.replace(directionalCharRe, encodeURIComponent).replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html } /** @@ -189,7 +189,7 @@ export class UrlMatch extends AbstractMatch { if (this.decodePercentEncoding) { anchorText = removePercentEncoding(anchorText); } - return anchorText; + return anchorText.replace(directionalCharRe, encodeURIComponent); } } diff --git a/src/parser/parse-matches.ts b/src/parser/parse-matches.ts index 99feddc1..7672e222 100644 --- a/src/parser/parse-matches.ts +++ b/src/parser/parse-matches.ts @@ -3,7 +3,7 @@ import { UrlMatch, UrlMatchType } from '../match/url-match'; import { Match } from '../match/match'; import { remove, assertNever } from '../utils'; import { - hasDirectionalChar, + isDirectionalChar, httpSchemeRe, isDomainLabelChar, isDomainLabelStartChar, @@ -412,7 +412,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] { } else if (isUrlSuffixStartChar(char)) { // '/', '?', or '#' stateMachine.state = State.Path; - } else if (isDomainLabelChar(char) || hasDirectionalChar(char)) { + } else if (isDomainLabelChar(char) || isDirectionalChar(char)) { // Stay in the DomainLabelChar state } else { // Anything else, end the domain name diff --git a/src/parser/uri-utils.ts b/src/parser/uri-utils.ts index dfff1501..b9c5f641 100644 --- a/src/parser/uri-utils.ts +++ b/src/parser/uri-utils.ts @@ -72,6 +72,8 @@ export const schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/; // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology export const tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars +export const directionalCharRe = /[\u202a-\u202e\u200e-\u200f]/; + /** * Determines if the given character may start a scheme (ex: 'http'). */ @@ -110,8 +112,12 @@ export function isDomainLabelChar(char: string): boolean { return char === '_' || isDomainLabelStartChar(char); } -export function hasDirectionalChar(char: string) { - return /[\u202a-\u202e\u200e-\u200f]/g.test(char); +/** + * Detects directional change character + * https://github.com/gregjacobs/Autolinker.js/issues/377 + */ +export function isDirectionalChar(char: string): boolean { + return directionalCharRe.test(char); } /** @@ -172,11 +178,6 @@ export function isValidSchemeUrl(url: string): boolean { return false; } - // If url contains directional change character prevent it from linking - if (hasDirectionalChar(url)) { - return false; - } - const isAuthorityMatch = !!schemeMatch![1]; const host = schemeMatch![2]; if (isAuthorityMatch) { @@ -222,11 +223,6 @@ export function isValidTldMatch(url: string): boolean { return false; } - // If url contains directional change character prevent it from linking - if (hasDirectionalChar(url)) { - return false; - } - const tld = hostLabels[hostLabels.length - 1]; if (!isKnownTld(tld)) { return false; diff --git a/tests/autolinker-url.spec.ts b/tests/autolinker-url.spec.ts index 1376f1b1..620196fd 100644 --- a/tests/autolinker-url.spec.ts +++ b/tests/autolinker-url.spec.ts @@ -1033,9 +1033,34 @@ describe('Autolinker Url Matching >', () => { }); describe('unicode exploits', () => { - fit('text with directional change characters should not be linked', () => { - expect(autolinker.link('foo.com\u202Ebar.com')).toBe('foo.com\u202Ebar.com'); - expect(autolinker.link('foo.com\u202Emoc.rab')).toBe('foo.com\u202Emoc.rab'); + it('text with directional change characters should not be linked', () => { + expect(autolinker.link('foo.combar.com')).toBe( + 'foo.combar.com' + ); + expect(autolinker.link('foo.com\u202Ebar.com')).toBe( + 'foo.com%E2%80%AEbar.com' + ); + expect(autolinker.link('foo.com\u202abar.com')).toBe( + 'foo.com%E2%80%AAbar.com' + ); + expect(autolinker.link('foo.com\u202bbar.com')).toBe( + 'foo.com%E2%80%ABbar.com' + ); + expect(autolinker.link('foo.com\u202cbar.com')).toBe( + 'foo.com%E2%80%ACbar.com' + ); + expect(autolinker.link('foo.com\u202dbar.com')).toBe( + 'foo.com%E2%80%ADbar.com' + ); + expect(autolinker.link('foo.com\u202ebar.com')).toBe( + 'foo.com%E2%80%AEbar.com' + ); + expect(autolinker.link('foo.com/?query\u202etest')).toBe( + 'foo.com/?query\u202Etest' + ); + expect(autolinker.link('foo.com/#query\u202etest')).toBe( + 'foo.com/#query\u202Etest' + ); }); });