diff --git a/src/match/url-match.ts b/src/match/url-match.ts index ddccb3a3..ab728405 100644 --- a/src/match/url-match.ts +++ b/src/match/url-match.ts @@ -1,5 +1,5 @@ import { AbstractMatch, AbstractMatchConfig } from './abstract-match'; -import { httpSchemePrefixRe } from '../parser/uri-utils'; +import { directionalCharRe, httpSchemePrefixRe } from '../parser/uri-utils'; import type { StripPrefixConfigObj } from '../autolinker'; /** @@ -162,7 +162,7 @@ export class UrlMatch extends AbstractMatch { public getAnchorHref(): string { let url = this.getUrl(); - return url.replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html + return url.replace(directionalCharRe, encodeURIComponent).replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html } /** @@ -189,7 +189,7 @@ export class UrlMatch extends AbstractMatch { if (this.decodePercentEncoding) { anchorText = removePercentEncoding(anchorText); } - return anchorText; + return anchorText.replace(directionalCharRe, encodeURIComponent); } } diff --git a/src/parser/parse-matches.ts b/src/parser/parse-matches.ts index 0259ac0b..7672e222 100644 --- a/src/parser/parse-matches.ts +++ b/src/parser/parse-matches.ts @@ -3,6 +3,7 @@ import { UrlMatch, UrlMatchType } from '../match/url-match'; import { Match } from '../match/match'; import { remove, assertNever } from '../utils'; import { + isDirectionalChar, httpSchemeRe, isDomainLabelChar, isDomainLabelStartChar, @@ -411,7 +412,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] { } else if (isUrlSuffixStartChar(char)) { // '/', '?', or '#' stateMachine.state = State.Path; - } else if (isDomainLabelChar(char)) { + } else if (isDomainLabelChar(char) || isDirectionalChar(char)) { // Stay in the DomainLabelChar state } else { // Anything else, end the domain name diff --git a/src/parser/uri-utils.ts b/src/parser/uri-utils.ts index 8cd34f7c..b9c5f641 100644 --- a/src/parser/uri-utils.ts +++ b/src/parser/uri-utils.ts @@ -72,6 +72,8 @@ export const schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/; // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology export const tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars +export const directionalCharRe = /[\u202a-\u202e\u200e-\u200f]/; + /** * Determines if the given character may start a scheme (ex: 'http'). */ @@ -110,6 +112,14 @@ export function isDomainLabelChar(char: string): boolean { return char === '_' || isDomainLabelStartChar(char); } +/** + * Detects directional change character + * https://github.com/gregjacobs/Autolinker.js/issues/377 + */ +export function isDirectionalChar(char: string): boolean { + return directionalCharRe.test(char); +} + /** * Determines if the character is a path character ("pchar") as defined by * https://tools.ietf.org/html/rfc3986#appendix-A diff --git a/tests/autolinker-url.spec.ts b/tests/autolinker-url.spec.ts index 643b62bf..620196fd 100644 --- a/tests/autolinker-url.spec.ts +++ b/tests/autolinker-url.spec.ts @@ -1032,6 +1032,38 @@ describe('Autolinker Url Matching >', () => { }); }); + describe('unicode exploits', () => { + it('text with directional change characters should not be linked', () => { + expect(autolinker.link('foo.combar.com')).toBe( + 'foo.combar.com' + ); + expect(autolinker.link('foo.com\u202Ebar.com')).toBe( + 'foo.com%E2%80%AEbar.com' + ); + expect(autolinker.link('foo.com\u202abar.com')).toBe( + 'foo.com%E2%80%AAbar.com' + ); + expect(autolinker.link('foo.com\u202bbar.com')).toBe( + 'foo.com%E2%80%ABbar.com' + ); + expect(autolinker.link('foo.com\u202cbar.com')).toBe( + 'foo.com%E2%80%ACbar.com' + ); + expect(autolinker.link('foo.com\u202dbar.com')).toBe( + 'foo.com%E2%80%ADbar.com' + ); + expect(autolinker.link('foo.com\u202ebar.com')).toBe( + 'foo.com%E2%80%AEbar.com' + ); + expect(autolinker.link('foo.com/?query\u202etest')).toBe( + 'foo.com/?query\u202Etest' + ); + expect(autolinker.link('foo.com/#query\u202etest')).toBe( + 'foo.com/#query\u202Etest' + ); + }); + }); + function generateCombinationTests({ schemes, hosts,