diff --git a/src/match/url-match.ts b/src/match/url-match.ts
index ddccb3a3..ab728405 100644
--- a/src/match/url-match.ts
+++ b/src/match/url-match.ts
@@ -1,5 +1,5 @@
import { AbstractMatch, AbstractMatchConfig } from './abstract-match';
-import { httpSchemePrefixRe } from '../parser/uri-utils';
+import { directionalCharRe, httpSchemePrefixRe } from '../parser/uri-utils';
import type { StripPrefixConfigObj } from '../autolinker';
/**
@@ -162,7 +162,7 @@ export class UrlMatch extends AbstractMatch {
public getAnchorHref(): string {
let url = this.getUrl();
- return url.replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html
+ return url.replace(directionalCharRe, encodeURIComponent).replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html
}
/**
@@ -189,7 +189,7 @@ export class UrlMatch extends AbstractMatch {
if (this.decodePercentEncoding) {
anchorText = removePercentEncoding(anchorText);
}
- return anchorText;
+ return anchorText.replace(directionalCharRe, encodeURIComponent);
}
}
diff --git a/src/parser/parse-matches.ts b/src/parser/parse-matches.ts
index 0259ac0b..7672e222 100644
--- a/src/parser/parse-matches.ts
+++ b/src/parser/parse-matches.ts
@@ -3,6 +3,7 @@ import { UrlMatch, UrlMatchType } from '../match/url-match';
import { Match } from '../match/match';
import { remove, assertNever } from '../utils';
import {
+ isDirectionalChar,
httpSchemeRe,
isDomainLabelChar,
isDomainLabelStartChar,
@@ -411,7 +412,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] {
} else if (isUrlSuffixStartChar(char)) {
// '/', '?', or '#'
stateMachine.state = State.Path;
- } else if (isDomainLabelChar(char)) {
+ } else if (isDomainLabelChar(char) || isDirectionalChar(char)) {
// Stay in the DomainLabelChar state
} else {
// Anything else, end the domain name
diff --git a/src/parser/uri-utils.ts b/src/parser/uri-utils.ts
index 8cd34f7c..b9c5f641 100644
--- a/src/parser/uri-utils.ts
+++ b/src/parser/uri-utils.ts
@@ -72,6 +72,8 @@ export const schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/;
// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
export const tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars
+export const directionalCharRe = /[\u202a-\u202e\u200e-\u200f]/;
+
/**
* Determines if the given character may start a scheme (ex: 'http').
*/
@@ -110,6 +112,14 @@ export function isDomainLabelChar(char: string): boolean {
return char === '_' || isDomainLabelStartChar(char);
}
+/**
+ * Detects directional change character
+ * https://github.com/gregjacobs/Autolinker.js/issues/377
+ */
+export function isDirectionalChar(char: string): boolean {
+ return directionalCharRe.test(char);
+}
+
/**
* Determines if the character is a path character ("pchar") as defined by
* https://tools.ietf.org/html/rfc3986#appendix-A
diff --git a/tests/autolinker-url.spec.ts b/tests/autolinker-url.spec.ts
index 643b62bf..620196fd 100644
--- a/tests/autolinker-url.spec.ts
+++ b/tests/autolinker-url.spec.ts
@@ -1032,6 +1032,38 @@ describe('Autolinker Url Matching >', () => {
});
});
+ describe('unicode exploits', () => {
+ it('text with directional change characters should not be linked', () => {
+ expect(autolinker.link('foo.combar.com')).toBe(
+ 'foo.combar.com'
+ );
+ expect(autolinker.link('foo.com\u202Ebar.com')).toBe(
+ 'foo.com%E2%80%AEbar.com'
+ );
+ expect(autolinker.link('foo.com\u202abar.com')).toBe(
+ 'foo.com%E2%80%AAbar.com'
+ );
+ expect(autolinker.link('foo.com\u202bbar.com')).toBe(
+ 'foo.com%E2%80%ABbar.com'
+ );
+ expect(autolinker.link('foo.com\u202cbar.com')).toBe(
+ 'foo.com%E2%80%ACbar.com'
+ );
+ expect(autolinker.link('foo.com\u202dbar.com')).toBe(
+ 'foo.com%E2%80%ADbar.com'
+ );
+ expect(autolinker.link('foo.com\u202ebar.com')).toBe(
+ 'foo.com%E2%80%AEbar.com'
+ );
+ expect(autolinker.link('foo.com/?query\u202etest')).toBe(
+ 'foo.com/?query\u202Etest'
+ );
+ expect(autolinker.link('foo.com/#query\u202etest')).toBe(
+ 'foo.com/#query\u202Etest'
+ );
+ });
+ });
+
function generateCombinationTests({
schemes,
hosts,