diff --git a/src/parser/parse-matches.ts b/src/parser/parse-matches.ts index 0259ac0b..99feddc1 100644 --- a/src/parser/parse-matches.ts +++ b/src/parser/parse-matches.ts @@ -3,6 +3,7 @@ import { UrlMatch, UrlMatchType } from '../match/url-match'; import { Match } from '../match/match'; import { remove, assertNever } from '../utils'; import { + hasDirectionalChar, httpSchemeRe, isDomainLabelChar, isDomainLabelStartChar, @@ -411,7 +412,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] { } else if (isUrlSuffixStartChar(char)) { // '/', '?', or '#' stateMachine.state = State.Path; - } else if (isDomainLabelChar(char)) { + } else if (isDomainLabelChar(char) || hasDirectionalChar(char)) { // Stay in the DomainLabelChar state } else { // Anything else, end the domain name diff --git a/src/parser/uri-utils.ts b/src/parser/uri-utils.ts index 8cd34f7c..dfff1501 100644 --- a/src/parser/uri-utils.ts +++ b/src/parser/uri-utils.ts @@ -110,6 +110,10 @@ export function isDomainLabelChar(char: string): boolean { return char === '_' || isDomainLabelStartChar(char); } +export function hasDirectionalChar(char: string) { + return /[\u202a-\u202e\u200e-\u200f]/g.test(char); +} + /** * Determines if the character is a path character ("pchar") as defined by * https://tools.ietf.org/html/rfc3986#appendix-A @@ -168,6 +172,11 @@ export function isValidSchemeUrl(url: string): boolean { return false; } + // If url contains directional change character prevent it from linking + if (hasDirectionalChar(url)) { + return false; + } + const isAuthorityMatch = !!schemeMatch![1]; const host = schemeMatch![2]; if (isAuthorityMatch) { @@ -213,6 +222,11 @@ export function isValidTldMatch(url: string): boolean { return false; } + // If url contains directional change character prevent it from linking + if (hasDirectionalChar(url)) { + return false; + } + const tld = hostLabels[hostLabels.length - 1]; if (!isKnownTld(tld)) { return false; diff --git a/tests/autolinker-url.spec.ts b/tests/autolinker-url.spec.ts index 643b62bf..1376f1b1 100644 --- a/tests/autolinker-url.spec.ts +++ b/tests/autolinker-url.spec.ts @@ -1032,6 +1032,13 @@ describe('Autolinker Url Matching >', () => { }); }); + describe('unicode exploits', () => { + fit('text with directional change characters should not be linked', () => { + expect(autolinker.link('foo.com\u202Ebar.com')).toBe('foo.com\u202Ebar.com'); + expect(autolinker.link('foo.com\u202Emoc.rab')).toBe('foo.com\u202Emoc.rab'); + }); + }); + function generateCombinationTests({ schemes, hosts,