From e3c3d389845334aac7f2109df08b72b9b628f656 Mon Sep 17 00:00:00 2001 From: Bill Mill Date: Wed, 14 Aug 2024 16:32:47 -0400 Subject: [PATCH] fix: limit match length of email regular expression closes #8 --- lib/index.js | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/index.js b/lib/index.js index d8399da..da5af23 100644 --- a/lib/index.js +++ b/lib/index.js @@ -141,7 +141,22 @@ function transformGfmAutolinkLiterals(tree) { tree, [ [/(https?:\/\/|www(?=\.))([-.\w]+)([^ \t\r\n]*)/gi, findUrl], - [/([-.\w+]+)@([-\w]+(?:\.[-\w]+)+)/g, findEmail] + // Use limited buffer sizes instead of `+` to avoid pathological regular + // expression behavior; see + // https://github.com/syntax-tree/mdast-util-gfm-autolink-literal/issues/8 + // + // limits on email addresses: + // + // In addition to restrictions on syntax, there is a length limit on + // email addresses. That limit is a maximum of 64 characters (octets) + // in the "local part" (before the "@") and a maximum of 255 characters + // (octets) in the domain part (after the "@") for a total length of 320 + // characters. However, there is a restriction in RFC 2821 on the length of an + // address in MAIL and RCPT commands of 254 characters. Since addresses + // that do not fit in those fields are not normally useful, the upper + // limit on address lengths should normally be considered to be 254. + // - http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690 + [/([-.\w+]{1,64})@([-\w]{1,255}(?:\.[-\w]{1,255}){1,255})/g, findEmail] ], {ignore: ['link', 'linkReference']} )