Skip to content

Commit 8afa90d

Browse files
committed
Add the pyIsEmail test cases
1 parent dad7b6c commit 8afa90d

File tree

2 files changed

+209
-6
lines changed

2 files changed

+209
-6
lines changed

Diff for: README.md

+10-6
Original file line numberDiff line numberDiff line change
@@ -410,14 +410,18 @@ or likely to cause trouble:
410410
checks are turned on.
411411
Most [Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml)
412412
and their subdomains are considered invalid (except see
413-
the `test_environment` parameter above).
414-
* The "quoted string" form of the local part of the email address (RFC
415-
5321 4.1.2) is not permitted --- no one uses this anymore anyway.
413+
the `test_environment` parameter above), if deliverability checks are
414+
turned on. Domain names without a `.` are rejected as a syntax error
415+
since no one has an email address directly at a TLD, and a missing
416+
TLD is a common user error.
417+
* Obsolete email syntaxes are rejected:
418+
The "quoted string" form of the local part of the email address (RFC
419+
5321 4.1.2) is not permitted.
416420
Quoted forms allow multiple @-signs, space characters, and other
417421
troublesome conditions. The unsual [(comment) syntax](https://github.com/JoshData/python-email-validator/issues/77)
418-
in email addresses is also rejected.
419-
* The "literal" form for the domain part of an email address (an
420-
IP address) is not accepted --- no one uses this anymore anyway.
422+
is also rejected. The "literal" form for the domain part of an email address (an
423+
IP address in brackets) is rejected. Other obsolete and deprecated syntaxes are
424+
rejected. No one uses these forms anymore.
421425

422426
Testing
423427
-------

Diff for: tests/test_main.py

+199
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,205 @@ def test_email_test_domain_name_in_test_environment():
320320
validate_email("[email protected]", test_environment=True)
321321

322322

323+
# This is the pyIsEmail (https://github.com/michaelherold/pyIsEmail) test suite.
324+
#
325+
# The test data was extracted by:
326+
#
327+
# $ wget https://raw.githubusercontent.com/michaelherold/pyIsEmail/master/tests/data/tests.xml
328+
# $ xmllint --xpath '/tests/test/address/text()' tests.xml > t1
329+
# $ xmllint --xpath "/tests/test[not(address='')]/diagnosis/text()" tests.xml > t2
330+
#
331+
# tests = []
332+
# def fixup_char(c):
333+
# if ord(c) >= 0x2400 and ord(c) <= 0x2432:
334+
# c = chr(ord(c)-0x2400)
335+
# return c
336+
# for email, diagnosis in zip(open("t1"), open("t2")):
337+
# email = email[:-1] # strip trailing \n but not more because trailing whitespace is significant
338+
# email = "".join(fixup_char(c) for c in email).replace("&amp;", "&")
339+
# tests.append([email, diagnosis.strip()])
340+
# print(repr(tests).replace("'], ['", "'],\n['"))
341+
@pytest.mark.parametrize(
342+
('email_input', 'status'),
343+
[
344+
['test', 'ISEMAIL_ERR_NODOMAIN'],
345+
['@', 'ISEMAIL_ERR_NOLOCALPART'],
346+
['test@', 'ISEMAIL_ERR_NODOMAIN'],
347+
# ['test@io', 'ISEMAIL_VALID'], # we reject domains without a dot, knowing they are not deliverable
348+
['@io', 'ISEMAIL_ERR_NOLOCALPART'],
349+
['@iana.org', 'ISEMAIL_ERR_NOLOCALPART'],
350+
['[email protected]', 'ISEMAIL_VALID'],
351+
['[email protected]', 'ISEMAIL_VALID'],
352+
['[email protected]', 'ISEMAIL_VALID'],
353+
['[email protected]', 'ISEMAIL_VALID'],
354+
['[email protected]', 'ISEMAIL_VALID'],
355+
['[email protected]', 'ISEMAIL_ERR_DOT_START'],
356+
['[email protected]', 'ISEMAIL_ERR_DOT_END'],
357+
['test..iana.org', 'ISEMAIL_ERR_CONSECUTIVEDOTS'],
358+
['test_exa-mple.com', 'ISEMAIL_ERR_NODOMAIN'],
359+
['!#$%&`*+/=?^`{|}[email protected]', 'ISEMAIL_VALID'],
360+
['test\\@[email protected]', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
361+
['[email protected]', 'ISEMAIL_VALID'],
362+
['[email protected]', 'ISEMAIL_VALID'],
363+
['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org', 'ISEMAIL_VALID'],
364+
['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklmn@iana.org', 'ISEMAIL_RFC5322_LOCAL_TOOLONG'],
365+
['test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm.com', 'ISEMAIL_RFC5322_LABEL_TOOLONG'],
366+
['[email protected]', 'ISEMAIL_VALID'],
367+
['[email protected]', 'ISEMAIL_ERR_DOMAINHYPHENSTART'],
368+
['[email protected]', 'ISEMAIL_ERR_DOMAINHYPHENEND'],
369+
['[email protected]', 'ISEMAIL_VALID'],
370+
['[email protected]', 'ISEMAIL_ERR_DOT_START'],
371+
['[email protected].', 'ISEMAIL_ERR_DOT_END'],
372+
['[email protected]', 'ISEMAIL_ERR_CONSECUTIVEDOTS'],
373+
['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghij', 'ISEMAIL_RFC5322_TOOLONG'],
374+
['a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hij', 'ISEMAIL_RFC5322_TOOLONG'],
375+
['a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hijk', 'ISEMAIL_RFC5322_DOMAIN_TOOLONG'],
376+
['"test"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'],
377+
['""@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'],
378+
['"""@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
379+
['"\\a"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'],
380+
['"\\""@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'],
381+
['"\\"@iana.org', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'],
382+
['"\\\\"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'],
383+
['test"@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
384+
['"[email protected]', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'],
385+
['"test"[email protected]', 'ISEMAIL_ERR_ATEXT_AFTER_QS'],
386+
['test"text"@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
387+
['"test""test"@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
388+
['"test"."test"@iana.org', 'ISEMAIL_DEPREC_LOCALPART'],
389+
['"test\\ test"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'],
390+
['"test"[email protected]', 'ISEMAIL_DEPREC_LOCALPART'],
391+
['"test\x00"@iana.org', 'ISEMAIL_ERR_EXPECTING_QTEXT'],
392+
['"test\\\x00"@iana.org', 'ISEMAIL_DEPREC_QP'],
393+
['"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghj"@iana.org', 'ISEMAIL_RFC5322_LOCAL_TOOLONG'],
394+
['"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefg\\h"@iana.org', 'ISEMAIL_RFC5322_LOCAL_TOOLONG'],
395+
['test@[255.255.255.255]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
396+
['test@a[255.255.255.255]', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
397+
['test@[255.255.255]', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
398+
['test@[255.255.255.255.255]', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
399+
['test@[255.255.255.256]', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
400+
['test@[1111:2222:3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
401+
['test@[IPv6:1111:2222:3333:4444:5555:6666:7777]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'],
402+
['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
403+
['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'],
404+
['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:888G]', 'ISEMAIL_RFC5322_IPV6_BADCHAR'],
405+
['test@[IPv6:1111:2222:3333:4444:5555:6666::8888]', 'ISEMAIL_RFC5321_IPV6DEPRECATED'],
406+
['test@[IPv6:1111:2222:3333:4444:5555::8888]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
407+
['test@[IPv6:1111:2222:3333:4444:5555:6666::7777:8888]', 'ISEMAIL_RFC5322_IPV6_MAXGRPS'],
408+
['test@[IPv6::3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5322_IPV6_COLONSTRT'],
409+
['test@[IPv6:::3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
410+
['test@[IPv6:1111::4444:5555::8888]', 'ISEMAIL_RFC5322_IPV6_2X2XCOLON'],
411+
['test@[IPv6:::]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
412+
['test@[IPv6:1111:2222:3333:4444:5555:255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'],
413+
['test@[IPv6:1111:2222:3333:4444:5555:6666:255.255.255.255]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
414+
['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'],
415+
['test@[IPv6:1111:2222:3333:4444::255.255.255.255]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'],
416+
['test@[IPv6:1111:2222:3333:4444:5555:6666::255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_MAXGRPS'],
417+
['test@[IPv6:1111:2222:3333:4444:::255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_2X2XCOLON'],
418+
['test@[IPv6::255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_COLONSTRT'],
419+
[' test @iana.org', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'],
420+
['test@ iana .com', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'],
421+
['test . [email protected]', 'ISEMAIL_DEPREC_FWS'],
422+
['\r\n [email protected]', 'ISEMAIL_CFWS_FWS'],
423+
['\r\n \r\n [email protected]', 'ISEMAIL_DEPREC_FWS'],
424+
['(comment)[email protected]', 'ISEMAIL_CFWS_COMMENT'],
425+
['((comment)[email protected]', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'],
426+
['(comment(comment))[email protected]', 'ISEMAIL_CFWS_COMMENT'],
427+
['test@(comment)iana.org', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'],
428+
['test(comment)[email protected]', 'ISEMAIL_ERR_ATEXT_AFTER_CFWS'],
429+
['test@(comment)[255.255.255.255]', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'],
430+
['(comment)abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org', 'ISEMAIL_CFWS_COMMENT'],
431+
['test@(comment)abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.com', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'],
432+
['(comment)test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghik.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghik.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstu', 'ISEMAIL_CFWS_COMMENT'],
433+
['[email protected]\n', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
434+
['[email protected]', 'ISEMAIL_VALID'],
435+
['[email protected]', 'ISEMAIL_VALID'],
436+
['[email protected]', 'ISEMAIL_ERR_DOMAINHYPHENEND'],
437+
['"[email protected]', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'],
438+
['([email protected]', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'],
439+
['test@(iana.org', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'],
440+
['test@[1.2.3.4', 'ISEMAIL_ERR_UNCLOSEDDOMLIT'],
441+
['"test\\"@iana.org', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'],
442+
['(comment\\)[email protected]', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'],
443+
['[email protected](comment\\)', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'],
444+
['[email protected](comment\\', 'ISEMAIL_ERR_BACKSLASHEND'],
445+
['test@[RFC-5322-domain-literal]', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
446+
['test@[RFC-5322]-domain-literal]', 'ISEMAIL_ERR_ATEXT_AFTER_DOMLIT'],
447+
['test@[RFC-5322-[domain-literal]', 'ISEMAIL_ERR_EXPECTING_DTEXT'],
448+
['test@[RFC-5322-\\\x07-domain-literal]', 'ISEMAIL_RFC5322_DOMLIT_OBSDTEXT'],
449+
['test@[RFC-5322-\\\t-domain-literal]', 'ISEMAIL_RFC5322_DOMLIT_OBSDTEXT'],
450+
['test@[RFC-5322-\\]-domain-literal]', 'ISEMAIL_RFC5322_DOMLIT_OBSDTEXT'],
451+
['test@[RFC-5322-domain-literal\\]', 'ISEMAIL_ERR_UNCLOSEDDOMLIT'],
452+
['test@[RFC-5322-domain-literal\\', 'ISEMAIL_ERR_BACKSLASHEND'],
453+
['test@[RFC 5322 domain literal]', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
454+
['test@[RFC-5322-domain-literal] (comment)', 'ISEMAIL_RFC5322_DOMAINLITERAL'],
455+
['\x7f@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
456+
['test@\x7f.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
457+
['"\x7f"@iana.org', 'ISEMAIL_DEPREC_QTEXT'],
458+
['"\\\x7f"@iana.org', 'ISEMAIL_DEPREC_QP'],
459+
['(\x7f)[email protected]', 'ISEMAIL_DEPREC_CTEXT'],
460+
['[email protected]\r', 'ISEMAIL_ERR_CR_NO_LF'],
461+
['\r[email protected]', 'ISEMAIL_ERR_CR_NO_LF'],
462+
['"\rtest"@iana.org', 'ISEMAIL_ERR_CR_NO_LF'],
463+
['(\r)[email protected]', 'ISEMAIL_ERR_CR_NO_LF'],
464+
['[email protected](\r)', 'ISEMAIL_ERR_CR_NO_LF'],
465+
['\n[email protected]', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
466+
['"\n"@iana.org', 'ISEMAIL_ERR_EXPECTING_QTEXT'],
467+
['"\\\n"@iana.org', 'ISEMAIL_DEPREC_QP'],
468+
['(\n)[email protected]', 'ISEMAIL_ERR_EXPECTING_CTEXT'],
469+
['\x07@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
470+
['test@\x07.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'],
471+
['"\x07"@iana.org', 'ISEMAIL_DEPREC_QTEXT'],
472+
['"\\\x07"@iana.org', 'ISEMAIL_DEPREC_QP'],
473+
['(\x07)[email protected]', 'ISEMAIL_DEPREC_CTEXT'],
474+
['\r\n[email protected]', 'ISEMAIL_ERR_FWS_CRLF_END'],
475+
['\r\n \r\n[email protected]', 'ISEMAIL_ERR_FWS_CRLF_END'],
476+
[' \r\n[email protected]', 'ISEMAIL_ERR_FWS_CRLF_END'],
477+
[' \r\n [email protected]', 'ISEMAIL_CFWS_FWS'],
478+
[' \r\n \r\n[email protected]', 'ISEMAIL_ERR_FWS_CRLF_END'],
479+
[' \r\n\r\n[email protected]', 'ISEMAIL_ERR_FWS_CRLF_X2'],
480+
[' \r\n\r\n [email protected]', 'ISEMAIL_ERR_FWS_CRLF_X2'],
481+
['[email protected]\r\n ', 'ISEMAIL_CFWS_FWS'],
482+
['[email protected]\r\n \r\n ', 'ISEMAIL_DEPREC_FWS'],
483+
['[email protected]\r\n', 'ISEMAIL_ERR_FWS_CRLF_END'],
484+
['[email protected]\r\n \r\n', 'ISEMAIL_ERR_FWS_CRLF_END'],
485+
['[email protected] \r\n', 'ISEMAIL_ERR_FWS_CRLF_END'],
486+
['[email protected] \r\n ', 'ISEMAIL_CFWS_FWS'],
487+
['[email protected] \r\n \r\n', 'ISEMAIL_ERR_FWS_CRLF_END'],
488+
['[email protected] \r\n\r\n', 'ISEMAIL_ERR_FWS_CRLF_X2'],
489+
['[email protected] \r\n\r\n ', 'ISEMAIL_ERR_FWS_CRLF_X2'],
490+
[' [email protected]', 'ISEMAIL_CFWS_FWS'],
491+
['[email protected] ', 'ISEMAIL_CFWS_FWS'],
492+
['test@[IPv6:1::2:]', 'ISEMAIL_RFC5322_IPV6_COLONEND'],
493+
['"test\\©"@iana.org', 'ISEMAIL_ERR_EXPECTING_QPAIR'],
494+
['test@iana/icann.org', 'ISEMAIL_RFC5322_DOMAIN'],
495+
['test.(comment)[email protected]', 'ISEMAIL_DEPREC_COMMENT']
496+
]
497+
)
498+
def test_pyisemail_tests(email_input, status):
499+
if status == "ISEMAIL_VALID":
500+
# All standard email address forms should not raise an exception.
501+
validate_email(email_input, test_environment=True)
502+
elif "_ERR_" in status or "_TOOLONG" in status \
503+
or "_CFWS_FWS" in status or "_CFWS_COMMENT" in status \
504+
or "_IPV6" in status or status == "ISEMAIL_RFC5322_DOMAIN":
505+
# Invalid syntax, extranous whitespace, and "(comments)" should be rejected.
506+
# The _IPV6_ diagnoses appear to represent syntactically invalid domain literals.
507+
# The ISEMAIL_RFC5322_DOMAIN diagnosis appears to be a syntactically invalid domain.
508+
with pytest.raises(EmailSyntaxError):
509+
validate_email(email_input, test_environment=True)
510+
elif "_DEPREC_" in status \
511+
or "RFC5321_QUOTEDSTRING" in status \
512+
or "DOMAINLITERAL" in status or "_DOMLIT_" in status or "_ADDRESSLITERAL" in status:
513+
# Quoted strings in the local part, domain literals (IP addresses in brackets),
514+
# and other deprecated syntax are valid email addresses and are accepted by pyIsEmail,
515+
# but we reject them.
516+
with pytest.raises(EmailSyntaxError):
517+
validate_email(email_input, test_environment=True)
518+
else:
519+
raise ValueError("status {} is not recognized".format(status))
520+
521+
323522
def test_dict_accessor():
324523
input_email = "[email protected]"
325524
valid_email = validate_email(input_email, check_deliverability=False)

0 commit comments

Comments
 (0)