From dade6b94d3591d46f260707eaa44953991cb5e37 Mon Sep 17 00:00:00 2001 From: CaedenPH Date: Sat, 29 Jul 2023 18:22:52 +0100 Subject: [PATCH 1/8] feat(strings): Create is valid email address --- strings/is_valid_email_address.py | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 strings/is_valid_email_address.py diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py new file mode 100644 index 000000000000..8ce960760400 --- /dev/null +++ b/strings/is_valid_email_address.py @@ -0,0 +1,71 @@ +""" +Implements an is valid email address algorithm + +@ https://en.wikipedia.org/wiki/Email_address +""" + + +email_tests: tuple[tuple[str, bool], ...] = ( + ("simple@example.com", True), + ("very.common@example.com", True), + ("disposable.style.email.with+symbol@example.com", True), + ("other-email-with-hyphen@and.subdomains.example.com", True), + ("fully-qualified-domain@example.com", True), + ("user.name+tag+sorting@example.com", True), + ("x@example.com", True), + ("example-indeed@strange-example.com", True), + ("test/test@test.com", True), + ( + "1234567890123456789012345678901234567890123456789012345678901234567890@example.com", + True, + ), + ("admin@mailserver1", True), + ("example@s.example", True), + ("Abc.example.com", False), + ("A@b@c@example.com", False), + ("a(c)d,e:f;gi[j\\k]l@example.com", False), + ( + "12345678901234567890123456789012345678901234567890123456789012345678901@example.com", + False, + ), + ("i.like.underscores@but_its_not_allowed_in_this_part", False), +) + +# The maximum octets (one character as a standard unicode character is one byte) +# that the local part and the domain part can have +MAX_LOCAL_PART_OCTETS = 64 +MAX_DOMAIN_OCTETS = 255 + + +def is_valid_email_address(email: str) -> bool: + """ + Returns True if the passed email address is valid. + + The local part of the email precedes the singular @ symbol and + is associated with a display-name. For example, "john.smith" + The domain is stricter than the local part and follows the @ symbol. + + >>> for email, valid in email_tests: + ... assert is_valid_email_address(email) is valid + """ + + # Make sure that there is only one @ symbol in the email address + if email.count("@") != 1: + return False + + local_part, domain = email.split("@") + # Check octet length of the local part and domain + if len(local_part) > MAX_LOCAL_PART_OCTETS or len(domain) > MAX_DOMAIN_OCTETS: + return False + + return True + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + for email, _ in email_tests: + is_valid = is_valid_email_address(email) + print(f"Email address {email} is {'not' if is_valid is False else ''} valid") From c606c2f0d45bc74b6487ae0eae1a6e068e6eab45 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Sat, 29 Jul 2023 17:23:07 +0000 Subject: [PATCH 2/8] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 77938f45011b..fed50c5b220b 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -1168,6 +1168,7 @@ * [Is Pangram](strings/is_pangram.py) * [Is Spain National Id](strings/is_spain_national_id.py) * [Is Srilankan Phone Number](strings/is_srilankan_phone_number.py) + * [Is Valid Email Address](strings/is_valid_email_address.py) * [Jaro Winkler](strings/jaro_winkler.py) * [Join](strings/join.py) * [Knuth Morris Pratt](strings/knuth_morris_pratt.py) From b94f13cba85e5d72e2f41c5f6c222af63ea7e9b9 Mon Sep 17 00:00:00 2001 From: caedenph Date: Sat, 29 Jul 2023 22:44:43 +0100 Subject: [PATCH 3/8] feat(strings): Create is_valid_email_address algorithm --- strings/is_valid_email_address.py | 62 ++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index 8ce960760400..f7aa9002ee0a 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -4,6 +4,8 @@ @ https://en.wikipedia.org/wiki/Email_address """ +import re +import string email_tests: tuple[tuple[str, bool], ...] = ( ("simple@example.com", True), @@ -16,16 +18,17 @@ ("example-indeed@strange-example.com", True), ("test/test@test.com", True), ( - "1234567890123456789012345678901234567890123456789012345678901234567890@example.com", + "123456789012345678901234567890123456789012345678901234567890123@example.com", True, ), ("admin@mailserver1", True), ("example@s.example", True), ("Abc.example.com", False), ("A@b@c@example.com", False), + ("abc@example..com", False), ("a(c)d,e:f;gi[j\\k]l@example.com", False), ( - "12345678901234567890123456789012345678901234567890123456789012345678901@example.com", + "12345678901234567890123456789012345678901234567890123456789012345@example.com", False, ), ("i.like.underscores@but_its_not_allowed_in_this_part", False), @@ -45,19 +48,65 @@ def is_valid_email_address(email: str) -> bool: is associated with a display-name. For example, "john.smith" The domain is stricter than the local part and follows the @ symbol. + Global email checks: + 1. There can only be one @ symbol in the email address. Technically if the + @ symbol is quoted in the local-part, then it is valid, however this + implementation ignores "" for now. + (See https://en.wikipedia.org/wiki/Email_address#:~:text=If%20quoted,) + 2. The local-part and the domain are limited to a certain number of octets. With + unicode storing a single character in one byte, each octet is equivalent to + a character. Hence, we can just check the length of the string. + Checks for the local-part: + 3. The local-part may contain: upper and lowercase latin letters, digits 0 to 9, + and printable characters (!#$%&'*+-/=?^_`{|}~) + 4. The local-part may also contain a "." in any place that is not the first or + last character, and may not have more than one "." consecutively. + + Checks for the domain: + 5. The domain may contain: upper and lowercase latin letters and digits 0 to 9 + 6. Hyphen "-", provided that it is not the first or last character + 7. The domain may also contain a "." in any place that is not the first or + last character, and may not have more than one "." consecutively. + >>> for email, valid in email_tests: ... assert is_valid_email_address(email) is valid """ - # Make sure that there is only one @ symbol in the email address + # (1.) Make sure that there is only one @ symbol in the email address if email.count("@") != 1: return False local_part, domain = email.split("@") - # Check octet length of the local part and domain + # (2.) Check octet length of the local part and domain if len(local_part) > MAX_LOCAL_PART_OCTETS or len(domain) > MAX_DOMAIN_OCTETS: return False + # (3.) Validate the characters in the local-part + if any( + char not in string.ascii_letters + string.digits + ".(!#$%&'*+-/=?^_`{|}~)" + for char in local_part + ): + return False + + # (4.) Validate the placement of "." characters + if ( + local_part.startswith(".") + or local_part.endswith(".") + or re.search(r"\.\.+", local_part) + ): + return False + + # (5.) Validate the characters in the domain + if any(char not in string.ascii_letters + string.digits + ".-" for char in domain): + return False + + # (6.) Validate the placement of "-" characters + if domain.startswith("-") or domain.endswith("."): + return False + + # (7.) Validate the placement of "." characters + if domain.startswith(".") or domain.endswith(".") or re.search(r"\.\.+", domain): + return False return True @@ -66,6 +115,7 @@ def is_valid_email_address(email: str) -> bool: doctest.testmod() - for email, _ in email_tests: + for email, valid in email_tests: is_valid = is_valid_email_address(email) - print(f"Email address {email} is {'not' if is_valid is False else ''} valid") + assert is_valid == valid, f"{email} is {is_valid}" + print(f"Email address {email} is {'not ' if is_valid is False else ''}valid") From 15c54829313db2ae9690254f64b792b2af76b14d Mon Sep 17 00:00:00 2001 From: CaedenPH Date: Thu, 10 Aug 2023 15:13:36 +0100 Subject: [PATCH 4/8] chore(is_valid_email_address): Implement changes from code review --- strings/is_valid_email_address.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index f7aa9002ee0a..fd1578a78ab1 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -4,7 +4,6 @@ @ https://en.wikipedia.org/wiki/Email_address """ -import re import string email_tests: tuple[tuple[str, bool], ...] = ( @@ -32,6 +31,7 @@ False, ), ("i.like.underscores@but_its_not_allowed_in_this_part", False), + ("", False) ) # The maximum octets (one character as a standard unicode character is one byte) @@ -69,7 +69,7 @@ def is_valid_email_address(email: str) -> bool: last character, and may not have more than one "." consecutively. >>> for email, valid in email_tests: - ... assert is_valid_email_address(email) is valid + ... assert is_valid_email_address(email) == valid """ # (1.) Make sure that there is only one @ symbol in the email address @@ -92,7 +92,7 @@ def is_valid_email_address(email: str) -> bool: if ( local_part.startswith(".") or local_part.endswith(".") - or re.search(r"\.\.+", local_part) + or ".." in local_part ): return False @@ -105,7 +105,7 @@ def is_valid_email_address(email: str) -> bool: return False # (7.) Validate the placement of "." characters - if domain.startswith(".") or domain.endswith(".") or re.search(r"\.\.+", domain): + if domain.startswith(".") or domain.endswith(".") or ".." in domain: return False return True @@ -118,4 +118,4 @@ def is_valid_email_address(email: str) -> bool: for email, valid in email_tests: is_valid = is_valid_email_address(email) assert is_valid == valid, f"{email} is {is_valid}" - print(f"Email address {email} is {'not ' if is_valid is False else ''}valid") + print(f"Email address {email} is {'not ' if is_valid == False else ''}valid") From 09fc6e8cb319b6505f3b0460ba1766d55b582d3e Mon Sep 17 00:00:00 2001 From: Caeden Perelli-Harris Date: Thu, 10 Aug 2023 15:14:21 +0100 Subject: [PATCH 5/8] Update strings/is_valid_email_address.py Co-authored-by: Tianyi Zheng --- strings/is_valid_email_address.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index fd1578a78ab1..84abfa7ce2d7 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -88,7 +88,7 @@ def is_valid_email_address(email: str) -> bool: ): return False - # (4.) Validate the placement of "." characters + # (4.) Validate the placement of "." characters in the local-part if ( local_part.startswith(".") or local_part.endswith(".") From bfd99cbc495ddcd81b56661f679752ba0c0e7f13 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 10 Aug 2023 14:14:55 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/is_valid_email_address.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index 84abfa7ce2d7..a0653990f5cc 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -31,7 +31,7 @@ False, ), ("i.like.underscores@but_its_not_allowed_in_this_part", False), - ("", False) + ("", False), ) # The maximum octets (one character as a standard unicode character is one byte) @@ -89,11 +89,7 @@ def is_valid_email_address(email: str) -> bool: return False # (4.) Validate the placement of "." characters in the local-part - if ( - local_part.startswith(".") - or local_part.endswith(".") - or ".." in local_part - ): + if local_part.startswith(".") or local_part.endswith(".") or ".." in local_part: return False # (5.) Validate the characters in the domain From 558aa54560100cdcb0a7d55b2d4494bfbc9ba042 Mon Sep 17 00:00:00 2001 From: CaedenPH Date: Thu, 10 Aug 2023 15:16:50 +0100 Subject: [PATCH 7/8] chore(is_valid_email_address): Fix ruff error --- strings/is_valid_email_address.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index fd1578a78ab1..d5e1afa8af36 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -118,4 +118,4 @@ def is_valid_email_address(email: str) -> bool: for email, valid in email_tests: is_valid = is_valid_email_address(email) assert is_valid == valid, f"{email} is {is_valid}" - print(f"Email address {email} is {'not ' if is_valid == False else ''}valid") + print(f"Email address {email} is {'not ' if is_valid else ''}valid") From bfec37ccfb42b4b56d6ea8bf50283b4fe2db1f9d Mon Sep 17 00:00:00 2001 From: Caeden Perelli-Harris Date: Mon, 14 Aug 2023 09:13:04 +0100 Subject: [PATCH 8/8] Update strings/is_valid_email_address.py Co-authored-by: Tianyi Zheng --- strings/is_valid_email_address.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py index c329f2e61cc7..205394f81297 100644 --- a/strings/is_valid_email_address.py +++ b/strings/is_valid_email_address.py @@ -114,4 +114,4 @@ def is_valid_email_address(email: str) -> bool: for email, valid in email_tests: is_valid = is_valid_email_address(email) assert is_valid == valid, f"{email} is {is_valid}" - print(f"Email address {email} is {'not ' if is_valid else ''}valid") + print(f"Email address {email} is {'not ' if not is_valid else ''}valid")