Skip to content

Commit 1431de0

Browse files
HeyHugoJoshData
andauthored
Add possibility to cache dns lookups (#58)
Add optional argument dns_resolver to validate_email. If provided it will be used instead of the default resolver. The provided resolver can have a configured cache and custom timeout. Co-authored-by: Joshua Tauberer <[email protected]>
1 parent ad53fb4 commit 1431de0

File tree

3 files changed

+70
-25
lines changed

3 files changed

+70
-25
lines changed

README.md

+18-11
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Key features:
1414
login forms or other uses related to identifying users.
1515
* Gives friendly error messages when validation fails (appropriate to show
1616
to end users).
17-
* (optionally) Checks deliverability: Does the domain name resolve?
17+
* (optionally) Checks deliverability: Does the domain name resolve? And you can override the default DNS resolver.
1818
* Supports internationalized domain names and (optionally)
1919
internationalized local parts.
2020
* Normalizes email addresses (super important for internationalized
@@ -69,23 +69,27 @@ This validates the address and gives you its normalized form. You should
6969
put the normalized form in your database and always normalize before
7070
checking if an address is in your database.
7171

72-
The validator will accept internationalized email addresses, but email
73-
addresses with non-ASCII characters in the *local* part of the address
74-
(before the @-sign) require the
75-
[SMTPUTF8](https://tools.ietf.org/html/rfc6531) extension which may not
76-
be supported by your mail submission library or your outbound mail
77-
server. If you know ahead of time that SMTPUTF8 is not supported then
78-
**add the keyword argument allow\_smtputf8=False to fail validation for
79-
addresses that would require SMTPUTF8**:
72+
When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call:
8073

8174
```python
82-
valid = validate_email(email, allow_smtputf8=False)
75+
from email_validator import validate_email, caching_resolver
76+
77+
resolver = caching_resolver(timeout=10)
78+
79+
while True:
80+
valid = validate_email(email, dns_resolver=resolver)
8381
```
8482

83+
The validator will accept internationalized email addresses, but not all
84+
mail systems can send email to an addresses with non-ASCII characters in
85+
the *local* part of the address (before the @-sign). See the `allow_smtputf8`
86+
option below.
87+
88+
8589
Overview
8690
--------
8791

88-
The module provides a single function `validate_email(email_address)` which
92+
The module provides a function `validate_email(email_address)` which
8993
takes an email address (either a `str` or ASCII `bytes`) and:
9094

9195
- Raises a `EmailNotValidError` with a helpful, human-readable error
@@ -128,6 +132,9 @@ shown):
128132

129133
`allow_empty_local=False`: Set to `True` to allow an empty local part (i.e.
130134
`@example.com`), e.g. for validating Postfix aliases.
135+
136+
`dns_resolver=None`: Pass an instance of [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to control the DNS resolver including setting a timeout and [a cache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html). The `caching_resolver` function shown above is a helper function to construct a dns.resolver.Resolver with a [LRUCache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html#dns.resolver.LRUCache). Reuse the same resolver instance across calls to `validate_email` to make use of the cache.
137+
131138

132139
Internationalized email addresses
133140
---------------------------------

email_validator/__init__.py

+25-13
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,20 @@ def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH):
180180
return reason.format(prefix, diff, suffix)
181181

182182

183+
def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None):
184+
resolver = dns.resolver.Resolver()
185+
resolver.cache = cache or dns.resolver.LRUCache()
186+
resolver.lifetime = timeout # timeout, in seconds
187+
return resolver
188+
189+
183190
def validate_email(
184191
email,
185192
allow_smtputf8=True,
186193
allow_empty_local=False,
187194
check_deliverability=True,
188195
timeout=DEFAULT_TIMEOUT,
196+
dns_resolver=None
189197
):
190198
"""
191199
Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of
@@ -273,7 +281,9 @@ def validate_email(
273281
if check_deliverability:
274282
# Validate the email address's deliverability and update the
275283
# return dict with metadata.
276-
deliverability_info = validate_email_deliverability(ret["domain"], ret["domain_i18n"], timeout)
284+
deliverability_info = validate_email_deliverability(
285+
ret["domain"], ret["domain_i18n"], timeout, dns_resolver
286+
)
277287
if "mx" in deliverability_info:
278288
ret.mx = deliverability_info["mx"]
279289
ret.mx_fallback_type = deliverability_info["mx-fallback"]
@@ -443,15 +453,22 @@ def validate_email_domain_part(domain):
443453
}
444454

445455

446-
def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT):
456+
def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT, dns_resolver=None):
447457
# Check that the domain resolves to an MX record. If there is no MX record,
448458
# try an A or AAAA record which is a deprecated fallback for deliverability.
449459

450-
def dns_resolver_resolve_shim(resolver, domain, record):
460+
# If no dns.resolver.Resolver was given, get dnspython's default resolver.
461+
# Override the default resolver's timeout. This may affect other uses of
462+
# dnspython in this process.
463+
if dns_resolver is None:
464+
dns_resolver = dns.resolver.get_default_resolver()
465+
dns_resolver.lifetime = timeout
466+
467+
def dns_resolver_resolve_shim(domain, record):
451468
try:
452469
# dns.resolver.Resolver.resolve is new to dnspython 2.x.
453470
# https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.resolve
454-
return resolver.resolve(domain, record)
471+
return dns_resolver.resolve(domain, record)
455472
except AttributeError:
456473
# dnspython 2.x is only available in Python 3.6 and later. For earlier versions
457474
# of Python, we maintain compatibility with dnspython 1.x which has a
@@ -460,7 +477,7 @@ def dns_resolver_resolve_shim(resolver, domain, record):
460477
# which we prevent by adding a "." to the domain name to make it absolute.
461478
# dns.resolver.Resolver.query is deprecated in dnspython version 2.x.
462479
# https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.query
463-
return resolver.query(domain + ".", record)
480+
return dns_resolver.query(domain + ".", record)
464481

465482
try:
466483
# We need a way to check how timeouts are handled in the tests. So we
@@ -469,28 +486,23 @@ def dns_resolver_resolve_shim(resolver, domain, record):
469486
if getattr(validate_email_deliverability, 'TEST_CHECK_TIMEOUT', False):
470487
raise dns.exception.Timeout()
471488

472-
resolver = dns.resolver.get_default_resolver()
473-
474-
if timeout:
475-
resolver.lifetime = timeout
476-
477489
try:
478490
# Try resolving for MX records and get them in sorted priority order.
479-
response = dns_resolver_resolve_shim(resolver, domain, "MX")
491+
response = dns_resolver_resolve_shim(domain, "MX")
480492
mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response])
481493
mx_fallback = None
482494
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
483495

484496
# If there was no MX record, fall back to an A record.
485497
try:
486-
response = dns_resolver_resolve_shim(resolver, domain, "A")
498+
response = dns_resolver_resolve_shim(domain, "A")
487499
mtas = [(0, str(r)) for r in response]
488500
mx_fallback = "A"
489501
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
490502

491503
# If there was no A record, fall back to an AAAA record.
492504
try:
493-
response = dns_resolver_resolve_shim(resolver, domain, "AAAA")
505+
response = dns_resolver_resolve_shim(domain, "AAAA")
494506
mtas = [(0, str(r)) for r in response]
495507
mx_fallback = "AAAA"
496508
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):

tests/test_main.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from unittest import mock
2+
import dns.resolver
13
import pytest
24
from email_validator import EmailSyntaxError, EmailUndeliverableError, \
35
validate_email, validate_email_deliverability, \
4-
ValidatedEmail
6+
caching_resolver, ValidatedEmail
57
# Let's test main but rename it to be clear
68
from email_validator import main as validator_main
79

@@ -344,3 +346,27 @@ def test_main_output_shim(monkeypatch, capsys):
344346
# The \n is part of the print statement, not part of the string, which is what the b'...' is
345347
# Since we're mocking py 2.7 here instead of actually using 2.7, this was the closest I could get
346348
assert stdout == "b'An email address cannot have a period immediately after the @-sign.'\n"
349+
350+
351+
@mock.patch("dns.resolver.LRUCache.put")
352+
def test_validate_email__with_caching_resolver(mocked_put):
353+
dns_resolver = caching_resolver()
354+
validate_email("[email protected]", dns_resolver=dns_resolver)
355+
assert mocked_put.called
356+
357+
with mock.patch("dns.resolver.LRUCache.get") as mocked_get:
358+
validate_email("[email protected]", dns_resolver=dns_resolver)
359+
assert mocked_get.called
360+
361+
362+
@mock.patch("dns.resolver.LRUCache.put")
363+
def test_validate_email__with_configured_resolver(mocked_put):
364+
dns_resolver = dns.resolver.Resolver()
365+
dns_resolver.lifetime = 10
366+
dns_resolver.cache = dns.resolver.LRUCache(max_size=1000)
367+
validate_email("[email protected]", dns_resolver=dns_resolver)
368+
assert mocked_put.called
369+
370+
with mock.patch("dns.resolver.LRUCache.get") as mocked_get:
371+
validate_email("[email protected]", dns_resolver=dns_resolver)
372+
assert mocked_get.called

0 commit comments

Comments
 (0)