Skip to content

Commit b57eb94

Browse files
authored
Add inbound mail handling
Add normalized event, signal, and webhooks for inbound mail. Closes #43 Closes #86
1 parent c924c9e commit b57eb94

35 files changed

+2970
-132
lines changed

README.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ built-in `django.core.mail` package. It includes:
3737
* Normalized sent-message status and tracking notification, by connecting
3838
your ESP's webhooks to Django signals
3939
* "Batch transactional" sends using your ESP's merge and template features
40+
* Inbound message support, to receive email through your ESP's webhooks,
41+
with simplified, portable access to attachments and other inbound content
4042

4143
Anymail is released under the BSD license. It is extensively tested against Django 1.8--2.0
4244
(including Python 2.7, Python 3 and PyPy).
@@ -67,6 +69,7 @@ Anymail 1-2-3
6769

6870
.. This quickstart section is also included in docs/quickstart.rst
6971
72+
Here's how to send a message.
7073
This example uses Mailgun, but you can substitute Mailjet or Postmark or SendGrid
7174
or SparkPost or any other supported ESP where you see "mailgun":
7275

@@ -144,4 +147,5 @@ or SparkPost or any other supported ESP where you see "mailgun":
144147
145148
146149
See the `full documentation <https://anymail.readthedocs.io/en/stable/>`_
147-
for more features and options.
150+
for more features and options, including receiving messages and tracking
151+
sent message status.

anymail/inbound.py

+355
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
from base64 import b64decode
2+
from email import message_from_string
3+
from email.message import Message
4+
from email.utils import unquote
5+
6+
import six
7+
from django.core.files.uploadedfile import SimpleUploadedFile
8+
9+
from .utils import angle_wrap, get_content_disposition, parse_address_list, parse_rfc2822date
10+
11+
# Python 2/3.*-compatible email.parser.HeaderParser(policy=email.policy.default)
12+
try:
13+
# With Python 3.3+ (email6) package, can use HeaderParser with default policy
14+
from email.parser import HeaderParser
15+
from email.policy import default as accurate_header_unfolding_policy # vs. compat32
16+
17+
except ImportError:
18+
# Earlier Pythons don't have HeaderParser, and/or try preserve earlier compatibility bugs
19+
# by failing to properly unfold headers (see RFC 5322 section 2.2.3)
20+
from email.parser import Parser
21+
import re
22+
accurate_header_unfolding_policy = object()
23+
24+
class HeaderParser(Parser, object):
25+
def __init__(self, _class, policy=None):
26+
# This "backport" doesn't actually support policies, but we want to ensure
27+
# that callers aren't trying to use HeaderParser's default compat32 policy
28+
# (which doesn't properly unfold headers)
29+
assert policy is accurate_header_unfolding_policy
30+
super(HeaderParser, self).__init__(_class)
31+
32+
def parsestr(self, text, headersonly=True):
33+
unfolded = self._unfold_headers(text)
34+
return super(HeaderParser, self).parsestr(unfolded, headersonly=True)
35+
36+
@staticmethod
37+
def _unfold_headers(text):
38+
# "Unfolding is accomplished by simply removing any CRLF that is immediately followed by WSP"
39+
# (WSP is space or tab, and per email.parser semantics, we allow CRLF, CR, or LF endings)
40+
return re.sub(r'(\r\n|\r|\n)(?=[ \t])', "", text)
41+
42+
43+
class AnymailInboundMessage(Message, object): # `object` ensures new-style class in Python 2)
44+
"""
45+
A normalized, parsed inbound email message.
46+
47+
A subclass of email.message.Message, with some additional
48+
convenience properties, plus helpful methods backported
49+
from Python 3.6+ email.message.EmailMessage (or really, MIMEPart)
50+
"""
51+
52+
# Why Python email.message.Message rather than django.core.mail.EmailMessage?
53+
# Django's EmailMessage is really intended for constructing a (limited subset of)
54+
# Message to send; Message is better designed for representing arbitrary messages:
55+
#
56+
# * Message is easily parsed from raw mime (which is an inbound format provided
57+
# by many ESPs), and can accurately represent any mime email that might be received
58+
# * Message can represent repeated header fields (e.g., "Received") which
59+
# are common in inbound messages
60+
# * Django's EmailMessage defaults a bunch of properties in ways that aren't helpful
61+
# (e.g., from_email from settings)
62+
63+
def __init__(self, *args, **kwargs):
64+
# Note: this must accept zero arguments, for use with message_from_string (email.parser)
65+
super(AnymailInboundMessage, self).__init__(*args, **kwargs)
66+
67+
# Additional attrs provided by some ESPs:
68+
self.envelope_sender = None
69+
self.envelope_recipient = None
70+
self.stripped_text = None
71+
self.stripped_html = None
72+
self.spam_detected = None
73+
self.spam_score = None
74+
75+
#
76+
# Convenience accessors
77+
#
78+
79+
@property
80+
def from_email(self):
81+
"""EmailAddress """
82+
# equivalent to Python 3.2+ message['From'].addresses[0]
83+
from_email = self.get_address_header('From')
84+
if len(from_email) == 1:
85+
return from_email[0]
86+
elif len(from_email) == 0:
87+
return None
88+
else:
89+
return from_email # unusual, but technically-legal multiple-From; preserve list
90+
91+
@property
92+
def to(self):
93+
"""list of EmailAddress objects from To header"""
94+
# equivalent to Python 3.2+ message['To'].addresses
95+
return self.get_address_header('To')
96+
97+
@property
98+
def cc(self):
99+
"""list of EmailAddress objects from Cc header"""
100+
# equivalent to Python 3.2+ message['Cc'].addresses
101+
return self.get_address_header('Cc')
102+
103+
@property
104+
def subject(self):
105+
"""str value of Subject header, or None"""
106+
return self['Subject']
107+
108+
@property
109+
def date(self):
110+
"""datetime.datetime from Date header, or None if missing/invalid"""
111+
# equivalent to Python 3.2+ message['Date'].datetime
112+
return self.get_date_header('Date')
113+
114+
@property
115+
def text(self):
116+
"""Contents of the (first) text/plain body part, or None"""
117+
return self._get_body_content('text/plain')
118+
119+
@property
120+
def html(self):
121+
"""Contents of the (first) text/html body part, or None"""
122+
return self._get_body_content('text/html')
123+
124+
@property
125+
def attachments(self):
126+
"""list of attachments (as MIMEPart objects); excludes inlines"""
127+
return [part for part in self.walk() if part.is_attachment()]
128+
129+
@property
130+
def inline_attachments(self):
131+
"""dict of Content-ID: attachment (as MIMEPart objects)"""
132+
return {unquote(part['Content-ID']): part for part in self.walk()
133+
if part.is_inline_attachment() and part['Content-ID']}
134+
135+
def get_address_header(self, header):
136+
"""Return the value of header parsed into a (possibly-empty) list of EmailAddress objects"""
137+
values = self.get_all(header)
138+
if values is not None:
139+
values = parse_address_list(values)
140+
return values or []
141+
142+
def get_date_header(self, header):
143+
"""Return the value of header parsed into a datetime.date, or None"""
144+
value = self[header]
145+
if value is not None:
146+
value = parse_rfc2822date(value)
147+
return value
148+
149+
def _get_body_content(self, content_type):
150+
# This doesn't handle as many corner cases as Python 3.6 email.message.EmailMessage.get_body,
151+
# but should work correctly for nearly all real-world inbound messages.
152+
# We're guaranteed to have `is_attachment` available, because all AnymailInboundMessage parts
153+
# should themselves be AnymailInboundMessage.
154+
for part in self.walk():
155+
if part.get_content_type() == content_type and not part.is_attachment():
156+
payload = part.get_payload(decode=True)
157+
if payload is not None:
158+
return payload.decode('utf-8')
159+
return None
160+
161+
# Backport from Python 3.5 email.message.Message
162+
def get_content_disposition(self):
163+
try:
164+
return super(AnymailInboundMessage, self).get_content_disposition()
165+
except AttributeError:
166+
return get_content_disposition(self)
167+
168+
# Backport from Python 3.4.2 email.message.MIMEPart
169+
def is_attachment(self):
170+
return self.get_content_disposition() == 'attachment'
171+
172+
# New for Anymail
173+
def is_inline_attachment(self):
174+
return self.get_content_disposition() == 'inline'
175+
176+
def get_content_bytes(self):
177+
"""Return the raw payload bytes"""
178+
maintype = self.get_content_maintype()
179+
if maintype == 'message':
180+
# The attachment's payload is a single (parsed) email Message; flatten it to bytes.
181+
# (Note that self.is_multipart() misleadingly returns True in this case.)
182+
payload = self.get_payload()
183+
assert len(payload) == 1 # should be exactly one message
184+
try:
185+
return payload[0].as_bytes() # Python 3
186+
except AttributeError:
187+
return payload[0].as_string().encode('utf-8')
188+
elif maintype == 'multipart':
189+
# The attachment itself is multipart; the payload is a list of parts,
190+
# and it's not clear which one is the "content".
191+
raise ValueError("get_content_bytes() is not valid on multipart messages "
192+
"(perhaps you want as_bytes()?)")
193+
return self.get_payload(decode=True)
194+
195+
def get_content_text(self, charset='utf-8'):
196+
"""Return the payload decoded to text"""
197+
maintype = self.get_content_maintype()
198+
if maintype == 'message':
199+
# The attachment's payload is a single (parsed) email Message; flatten it to text.
200+
# (Note that self.is_multipart() misleadingly returns True in this case.)
201+
payload = self.get_payload()
202+
assert len(payload) == 1 # should be exactly one message
203+
return payload[0].as_string()
204+
elif maintype == 'multipart':
205+
# The attachment itself is multipart; the payload is a list of parts,
206+
# and it's not clear which one is the "content".
207+
raise ValueError("get_content_text() is not valid on multipart messages "
208+
"(perhaps you want as_string()?)")
209+
return self.get_payload(decode=True).decode(charset)
210+
211+
def as_uploaded_file(self):
212+
"""Return the attachment converted to a Django UploadedFile"""
213+
if self['Content-Disposition'] is None:
214+
return None # this part is not an attachment
215+
name = self.get_filename()
216+
content_type = self.get_content_type()
217+
content = self.get_content_bytes()
218+
return SimpleUploadedFile(name, content, content_type)
219+
220+
#
221+
# Construction
222+
#
223+
# These methods are intended primarily for internal Anymail use
224+
# (in inbound webhook handlers)
225+
226+
@classmethod
227+
def parse_raw_mime(cls, s):
228+
"""Returns a new AnymailInboundMessage parsed from str s"""
229+
return message_from_string(s, cls)
230+
231+
@classmethod
232+
def construct(cls, raw_headers=None, from_email=None, to=None, cc=None, subject=None, headers=None,
233+
text=None, text_charset='utf-8', html=None, html_charset='utf-8',
234+
attachments=None):
235+
"""
236+
Returns a new AnymailInboundMessage constructed from params.
237+
238+
This is designed to handle the sorts of email fields typically present
239+
in ESP parsed inbound messages. (It's not a generalized MIME message constructor.)
240+
241+
:param raw_headers: {str|None} base (or complete) message headers as a single string
242+
:param from_email: {str|None} value for From header
243+
:param to: {str|None} value for To header
244+
:param cc: {str|None} value for Cc header
245+
:param subject: {str|None} value for Subject header
246+
:param headers: {sequence[(str, str)]|mapping|None} additional headers
247+
:param text: {str|None} plaintext body
248+
:param text_charset: {str} charset of plaintext body; default utf-8
249+
:param html: {str|None} html body
250+
:param html_charset: {str} charset of html body; default utf-8
251+
:param attachments: {list[MIMEBase]|None} as returned by construct_attachment
252+
:return: {AnymailInboundMessage}
253+
"""
254+
if raw_headers is not None:
255+
msg = HeaderParser(cls, policy=accurate_header_unfolding_policy).parsestr(raw_headers)
256+
msg.set_payload(None) # headersonly forces an empty string payload, which breaks things later
257+
else:
258+
msg = cls()
259+
260+
if from_email is not None:
261+
del msg['From'] # override raw_headers value, if any
262+
msg['From'] = from_email
263+
if to is not None:
264+
del msg['To']
265+
msg['To'] = to
266+
if cc is not None:
267+
del msg['Cc']
268+
msg['Cc'] = cc
269+
if subject is not None:
270+
del msg['Subject']
271+
msg['Subject'] = subject
272+
if headers is not None:
273+
try:
274+
header_items = headers.items() # mapping
275+
except AttributeError:
276+
header_items = headers # sequence of (key, value)
277+
for name, value in header_items:
278+
msg.add_header(name, value)
279+
280+
# For simplicity, we always build a MIME structure that could support plaintext/html
281+
# alternative bodies, inline attachments for the body(ies), and message attachments.
282+
# This may be overkill for simpler messages, but the structure is never incorrect.
283+
del msg['MIME-Version'] # override raw_headers values, if any
284+
del msg['Content-Type']
285+
msg['MIME-Version'] = '1.0'
286+
msg['Content-Type'] = 'multipart/mixed'
287+
288+
related = cls() # container for alternative bodies and inline attachments
289+
related['Content-Type'] = 'multipart/related'
290+
msg.attach(related)
291+
292+
alternatives = cls() # container for text and html bodies
293+
alternatives['Content-Type'] = 'multipart/alternative'
294+
related.attach(alternatives)
295+
296+
if text is not None:
297+
part = cls()
298+
part['Content-Type'] = 'text/plain'
299+
part.set_payload(text, charset=text_charset)
300+
alternatives.attach(part)
301+
if html is not None:
302+
part = cls()
303+
part['Content-Type'] = 'text/html'
304+
part.set_payload(html, charset=html_charset)
305+
alternatives.attach(part)
306+
307+
if attachments is not None:
308+
for attachment in attachments:
309+
if attachment.is_inline_attachment():
310+
related.attach(attachment)
311+
else:
312+
msg.attach(attachment)
313+
314+
return msg
315+
316+
@classmethod
317+
def construct_attachment_from_uploaded_file(cls, file, content_id=None):
318+
# This pulls the entire file into memory; it would be better to implement
319+
# some sort of lazy attachment where the content is only pulled in if/when
320+
# requested (and then use file.chunks() to minimize memory usage)
321+
return cls.construct_attachment(
322+
content_type=file.content_type,
323+
content=file.read(),
324+
filename=file.name,
325+
content_id=content_id,
326+
charset=file.charset)
327+
328+
@classmethod
329+
def construct_attachment(cls, content_type, content,
330+
charset=None, filename=None, content_id=None, base64=False):
331+
part = cls()
332+
part['Content-Type'] = content_type
333+
part['Content-Disposition'] = 'inline' if content_id is not None else 'attachment'
334+
335+
if filename is not None:
336+
part.set_param('name', filename, header='Content-Type')
337+
part.set_param('filename', filename, header='Content-Disposition')
338+
339+
if content_id is not None:
340+
part['Content-ID'] = angle_wrap(content_id)
341+
342+
if base64:
343+
content = b64decode(content)
344+
345+
payload = content
346+
if part.get_content_maintype() == 'message':
347+
# email.Message parses message/rfc822 parts as a "multipart" (list) payload
348+
# whose single item is the recursively-parsed message attachment
349+
if isinstance(content, six.binary_type):
350+
content = content.decode()
351+
payload = [cls.parse_raw_mime(content)]
352+
charset = None
353+
354+
part.set_payload(payload, charset)
355+
return part

anymail/signals.py

+12
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@ class AnymailInboundEvent(AnymailEvent):
4545

4646
def __init__(self, **kwargs):
4747
super(AnymailInboundEvent, self).__init__(**kwargs)
48+
self.message = kwargs.pop('message', None) # anymail.inbound.AnymailInboundMessage
49+
self.recipient = kwargs.pop('recipient', None) # str: envelope recipient
50+
self.sender = kwargs.pop('sender', None) # str: envelope sender
51+
52+
self.stripped_text = kwargs.pop('stripped_text', None) # cleaned of quotes/signatures (varies by ESP)
53+
self.stripped_html = kwargs.pop('stripped_html', None)
54+
self.spam_detected = kwargs.pop('spam_detected', None) # bool
55+
self.spam_score = kwargs.pop('spam_score', None) # float: usually SpamAssassin
56+
57+
# SPF status?
58+
# DKIM status?
59+
# DMARC status? (no ESP has documented support yet)
4860

4961

5062
class EventType:

0 commit comments

Comments
 (0)