|
| 1 | +from base64 import b64decode |
| 2 | +from email import message_from_string |
| 3 | +from email.message import Message |
| 4 | +from email.utils import unquote |
| 5 | + |
| 6 | +import six |
| 7 | +from django.core.files.uploadedfile import SimpleUploadedFile |
| 8 | + |
| 9 | +from .utils import angle_wrap, get_content_disposition, parse_address_list, parse_rfc2822date |
| 10 | + |
| 11 | +# Python 2/3.*-compatible email.parser.HeaderParser(policy=email.policy.default) |
| 12 | +try: |
| 13 | + # With Python 3.3+ (email6) package, can use HeaderParser with default policy |
| 14 | + from email.parser import HeaderParser |
| 15 | + from email.policy import default as accurate_header_unfolding_policy # vs. compat32 |
| 16 | + |
| 17 | +except ImportError: |
| 18 | + # Earlier Pythons don't have HeaderParser, and/or try preserve earlier compatibility bugs |
| 19 | + # by failing to properly unfold headers (see RFC 5322 section 2.2.3) |
| 20 | + from email.parser import Parser |
| 21 | + import re |
| 22 | + accurate_header_unfolding_policy = object() |
| 23 | + |
| 24 | + class HeaderParser(Parser, object): |
| 25 | + def __init__(self, _class, policy=None): |
| 26 | + # This "backport" doesn't actually support policies, but we want to ensure |
| 27 | + # that callers aren't trying to use HeaderParser's default compat32 policy |
| 28 | + # (which doesn't properly unfold headers) |
| 29 | + assert policy is accurate_header_unfolding_policy |
| 30 | + super(HeaderParser, self).__init__(_class) |
| 31 | + |
| 32 | + def parsestr(self, text, headersonly=True): |
| 33 | + unfolded = self._unfold_headers(text) |
| 34 | + return super(HeaderParser, self).parsestr(unfolded, headersonly=True) |
| 35 | + |
| 36 | + @staticmethod |
| 37 | + def _unfold_headers(text): |
| 38 | + # "Unfolding is accomplished by simply removing any CRLF that is immediately followed by WSP" |
| 39 | + # (WSP is space or tab, and per email.parser semantics, we allow CRLF, CR, or LF endings) |
| 40 | + return re.sub(r'(\r\n|\r|\n)(?=[ \t])', "", text) |
| 41 | + |
| 42 | + |
| 43 | +class AnymailInboundMessage(Message, object): # `object` ensures new-style class in Python 2) |
| 44 | + """ |
| 45 | + A normalized, parsed inbound email message. |
| 46 | +
|
| 47 | + A subclass of email.message.Message, with some additional |
| 48 | + convenience properties, plus helpful methods backported |
| 49 | + from Python 3.6+ email.message.EmailMessage (or really, MIMEPart) |
| 50 | + """ |
| 51 | + |
| 52 | + # Why Python email.message.Message rather than django.core.mail.EmailMessage? |
| 53 | + # Django's EmailMessage is really intended for constructing a (limited subset of) |
| 54 | + # Message to send; Message is better designed for representing arbitrary messages: |
| 55 | + # |
| 56 | + # * Message is easily parsed from raw mime (which is an inbound format provided |
| 57 | + # by many ESPs), and can accurately represent any mime email that might be received |
| 58 | + # * Message can represent repeated header fields (e.g., "Received") which |
| 59 | + # are common in inbound messages |
| 60 | + # * Django's EmailMessage defaults a bunch of properties in ways that aren't helpful |
| 61 | + # (e.g., from_email from settings) |
| 62 | + |
| 63 | + def __init__(self, *args, **kwargs): |
| 64 | + # Note: this must accept zero arguments, for use with message_from_string (email.parser) |
| 65 | + super(AnymailInboundMessage, self).__init__(*args, **kwargs) |
| 66 | + |
| 67 | + # Additional attrs provided by some ESPs: |
| 68 | + self.envelope_sender = None |
| 69 | + self.envelope_recipient = None |
| 70 | + self.stripped_text = None |
| 71 | + self.stripped_html = None |
| 72 | + self.spam_detected = None |
| 73 | + self.spam_score = None |
| 74 | + |
| 75 | + # |
| 76 | + # Convenience accessors |
| 77 | + # |
| 78 | + |
| 79 | + @property |
| 80 | + def from_email(self): |
| 81 | + """EmailAddress """ |
| 82 | + # equivalent to Python 3.2+ message['From'].addresses[0] |
| 83 | + from_email = self.get_address_header('From') |
| 84 | + if len(from_email) == 1: |
| 85 | + return from_email[0] |
| 86 | + elif len(from_email) == 0: |
| 87 | + return None |
| 88 | + else: |
| 89 | + return from_email # unusual, but technically-legal multiple-From; preserve list |
| 90 | + |
| 91 | + @property |
| 92 | + def to(self): |
| 93 | + """list of EmailAddress objects from To header""" |
| 94 | + # equivalent to Python 3.2+ message['To'].addresses |
| 95 | + return self.get_address_header('To') |
| 96 | + |
| 97 | + @property |
| 98 | + def cc(self): |
| 99 | + """list of EmailAddress objects from Cc header""" |
| 100 | + # equivalent to Python 3.2+ message['Cc'].addresses |
| 101 | + return self.get_address_header('Cc') |
| 102 | + |
| 103 | + @property |
| 104 | + def subject(self): |
| 105 | + """str value of Subject header, or None""" |
| 106 | + return self['Subject'] |
| 107 | + |
| 108 | + @property |
| 109 | + def date(self): |
| 110 | + """datetime.datetime from Date header, or None if missing/invalid""" |
| 111 | + # equivalent to Python 3.2+ message['Date'].datetime |
| 112 | + return self.get_date_header('Date') |
| 113 | + |
| 114 | + @property |
| 115 | + def text(self): |
| 116 | + """Contents of the (first) text/plain body part, or None""" |
| 117 | + return self._get_body_content('text/plain') |
| 118 | + |
| 119 | + @property |
| 120 | + def html(self): |
| 121 | + """Contents of the (first) text/html body part, or None""" |
| 122 | + return self._get_body_content('text/html') |
| 123 | + |
| 124 | + @property |
| 125 | + def attachments(self): |
| 126 | + """list of attachments (as MIMEPart objects); excludes inlines""" |
| 127 | + return [part for part in self.walk() if part.is_attachment()] |
| 128 | + |
| 129 | + @property |
| 130 | + def inline_attachments(self): |
| 131 | + """dict of Content-ID: attachment (as MIMEPart objects)""" |
| 132 | + return {unquote(part['Content-ID']): part for part in self.walk() |
| 133 | + if part.is_inline_attachment() and part['Content-ID']} |
| 134 | + |
| 135 | + def get_address_header(self, header): |
| 136 | + """Return the value of header parsed into a (possibly-empty) list of EmailAddress objects""" |
| 137 | + values = self.get_all(header) |
| 138 | + if values is not None: |
| 139 | + values = parse_address_list(values) |
| 140 | + return values or [] |
| 141 | + |
| 142 | + def get_date_header(self, header): |
| 143 | + """Return the value of header parsed into a datetime.date, or None""" |
| 144 | + value = self[header] |
| 145 | + if value is not None: |
| 146 | + value = parse_rfc2822date(value) |
| 147 | + return value |
| 148 | + |
| 149 | + def _get_body_content(self, content_type): |
| 150 | + # This doesn't handle as many corner cases as Python 3.6 email.message.EmailMessage.get_body, |
| 151 | + # but should work correctly for nearly all real-world inbound messages. |
| 152 | + # We're guaranteed to have `is_attachment` available, because all AnymailInboundMessage parts |
| 153 | + # should themselves be AnymailInboundMessage. |
| 154 | + for part in self.walk(): |
| 155 | + if part.get_content_type() == content_type and not part.is_attachment(): |
| 156 | + payload = part.get_payload(decode=True) |
| 157 | + if payload is not None: |
| 158 | + return payload.decode('utf-8') |
| 159 | + return None |
| 160 | + |
| 161 | + # Backport from Python 3.5 email.message.Message |
| 162 | + def get_content_disposition(self): |
| 163 | + try: |
| 164 | + return super(AnymailInboundMessage, self).get_content_disposition() |
| 165 | + except AttributeError: |
| 166 | + return get_content_disposition(self) |
| 167 | + |
| 168 | + # Backport from Python 3.4.2 email.message.MIMEPart |
| 169 | + def is_attachment(self): |
| 170 | + return self.get_content_disposition() == 'attachment' |
| 171 | + |
| 172 | + # New for Anymail |
| 173 | + def is_inline_attachment(self): |
| 174 | + return self.get_content_disposition() == 'inline' |
| 175 | + |
| 176 | + def get_content_bytes(self): |
| 177 | + """Return the raw payload bytes""" |
| 178 | + maintype = self.get_content_maintype() |
| 179 | + if maintype == 'message': |
| 180 | + # The attachment's payload is a single (parsed) email Message; flatten it to bytes. |
| 181 | + # (Note that self.is_multipart() misleadingly returns True in this case.) |
| 182 | + payload = self.get_payload() |
| 183 | + assert len(payload) == 1 # should be exactly one message |
| 184 | + try: |
| 185 | + return payload[0].as_bytes() # Python 3 |
| 186 | + except AttributeError: |
| 187 | + return payload[0].as_string().encode('utf-8') |
| 188 | + elif maintype == 'multipart': |
| 189 | + # The attachment itself is multipart; the payload is a list of parts, |
| 190 | + # and it's not clear which one is the "content". |
| 191 | + raise ValueError("get_content_bytes() is not valid on multipart messages " |
| 192 | + "(perhaps you want as_bytes()?)") |
| 193 | + return self.get_payload(decode=True) |
| 194 | + |
| 195 | + def get_content_text(self, charset='utf-8'): |
| 196 | + """Return the payload decoded to text""" |
| 197 | + maintype = self.get_content_maintype() |
| 198 | + if maintype == 'message': |
| 199 | + # The attachment's payload is a single (parsed) email Message; flatten it to text. |
| 200 | + # (Note that self.is_multipart() misleadingly returns True in this case.) |
| 201 | + payload = self.get_payload() |
| 202 | + assert len(payload) == 1 # should be exactly one message |
| 203 | + return payload[0].as_string() |
| 204 | + elif maintype == 'multipart': |
| 205 | + # The attachment itself is multipart; the payload is a list of parts, |
| 206 | + # and it's not clear which one is the "content". |
| 207 | + raise ValueError("get_content_text() is not valid on multipart messages " |
| 208 | + "(perhaps you want as_string()?)") |
| 209 | + return self.get_payload(decode=True).decode(charset) |
| 210 | + |
| 211 | + def as_uploaded_file(self): |
| 212 | + """Return the attachment converted to a Django UploadedFile""" |
| 213 | + if self['Content-Disposition'] is None: |
| 214 | + return None # this part is not an attachment |
| 215 | + name = self.get_filename() |
| 216 | + content_type = self.get_content_type() |
| 217 | + content = self.get_content_bytes() |
| 218 | + return SimpleUploadedFile(name, content, content_type) |
| 219 | + |
| 220 | + # |
| 221 | + # Construction |
| 222 | + # |
| 223 | + # These methods are intended primarily for internal Anymail use |
| 224 | + # (in inbound webhook handlers) |
| 225 | + |
| 226 | + @classmethod |
| 227 | + def parse_raw_mime(cls, s): |
| 228 | + """Returns a new AnymailInboundMessage parsed from str s""" |
| 229 | + return message_from_string(s, cls) |
| 230 | + |
| 231 | + @classmethod |
| 232 | + def construct(cls, raw_headers=None, from_email=None, to=None, cc=None, subject=None, headers=None, |
| 233 | + text=None, text_charset='utf-8', html=None, html_charset='utf-8', |
| 234 | + attachments=None): |
| 235 | + """ |
| 236 | + Returns a new AnymailInboundMessage constructed from params. |
| 237 | +
|
| 238 | + This is designed to handle the sorts of email fields typically present |
| 239 | + in ESP parsed inbound messages. (It's not a generalized MIME message constructor.) |
| 240 | +
|
| 241 | + :param raw_headers: {str|None} base (or complete) message headers as a single string |
| 242 | + :param from_email: {str|None} value for From header |
| 243 | + :param to: {str|None} value for To header |
| 244 | + :param cc: {str|None} value for Cc header |
| 245 | + :param subject: {str|None} value for Subject header |
| 246 | + :param headers: {sequence[(str, str)]|mapping|None} additional headers |
| 247 | + :param text: {str|None} plaintext body |
| 248 | + :param text_charset: {str} charset of plaintext body; default utf-8 |
| 249 | + :param html: {str|None} html body |
| 250 | + :param html_charset: {str} charset of html body; default utf-8 |
| 251 | + :param attachments: {list[MIMEBase]|None} as returned by construct_attachment |
| 252 | + :return: {AnymailInboundMessage} |
| 253 | + """ |
| 254 | + if raw_headers is not None: |
| 255 | + msg = HeaderParser(cls, policy=accurate_header_unfolding_policy).parsestr(raw_headers) |
| 256 | + msg.set_payload(None) # headersonly forces an empty string payload, which breaks things later |
| 257 | + else: |
| 258 | + msg = cls() |
| 259 | + |
| 260 | + if from_email is not None: |
| 261 | + del msg['From'] # override raw_headers value, if any |
| 262 | + msg['From'] = from_email |
| 263 | + if to is not None: |
| 264 | + del msg['To'] |
| 265 | + msg['To'] = to |
| 266 | + if cc is not None: |
| 267 | + del msg['Cc'] |
| 268 | + msg['Cc'] = cc |
| 269 | + if subject is not None: |
| 270 | + del msg['Subject'] |
| 271 | + msg['Subject'] = subject |
| 272 | + if headers is not None: |
| 273 | + try: |
| 274 | + header_items = headers.items() # mapping |
| 275 | + except AttributeError: |
| 276 | + header_items = headers # sequence of (key, value) |
| 277 | + for name, value in header_items: |
| 278 | + msg.add_header(name, value) |
| 279 | + |
| 280 | + # For simplicity, we always build a MIME structure that could support plaintext/html |
| 281 | + # alternative bodies, inline attachments for the body(ies), and message attachments. |
| 282 | + # This may be overkill for simpler messages, but the structure is never incorrect. |
| 283 | + del msg['MIME-Version'] # override raw_headers values, if any |
| 284 | + del msg['Content-Type'] |
| 285 | + msg['MIME-Version'] = '1.0' |
| 286 | + msg['Content-Type'] = 'multipart/mixed' |
| 287 | + |
| 288 | + related = cls() # container for alternative bodies and inline attachments |
| 289 | + related['Content-Type'] = 'multipart/related' |
| 290 | + msg.attach(related) |
| 291 | + |
| 292 | + alternatives = cls() # container for text and html bodies |
| 293 | + alternatives['Content-Type'] = 'multipart/alternative' |
| 294 | + related.attach(alternatives) |
| 295 | + |
| 296 | + if text is not None: |
| 297 | + part = cls() |
| 298 | + part['Content-Type'] = 'text/plain' |
| 299 | + part.set_payload(text, charset=text_charset) |
| 300 | + alternatives.attach(part) |
| 301 | + if html is not None: |
| 302 | + part = cls() |
| 303 | + part['Content-Type'] = 'text/html' |
| 304 | + part.set_payload(html, charset=html_charset) |
| 305 | + alternatives.attach(part) |
| 306 | + |
| 307 | + if attachments is not None: |
| 308 | + for attachment in attachments: |
| 309 | + if attachment.is_inline_attachment(): |
| 310 | + related.attach(attachment) |
| 311 | + else: |
| 312 | + msg.attach(attachment) |
| 313 | + |
| 314 | + return msg |
| 315 | + |
| 316 | + @classmethod |
| 317 | + def construct_attachment_from_uploaded_file(cls, file, content_id=None): |
| 318 | + # This pulls the entire file into memory; it would be better to implement |
| 319 | + # some sort of lazy attachment where the content is only pulled in if/when |
| 320 | + # requested (and then use file.chunks() to minimize memory usage) |
| 321 | + return cls.construct_attachment( |
| 322 | + content_type=file.content_type, |
| 323 | + content=file.read(), |
| 324 | + filename=file.name, |
| 325 | + content_id=content_id, |
| 326 | + charset=file.charset) |
| 327 | + |
| 328 | + @classmethod |
| 329 | + def construct_attachment(cls, content_type, content, |
| 330 | + charset=None, filename=None, content_id=None, base64=False): |
| 331 | + part = cls() |
| 332 | + part['Content-Type'] = content_type |
| 333 | + part['Content-Disposition'] = 'inline' if content_id is not None else 'attachment' |
| 334 | + |
| 335 | + if filename is not None: |
| 336 | + part.set_param('name', filename, header='Content-Type') |
| 337 | + part.set_param('filename', filename, header='Content-Disposition') |
| 338 | + |
| 339 | + if content_id is not None: |
| 340 | + part['Content-ID'] = angle_wrap(content_id) |
| 341 | + |
| 342 | + if base64: |
| 343 | + content = b64decode(content) |
| 344 | + |
| 345 | + payload = content |
| 346 | + if part.get_content_maintype() == 'message': |
| 347 | + # email.Message parses message/rfc822 parts as a "multipart" (list) payload |
| 348 | + # whose single item is the recursively-parsed message attachment |
| 349 | + if isinstance(content, six.binary_type): |
| 350 | + content = content.decode() |
| 351 | + payload = [cls.parse_raw_mime(content)] |
| 352 | + charset = None |
| 353 | + |
| 354 | + part.set_payload(payload, charset) |
| 355 | + return part |
0 commit comments