[2] | 1 | # Copyright (C) 2001-2006 Python Software Foundation
|
---|
| 2 | # Author: Barry Warsaw
|
---|
| 3 | # Contact: email-sig@python.org
|
---|
| 4 |
|
---|
| 5 | """Basic message object for the email package object model."""
|
---|
| 6 |
|
---|
| 7 | __all__ = ['Message']
|
---|
| 8 |
|
---|
| 9 | import re
|
---|
| 10 | import uu
|
---|
| 11 | import binascii
|
---|
| 12 | import warnings
|
---|
| 13 | from cStringIO import StringIO
|
---|
| 14 |
|
---|
| 15 | # Intrapackage imports
|
---|
| 16 | import email.charset
|
---|
| 17 | from email import utils
|
---|
| 18 | from email import errors
|
---|
| 19 |
|
---|
| 20 | SEMISPACE = '; '
|
---|
| 21 |
|
---|
| 22 | # Regular expression that matches `special' characters in parameters, the
|
---|
| 23 | # existence of which force quoting of the parameter value.
|
---|
| 24 | tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
|
---|
| 25 |
|
---|
| 26 |
|
---|
| 27 | # Helper functions
|
---|
| 28 | def _splitparam(param):
|
---|
| 29 | # Split header parameters. BAW: this may be too simple. It isn't
|
---|
| 30 | # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
|
---|
| 31 | # found in the wild. We may eventually need a full fledged parser
|
---|
| 32 | # eventually.
|
---|
| 33 | a, sep, b = param.partition(';')
|
---|
| 34 | if not sep:
|
---|
| 35 | return a.strip(), None
|
---|
| 36 | return a.strip(), b.strip()
|
---|
| 37 | |
---|
| 38 |
|
---|
| 39 | def _formatparam(param, value=None, quote=True):
|
---|
| 40 | """Convenience function to format and return a key=value pair.
|
---|
[391] | 41 |
|
---|
| 42 | This will quote the value if needed or if quote is true. If value is a
|
---|
| 43 | three tuple (charset, language, value), it will be encoded according
|
---|
[2] | 44 | to RFC2231 rules.
|
---|
| 45 | """
|
---|
| 46 | if value is not None and len(value) > 0:
|
---|
| 47 | # A tuple is used for RFC 2231 encoded parameter values where items
|
---|
| 48 | # are (charset, language, value). charset is a string, not a Charset
|
---|
| 49 | # instance.
|
---|
| 50 | if isinstance(value, tuple):
|
---|
| 51 | # Encode as per RFC 2231
|
---|
| 52 | param += '*'
|
---|
| 53 | value = utils.encode_rfc2231(value[2], value[0], value[1])
|
---|
| 54 | # BAW: Please check this. I think that if quote is set it should
|
---|
| 55 | # force quoting even if not necessary.
|
---|
| 56 | if quote or tspecials.search(value):
|
---|
| 57 | return '%s="%s"' % (param, utils.quote(value))
|
---|
| 58 | else:
|
---|
| 59 | return '%s=%s' % (param, value)
|
---|
| 60 | else:
|
---|
| 61 | return param
|
---|
| 62 |
|
---|
| 63 | def _parseparam(s):
|
---|
| 64 | plist = []
|
---|
| 65 | while s[:1] == ';':
|
---|
| 66 | s = s[1:]
|
---|
[391] | 67 | end = s.find(';')
|
---|
[2] | 68 | while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
---|
| 69 | end = s.find(';', end + 1)
|
---|
| 70 | if end < 0:
|
---|
| 71 | end = len(s)
|
---|
| 72 | f = s[:end]
|
---|
| 73 | if '=' in f:
|
---|
| 74 | i = f.index('=')
|
---|
| 75 | f = f[:i].strip().lower() + '=' + f[i+1:].strip()
|
---|
| 76 | plist.append(f.strip())
|
---|
| 77 | s = s[end:]
|
---|
| 78 | return plist
|
---|
| 79 |
|
---|
| 80 |
|
---|
| 81 | def _unquotevalue(value):
|
---|
| 82 | # This is different than utils.collapse_rfc2231_value() because it doesn't
|
---|
| 83 | # try to convert the value to a unicode. Message.get_param() and
|
---|
| 84 | # Message.get_params() are both currently defined to return the tuple in
|
---|
| 85 | # the face of RFC 2231 parameters.
|
---|
| 86 | if isinstance(value, tuple):
|
---|
| 87 | return value[0], value[1], utils.unquote(value[2])
|
---|
| 88 | else:
|
---|
| 89 | return utils.unquote(value)
|
---|
| 90 |
|
---|
| 91 |
|
---|
| 92 | |
---|
| 93 |
|
---|
| 94 | class Message:
|
---|
| 95 | """Basic message object.
|
---|
| 96 |
|
---|
| 97 | A message object is defined as something that has a bunch of RFC 2822
|
---|
| 98 | headers and a payload. It may optionally have an envelope header
|
---|
| 99 | (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
|
---|
| 100 | multipart or a message/rfc822), then the payload is a list of Message
|
---|
| 101 | objects, otherwise it is a string.
|
---|
[391] | 102 |
|
---|
[2] | 103 | Message objects implement part of the `mapping' interface, which assumes
|
---|
| 104 | there is exactly one occurrence of the header per message. Some headers
|
---|
| 105 | do in fact appear multiple times (e.g. Received) and for those headers,
|
---|
| 106 | you must use the explicit API to set or get all the headers. Not all of
|
---|
| 107 | the mapping methods are implemented.
|
---|
| 108 | """
|
---|
| 109 | def __init__(self):
|
---|
| 110 | self._headers = []
|
---|
| 111 | self._unixfrom = None
|
---|
| 112 | self._payload = None
|
---|
| 113 | self._charset = None
|
---|
| 114 | # Defaults for multipart messages
|
---|
| 115 | self.preamble = self.epilogue = None
|
---|
| 116 | self.defects = []
|
---|
| 117 | # Default content type
|
---|
| 118 | self._default_type = 'text/plain'
|
---|
| 119 |
|
---|
| 120 | def __str__(self):
|
---|
| 121 | """Return the entire formatted message as a string.
|
---|
| 122 | This includes the headers, body, and envelope header.
|
---|
| 123 | """
|
---|
| 124 | return self.as_string(unixfrom=True)
|
---|
| 125 |
|
---|
| 126 | def as_string(self, unixfrom=False):
|
---|
| 127 | """Return the entire formatted message as a string.
|
---|
| 128 | Optional `unixfrom' when True, means include the Unix From_ envelope
|
---|
| 129 | header.
|
---|
| 130 |
|
---|
| 131 | This is a convenience method and may not generate the message exactly
|
---|
| 132 | as you intend because by default it mangles lines that begin with
|
---|
| 133 | "From ". For more flexibility, use the flatten() method of a
|
---|
| 134 | Generator instance.
|
---|
| 135 | """
|
---|
| 136 | from email.generator import Generator
|
---|
| 137 | fp = StringIO()
|
---|
| 138 | g = Generator(fp)
|
---|
| 139 | g.flatten(self, unixfrom=unixfrom)
|
---|
| 140 | return fp.getvalue()
|
---|
| 141 |
|
---|
| 142 | def is_multipart(self):
|
---|
| 143 | """Return True if the message consists of multiple parts."""
|
---|
| 144 | return isinstance(self._payload, list)
|
---|
| 145 |
|
---|
| 146 | #
|
---|
| 147 | # Unix From_ line
|
---|
| 148 | #
|
---|
| 149 | def set_unixfrom(self, unixfrom):
|
---|
| 150 | self._unixfrom = unixfrom
|
---|
| 151 |
|
---|
| 152 | def get_unixfrom(self):
|
---|
| 153 | return self._unixfrom
|
---|
| 154 |
|
---|
| 155 | #
|
---|
| 156 | # Payload manipulation.
|
---|
| 157 | #
|
---|
| 158 | def attach(self, payload):
|
---|
| 159 | """Add the given payload to the current payload.
|
---|
| 160 |
|
---|
| 161 | The current payload will always be a list of objects after this method
|
---|
| 162 | is called. If you want to set the payload to a scalar object, use
|
---|
| 163 | set_payload() instead.
|
---|
| 164 | """
|
---|
| 165 | if self._payload is None:
|
---|
| 166 | self._payload = [payload]
|
---|
| 167 | else:
|
---|
| 168 | self._payload.append(payload)
|
---|
| 169 |
|
---|
| 170 | def get_payload(self, i=None, decode=False):
|
---|
| 171 | """Return a reference to the payload.
|
---|
| 172 |
|
---|
| 173 | The payload will either be a list object or a string. If you mutate
|
---|
| 174 | the list object, you modify the message's payload in place. Optional
|
---|
| 175 | i returns that index into the payload.
|
---|
| 176 |
|
---|
| 177 | Optional decode is a flag indicating whether the payload should be
|
---|
| 178 | decoded or not, according to the Content-Transfer-Encoding header
|
---|
| 179 | (default is False).
|
---|
| 180 |
|
---|
| 181 | When True and the message is not a multipart, the payload will be
|
---|
| 182 | decoded if this header's value is `quoted-printable' or `base64'. If
|
---|
| 183 | some other encoding is used, or the header is missing, or if the
|
---|
| 184 | payload has bogus data (i.e. bogus base64 or uuencoded data), the
|
---|
| 185 | payload is returned as-is.
|
---|
| 186 |
|
---|
| 187 | If the message is a multipart and the decode flag is True, then None
|
---|
| 188 | is returned.
|
---|
| 189 | """
|
---|
| 190 | if i is None:
|
---|
| 191 | payload = self._payload
|
---|
| 192 | elif not isinstance(self._payload, list):
|
---|
| 193 | raise TypeError('Expected list, got %s' % type(self._payload))
|
---|
| 194 | else:
|
---|
| 195 | payload = self._payload[i]
|
---|
| 196 | if decode:
|
---|
| 197 | if self.is_multipart():
|
---|
| 198 | return None
|
---|
| 199 | cte = self.get('content-transfer-encoding', '').lower()
|
---|
| 200 | if cte == 'quoted-printable':
|
---|
| 201 | return utils._qdecode(payload)
|
---|
| 202 | elif cte == 'base64':
|
---|
| 203 | try:
|
---|
| 204 | return utils._bdecode(payload)
|
---|
| 205 | except binascii.Error:
|
---|
| 206 | # Incorrect padding
|
---|
| 207 | return payload
|
---|
| 208 | elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
|
---|
| 209 | sfp = StringIO()
|
---|
| 210 | try:
|
---|
| 211 | uu.decode(StringIO(payload+'\n'), sfp, quiet=True)
|
---|
| 212 | payload = sfp.getvalue()
|
---|
| 213 | except uu.Error:
|
---|
| 214 | # Some decoding problem
|
---|
| 215 | return payload
|
---|
| 216 | # Everything else, including encodings with 8bit or 7bit are returned
|
---|
| 217 | # unchanged.
|
---|
| 218 | return payload
|
---|
| 219 |
|
---|
| 220 | def set_payload(self, payload, charset=None):
|
---|
| 221 | """Set the payload to the given value.
|
---|
| 222 |
|
---|
| 223 | Optional charset sets the message's default character set. See
|
---|
| 224 | set_charset() for details.
|
---|
| 225 | """
|
---|
| 226 | self._payload = payload
|
---|
| 227 | if charset is not None:
|
---|
| 228 | self.set_charset(charset)
|
---|
| 229 |
|
---|
| 230 | def set_charset(self, charset):
|
---|
| 231 | """Set the charset of the payload to a given character set.
|
---|
| 232 |
|
---|
| 233 | charset can be a Charset instance, a string naming a character set, or
|
---|
| 234 | None. If it is a string it will be converted to a Charset instance.
|
---|
| 235 | If charset is None, the charset parameter will be removed from the
|
---|
| 236 | Content-Type field. Anything else will generate a TypeError.
|
---|
| 237 |
|
---|
| 238 | The message will be assumed to be of type text/* encoded with
|
---|
| 239 | charset.input_charset. It will be converted to charset.output_charset
|
---|
| 240 | and encoded properly, if needed, when generating the plain text
|
---|
| 241 | representation of the message. MIME headers (MIME-Version,
|
---|
| 242 | Content-Type, Content-Transfer-Encoding) will be added as needed.
|
---|
| 243 |
|
---|
| 244 | """
|
---|
| 245 | if charset is None:
|
---|
| 246 | self.del_param('charset')
|
---|
| 247 | self._charset = None
|
---|
| 248 | return
|
---|
| 249 | if isinstance(charset, basestring):
|
---|
| 250 | charset = email.charset.Charset(charset)
|
---|
| 251 | if not isinstance(charset, email.charset.Charset):
|
---|
| 252 | raise TypeError(charset)
|
---|
| 253 | # BAW: should we accept strings that can serve as arguments to the
|
---|
[391] | 254 | # Charset constructor?
|
---|
[2] | 255 | self._charset = charset
|
---|
[391] | 256 | if 'MIME-Version' not in self:
|
---|
[2] | 257 | self.add_header('MIME-Version', '1.0')
|
---|
| 258 | if 'Content-Type' not in self:
|
---|
| 259 | self.add_header('Content-Type', 'text/plain',
|
---|
| 260 | charset=charset.get_output_charset())
|
---|
[391] | 261 | else:
|
---|
| 262 | self.set_param('charset', charset.get_output_charset())
|
---|
[2] | 263 | if isinstance(self._payload, unicode):
|
---|
| 264 | self._payload = self._payload.encode(charset.output_charset)
|
---|
[391] | 265 | if str(charset) != charset.get_output_charset():
|
---|
[2] | 266 | self._payload = charset.body_encode(self._payload)
|
---|
| 267 | if 'Content-Transfer-Encoding' not in self:
|
---|
| 268 | cte = charset.get_body_encoding()
|
---|
| 269 | try:
|
---|
| 270 | cte(self)
|
---|
| 271 | except TypeError:
|
---|
| 272 | self._payload = charset.body_encode(self._payload)
|
---|
| 273 | self.add_header('Content-Transfer-Encoding', cte)
|
---|
| 274 |
|
---|
| 275 | def get_charset(self):
|
---|
| 276 | """Return the Charset instance associated with the message's payload.
|
---|
| 277 | """
|
---|
| 278 | return self._charset
|
---|
| 279 |
|
---|
| 280 | #
|
---|
| 281 | # MAPPING INTERFACE (partial)
|
---|
| 282 | #
|
---|
| 283 | def __len__(self):
|
---|
| 284 | """Return the total number of headers, including duplicates."""
|
---|
| 285 | return len(self._headers)
|
---|
| 286 |
|
---|
| 287 | def __getitem__(self, name):
|
---|
| 288 | """Get a header value.
|
---|
| 289 |
|
---|
| 290 | Return None if the header is missing instead of raising an exception.
|
---|
[391] | 291 |
|
---|
[2] | 292 | Note that if the header appeared multiple times, exactly which
|
---|
| 293 | occurrence gets returned is undefined. Use get_all() to get all
|
---|
| 294 | the values matching a header field name.
|
---|
| 295 | """
|
---|
| 296 | return self.get(name)
|
---|
| 297 |
|
---|
| 298 | def __setitem__(self, name, val):
|
---|
| 299 | """Set the value of a header.
|
---|
| 300 |
|
---|
| 301 | Note: this does not overwrite an existing header with the same field
|
---|
| 302 | name. Use __delitem__() first to delete any existing headers.
|
---|
| 303 | """
|
---|
| 304 | self._headers.append((name, val))
|
---|
| 305 |
|
---|
| 306 | def __delitem__(self, name):
|
---|
| 307 | """Delete all occurrences of a header, if present.
|
---|
| 308 |
|
---|
| 309 | Does not raise an exception if the header is missing.
|
---|
| 310 | """
|
---|
| 311 | name = name.lower()
|
---|
| 312 | newheaders = []
|
---|
| 313 | for k, v in self._headers:
|
---|
| 314 | if k.lower() != name:
|
---|
| 315 | newheaders.append((k, v))
|
---|
| 316 | self._headers = newheaders
|
---|
| 317 |
|
---|
| 318 | def __contains__(self, name):
|
---|
| 319 | return name.lower() in [k.lower() for k, v in self._headers]
|
---|
| 320 |
|
---|
| 321 | def has_key(self, name):
|
---|
| 322 | """Return true if the message contains the header."""
|
---|
| 323 | missing = object()
|
---|
| 324 | return self.get(name, missing) is not missing
|
---|
| 325 |
|
---|
| 326 | def keys(self):
|
---|
| 327 | """Return a list of all the message's header field names.
|
---|
| 328 |
|
---|
| 329 | These will be sorted in the order they appeared in the original
|
---|
| 330 | message, or were added to the message, and may contain duplicates.
|
---|
| 331 | Any fields deleted and re-inserted are always appended to the header
|
---|
| 332 | list.
|
---|
| 333 | """
|
---|
| 334 | return [k for k, v in self._headers]
|
---|
| 335 |
|
---|
| 336 | def values(self):
|
---|
| 337 | """Return a list of all the message's header values.
|
---|
| 338 |
|
---|
| 339 | These will be sorted in the order they appeared in the original
|
---|
| 340 | message, or were added to the message, and may contain duplicates.
|
---|
| 341 | Any fields deleted and re-inserted are always appended to the header
|
---|
| 342 | list.
|
---|
| 343 | """
|
---|
| 344 | return [v for k, v in self._headers]
|
---|
| 345 |
|
---|
| 346 | def items(self):
|
---|
| 347 | """Get all the message's header fields and values.
|
---|
| 348 |
|
---|
| 349 | These will be sorted in the order they appeared in the original
|
---|
| 350 | message, or were added to the message, and may contain duplicates.
|
---|
| 351 | Any fields deleted and re-inserted are always appended to the header
|
---|
| 352 | list.
|
---|
| 353 | """
|
---|
| 354 | return self._headers[:]
|
---|
| 355 |
|
---|
| 356 | def get(self, name, failobj=None):
|
---|
| 357 | """Get a header value.
|
---|
| 358 |
|
---|
| 359 | Like __getitem__() but return failobj instead of None when the field
|
---|
| 360 | is missing.
|
---|
| 361 | """
|
---|
| 362 | name = name.lower()
|
---|
| 363 | for k, v in self._headers:
|
---|
| 364 | if k.lower() == name:
|
---|
| 365 | return v
|
---|
| 366 | return failobj
|
---|
| 367 |
|
---|
| 368 | #
|
---|
| 369 | # Additional useful stuff
|
---|
| 370 | #
|
---|
| 371 |
|
---|
| 372 | def get_all(self, name, failobj=None):
|
---|
| 373 | """Return a list of all the values for the named field.
|
---|
| 374 |
|
---|
| 375 | These will be sorted in the order they appeared in the original
|
---|
| 376 | message, and may contain duplicates. Any fields deleted and
|
---|
| 377 | re-inserted are always appended to the header list.
|
---|
| 378 |
|
---|
| 379 | If no such fields exist, failobj is returned (defaults to None).
|
---|
| 380 | """
|
---|
| 381 | values = []
|
---|
| 382 | name = name.lower()
|
---|
| 383 | for k, v in self._headers:
|
---|
| 384 | if k.lower() == name:
|
---|
| 385 | values.append(v)
|
---|
| 386 | if not values:
|
---|
| 387 | return failobj
|
---|
| 388 | return values
|
---|
| 389 |
|
---|
| 390 | def add_header(self, _name, _value, **_params):
|
---|
| 391 | """Extended header setting.
|
---|
| 392 |
|
---|
| 393 | name is the header field to add. keyword arguments can be used to set
|
---|
[391] | 394 | additional parameters for the header field, with underscores converted
|
---|
| 395 | to dashes. Normally the parameter will be added as key="value" unless
|
---|
| 396 | value is None, in which case only the key will be added. If a
|
---|
| 397 | parameter value contains non-ASCII characters it must be specified as a
|
---|
[2] | 398 | three-tuple of (charset, language, value), in which case it will be
|
---|
| 399 | encoded according to RFC2231 rules.
|
---|
| 400 |
|
---|
| 401 | Example:
|
---|
| 402 |
|
---|
| 403 | msg.add_header('content-disposition', 'attachment', filename='bud.gif')
|
---|
| 404 | """
|
---|
| 405 | parts = []
|
---|
| 406 | for k, v in _params.items():
|
---|
| 407 | if v is None:
|
---|
| 408 | parts.append(k.replace('_', '-'))
|
---|
| 409 | else:
|
---|
| 410 | parts.append(_formatparam(k.replace('_', '-'), v))
|
---|
| 411 | if _value is not None:
|
---|
| 412 | parts.insert(0, _value)
|
---|
| 413 | self._headers.append((_name, SEMISPACE.join(parts)))
|
---|
| 414 |
|
---|
| 415 | def replace_header(self, _name, _value):
|
---|
| 416 | """Replace a header.
|
---|
| 417 |
|
---|
| 418 | Replace the first matching header found in the message, retaining
|
---|
| 419 | header order and case. If no matching header was found, a KeyError is
|
---|
| 420 | raised.
|
---|
| 421 | """
|
---|
| 422 | _name = _name.lower()
|
---|
| 423 | for i, (k, v) in zip(range(len(self._headers)), self._headers):
|
---|
| 424 | if k.lower() == _name:
|
---|
| 425 | self._headers[i] = (k, _value)
|
---|
| 426 | break
|
---|
| 427 | else:
|
---|
| 428 | raise KeyError(_name)
|
---|
| 429 |
|
---|
| 430 | #
|
---|
| 431 | # Use these three methods instead of the three above.
|
---|
| 432 | #
|
---|
| 433 |
|
---|
| 434 | def get_content_type(self):
|
---|
| 435 | """Return the message's content type.
|
---|
| 436 |
|
---|
| 437 | The returned string is coerced to lower case of the form
|
---|
| 438 | `maintype/subtype'. If there was no Content-Type header in the
|
---|
| 439 | message, the default type as given by get_default_type() will be
|
---|
| 440 | returned. Since according to RFC 2045, messages always have a default
|
---|
| 441 | type this will always return a value.
|
---|
| 442 |
|
---|
| 443 | RFC 2045 defines a message's default type to be text/plain unless it
|
---|
| 444 | appears inside a multipart/digest container, in which case it would be
|
---|
| 445 | message/rfc822.
|
---|
| 446 | """
|
---|
| 447 | missing = object()
|
---|
| 448 | value = self.get('content-type', missing)
|
---|
| 449 | if value is missing:
|
---|
| 450 | # This should have no parameters
|
---|
| 451 | return self.get_default_type()
|
---|
| 452 | ctype = _splitparam(value)[0].lower()
|
---|
| 453 | # RFC 2045, section 5.2 says if its invalid, use text/plain
|
---|
| 454 | if ctype.count('/') != 1:
|
---|
| 455 | return 'text/plain'
|
---|
| 456 | return ctype
|
---|
| 457 |
|
---|
| 458 | def get_content_maintype(self):
|
---|
| 459 | """Return the message's main content type.
|
---|
| 460 |
|
---|
| 461 | This is the `maintype' part of the string returned by
|
---|
| 462 | get_content_type().
|
---|
| 463 | """
|
---|
| 464 | ctype = self.get_content_type()
|
---|
| 465 | return ctype.split('/')[0]
|
---|
| 466 |
|
---|
| 467 | def get_content_subtype(self):
|
---|
| 468 | """Returns the message's sub-content type.
|
---|
| 469 |
|
---|
| 470 | This is the `subtype' part of the string returned by
|
---|
| 471 | get_content_type().
|
---|
| 472 | """
|
---|
| 473 | ctype = self.get_content_type()
|
---|
| 474 | return ctype.split('/')[1]
|
---|
| 475 |
|
---|
| 476 | def get_default_type(self):
|
---|
| 477 | """Return the `default' content type.
|
---|
| 478 |
|
---|
| 479 | Most messages have a default content type of text/plain, except for
|
---|
| 480 | messages that are subparts of multipart/digest containers. Such
|
---|
| 481 | subparts have a default content type of message/rfc822.
|
---|
| 482 | """
|
---|
| 483 | return self._default_type
|
---|
| 484 |
|
---|
| 485 | def set_default_type(self, ctype):
|
---|
| 486 | """Set the `default' content type.
|
---|
| 487 |
|
---|
| 488 | ctype should be either "text/plain" or "message/rfc822", although this
|
---|
| 489 | is not enforced. The default content type is not stored in the
|
---|
| 490 | Content-Type header.
|
---|
| 491 | """
|
---|
| 492 | self._default_type = ctype
|
---|
| 493 |
|
---|
| 494 | def _get_params_preserve(self, failobj, header):
|
---|
| 495 | # Like get_params() but preserves the quoting of values. BAW:
|
---|
| 496 | # should this be part of the public interface?
|
---|
| 497 | missing = object()
|
---|
| 498 | value = self.get(header, missing)
|
---|
| 499 | if value is missing:
|
---|
| 500 | return failobj
|
---|
| 501 | params = []
|
---|
| 502 | for p in _parseparam(';' + value):
|
---|
| 503 | try:
|
---|
| 504 | name, val = p.split('=', 1)
|
---|
| 505 | name = name.strip()
|
---|
| 506 | val = val.strip()
|
---|
| 507 | except ValueError:
|
---|
| 508 | # Must have been a bare attribute
|
---|
| 509 | name = p.strip()
|
---|
| 510 | val = ''
|
---|
| 511 | params.append((name, val))
|
---|
| 512 | params = utils.decode_params(params)
|
---|
| 513 | return params
|
---|
| 514 |
|
---|
| 515 | def get_params(self, failobj=None, header='content-type', unquote=True):
|
---|
| 516 | """Return the message's Content-Type parameters, as a list.
|
---|
| 517 |
|
---|
| 518 | The elements of the returned list are 2-tuples of key/value pairs, as
|
---|
| 519 | split on the `=' sign. The left hand side of the `=' is the key,
|
---|
| 520 | while the right hand side is the value. If there is no `=' sign in
|
---|
| 521 | the parameter the value is the empty string. The value is as
|
---|
| 522 | described in the get_param() method.
|
---|
| 523 |
|
---|
| 524 | Optional failobj is the object to return if there is no Content-Type
|
---|
| 525 | header. Optional header is the header to search instead of
|
---|
| 526 | Content-Type. If unquote is True, the value is unquoted.
|
---|
| 527 | """
|
---|
| 528 | missing = object()
|
---|
| 529 | params = self._get_params_preserve(missing, header)
|
---|
| 530 | if params is missing:
|
---|
| 531 | return failobj
|
---|
| 532 | if unquote:
|
---|
| 533 | return [(k, _unquotevalue(v)) for k, v in params]
|
---|
| 534 | else:
|
---|
| 535 | return params
|
---|
| 536 |
|
---|
| 537 | def get_param(self, param, failobj=None, header='content-type',
|
---|
| 538 | unquote=True):
|
---|
| 539 | """Return the parameter value if found in the Content-Type header.
|
---|
| 540 |
|
---|
| 541 | Optional failobj is the object to return if there is no Content-Type
|
---|
| 542 | header, or the Content-Type header has no such parameter. Optional
|
---|
| 543 | header is the header to search instead of Content-Type.
|
---|
| 544 |
|
---|
| 545 | Parameter keys are always compared case insensitively. The return
|
---|
| 546 | value can either be a string, or a 3-tuple if the parameter was RFC
|
---|
| 547 | 2231 encoded. When it's a 3-tuple, the elements of the value are of
|
---|
| 548 | the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
|
---|
| 549 | LANGUAGE can be None, in which case you should consider VALUE to be
|
---|
| 550 | encoded in the us-ascii charset. You can usually ignore LANGUAGE.
|
---|
| 551 |
|
---|
| 552 | Your application should be prepared to deal with 3-tuple return
|
---|
| 553 | values, and can convert the parameter to a Unicode string like so:
|
---|
| 554 |
|
---|
| 555 | param = msg.get_param('foo')
|
---|
| 556 | if isinstance(param, tuple):
|
---|
| 557 | param = unicode(param[2], param[0] or 'us-ascii')
|
---|
| 558 |
|
---|
| 559 | In any case, the parameter value (either the returned string, or the
|
---|
| 560 | VALUE item in the 3-tuple) is always unquoted, unless unquote is set
|
---|
[391] | 561 | to False.
|
---|
[2] | 562 | """
|
---|
| 563 | if header not in self:
|
---|
| 564 | return failobj
|
---|
| 565 | for k, v in self._get_params_preserve(failobj, header):
|
---|
| 566 | if k.lower() == param.lower():
|
---|
| 567 | if unquote:
|
---|
| 568 | return _unquotevalue(v)
|
---|
| 569 | else:
|
---|
| 570 | return v
|
---|
| 571 | return failobj
|
---|
| 572 |
|
---|
| 573 | def set_param(self, param, value, header='Content-Type', requote=True,
|
---|
| 574 | charset=None, language=''):
|
---|
| 575 | """Set a parameter in the Content-Type header.
|
---|
| 576 |
|
---|
| 577 | If the parameter already exists in the header, its value will be
|
---|
| 578 | replaced with the new value.
|
---|
| 579 |
|
---|
| 580 | If header is Content-Type and has not yet been defined for this
|
---|
| 581 | message, it will be set to "text/plain" and the new parameter and
|
---|
| 582 | value will be appended as per RFC 2045.
|
---|
| 583 |
|
---|
| 584 | An alternate header can specified in the header argument, and all
|
---|
| 585 | parameters will be quoted as necessary unless requote is False.
|
---|
| 586 |
|
---|
| 587 | If charset is specified, the parameter will be encoded according to RFC
|
---|
| 588 | 2231. Optional language specifies the RFC 2231 language, defaulting
|
---|
| 589 | to the empty string. Both charset and language should be strings.
|
---|
| 590 | """
|
---|
| 591 | if not isinstance(value, tuple) and charset:
|
---|
[391] | 592 | value = (charset, language, value)
|
---|
[2] | 593 |
|
---|
| 594 | if header not in self and header.lower() == 'content-type':
|
---|
| 595 | ctype = 'text/plain'
|
---|
| 596 | else:
|
---|
| 597 | ctype = self.get(header)
|
---|
| 598 | if not self.get_param(param, header=header):
|
---|
| 599 | if not ctype:
|
---|
| 600 | ctype = _formatparam(param, value, requote)
|
---|
| 601 | else:
|
---|
| 602 | ctype = SEMISPACE.join(
|
---|
| 603 | [ctype, _formatparam(param, value, requote)])
|
---|
| 604 | else:
|
---|
| 605 | ctype = ''
|
---|
| 606 | for old_param, old_value in self.get_params(header=header,
|
---|
| 607 | unquote=requote):
|
---|
| 608 | append_param = ''
|
---|
| 609 | if old_param.lower() == param.lower():
|
---|
| 610 | append_param = _formatparam(param, value, requote)
|
---|
| 611 | else:
|
---|
| 612 | append_param = _formatparam(old_param, old_value, requote)
|
---|
| 613 | if not ctype:
|
---|
| 614 | ctype = append_param
|
---|
| 615 | else:
|
---|
| 616 | ctype = SEMISPACE.join([ctype, append_param])
|
---|
| 617 | if ctype != self.get(header):
|
---|
| 618 | del self[header]
|
---|
| 619 | self[header] = ctype
|
---|
| 620 |
|
---|
| 621 | def del_param(self, param, header='content-type', requote=True):
|
---|
| 622 | """Remove the given parameter completely from the Content-Type header.
|
---|
| 623 |
|
---|
| 624 | The header will be re-written in place without the parameter or its
|
---|
| 625 | value. All values will be quoted as necessary unless requote is
|
---|
| 626 | False. Optional header specifies an alternative to the Content-Type
|
---|
[391] | 627 | header.
|
---|
[2] | 628 | """
|
---|
| 629 | if header not in self:
|
---|
| 630 | return
|
---|
| 631 | new_ctype = ''
|
---|
| 632 | for p, v in self.get_params(header=header, unquote=requote):
|
---|
| 633 | if p.lower() != param.lower():
|
---|
| 634 | if not new_ctype:
|
---|
| 635 | new_ctype = _formatparam(p, v, requote)
|
---|
| 636 | else:
|
---|
| 637 | new_ctype = SEMISPACE.join([new_ctype,
|
---|
| 638 | _formatparam(p, v, requote)])
|
---|
| 639 | if new_ctype != self.get(header):
|
---|
| 640 | del self[header]
|
---|
| 641 | self[header] = new_ctype
|
---|
| 642 |
|
---|
| 643 | def set_type(self, type, header='Content-Type', requote=True):
|
---|
| 644 | """Set the main type and subtype for the Content-Type header.
|
---|
| 645 |
|
---|
| 646 | type must be a string in the form "maintype/subtype", otherwise a
|
---|
| 647 | ValueError is raised.
|
---|
| 648 |
|
---|
| 649 | This method replaces the Content-Type header, keeping all the
|
---|
| 650 | parameters in place. If requote is False, this leaves the existing
|
---|
| 651 | header's quoting as is. Otherwise, the parameters will be quoted (the
|
---|
| 652 | default).
|
---|
| 653 |
|
---|
| 654 | An alternative header can be specified in the header argument. When
|
---|
| 655 | the Content-Type header is set, we'll always also add a MIME-Version
|
---|
| 656 | header.
|
---|
| 657 | """
|
---|
| 658 | # BAW: should we be strict?
|
---|
| 659 | if not type.count('/') == 1:
|
---|
| 660 | raise ValueError
|
---|
| 661 | # Set the Content-Type, you get a MIME-Version
|
---|
| 662 | if header.lower() == 'content-type':
|
---|
[391] | 663 | del self['mime-version']
|
---|
[2] | 664 | self['MIME-Version'] = '1.0'
|
---|
| 665 | if header not in self:
|
---|
| 666 | self[header] = type
|
---|
| 667 | return
|
---|
| 668 | params = self.get_params(header=header, unquote=requote)
|
---|
| 669 | del self[header]
|
---|
| 670 | self[header] = type
|
---|
| 671 | # Skip the first param; it's the old type.
|
---|
| 672 | for p, v in params[1:]:
|
---|
| 673 | self.set_param(p, v, header, requote)
|
---|
| 674 |
|
---|
| 675 | def get_filename(self, failobj=None):
|
---|
| 676 | """Return the filename associated with the payload if present.
|
---|
| 677 |
|
---|
| 678 | The filename is extracted from the Content-Disposition header's
|
---|
| 679 | `filename' parameter, and it is unquoted. If that header is missing
|
---|
| 680 | the `filename' parameter, this method falls back to looking for the
|
---|
| 681 | `name' parameter.
|
---|
| 682 | """
|
---|
| 683 | missing = object()
|
---|
| 684 | filename = self.get_param('filename', missing, 'content-disposition')
|
---|
| 685 | if filename is missing:
|
---|
| 686 | filename = self.get_param('name', missing, 'content-type')
|
---|
| 687 | if filename is missing:
|
---|
| 688 | return failobj
|
---|
| 689 | return utils.collapse_rfc2231_value(filename).strip()
|
---|
| 690 |
|
---|
| 691 | def get_boundary(self, failobj=None):
|
---|
| 692 | """Return the boundary associated with the payload if present.
|
---|
| 693 |
|
---|
| 694 | The boundary is extracted from the Content-Type header's `boundary'
|
---|
| 695 | parameter, and it is unquoted.
|
---|
| 696 | """
|
---|
| 697 | missing = object()
|
---|
| 698 | boundary = self.get_param('boundary', missing)
|
---|
| 699 | if boundary is missing:
|
---|
| 700 | return failobj
|
---|
| 701 | # RFC 2046 says that boundaries may begin but not end in w/s
|
---|
| 702 | return utils.collapse_rfc2231_value(boundary).rstrip()
|
---|
| 703 |
|
---|
| 704 | def set_boundary(self, boundary):
|
---|
| 705 | """Set the boundary parameter in Content-Type to 'boundary'.
|
---|
| 706 |
|
---|
| 707 | This is subtly different than deleting the Content-Type header and
|
---|
| 708 | adding a new one with a new boundary parameter via add_header(). The
|
---|
| 709 | main difference is that using the set_boundary() method preserves the
|
---|
| 710 | order of the Content-Type header in the original message.
|
---|
| 711 |
|
---|
| 712 | HeaderParseError is raised if the message has no Content-Type header.
|
---|
| 713 | """
|
---|
| 714 | missing = object()
|
---|
| 715 | params = self._get_params_preserve(missing, 'content-type')
|
---|
| 716 | if params is missing:
|
---|
| 717 | # There was no Content-Type header, and we don't know what type
|
---|
| 718 | # to set it to, so raise an exception.
|
---|
| 719 | raise errors.HeaderParseError('No Content-Type header found')
|
---|
| 720 | newparams = []
|
---|
| 721 | foundp = False
|
---|
| 722 | for pk, pv in params:
|
---|
| 723 | if pk.lower() == 'boundary':
|
---|
| 724 | newparams.append(('boundary', '"%s"' % boundary))
|
---|
| 725 | foundp = True
|
---|
| 726 | else:
|
---|
| 727 | newparams.append((pk, pv))
|
---|
| 728 | if not foundp:
|
---|
| 729 | # The original Content-Type header had no boundary attribute.
|
---|
| 730 | # Tack one on the end. BAW: should we raise an exception
|
---|
| 731 | # instead???
|
---|
| 732 | newparams.append(('boundary', '"%s"' % boundary))
|
---|
| 733 | # Replace the existing Content-Type header with the new value
|
---|
| 734 | newheaders = []
|
---|
| 735 | for h, v in self._headers:
|
---|
| 736 | if h.lower() == 'content-type':
|
---|
| 737 | parts = []
|
---|
| 738 | for k, v in newparams:
|
---|
| 739 | if v == '':
|
---|
| 740 | parts.append(k)
|
---|
| 741 | else:
|
---|
| 742 | parts.append('%s=%s' % (k, v))
|
---|
| 743 | newheaders.append((h, SEMISPACE.join(parts)))
|
---|
| 744 |
|
---|
| 745 | else:
|
---|
| 746 | newheaders.append((h, v))
|
---|
| 747 | self._headers = newheaders
|
---|
| 748 |
|
---|
| 749 | def get_content_charset(self, failobj=None):
|
---|
| 750 | """Return the charset parameter of the Content-Type header.
|
---|
| 751 |
|
---|
| 752 | The returned string is always coerced to lower case. If there is no
|
---|
| 753 | Content-Type header, or if that header has no charset parameter,
|
---|
| 754 | failobj is returned.
|
---|
| 755 | """
|
---|
| 756 | missing = object()
|
---|
| 757 | charset = self.get_param('charset', missing)
|
---|
| 758 | if charset is missing:
|
---|
| 759 | return failobj
|
---|
| 760 | if isinstance(charset, tuple):
|
---|
| 761 | # RFC 2231 encoded, so decode it, and it better end up as ascii.
|
---|
| 762 | pcharset = charset[0] or 'us-ascii'
|
---|
| 763 | try:
|
---|
| 764 | # LookupError will be raised if the charset isn't known to
|
---|
| 765 | # Python. UnicodeError will be raised if the encoded text
|
---|
| 766 | # contains a character not in the charset.
|
---|
| 767 | charset = unicode(charset[2], pcharset).encode('us-ascii')
|
---|
| 768 | except (LookupError, UnicodeError):
|
---|
| 769 | charset = charset[2]
|
---|
| 770 | # charset character must be in us-ascii range
|
---|
| 771 | try:
|
---|
| 772 | if isinstance(charset, str):
|
---|
| 773 | charset = unicode(charset, 'us-ascii')
|
---|
| 774 | charset = charset.encode('us-ascii')
|
---|
| 775 | except UnicodeError:
|
---|
| 776 | return failobj
|
---|
| 777 | # RFC 2046, $4.1.2 says charsets are not case sensitive
|
---|
| 778 | return charset.lower()
|
---|
| 779 |
|
---|
| 780 | def get_charsets(self, failobj=None):
|
---|
| 781 | """Return a list containing the charset(s) used in this message.
|
---|
| 782 |
|
---|
| 783 | The returned list of items describes the Content-Type headers'
|
---|
| 784 | charset parameter for this message and all the subparts in its
|
---|
| 785 | payload.
|
---|
| 786 |
|
---|
| 787 | Each item will either be a string (the value of the charset parameter
|
---|
| 788 | in the Content-Type header of that part) or the value of the
|
---|
| 789 | 'failobj' parameter (defaults to None), if the part does not have a
|
---|
| 790 | main MIME type of "text", or the charset is not defined.
|
---|
| 791 |
|
---|
| 792 | The list will contain one string for each part of the message, plus
|
---|
| 793 | one for the container message (i.e. self), so that a non-multipart
|
---|
| 794 | message will still return a list of length 1.
|
---|
| 795 | """
|
---|
| 796 | return [part.get_content_charset(failobj) for part in self.walk()]
|
---|
| 797 |
|
---|
| 798 | # I.e. def walk(self): ...
|
---|
| 799 | from email.iterators import walk
|
---|