umessage.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
  2. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
  3. #
  4. # This file is part of logilab-common.
  5. #
  6. # logilab-common is free software: you can redistribute it and/or modify it under
  7. # the terms of the GNU Lesser General Public License as published by the Free
  8. # Software Foundation, either version 2.1 of the License, or (at your option) any
  9. # later version.
  10. #
  11. # logilab-common is distributed in the hope that it will be useful, but WITHOUT
  12. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13. # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  14. # details.
  15. #
  16. # You should have received a copy of the GNU Lesser General Public License along
  17. # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
  18. """Unicode email support (extends email from stdlib).
  19. """
  20. __docformat__ = "restructuredtext en"
  21. import email
  22. from encodings import search_function
  23. import sys
  24. if sys.version_info >= (2, 5):
  25. from email.utils import parseaddr, parsedate
  26. from email.header import decode_header
  27. else:
  28. from email.Utils import parseaddr, parsedate
  29. from email.Header import decode_header
  30. from datetime import datetime
  31. try:
  32. from mx.DateTime import DateTime
  33. except ImportError:
  34. DateTime = datetime
  35. import logilab.common as lgc
  36. def decode_QP(string):
  37. parts = []
  38. for decoded, charset in decode_header(string):
  39. if not charset :
  40. charset = 'iso-8859-15'
  41. parts.append(unicode(decoded, charset, 'replace'))
  42. return u' '.join(parts)
  43. def message_from_file(fd):
  44. try:
  45. return UMessage(email.message_from_file(fd))
  46. except email.Errors.MessageParseError:
  47. return ''
  48. def message_from_string(string):
  49. try:
  50. return UMessage(email.message_from_string(string))
  51. except email.Errors.MessageParseError:
  52. return ''
  53. class UMessage:
  54. """Encapsulates an email.Message instance and returns only unicode objects.
  55. """
  56. def __init__(self, message):
  57. self.message = message
  58. # email.Message interface #################################################
  59. def get(self, header, default=None):
  60. value = self.message.get(header, default)
  61. if value:
  62. return decode_QP(value)
  63. return value
  64. def get_all(self, header, default=()):
  65. return [decode_QP(val) for val in self.message.get_all(header, default)
  66. if val is not None]
  67. def get_payload(self, index=None, decode=False):
  68. message = self.message
  69. if index is None:
  70. payload = message.get_payload(index, decode)
  71. if isinstance(payload, list):
  72. return [UMessage(msg) for msg in payload]
  73. if message.get_content_maintype() != 'text':
  74. return payload
  75. charset = message.get_content_charset() or 'iso-8859-1'
  76. if search_function(charset) is None:
  77. charset = 'iso-8859-1'
  78. return unicode(payload or '', charset, "replace")
  79. else:
  80. payload = UMessage(message.get_payload(index, decode))
  81. return payload
  82. def is_multipart(self):
  83. return self.message.is_multipart()
  84. def get_boundary(self):
  85. return self.message.get_boundary()
  86. def walk(self):
  87. for part in self.message.walk():
  88. yield UMessage(part)
  89. def get_content_maintype(self):
  90. return unicode(self.message.get_content_maintype())
  91. def get_content_type(self):
  92. return unicode(self.message.get_content_type())
  93. def get_filename(self, failobj=None):
  94. value = self.message.get_filename(failobj)
  95. if value is failobj:
  96. return value
  97. try:
  98. return unicode(value)
  99. except UnicodeDecodeError:
  100. return u'error decoding filename'
  101. # other convenience methods ###############################################
  102. def headers(self):
  103. """return an unicode string containing all the message's headers"""
  104. values = []
  105. for header in self.message.keys():
  106. values.append(u'%s: %s' % (header, self.get(header)))
  107. return '\n'.join(values)
  108. def multi_addrs(self, header):
  109. """return a list of 2-uple (name, address) for the given address (which
  110. is expected to be an header containing address such as from, to, cc...)
  111. """
  112. persons = []
  113. for person in self.get_all(header, ()):
  114. name, mail = parseaddr(person)
  115. persons.append((name, mail))
  116. return persons
  117. def date(self, alternative_source=False, return_str=False):
  118. """return a datetime object for the email's date or None if no date is
  119. set or if it can't be parsed
  120. """
  121. value = self.get('date')
  122. if value is None and alternative_source:
  123. unix_from = self.message.get_unixfrom()
  124. if unix_from is not None:
  125. try:
  126. value = unix_from.split(" ", 2)[2]
  127. except IndexError:
  128. pass
  129. if value is not None:
  130. datetuple = parsedate(value)
  131. if datetuple:
  132. if lgc.USE_MX_DATETIME:
  133. return DateTime(*datetuple[:6])
  134. return datetime(*datetuple[:6])
  135. elif not return_str:
  136. return None
  137. return value