* Make it Python 3.3 complient
* Fix and add some filters for Out Of Office messages detection
* Fix findSubject charset encoding exceptions
Signed-off-by: Nicolas Iooss <nicolas.iooss_git@polytechnique.org>
-#!/usr/bin/env python2.5
# -*- coding: utf-8 -*-
#***************************************************************************
#* Copyright (C) 2003-2013 Polytechnique.org *
# -*- coding: utf-8 -*-
#***************************************************************************
#* Copyright (C) 2003-2013 Polytechnique.org *
All emails are saved in different mailboxes to make human post-processing easier.
"""
All emails are saved in different mailboxes to make human post-processing easier.
"""
-import email, mailbox, os, re, sys, time
+import email
+import mailbox
+import os
+import re
+import sys
+import time
#----------------------------------------------------------------------------#
#----------------------------------------------------------------------------#
def finalize_filters(self):
duration = time.clock() - self.start_time
separator = '-' * 80
def finalize_filters(self):
duration = time.clock() - self.start_time
separator = '-' * 80
- print separator
- print 'Processed the %d messages of %s in %.2fs' % (len(self.mbox), self.mbox_file, duration)
- print separator
+ print(separator)
+ print('Processed the %d messages of %s in %.2fs' % (len(self.mbox), self.mbox_file, duration))
+ print(separator)
- f.finalize();
- print separator
+ f.finalize()
+ print(separator)
def run(self):
self.mbox.lock()
def run(self):
self.mbox.lock()
def initialize(self, mbox_file):
"""Called by the processor before processing starts.
def initialize(self, mbox_file):
"""Called by the processor before processing starts.
This is the place to open descriptors required during processing."""
pass
def process(self, message):
"""Called by the processor for each message that reaches this step.
This is the place to open descriptors required during processing."""
pass
def process(self, message):
"""Called by the processor for each message that reaches this step.
Return true to stop processing, and false to go to the next filter."""
pass
def finalize(self):
"""Called by the processor after processing ends.
Return true to stop processing, and false to go to the next filter."""
pass
def finalize(self):
"""Called by the processor after processing ends.
This is the place to display the results and close all descriptors."""
pass
This is the place to display the results and close all descriptors."""
pass
def findSubject(message):
"""Returns the subject of an email.Message as an unicode string."""
def findSubject(message):
"""Returns the subject of an email.Message as an unicode string."""
- if message['Subject'] is not None:
- try:
- return unicode(email.header.make_header(email.header.decode_header(message['Subject'])))
- except:
- pass
- return None
+ if message['Subject'] is None:
+ return None
+
+ # decode_header returns a list of (decoded_string, charset) pairs
+ decoded_seq = email.header.decode_header(message['Subject'])
+ decoded_seq = [(subj, enc or 'utf-8') for subj, enc in decoded_seq]
+ header = email.header.make_header(decoded_seq)
+ # Be Python 2 & 3 compatible
+ return unicode(header) if sys.version_info < (3,) else str(header)
+
_recipient_re = re.compile(r'^rfc822; ?(.+)$', re.I | re.U)
_recipient_re = re.compile(r'^rfc822; ?(.+)$', re.I | re.U)
def findAddressInBounce(bounce):
"""Finds the faulty email address in a bounced email.
def findAddressInBounce(bounce):
"""Finds the faulty email address in a bounced email.
See RFC 1894 for more information.
Returns None or the email address."""
See RFC 1894 for more information.
Returns None or the email address."""
# Check that it is a bounce - a few MTA fail to set this correctly :(
if bounce.get_content_type() != 'multipart/report':
# Check that it is a bounce - a few MTA fail to set this correctly :(
if bounce.get_content_type() != 'multipart/report':
- print '! Not a valid bounce (expected multipart/report, found %s).' % bounce.get_content_type()
+ print('! Not a valid bounce (expected multipart/report, found %s).' % bounce.get_content_type())
return None
# Extract the second component of the multipart/report
num_payloads = len(bounce.get_payload())
if num_payloads < 2:
return None
# Extract the second component of the multipart/report
num_payloads = len(bounce.get_payload())
if num_payloads < 2:
- print '! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads
+ print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
return None
status = bounce.get_payload(1)
if status.get_content_type() != 'message/delivery-status':
return None
status = bounce.get_payload(1)
if status.get_content_type() != 'message/delivery-status':
- print '! Not a valid bounce (expected message/delivery-status, found %s).' % bounce.get_content_type()
+ print('! Not a valid bounce (expected message/delivery-status, found %s).' % bounce.get_content_type())
return None
# The per-message-fields don't matter here, get only the per-recipient-fields
num_payloads = len(status.get_payload())
if num_payloads < 2:
return None
# The per-message-fields don't matter here, get only the per-recipient-fields
num_payloads = len(status.get_payload())
if num_payloads < 2:
- print '! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads
+ print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
return None
content = status.get_payload(1)
if content.get_content_type() != 'text/plain':
return None
content = status.get_payload(1)
if content.get_content_type() != 'text/plain':
- print '! Not a valid bounce (expected text/plain, found %s).' % bounce.get_content_type
+ print('! Not a valid bounce (expected text/plain, found %s).' % bounce.get_content_type)
return None
# Extract the faulty email address
recipient_match = _recipient_re.search(content['Final-Recipient'])
if recipient_match is None:
return None
# Extract the faulty email address
recipient_match = _recipient_re.search(content['Final-Recipient'])
if recipient_match is None:
- print '! Missing final recipient.'
+ print('! Missing final recipient.')
return None
email = recipient_match.group(1)
# Check the action field
if content['Action'] != 'failed':
return None
email = recipient_match.group(1)
# Check the action field
if content['Action'] != 'failed':
- print '! Not a failed action (%s).' % content['Action']
+ print('! Not a failed action (%s).' % content['Action'])
return None
# Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
# Otherwise, the first sub-field should indicate a permanent failure
postfix_error = content['Diagnostic-Code'] is not None \
and content['Diagnostic-Code'].startswith('X-Postfix')
if not postfix_error and int(content['Status'][:1]) != 5:
return None
# Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
# Otherwise, the first sub-field should indicate a permanent failure
postfix_error = content['Diagnostic-Code'] is not None \
and content['Diagnostic-Code'].startswith('X-Postfix')
if not postfix_error and int(content['Status'][:1]) != 5:
- print '! Not a permanent failure status (%s).' % content['Status']
+ print('! Not a permanent failure status (%s).' % content['Status'])
def initialize(self, mbox_file):
self.seen = 0
def initialize(self, mbox_file):
self.seen = 0
self.emails = []
self.mbox_file = '%s.bounced' % mbox_file
self.mbox = mailbox.mbox(self.mbox_file)
self.emails = []
self.mbox_file = '%s.bounced' % mbox_file
self.mbox = mailbox.mbox(self.mbox_file)
self.seen += 1
# Special case: ignore mailman notifications for the mailing-list
# on which the NL is forwarded
self.seen += 1
# Special case: ignore mailman notifications for the mailing-list
# on which the NL is forwarded
- if message['From'] == 'polytechnique.org_newsletter-externes-bounces@listes.polytechnique.org':
- print '! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.'
+ if message['From'] == 'newsletter-externes-bounces@polytechnique.org':
+ print('! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.')
self.seen -= 1
return True
# Additionnal checks, just to be sure
elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
or message['Subject'] != 'Undelivered Mail Returned to Sender':
self.seen -= 1
return True
# Additionnal checks, just to be sure
elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
or message['Subject'] != 'Undelivered Mail Returned to Sender':
- print '! Not an usual direct bounce (From="%s", Subject="%s").' % (message['From'], message['Subject'])
+ print('! Not an usual direct bounce (From="%s", Subject="%s").' % (message['From'], message['Subject']))
else:
email = findAddressInBounce(message)
if email is not None:
else:
email = findAddressInBounce(message)
if email is not None:
self.mbox.add(message)
return True
else:
self.mbox.add(message)
return True
else:
- print '! No email found in direct bounce, this is really bad.'
+ print('! => No email found in direct bounce, this is really bad.')
+ self.bad_problems += 1
return False
def finalize(self):
return False
def finalize(self):
- print 'Found %d messages with no X-Spam-Flag header.' % self.seen
- print 'Found %d of them that are confirmed bounces.' % len(self.mbox)
- if self.seen != len(self.mbox):
- print ' /!\ These numbers shoud be equal! We have a problem! /!\\'
- print 'They were saved in %s.' % self.mbox_file
- print ''
- print 'Here is the list of email adresses for these bounces:'
- print ''
+ print('Found %d messages with no X-Spam-Flag header.' % self.seen)
+ print('Found %d of them that are confirmed bounces.' % len(self.mbox))
+ print('They were saved in %s.' % self.mbox_file)
+ if self.bad_problems:
+ print('Found %d of them that are invalid.' % self.bad_problems)
+ if self.seen != len(self.mbox) + self.bad_problems:
+ print(' /!\ These numbers shoud be equal! We have a problem! /!\\')
+ print('')
+ print('Here is the list of email adresses for these bounces:')
+ print('')
for email in self.emails:
for email in self.emails:
+ print(email)
+ print('')
self.mbox.close()
#----------------------------------------------------------------------------#
self.mbox.close()
#----------------------------------------------------------------------------#
return False
def finalize(self):
return False
def finalize(self):
- print 'Found %d spams. This is reliable.' % len(self.mbox)
- print 'They were saved in %s.' % self.mbox_file
- print 'You might check the contents of this mbox.'
+ print('Found %d spams. This is reliable.' % len(self.mbox))
+ print('They were saved in %s.' % self.mbox_file)
+ print('You might check the contents of this mbox.')
self.mbox.close()
#----------------------------------------------------------------------------#
self.mbox.close()
#----------------------------------------------------------------------------#
return False
def finalize(self):
return False
def finalize(self):
- print 'Found %d unclassified messages. Most of them should be spams.' % len(self.mbox)
- print 'They were saved in %s.' % self.mbox_file
- print 'You must check the contents of this mbox and feed the antispam.'
+ print('Found %d unclassified messages. Most of them should be spams.' % len(self.mbox))
+ print('They were saved in %s.' % self.mbox_file)
+ print('You must check the contents of this mbox and feed the antispam.')
self.mbox.close()
#----------------------------------------------------------------------------#
self.mbox.close()
#----------------------------------------------------------------------------#
def finalize(self):
if self.seen > 0:
def finalize(self):
if self.seen > 0:
- print 'Encountered %d messages that were neither spam, nor unsure, nor non-spams.' % self.seen
- print 'Please investigate.'
+ print('Encountered %d messages that were neither spam, nor unsure, nor non-spams.' % self.seen)
+ print('Please investigate.')
- print 'All messages were either spam, or unsure, or non-spams. Good.'
+ print('All messages were either spam, or unsure, or non-spams. Good.')
#----------------------------------------------------------------------------#
#----------------------------------------------------------------------------#
self.mbox.clear()
subject_re = [
r'^Absen(t|ce)',
self.mbox.clear()
subject_re = [
r'^Absen(t|ce)',
- r'(est|is) absent',
- r'^Out of (the )?office',
- r'is out of (the )?office',
- r'I am out of town',
+ r'^(AUTO: )?Out of (the )?office',
+ r'^Automatic reply: ',
r'automatique d\'absence',
r'automatique d\'absence',
- r'Notification d\'absence'
- u'RĂ©ponse automatique :', #unicode!
+ r'(est|is) absent',
+ r'I am out of town',
+ r'I am currently away',
+ r'is out of (the )?office',
+ r'Notification d\'absence',
+ r'R.{1,2}ponse automatique( :)?', # There may be encoding error of e acute
- self.subject_regexes = map(re.compile, subject_re, [re.I | re.U] * len(subject_re))
+ self.subject_regexes = [re.compile(sre, re.I | re.U) for sre in subject_re]
def process(self, message):
subject = findSubject(message)
def process(self, message):
subject = findSubject(message)
return False
def finalize(self):
return False
def finalize(self):
- print 'Found %d "out of office". This is generally reliable.' % len(self.mbox)
- print 'They were saved in %s.' % self.mbox_file
- print 'You may check the contents of this mbox.'
+ print('Found %d "out of office". This is generally reliable.' % len(self.mbox))
+ print('They were saved in %s.' % self.mbox_file)
+ print('You may check the contents of this mbox.')
self.mbox.close()
#----------------------------------------------------------------------------#
self.mbox.close()
#----------------------------------------------------------------------------#
self.mbox_file = '%s.dsn' % mbox_file
self.mbox = mailbox.mbox(self.mbox_file)
self.mbox.clear()
self.mbox_file = '%s.dsn' % mbox_file
self.mbox = mailbox.mbox(self.mbox_file)
self.mbox.clear()
+ self.mbox_temp_file = '%s.dsn-temp' % mbox_file
+ self.mbox_temp = mailbox.mbox(self.mbox_temp_file)
+ self.mbox_temp.clear()
def process(self, message):
if message.get_content_type() == 'multipart/report':
def process(self, message):
if message.get_content_type() == 'multipart/report':
self.emails.append(email)
self.mbox.add(message)
return True
self.emails.append(email)
self.mbox.add(message)
return True
+ else:
+ print("! => Moved to temporary DSN mailbox")
+ self.mbox_temp.add(message)
+ return True
return False
def finalize(self):
return False
def finalize(self):
- print 'Found %d delivery status notifications. This is generally reliable.' % len(self.mbox)
- print 'They were saved in %s.' % self.mbox_file
- print ''
- print 'Here is the list of email adresses for these bounces:'
- print ''
+ print('Found %d delivery status notifications. This is generally reliable.' % len(self.mbox))
+ print('They were saved in %s.' % self.mbox_file)
+ print('')
+ print('Here is the list of email adresses for these bounces:')
+ print('')
for email in self.emails:
for email in self.emails:
+ print(email)
+ print('')
+ print('Found %d temporary and invalid delivery status notifications.' % len(self.mbox_temp))
+ print('They were saved in %s.' % self.mbox_temp_file)
+ self.mbox_temp.close()
#----------------------------------------------------------------------------#
#----------------------------------------------------------------------------#
def finalize(self):
if len(self.mbox) > 0:
def finalize(self):
if len(self.mbox) > 0:
- print '%d messages reached the catchall.' % len(self.mbox)
- print 'They were saved in %s.' % self.mbox_file
- print 'You must process the contents of this mbox manually.'
+ print('%d messages reached the catchall.' % len(self.mbox))
+ print('They were saved in %s.' % self.mbox_file)
+ print('You must process the contents of this mbox manually.')
- print 'No messages reached the catchall. Nice.'
+ print('No messages reached the catchall. Nice.')
self.mbox.close()
os.unlink(self.mbox_file)
self.mbox.close()
os.unlink(self.mbox_file)
if __name__ == '__main__':
if len(sys.argv) != 2:
if __name__ == '__main__':
if len(sys.argv) != 2:
- print 'Usage: %s mbox' % sys.argv[0]
+ print('Usage: %s mbox' % sys.argv[0])
sys.exit(1)
if not os.path.exists(sys.argv[1]):
sys.exit(1)
if not os.path.exists(sys.argv[1]):
- print 'No such file: %s' % sys.argv[1]
+ print('No such file: %s' % sys.argv[1])
sys.exit(1)
processor = MboxProcessor(sys.argv[1])
sys.exit(1)
processor = MboxProcessor(sys.argv[1])