NL bounces: add new failure hints
[platal.git] / bin / newsletter.bounces.processor.py
index 8693f65..8933349 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #***************************************************************************
-#*  Copyright (C) 2003-2013 Polytechnique.org                              *
+#*  Copyright (C) 2003-2014 Polytechnique.org                              *
 #*  http://opensource.polytechnique.org/                                   *
 #*                                                                         *
 #*  This program is free software; you can redistribute it and/or modify   *
@@ -165,16 +165,24 @@ def findAddressInBounce(bounce):
         print('! Not a valid bounce (expected text/plain, found %s).' % content.get_content_type())
         return None
     # Extract the faulty email address
-    recipient_match = _recipient_re.search(content['Final-Recipient'])
+    # Some MTA don't set Final-Recipient but use Remote-Recipient instead
+    if 'Final-Recipient' in content:
+        final_recipient = content['Final-Recipient']
+    elif 'Remote-Recipient' in content:
+        final_recipient = content['Remote-Recipient']
+    else:
+        print('! Not a valid bounce (no Final-Recipient).')
+        return None
+    recipient_match = _recipient_re.search(final_recipient)
     if recipient_match is None:
         # Be nice, test another regexp
-        recipient_match = _recipient_re2.search(content['Final-Recipient'])
+        recipient_match = _recipient_re2.search(final_recipient)
         if recipient_match is None:
             print('! Missing final recipient.')
             return None
     email = recipient_match.group(1)
     # Check the action field
-    if content['Action'].lower() != 'failed':
+    if content['Action'].lower().strip() != 'failed':
         print('! Not a failed action (%s).' % content['Action'])
         return None
 
@@ -192,6 +200,8 @@ def findAddressInBounce(bounce):
     failure_hints = [
         "insufficient system storage",
         "mailbox full",
+        "mailbox recipient does not have a mailbox database",
+        "over quota",
         "requested action aborted: local error in processing",
         "user unknown",
         ]
@@ -301,8 +311,9 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     """
     # real_bounce is the full email and bounce only the text/plain part, if email have several MIME parts
     real_bounce = real_bounce or bounce
-    if 'MAILER-DAEMON@' not in real_bounce['From'].upper():
-        print('! Not a valid plain bounce (expected from MAILER-DAEMON, found %s).' % bounce['From'])
+    lower_from = real_bounce['From'].lower()
+    if 'mailer-daemon@' not in lower_from and 'postmaster' not in lower_from:
+        print('! Not a valid plain bounce (expected from MAILER-DAEMON or postmaster, found %s).' % bounce['From'])
         return None
     if bounce.get_content_type() != 'text/plain':
         print('! Not a valid plain bounce (expected text/plain, found %s).' % bounce.get_content_type())
@@ -311,6 +322,7 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     known_subjects = [
         "delivery status notification (failure)",
         "failure notice",
+        "mail delivery failure",
         "returned mail: see transcript for details",
         "undeliverable message",
         "undelivered mail returned to sender",
@@ -334,11 +346,13 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     #   I'm afraid I wasn't able to deliver your message to the following addresses.
     #   The following message to <email@example.com> was undeliverable.
     non_delivery_hints = [
+        "could not be delivered to",
         "Delivery to the following recipient failed permanently",
         "I'm sorry to have to inform you that your message could not",
         "I wasn't able to deliver your message",
+        "try to send your message again at a later time",
+        "User unknown in local recipient table",
         "> was undeliverable.",
-        "could not be delivered to",
         "we were unable to deliver your message",
     ]
     if not any(any(hint in line for hint in non_delivery_hints) for line in lines):
@@ -351,7 +365,10 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     #   5.1.0 - Unknown address error 550-'email@example.com... No such user'
     permanent_error_hints = [
         "Delivery to the following recipient failed permanently",
+        "failed due to an unavailable mailbox",
+        "following addresses had permanent fatal errors",
         "I'm sorry to have to inform you that your message could not",
+        "The email account that you tried to reach does not exist",
         "This is a permanent error",
         "Unknown address error",
         "unreachable for too long",
@@ -366,7 +383,7 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     for line in lines:
         match = re.match(r'.*?<([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)>', line)
         if match is None:
-            match = re.match(r'^\s*([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)\s*$', line)
+            match = re.match(r'^\s*"?([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)"?\s*$', line)
         if match is not None:
             email = match.group(1)
             if email.endswith('@polytechnique.org'):
@@ -403,7 +420,7 @@ class DirectBouncesFilter(MboxFilter):
             # Additionnal checks, just to be sure
             elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
             or message['Subject'] != 'Undelivered Mail Returned to Sender':
-                print('! Not an usual direct bounce (From="%s", Subject="%s").' % (message['From'], message['Subject']))
+                print('! Not an usual direct bounce (From=%r, Subject=%r).' % (message['From'], message['Subject']))
             else:
                 email = findAddressInBounce(message)
                 if email is not None:
@@ -596,7 +613,10 @@ class DeliveryStatusNotificationFilter(MboxFilter):
             return True
 
         # Detect ill-formatted reports, sent as plain text email
-        if 'MAILER-DAEMON@' in message['From'].upper() and report_message.get_content_type() == 'text/plain':
+        if report_message.get_content_type() == 'text/plain' and (
+            'MAILER-DAEMON@' in message.get('From', '').upper() or
+            'mail delivery failure' == message.get('Subject', '').lower()
+            ):
             email = findAddressInPlainBounce(report_message)
             if email is not None:
                 self.emails.append(email)