NL bounces: add new failure hints
[platal.git] / bin / newsletter.bounces.processor.py
index a8f138c..8933349 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #***************************************************************************
-#*  Copyright (C) 2003-2013 Polytechnique.org                              *
+#*  Copyright (C) 2003-2014 Polytechnique.org                              *
 #*  http://opensource.polytechnique.org/                                   *
 #*                                                                         *
 #*  This program is free software; you can redistribute it and/or modify   *
@@ -165,16 +165,24 @@ def findAddressInBounce(bounce):
         print('! Not a valid bounce (expected text/plain, found %s).' % content.get_content_type())
         return None
     # Extract the faulty email address
-    recipient_match = _recipient_re.search(content['Final-Recipient'])
+    # Some MTA don't set Final-Recipient but use Remote-Recipient instead
+    if 'Final-Recipient' in content:
+        final_recipient = content['Final-Recipient']
+    elif 'Remote-Recipient' in content:
+        final_recipient = content['Remote-Recipient']
+    else:
+        print('! Not a valid bounce (no Final-Recipient).')
+        return None
+    recipient_match = _recipient_re.search(final_recipient)
     if recipient_match is None:
         # Be nice, test another regexp
-        recipient_match = _recipient_re2.search(content['Final-Recipient'])
+        recipient_match = _recipient_re2.search(final_recipient)
         if recipient_match is None:
             print('! Missing final recipient.')
             return None
     email = recipient_match.group(1)
     # Check the action field
-    if content['Action'].lower() != 'failed':
+    if content['Action'].lower().strip() != 'failed':
         print('! Not a failed action (%s).' % content['Action'])
         return None
 
@@ -183,7 +191,7 @@ def findAddressInBounce(bounce):
 
     # Permanent failure state
     if int(status[:1]) == 5:
-         return email
+        return email
 
     # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
     if diag_code is not None and diag_code.startswith('X-Postfix'):
@@ -192,6 +200,8 @@ def findAddressInBounce(bounce):
     failure_hints = [
         "insufficient system storage",
         "mailbox full",
+        "mailbox recipient does not have a mailbox database",
+        "over quota",
         "requested action aborted: local error in processing",
         "user unknown",
         ]
@@ -208,13 +218,102 @@ def findAddressInBounce(bounce):
     return None
 
 
+def findAddressInWeirdDeliveryStatus(message):
+    """Finds the faulty email address in the delivery-status part of an email
+
+    Unlikely to findAddressInBounce, the status does NOT follow RFC 1894, so
+    try to learn to get data nevertheless...
+    Returns None or the email address.
+    """
+    if message.get_content_type() != 'message/delivery-status':
+        print('! Not a valid weird bounce (expected message/delivery-status, found %s).' % message.get_content_type())
+        return None
+    # The per-message-fields don't matter here, get only the per-recipient-fields
+    num_payloads = len(message.get_payload())
+    if num_payloads < 2:
+        print('! Not a valid weird bounce (expected at least 2 parts, found %d).' % num_payloads)
+        return None
+    content = message.get_payload(1)
+    # The content may be missing, but interesting headers still present in the first payload...
+    if not content:
+        content = message.get_payload(0)
+        if 'Action' not in content:
+            print('! Not a valid weird bounce (unable to find content).')
+            return None
+    elif content.get_content_type() != 'text/plain':
+        print('! Not a valid weird bounce (expected text/plain, found %s).' % content.get_content_type())
+        return None
+
+    # Extract the faulty email address
+    if 'Final-Recipient' in content:
+        recipient_match = _recipient_re.search(content['Final-Recipient'])
+        if recipient_match is None:
+            # Be nice, test another regexp
+            recipient_match = _recipient_re2.search(content['Final-Recipient'])
+            if recipient_match is None:
+                print('! Unknown final recipient in weird bounce.')
+                return None
+        email = recipient_match.group(1)
+    elif 'Original-Recipient' in content:
+        recipient = content['Original-Recipient']
+        recipient_match = _recipient_re.search(recipient)
+        if recipient_match is None:
+            # Be nice, test another regexp
+            recipient_match = _recipient_re2.search(recipient)
+            if recipient_match is None:
+                recipient_match = re.match(r'<([^>]+@[^@>]+)>', recipient)
+                if recipient_match is None:
+                    print('! Unknown original recipient in weird bounce.')
+                    return None
+        email = recipient_match.group(1)
+    else:
+        print('! Missing recipient in weird bounce.')
+        return None
+
+    # Check the action field
+    if content['Action'].lower() != 'failed':
+        print('! Not a failed action (%s).' % content['Action'])
+        return None
+
+    status = content['Status']
+    diag_code = content['Diagnostic-Code']
+
+    # Permanent failure state
+    if status and int(status[:1]) == 5:
+        return email
+
+    # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
+    if diag_code is not None and diag_code.startswith('X-Postfix'):
+        return email
+
+    failure_hints = [
+        "insufficient system storage",
+        "mailbox full",
+        "requested action aborted: local error in processing",
+        "sender address rejected",
+        "user unknown",
+        ]
+    if status and 'quota' in status.lower():
+        return email
+    if diag_code is not None:
+        ldiag_code = diag_code.lower()
+        if any(hint in ldiag_code for hint in failure_hints):
+            return email
+
+    print('! Not a permanent failure status (%s).' % status)
+    if diag_code is not None:
+        print('! Diagnostic code was: %s' % diag_code)
+    return None
+
+
 def findAddressInPlainBounce(bounce, real_bounce=None):
     """Finds the faulty email address in a non-RFC-1894 bounced email
     """
     # real_bounce is the full email and bounce only the text/plain part, if email have several MIME parts
     real_bounce = real_bounce or bounce
-    if 'MAILER-DAEMON@' not in real_bounce['From'].upper():
-        print('! Not a valid plain bounce (expected from MAILER-DAEMON, found %s).' % bounce['From'])
+    lower_from = real_bounce['From'].lower()
+    if 'mailer-daemon@' not in lower_from and 'postmaster' not in lower_from:
+        print('! Not a valid plain bounce (expected from MAILER-DAEMON or postmaster, found %s).' % bounce['From'])
         return None
     if bounce.get_content_type() != 'text/plain':
         print('! Not a valid plain bounce (expected text/plain, found %s).' % bounce.get_content_type())
@@ -223,6 +322,7 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     known_subjects = [
         "delivery status notification (failure)",
         "failure notice",
+        "mail delivery failure",
         "returned mail: see transcript for details",
         "undeliverable message",
         "undelivered mail returned to sender",
@@ -246,11 +346,13 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     #   I'm afraid I wasn't able to deliver your message to the following addresses.
     #   The following message to <email@example.com> was undeliverable.
     non_delivery_hints = [
+        "could not be delivered to",
         "Delivery to the following recipient failed permanently",
         "I'm sorry to have to inform you that your message could not",
         "I wasn't able to deliver your message",
+        "try to send your message again at a later time",
+        "User unknown in local recipient table",
         "> was undeliverable.",
-        "could not be delivered to",
         "we were unable to deliver your message",
     ]
     if not any(any(hint in line for hint in non_delivery_hints) for line in lines):
@@ -263,7 +365,10 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     #   5.1.0 - Unknown address error 550-'email@example.com... No such user'
     permanent_error_hints = [
         "Delivery to the following recipient failed permanently",
+        "failed due to an unavailable mailbox",
+        "following addresses had permanent fatal errors",
         "I'm sorry to have to inform you that your message could not",
+        "The email account that you tried to reach does not exist",
         "This is a permanent error",
         "Unknown address error",
         "unreachable for too long",
@@ -278,7 +383,7 @@ def findAddressInPlainBounce(bounce, real_bounce=None):
     for line in lines:
         match = re.match(r'.*?<([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)>', line)
         if match is None:
-            match = re.match(r'^\s*([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)\s*$', line)
+            match = re.match(r'^\s*"?([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)"?\s*$', line)
         if match is not None:
             email = match.group(1)
             if email.endswith('@polytechnique.org'):
@@ -308,14 +413,14 @@ class DirectBouncesFilter(MboxFilter):
             self.seen += 1
             # Special case: ignore mailman notifications for the mailing-list
             # on which the NL is forwarded
-            if message['From'] == 'newsletter-externes-bounces@polytechnique.org':
+            if message['From'] == 'newsletter-externes-owner@polytechnique.org':
                 print('! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.')
                 self.seen -= 1
                 return True
             # Additionnal checks, just to be sure
             elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
             or message['Subject'] != 'Undelivered Mail Returned to Sender':
-                print('! Not an usual direct bounce (From="%s", Subject="%s").' % (message['From'], message['Subject']))
+                print('! Not an usual direct bounce (From=%r, Subject=%r).' % (message['From'], message['Subject']))
             else:
                 email = findAddressInBounce(message)
                 if email is not None:
@@ -481,6 +586,19 @@ class DeliveryStatusNotificationFilter(MboxFilter):
         report_message = message
         # Find real report inside attachment
         if message.get_content_type() == 'multipart/mixed':
+            # Some MTA confuse multipart/mixed with multipart/report
+            # Let's try to find a report!
+            if len(message.get_payload()) >= 2:
+                try_status = message.get_payload(1)
+                if try_status.get_content_type() == 'message/delivery-status':
+                    # The world would be a nice place if delivery-status were
+                    # formatted as expected...
+                    email = findAddressInWeirdDeliveryStatus(try_status)
+                    if email is not None:
+                        self.emails.append(email)
+                        self.mbox.add(message)
+                        return True
+                try_status = None
             report_message = message.get_payload(0)
 
         # Process report if its type is correct
@@ -495,7 +613,10 @@ class DeliveryStatusNotificationFilter(MboxFilter):
             return True
 
         # Detect ill-formatted reports, sent as plain text email
-        if 'MAILER-DAEMON@' in message['From'].upper() and report_message.get_content_type() == 'text/plain':
+        if report_message.get_content_type() == 'text/plain' and (
+            'MAILER-DAEMON@' in message.get('From', '').upper() or
+            'mail delivery failure' == message.get('Subject', '').lower()
+            ):
             email = findAddressInPlainBounce(report_message)
             if email is not None:
                 self.emails.append(email)