NL bounces: add new failure hints
[platal.git] / bin / newsletter.bounces.processor.py
CommitLineData
6208fd26 1#!/usr/bin/env python
58e64caf
AA
2# -*- coding: utf-8 -*-
3#***************************************************************************
c441aabe 4#* Copyright (C) 2003-2014 Polytechnique.org *
58e64caf
AA
5#* http://opensource.polytechnique.org/ *
6#* *
7#* This program is free software; you can redistribute it and/or modify *
8#* it under the terms of the GNU General Public License as published by *
9#* the Free Software Foundation; either version 2 of the License, or *
10#* (at your option) any later version. *
11#* *
12#* This program is distributed in the hope that it will be useful, *
13#* but WITHOUT ANY WARRANTY; without even the implied warranty of *
14#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15#* GNU General Public License for more details. *
16#* *
17#* You should have received a copy of the GNU General Public License *
18#* along with this program; if not, write to the Free Software *
19#* Foundation, Inc., *
20#* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
21#***************************************************************************
22
58e64caf
AA
23"""
24Process as automatically as possible bounces from the newsletter
25
26The goal is to extract the email adresses that actually bounced.
27Bounces conforming to RFC 1894 will be automatically processed.
28
29This script uses the X-Spam-Flag header to remove spam and heuristics
30to detect out-of-office auto-replies and delivery status notifications.
31
32All emails are saved in different mailboxes to make human post-processing easier.
33"""
34
6208fd26
NI
35import email
36import mailbox
37import os
38import re
39import sys
40import time
58e64caf
AA
41
42#----------------------------------------------------------------------------#
43
44class MboxProcessor:
45 """Applies a series of filters to each message in a mbox."""
46
47 def __init__(self, mbox):
48 self.mbox_file = mbox
49 self.mbox = mailbox.mbox(self.mbox_file)
50 self.filters = [
51 DirectBouncesFilter(),
52 SpamFilter(),
53 UnsureFilter(),
54 CheckNonSpamFilter(),
55 OutOfOfficeFilter(),
56 DeliveryStatusNotificationFilter(),
57 CatchAllFilter()
58 ]
59
60 def initialize_filters(self):
61 for f in self.filters: f.initialize(self.mbox_file)
62 self.start_time = time.clock()
63
64 def apply_filters(self, message):
65 return any(f.process(message) for f in self.filters)
66
67 def finalize_filters(self):
68 duration = time.clock() - self.start_time
69 separator = '-' * 80
6208fd26
NI
70 print(separator)
71 print('Processed the %d messages of %s in %.2fs' % (len(self.mbox), self.mbox_file, duration))
72 print(separator)
58e64caf 73 for f in self.filters:
6208fd26
NI
74 f.finalize()
75 print(separator)
58e64caf
AA
76
77 def run(self):
78 self.mbox.lock()
79 try:
80 self.initialize_filters()
81 for message in self.mbox: self.apply_filters(message)
82 self.finalize_filters()
83 finally:
84 self.mbox.unlock()
85 self.mbox.close()
86
87#----------------------------------------------------------------------------#
88
89class MboxFilter:
90 """Defines an interface for filters."""
91
92 def initialize(self, mbox_file):
93 """Called by the processor before processing starts.
6208fd26 94
58e64caf
AA
95 This is the place to open descriptors required during processing."""
96 pass
97
98 def process(self, message):
99 """Called by the processor for each message that reaches this step.
6208fd26 100
58e64caf
AA
101 Return true to stop processing, and false to go to the next filter."""
102 pass
103
104 def finalize(self):
105 """Called by the processor after processing ends.
6208fd26 106
58e64caf
AA
107 This is the place to display the results and close all descriptors."""
108 pass
109
110#----------------------------------------------------------------------------#
111
112def findSubject(message):
113 """Returns the subject of an email.Message as an unicode string."""
6208fd26
NI
114 if message['Subject'] is None:
115 return None
116
117 # decode_header returns a list of (decoded_string, charset) pairs
118 decoded_seq = email.header.decode_header(message['Subject'])
119 decoded_seq = [(subj, enc or 'utf-8') for subj, enc in decoded_seq]
120 header = email.header.make_header(decoded_seq)
121 # Be Python 2 & 3 compatible
122 return unicode(header) if sys.version_info < (3,) else str(header)
123
58e64caf
AA
124
125_recipient_re = re.compile(r'^rfc822; ?(.+)$', re.I | re.U)
8c9c7d77
NI
126# Some MTA set the Final-Recipient with "LOCAL;" instead of "rfc822;"
127_recipient_re2 = re.compile(r'^local; ?(.+)$', re.I | re.U)
58e64caf 128
6208fd26 129
58e64caf
AA
130def findAddressInBounce(bounce):
131 """Finds the faulty email address in a bounced email.
6208fd26 132
58e64caf
AA
133 See RFC 1894 for more information.
134 Returns None or the email address."""
6208fd26 135
58e64caf
AA
136 # Check that it is a bounce - a few MTA fail to set this correctly :(
137 if bounce.get_content_type() != 'multipart/report':
6208fd26 138 print('! Not a valid bounce (expected multipart/report, found %s).' % bounce.get_content_type())
58e64caf
AA
139 return None
140 # Extract the second component of the multipart/report
aa6c6ed4
AA
141 num_payloads = len(bounce.get_payload())
142 if num_payloads < 2:
6208fd26 143 print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
58e64caf
AA
144 return None
145 status = bounce.get_payload(1)
0cec3fee
NI
146
147 # If the second part is of type "message/rfc822" it is the undelivered message.
148 # Let's try to understand the text part
149 if status.get_content_type() == 'message/rfc822':
150 text_bounce = bounce.get_payload(0)
151 if text_bounce.get_content_type() == 'text/plain':
152 return findAddressInPlainBounce(text_bounce, bounce)
153 # If it's not a text message, let's continue to the next error message
154
58e64caf 155 if status.get_content_type() != 'message/delivery-status':
4aad6c9c 156 print('! Not a valid bounce (expected message/delivery-status, found %s).' % status.get_content_type())
58e64caf
AA
157 return None
158 # The per-message-fields don't matter here, get only the per-recipient-fields
aa6c6ed4
AA
159 num_payloads = len(status.get_payload())
160 if num_payloads < 2:
6208fd26 161 print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
58e64caf
AA
162 return None
163 content = status.get_payload(1)
164 if content.get_content_type() != 'text/plain':
4aad6c9c 165 print('! Not a valid bounce (expected text/plain, found %s).' % content.get_content_type())
58e64caf
AA
166 return None
167 # Extract the faulty email address
cfea91c4
NI
168 # Some MTA don't set Final-Recipient but use Remote-Recipient instead
169 if 'Final-Recipient' in content:
170 final_recipient = content['Final-Recipient']
171 elif 'Remote-Recipient' in content:
172 final_recipient = content['Remote-Recipient']
173 else:
174 print('! Not a valid bounce (no Final-Recipient).')
175 return None
176 recipient_match = _recipient_re.search(final_recipient)
58e64caf 177 if recipient_match is None:
8c9c7d77 178 # Be nice, test another regexp
cfea91c4 179 recipient_match = _recipient_re2.search(final_recipient)
8c9c7d77
NI
180 if recipient_match is None:
181 print('! Missing final recipient.')
182 return None
58e64caf
AA
183 email = recipient_match.group(1)
184 # Check the action field
8da7bf9f 185 if content['Action'].lower().strip() != 'failed':
6208fd26 186 print('! Not a failed action (%s).' % content['Action'])
58e64caf 187 return None
29c6ffa5
NI
188
189 status = content['Status']
190 diag_code = content['Diagnostic-Code']
191
192 # Permanent failure state
193 if int(status[:1]) == 5:
8438b7d1 194 return email
29c6ffa5 195
58e64caf 196 # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
29c6ffa5
NI
197 if diag_code is not None and diag_code.startswith('X-Postfix'):
198 return email
199
200 failure_hints = [
201 "insufficient system storage",
202 "mailbox full",
e72022bc
NI
203 "mailbox recipient does not have a mailbox database",
204 "over quota",
95f9eb9c 205 "requested action aborted: local error in processing",
29c6ffa5
NI
206 "user unknown",
207 ]
208 if 'quota' in status.lower():
209 return email
210 if diag_code is not None:
211 ldiag_code = diag_code.lower()
212 if any(hint in ldiag_code for hint in failure_hints):
213 return email
214
215 print('! Not a permanent failure status (%s).' % status)
216 if diag_code is not None:
217 print('! Diagnostic code was: %s' % diag_code)
218 return None
58e64caf 219
15f4834d 220
8438b7d1
NI
221def findAddressInWeirdDeliveryStatus(message):
222 """Finds the faulty email address in the delivery-status part of an email
223
224 Unlikely to findAddressInBounce, the status does NOT follow RFC 1894, so
225 try to learn to get data nevertheless...
226 Returns None or the email address.
227 """
228 if message.get_content_type() != 'message/delivery-status':
229 print('! Not a valid weird bounce (expected message/delivery-status, found %s).' % message.get_content_type())
230 return None
231 # The per-message-fields don't matter here, get only the per-recipient-fields
232 num_payloads = len(message.get_payload())
233 if num_payloads < 2:
234 print('! Not a valid weird bounce (expected at least 2 parts, found %d).' % num_payloads)
235 return None
236 content = message.get_payload(1)
237 # The content may be missing, but interesting headers still present in the first payload...
238 if not content:
239 content = message.get_payload(0)
240 if 'Action' not in content:
241 print('! Not a valid weird bounce (unable to find content).')
242 return None
243 elif content.get_content_type() != 'text/plain':
244 print('! Not a valid weird bounce (expected text/plain, found %s).' % content.get_content_type())
245 return None
246
247 # Extract the faulty email address
248 if 'Final-Recipient' in content:
249 recipient_match = _recipient_re.search(content['Final-Recipient'])
250 if recipient_match is None:
251 # Be nice, test another regexp
252 recipient_match = _recipient_re2.search(content['Final-Recipient'])
253 if recipient_match is None:
254 print('! Unknown final recipient in weird bounce.')
255 return None
256 email = recipient_match.group(1)
257 elif 'Original-Recipient' in content:
258 recipient = content['Original-Recipient']
259 recipient_match = _recipient_re.search(recipient)
260 if recipient_match is None:
261 # Be nice, test another regexp
262 recipient_match = _recipient_re2.search(recipient)
263 if recipient_match is None:
264 recipient_match = re.match(r'<([^>]+@[^@>]+)>', recipient)
265 if recipient_match is None:
266 print('! Unknown original recipient in weird bounce.')
267 return None
268 email = recipient_match.group(1)
269 else:
270 print('! Missing recipient in weird bounce.')
271 return None
272
273 # Check the action field
274 if content['Action'].lower() != 'failed':
275 print('! Not a failed action (%s).' % content['Action'])
276 return None
277
278 status = content['Status']
279 diag_code = content['Diagnostic-Code']
280
281 # Permanent failure state
282 if status and int(status[:1]) == 5:
283 return email
284
285 # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
286 if diag_code is not None and diag_code.startswith('X-Postfix'):
287 return email
288
289 failure_hints = [
290 "insufficient system storage",
291 "mailbox full",
292 "requested action aborted: local error in processing",
293 "sender address rejected",
294 "user unknown",
295 ]
296 if status and 'quota' in status.lower():
297 return email
298 if diag_code is not None:
299 ldiag_code = diag_code.lower()
300 if any(hint in ldiag_code for hint in failure_hints):
301 return email
302
303 print('! Not a permanent failure status (%s).' % status)
304 if diag_code is not None:
305 print('! Diagnostic code was: %s' % diag_code)
306 return None
307
308
0cec3fee 309def findAddressInPlainBounce(bounce, real_bounce=None):
15f4834d
NI
310 """Finds the faulty email address in a non-RFC-1894 bounced email
311 """
0cec3fee
NI
312 # real_bounce is the full email and bounce only the text/plain part, if email have several MIME parts
313 real_bounce = real_bounce or bounce
604c302f
NI
314 lower_from = real_bounce['From'].lower()
315 if 'mailer-daemon@' not in lower_from and 'postmaster' not in lower_from:
316 print('! Not a valid plain bounce (expected from MAILER-DAEMON or postmaster, found %s).' % bounce['From'])
15f4834d
NI
317 return None
318 if bounce.get_content_type() != 'text/plain':
319 print('! Not a valid plain bounce (expected text/plain, found %s).' % bounce.get_content_type())
320 return None
0cec3fee 321 subject = findSubject(real_bounce).lower()
fa7bc030
NI
322 known_subjects = [
323 "delivery status notification (failure)",
324 "failure notice",
604c302f 325 "mail delivery failure",
fa7bc030
NI
326 "returned mail: see transcript for details",
327 "undeliverable message",
328 "undelivered mail returned to sender",
329 ]
330 if subject not in known_subjects and not subject.startswith('mail delivery failed'):
15f4834d
NI
331 print('! Not a valid plain bounce (unknown subject: %s).' % subject)
332 return None
333
334 # Read the 15 first lines of content and find some relevant keywords to validate the bounce
335 lines = bounce.get_payload().splitlines()[:15]
336
222984e4
NI
337 # ALTOSPAM is a service which requires to click on a link when sending an email
338 # Don't consider the "554 5.0.0 Service unavailable" returned by ALTOSPAM as a failure
339 # but put this message in the dsn-temp mailbox so that it can be processed by hand.
340 if any("ALTOSPAM which is used by the person" in line for line in lines):
341 print('! ALTOSPAM has been detected. Moving this message to the dsn-temp mbox')
342 return None
343
15f4834d
NI
344 # Match:
345 # A message that you sent could not be delivered to one or more of its recipients.
346 # I'm afraid I wasn't able to deliver your message to the following addresses.
347 # The following message to <email@example.com> was undeliverable.
348 non_delivery_hints = [
604c302f 349 "could not be delivered to",
15f4834d 350 "Delivery to the following recipient failed permanently",
fa7bc030 351 "I'm sorry to have to inform you that your message could not",
15f4834d 352 "I wasn't able to deliver your message",
604c302f 353 "try to send your message again at a later time",
e571dd3b 354 "User unknown in local recipient table",
15f4834d 355 "> was undeliverable.",
15f4834d
NI
356 "we were unable to deliver your message",
357 ]
358 if not any(any(hint in line for hint in non_delivery_hints) for line in lines):
359 print('! Unknown mailer-daemon message, unable to find an hint for non-delivery in message:')
360 print('\n'.join(lines))
361 return None
362
363 # Match:
364 # This is a permanent error; I've given up. Sorry it didn't work out.
365 # 5.1.0 - Unknown address error 550-'email@example.com... No such user'
366 permanent_error_hints = [
367 "Delivery to the following recipient failed permanently",
604c302f 368 "failed due to an unavailable mailbox",
e571dd3b 369 "following addresses had permanent fatal errors",
fa7bc030 370 "I'm sorry to have to inform you that your message could not",
e571dd3b 371 "The email account that you tried to reach does not exist",
15f4834d
NI
372 "This is a permanent error",
373 "Unknown address error",
95f9eb9c 374 "unreachable for too long",
15f4834d
NI
375 "550 Requested action not taken",
376 ]
377 if not any(any(hint in line for hint in permanent_error_hints) for line in lines):
378 print('! Unknown mailer-daemon message, unable to find an hint for permanent error in message:')
379 print('\n'.join(lines))
380 return None
381
382 # Retrieve the first occurence of <email@example.com>
383 for line in lines:
384 match = re.match(r'.*?<([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)>', line)
385 if match is None:
604c302f 386 match = re.match(r'^\s*"?([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)"?\s*$', line)
15f4834d
NI
387 if match is not None:
388 email = match.group(1)
389 if email.endswith('@polytechnique.org'):
390 # First valid mail is something like <info_newsletter@polytechnique.org>, so we missed the real one
391 break
392 return email
393
394 print('! Unknown mailer-daemon message, unable to find email address:')
395 print('\n'.join(lines))
396 return None
397
58e64caf
AA
398#----------------------------------------------------------------------------#
399
400class DirectBouncesFilter(MboxFilter):
401
402 def initialize(self, mbox_file):
403 self.seen = 0
6208fd26 404 self.bad_problems = 0
58e64caf
AA
405 self.emails = []
406 self.mbox_file = '%s.bounced' % mbox_file
407 self.mbox = mailbox.mbox(self.mbox_file)
408 self.mbox.clear()
409
410 def process(self, message):
411 if message['X-Spam-Flag'] is None:
412 # During finalization, we will verifiy that all messages were processed
413 self.seen += 1
e0c82ac8
AA
414 # Special case: ignore mailman notifications for the mailing-list
415 # on which the NL is forwarded
532d50bc 416 if message['From'] == 'newsletter-externes-owner@polytechnique.org':
6208fd26 417 print('! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.')
e0c82ac8
AA
418 self.seen -= 1
419 return True
58e64caf 420 # Additionnal checks, just to be sure
e0c82ac8 421 elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
58e64caf 422 or message['Subject'] != 'Undelivered Mail Returned to Sender':
397d1980 423 print('! Not an usual direct bounce (From=%r, Subject=%r).' % (message['From'], message['Subject']))
e0c82ac8
AA
424 else:
425 email = findAddressInBounce(message)
426 if email is not None:
427 self.emails.append(email)
428 self.mbox.add(message)
429 return True
430 else:
6208fd26
NI
431 print('! => No email found in direct bounce, this is really bad.')
432 self.bad_problems += 1
58e64caf
AA
433 return False
434
435 def finalize(self):
6208fd26
NI
436 print('Found %d messages with no X-Spam-Flag header.' % self.seen)
437 print('Found %d of them that are confirmed bounces.' % len(self.mbox))
438 print('They were saved in %s.' % self.mbox_file)
439 if self.bad_problems:
440 print('Found %d of them that are invalid.' % self.bad_problems)
441 if self.seen != len(self.mbox) + self.bad_problems:
442 print(' /!\ These numbers shoud be equal! We have a problem! /!\\')
443 print('')
444 print('Here is the list of email adresses for these bounces:')
445 print('')
58e64caf 446 for email in self.emails:
6208fd26
NI
447 print(email)
448 print('')
58e64caf
AA
449 self.mbox.close()
450
451#----------------------------------------------------------------------------#
452
453class SpamFilter(MboxFilter):
454
455 def initialize(self, mbox_file):
456 self.mbox_file = '%s.spam' % mbox_file
457 self.mbox = mailbox.mbox(self.mbox_file)
458 self.mbox.clear()
459
460 def process(self, message):
e0c82ac8
AA
461 if message['X-Spam-Flag'] is not None \
462 and message['X-Spam-Flag'].startswith('Yes, tests=bogofilter'):
58e64caf
AA
463 self.mbox.add(message)
464 return True
465 return False
466
467 def finalize(self):
6208fd26
NI
468 print('Found %d spams. This is reliable.' % len(self.mbox))
469 print('They were saved in %s.' % self.mbox_file)
470 print('You might check the contents of this mbox.')
58e64caf
AA
471 self.mbox.close()
472
473#----------------------------------------------------------------------------#
474
475class UnsureFilter(MboxFilter):
476
477 def initialize(self, mbox_file):
478 self.mbox_file = '%s.unsure' % mbox_file
479 self.mbox = mailbox.mbox(self.mbox_file)
480 self.mbox.clear()
481
482 def process(self, message):
e0c82ac8
AA
483 if message['X-Spam-Flag'] is not None \
484 and message['X-Spam-Flag'].startswith('Unsure, tests=bogofilter'):
58e64caf
AA
485 self.mbox.add(message)
486 return True
487 return False
488
489 def finalize(self):
6208fd26
NI
490 print('Found %d unclassified messages. Most of them should be spams.' % len(self.mbox))
491 print('They were saved in %s.' % self.mbox_file)
492 print('You must check the contents of this mbox and feed the antispam.')
58e64caf
AA
493 self.mbox.close()
494
495#----------------------------------------------------------------------------#
496
497class CheckNonSpamFilter(MboxFilter):
498
499 def initialize(self, mbox_file):
500 self.seen = 0
501
502 def process(self, message):
e0c82ac8
AA
503 if message['X-Spam-Flag'] is None \
504 or not message['X-Spam-Flag'].startswith('No, tests=bogofilter'):
58e64caf
AA
505 self.seen += 1
506 return False
507
508 def finalize(self):
509 if self.seen > 0:
6208fd26
NI
510 print('Encountered %d messages that were neither spam, nor unsure, nor non-spams.' % self.seen)
511 print('Please investigate.')
58e64caf 512 else:
6208fd26 513 print('All messages were either spam, or unsure, or non-spams. Good.')
58e64caf
AA
514
515#----------------------------------------------------------------------------#
516
517class OutOfOfficeFilter(MboxFilter):
518
519 def initialize(self, mbox_file):
520 self.mbox_file = '%s.ooo' % mbox_file
521 self.mbox = mailbox.mbox(self.mbox_file)
522 self.mbox.clear()
523 subject_re = [
524 r'^Absen(t|ce)',
6208fd26 525 r'^(AUTO: )?Out of (the )?office',
fc643049 526 r'^Auto( ?): ',
d0ce063e 527 r'^AutoRe( ?):',
6208fd26 528 r'^Automatic reply: ',
aa6c6ed4 529 r'automatique d\'absence',
aa6c6ed4 530 r'AutoReply',
6208fd26 531 r'(est|is) absent',
95f9eb9c 532 r'^En dehors du bureau',
6208fd26
NI
533 r'I am out of town',
534 r'I am currently away',
fc643049 535 r'(am|is) out of (the )?office',
6208fd26 536 r'Notification d\'absence',
95f9eb9c 537 r'^Out of email reach',
6208fd26 538 r'R.{1,2}ponse automatique( :)?', # There may be encoding error of e acute
95f9eb9c 539 r'^Respuesta de Estoy ausente:',
58e64caf 540 ]
6208fd26 541 self.subject_regexes = [re.compile(sre, re.I | re.U) for sre in subject_re]
58e64caf
AA
542
543 def process(self, message):
544 subject = findSubject(message)
545 if subject is not None and any(regex.search(subject) for regex in self.subject_regexes):
546 self.mbox.add(message)
547 return True
fc643049
NI
548
549 # Some systems reply with "Re: ". Be smart here!
550 if subject is not None and subject.startswith('Re: '):
551 # Delivered-To: Autoresponder
552 if 'Autoresponder' in message.get_all('Delivered-To'):
553 self.mbox.add(message)
554 return True
555 # Parse content if it is simple enough
556 if message.get_content_type() == 'text/plain':
557 firstline = message.get_payload().splitlines()[0].lower()
558 if (' absent du bureau ' in firstline
559 or ' away from my office ' in firstline):
560 self.mbox.add(message)
561 return True
562
58e64caf
AA
563 return False
564
565 def finalize(self):
6208fd26
NI
566 print('Found %d "out of office". This is generally reliable.' % len(self.mbox))
567 print('They were saved in %s.' % self.mbox_file)
568 print('You may check the contents of this mbox.')
58e64caf
AA
569 self.mbox.close()
570
571#----------------------------------------------------------------------------#
572
573class DeliveryStatusNotificationFilter(MboxFilter):
574
575 def initialize(self, mbox_file):
576 self.emails = []
577 self.mbox_file = '%s.dsn' % mbox_file
578 self.mbox = mailbox.mbox(self.mbox_file)
579 self.mbox.clear()
6208fd26
NI
580 self.mbox_temp_file = '%s.dsn-temp' % mbox_file
581 self.mbox_temp = mailbox.mbox(self.mbox_temp_file)
582 self.mbox_temp.clear()
58e64caf
AA
583
584 def process(self, message):
15f4834d
NI
585 # Don't modify message variable for "self.mbox.add(message)"
586 report_message = message
587 # Find real report inside attachment
588 if message.get_content_type() == 'multipart/mixed':
8438b7d1
NI
589 # Some MTA confuse multipart/mixed with multipart/report
590 # Let's try to find a report!
591 if len(message.get_payload()) >= 2:
592 try_status = message.get_payload(1)
593 if try_status.get_content_type() == 'message/delivery-status':
594 # The world would be a nice place if delivery-status were
595 # formatted as expected...
596 email = findAddressInWeirdDeliveryStatus(try_status)
597 if email is not None:
598 self.emails.append(email)
599 self.mbox.add(message)
600 return True
601 try_status = None
15f4834d
NI
602 report_message = message.get_payload(0)
603
604 # Process report if its type is correct
605 if report_message.get_content_type() == 'multipart/report':
606 email = findAddressInBounce(report_message)
58e64caf
AA
607 if email is not None:
608 self.emails.append(email)
609 self.mbox.add(message)
6208fd26
NI
610 else:
611 print("! => Moved to temporary DSN mailbox")
612 self.mbox_temp.add(message)
15f4834d
NI
613 return True
614
615 # Detect ill-formatted reports, sent as plain text email
604c302f 616 if report_message.get_content_type() == 'text/plain' and (
6c4ff6f1
NI
617 'MAILER-DAEMON@' in message.get('From', '').upper() or
618 'mail delivery failure' == message.get('Subject', '').lower()
604c302f 619 ):
4cfc54b4 620 email = findAddressInPlainBounce(report_message)
15f4834d
NI
621 if email is not None:
622 self.emails.append(email)
623 self.mbox.add(message)
6208fd26 624 return True
58e64caf
AA
625 return False
626
627 def finalize(self):
6208fd26
NI
628 print('Found %d delivery status notifications. This is generally reliable.' % len(self.mbox))
629 print('They were saved in %s.' % self.mbox_file)
630 print('')
631 print('Here is the list of email adresses for these bounces:')
632 print('')
58e64caf 633 for email in self.emails:
6208fd26
NI
634 print(email)
635 print('')
58e64caf 636 self.mbox.close()
6208fd26
NI
637 print('Found %d temporary and invalid delivery status notifications.' % len(self.mbox_temp))
638 print('They were saved in %s.' % self.mbox_temp_file)
639 self.mbox_temp.close()
58e64caf
AA
640
641#----------------------------------------------------------------------------#
642
643class CatchAllFilter(MboxFilter):
644
645 def initialize(self, mbox_file):
646 self.mbox_file = '%s.catchall' % mbox_file
647 self.mbox = mailbox.mbox(self.mbox_file)
648 self.mbox.clear()
649
650 def process(self, message):
651 self.mbox.add(message)
652 return True
653
654 def finalize(self):
655 if len(self.mbox) > 0:
6208fd26
NI
656 print('%d messages reached the catchall.' % len(self.mbox))
657 print('They were saved in %s.' % self.mbox_file)
658 print('You must process the contents of this mbox manually.')
58e64caf
AA
659 self.mbox.close()
660 else:
6208fd26 661 print('No messages reached the catchall. Nice.')
58e64caf
AA
662 self.mbox.close()
663 os.unlink(self.mbox_file)
664
665#----------------------------------------------------------------------------#
666
667if __name__ == '__main__':
668
669 if len(sys.argv) != 2:
6208fd26 670 print('Usage: %s mbox' % sys.argv[0])
58e64caf
AA
671 sys.exit(1)
672
673 if not os.path.exists(sys.argv[1]):
6208fd26 674 print('No such file: %s' % sys.argv[1])
58e64caf
AA
675 sys.exit(1)
676
677 processor = MboxProcessor(sys.argv[1])
678 processor.run()