Revert "Force-normalize user emails for list unsubscribe."
[platal.git] / bin / newsletter.bounces.processor.py
CommitLineData
6208fd26 1#!/usr/bin/env python
58e64caf
AA
2# -*- coding: utf-8 -*-
3#***************************************************************************
c441aabe 4#* Copyright (C) 2003-2014 Polytechnique.org *
58e64caf
AA
5#* http://opensource.polytechnique.org/ *
6#* *
7#* This program is free software; you can redistribute it and/or modify *
8#* it under the terms of the GNU General Public License as published by *
9#* the Free Software Foundation; either version 2 of the License, or *
10#* (at your option) any later version. *
11#* *
12#* This program is distributed in the hope that it will be useful, *
13#* but WITHOUT ANY WARRANTY; without even the implied warranty of *
14#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15#* GNU General Public License for more details. *
16#* *
17#* You should have received a copy of the GNU General Public License *
18#* along with this program; if not, write to the Free Software *
19#* Foundation, Inc., *
20#* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
21#***************************************************************************
22
58e64caf
AA
23"""
24Process as automatically as possible bounces from the newsletter
25
26The goal is to extract the email adresses that actually bounced.
27Bounces conforming to RFC 1894 will be automatically processed.
28
29This script uses the X-Spam-Flag header to remove spam and heuristics
30to detect out-of-office auto-replies and delivery status notifications.
31
32All emails are saved in different mailboxes to make human post-processing easier.
33"""
34
6208fd26
NI
35import email
36import mailbox
37import os
38import re
39import sys
40import time
58e64caf
AA
41
42#----------------------------------------------------------------------------#
43
44class MboxProcessor:
45 """Applies a series of filters to each message in a mbox."""
46
47 def __init__(self, mbox):
48 self.mbox_file = mbox
49 self.mbox = mailbox.mbox(self.mbox_file)
50 self.filters = [
51 DirectBouncesFilter(),
52 SpamFilter(),
53 UnsureFilter(),
54 CheckNonSpamFilter(),
55 OutOfOfficeFilter(),
56 DeliveryStatusNotificationFilter(),
57 CatchAllFilter()
58 ]
59
60 def initialize_filters(self):
61 for f in self.filters: f.initialize(self.mbox_file)
62 self.start_time = time.clock()
63
64 def apply_filters(self, message):
65 return any(f.process(message) for f in self.filters)
66
67 def finalize_filters(self):
68 duration = time.clock() - self.start_time
69 separator = '-' * 80
6208fd26
NI
70 print(separator)
71 print('Processed the %d messages of %s in %.2fs' % (len(self.mbox), self.mbox_file, duration))
72 print(separator)
58e64caf 73 for f in self.filters:
6208fd26
NI
74 f.finalize()
75 print(separator)
58e64caf
AA
76
77 def run(self):
78 self.mbox.lock()
79 try:
80 self.initialize_filters()
81 for message in self.mbox: self.apply_filters(message)
82 self.finalize_filters()
83 finally:
84 self.mbox.unlock()
85 self.mbox.close()
86
87#----------------------------------------------------------------------------#
88
89class MboxFilter:
90 """Defines an interface for filters."""
91
92 def initialize(self, mbox_file):
93 """Called by the processor before processing starts.
6208fd26 94
58e64caf
AA
95 This is the place to open descriptors required during processing."""
96 pass
97
98 def process(self, message):
99 """Called by the processor for each message that reaches this step.
6208fd26 100
58e64caf
AA
101 Return true to stop processing, and false to go to the next filter."""
102 pass
103
104 def finalize(self):
105 """Called by the processor after processing ends.
6208fd26 106
58e64caf
AA
107 This is the place to display the results and close all descriptors."""
108 pass
109
110#----------------------------------------------------------------------------#
111
112def findSubject(message):
113 """Returns the subject of an email.Message as an unicode string."""
6208fd26
NI
114 if message['Subject'] is None:
115 return None
116
117 # decode_header returns a list of (decoded_string, charset) pairs
118 decoded_seq = email.header.decode_header(message['Subject'])
119 decoded_seq = [(subj, enc or 'utf-8') for subj, enc in decoded_seq]
120 header = email.header.make_header(decoded_seq)
121 # Be Python 2 & 3 compatible
122 return unicode(header) if sys.version_info < (3,) else str(header)
123
58e64caf
AA
124
125_recipient_re = re.compile(r'^rfc822; ?(.+)$', re.I | re.U)
8c9c7d77
NI
126# Some MTA set the Final-Recipient with "LOCAL;" instead of "rfc822;"
127_recipient_re2 = re.compile(r'^local; ?(.+)$', re.I | re.U)
58e64caf 128
6208fd26 129
58e64caf
AA
130def findAddressInBounce(bounce):
131 """Finds the faulty email address in a bounced email.
6208fd26 132
58e64caf
AA
133 See RFC 1894 for more information.
134 Returns None or the email address."""
6208fd26 135
58e64caf
AA
136 # Check that it is a bounce - a few MTA fail to set this correctly :(
137 if bounce.get_content_type() != 'multipart/report':
6208fd26 138 print('! Not a valid bounce (expected multipart/report, found %s).' % bounce.get_content_type())
58e64caf
AA
139 return None
140 # Extract the second component of the multipart/report
aa6c6ed4
AA
141 num_payloads = len(bounce.get_payload())
142 if num_payloads < 2:
6208fd26 143 print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
58e64caf
AA
144 return None
145 status = bounce.get_payload(1)
0cec3fee
NI
146
147 # If the second part is of type "message/rfc822" it is the undelivered message.
148 # Let's try to understand the text part
149 if status.get_content_type() == 'message/rfc822':
150 text_bounce = bounce.get_payload(0)
151 if text_bounce.get_content_type() == 'text/plain':
152 return findAddressInPlainBounce(text_bounce, bounce)
153 # If it's not a text message, let's continue to the next error message
154
58e64caf 155 if status.get_content_type() != 'message/delivery-status':
4aad6c9c 156 print('! Not a valid bounce (expected message/delivery-status, found %s).' % status.get_content_type())
58e64caf
AA
157 return None
158 # The per-message-fields don't matter here, get only the per-recipient-fields
aa6c6ed4
AA
159 num_payloads = len(status.get_payload())
160 if num_payloads < 2:
6208fd26 161 print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
58e64caf
AA
162 return None
163 content = status.get_payload(1)
164 if content.get_content_type() != 'text/plain':
4aad6c9c 165 print('! Not a valid bounce (expected text/plain, found %s).' % content.get_content_type())
58e64caf
AA
166 return None
167 # Extract the faulty email address
cfea91c4
NI
168 # Some MTA don't set Final-Recipient but use Remote-Recipient instead
169 if 'Final-Recipient' in content:
170 final_recipient = content['Final-Recipient']
171 elif 'Remote-Recipient' in content:
172 final_recipient = content['Remote-Recipient']
173 else:
174 print('! Not a valid bounce (no Final-Recipient).')
175 return None
176 recipient_match = _recipient_re.search(final_recipient)
58e64caf 177 if recipient_match is None:
8c9c7d77 178 # Be nice, test another regexp
cfea91c4 179 recipient_match = _recipient_re2.search(final_recipient)
8c9c7d77
NI
180 if recipient_match is None:
181 print('! Missing final recipient.')
182 return None
58e64caf
AA
183 email = recipient_match.group(1)
184 # Check the action field
8da7bf9f 185 if content['Action'].lower().strip() != 'failed':
6208fd26 186 print('! Not a failed action (%s).' % content['Action'])
58e64caf 187 return None
29c6ffa5
NI
188
189 status = content['Status']
190 diag_code = content['Diagnostic-Code']
191
192 # Permanent failure state
193 if int(status[:1]) == 5:
8438b7d1 194 return email
29c6ffa5 195
58e64caf 196 # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
29c6ffa5
NI
197 if diag_code is not None and diag_code.startswith('X-Postfix'):
198 return email
199
200 failure_hints = [
201 "insufficient system storage",
202 "mailbox full",
95f9eb9c 203 "requested action aborted: local error in processing",
29c6ffa5
NI
204 "user unknown",
205 ]
206 if 'quota' in status.lower():
207 return email
208 if diag_code is not None:
209 ldiag_code = diag_code.lower()
210 if any(hint in ldiag_code for hint in failure_hints):
211 return email
212
213 print('! Not a permanent failure status (%s).' % status)
214 if diag_code is not None:
215 print('! Diagnostic code was: %s' % diag_code)
216 return None
58e64caf 217
15f4834d 218
8438b7d1
NI
219def findAddressInWeirdDeliveryStatus(message):
220 """Finds the faulty email address in the delivery-status part of an email
221
222 Unlikely to findAddressInBounce, the status does NOT follow RFC 1894, so
223 try to learn to get data nevertheless...
224 Returns None or the email address.
225 """
226 if message.get_content_type() != 'message/delivery-status':
227 print('! Not a valid weird bounce (expected message/delivery-status, found %s).' % message.get_content_type())
228 return None
229 # The per-message-fields don't matter here, get only the per-recipient-fields
230 num_payloads = len(message.get_payload())
231 if num_payloads < 2:
232 print('! Not a valid weird bounce (expected at least 2 parts, found %d).' % num_payloads)
233 return None
234 content = message.get_payload(1)
235 # The content may be missing, but interesting headers still present in the first payload...
236 if not content:
237 content = message.get_payload(0)
238 if 'Action' not in content:
239 print('! Not a valid weird bounce (unable to find content).')
240 return None
241 elif content.get_content_type() != 'text/plain':
242 print('! Not a valid weird bounce (expected text/plain, found %s).' % content.get_content_type())
243 return None
244
245 # Extract the faulty email address
246 if 'Final-Recipient' in content:
247 recipient_match = _recipient_re.search(content['Final-Recipient'])
248 if recipient_match is None:
249 # Be nice, test another regexp
250 recipient_match = _recipient_re2.search(content['Final-Recipient'])
251 if recipient_match is None:
252 print('! Unknown final recipient in weird bounce.')
253 return None
254 email = recipient_match.group(1)
255 elif 'Original-Recipient' in content:
256 recipient = content['Original-Recipient']
257 recipient_match = _recipient_re.search(recipient)
258 if recipient_match is None:
259 # Be nice, test another regexp
260 recipient_match = _recipient_re2.search(recipient)
261 if recipient_match is None:
262 recipient_match = re.match(r'<([^>]+@[^@>]+)>', recipient)
263 if recipient_match is None:
264 print('! Unknown original recipient in weird bounce.')
265 return None
266 email = recipient_match.group(1)
267 else:
268 print('! Missing recipient in weird bounce.')
269 return None
270
271 # Check the action field
272 if content['Action'].lower() != 'failed':
273 print('! Not a failed action (%s).' % content['Action'])
274 return None
275
276 status = content['Status']
277 diag_code = content['Diagnostic-Code']
278
279 # Permanent failure state
280 if status and int(status[:1]) == 5:
281 return email
282
283 # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
284 if diag_code is not None and diag_code.startswith('X-Postfix'):
285 return email
286
287 failure_hints = [
288 "insufficient system storage",
289 "mailbox full",
290 "requested action aborted: local error in processing",
291 "sender address rejected",
292 "user unknown",
293 ]
294 if status and 'quota' in status.lower():
295 return email
296 if diag_code is not None:
297 ldiag_code = diag_code.lower()
298 if any(hint in ldiag_code for hint in failure_hints):
299 return email
300
301 print('! Not a permanent failure status (%s).' % status)
302 if diag_code is not None:
303 print('! Diagnostic code was: %s' % diag_code)
304 return None
305
306
0cec3fee 307def findAddressInPlainBounce(bounce, real_bounce=None):
15f4834d
NI
308 """Finds the faulty email address in a non-RFC-1894 bounced email
309 """
0cec3fee
NI
310 # real_bounce is the full email and bounce only the text/plain part, if email have several MIME parts
311 real_bounce = real_bounce or bounce
604c302f
NI
312 lower_from = real_bounce['From'].lower()
313 if 'mailer-daemon@' not in lower_from and 'postmaster' not in lower_from:
314 print('! Not a valid plain bounce (expected from MAILER-DAEMON or postmaster, found %s).' % bounce['From'])
15f4834d
NI
315 return None
316 if bounce.get_content_type() != 'text/plain':
317 print('! Not a valid plain bounce (expected text/plain, found %s).' % bounce.get_content_type())
318 return None
0cec3fee 319 subject = findSubject(real_bounce).lower()
fa7bc030
NI
320 known_subjects = [
321 "delivery status notification (failure)",
322 "failure notice",
604c302f 323 "mail delivery failure",
fa7bc030
NI
324 "returned mail: see transcript for details",
325 "undeliverable message",
326 "undelivered mail returned to sender",
327 ]
328 if subject not in known_subjects and not subject.startswith('mail delivery failed'):
15f4834d
NI
329 print('! Not a valid plain bounce (unknown subject: %s).' % subject)
330 return None
331
332 # Read the 15 first lines of content and find some relevant keywords to validate the bounce
333 lines = bounce.get_payload().splitlines()[:15]
334
222984e4
NI
335 # ALTOSPAM is a service which requires to click on a link when sending an email
336 # Don't consider the "554 5.0.0 Service unavailable" returned by ALTOSPAM as a failure
337 # but put this message in the dsn-temp mailbox so that it can be processed by hand.
338 if any("ALTOSPAM which is used by the person" in line for line in lines):
339 print('! ALTOSPAM has been detected. Moving this message to the dsn-temp mbox')
340 return None
341
15f4834d
NI
342 # Match:
343 # A message that you sent could not be delivered to one or more of its recipients.
344 # I'm afraid I wasn't able to deliver your message to the following addresses.
345 # The following message to <email@example.com> was undeliverable.
346 non_delivery_hints = [
604c302f 347 "could not be delivered to",
15f4834d 348 "Delivery to the following recipient failed permanently",
fa7bc030 349 "I'm sorry to have to inform you that your message could not",
15f4834d 350 "I wasn't able to deliver your message",
604c302f 351 "try to send your message again at a later time",
15f4834d 352 "> was undeliverable.",
15f4834d
NI
353 "we were unable to deliver your message",
354 ]
355 if not any(any(hint in line for hint in non_delivery_hints) for line in lines):
356 print('! Unknown mailer-daemon message, unable to find an hint for non-delivery in message:')
357 print('\n'.join(lines))
358 return None
359
360 # Match:
361 # This is a permanent error; I've given up. Sorry it didn't work out.
362 # 5.1.0 - Unknown address error 550-'email@example.com... No such user'
363 permanent_error_hints = [
364 "Delivery to the following recipient failed permanently",
604c302f 365 "failed due to an unavailable mailbox",
fa7bc030 366 "I'm sorry to have to inform you that your message could not",
15f4834d
NI
367 "This is a permanent error",
368 "Unknown address error",
95f9eb9c 369 "unreachable for too long",
15f4834d
NI
370 "550 Requested action not taken",
371 ]
372 if not any(any(hint in line for hint in permanent_error_hints) for line in lines):
373 print('! Unknown mailer-daemon message, unable to find an hint for permanent error in message:')
374 print('\n'.join(lines))
375 return None
376
377 # Retrieve the first occurence of <email@example.com>
378 for line in lines:
379 match = re.match(r'.*?<([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)>', line)
380 if match is None:
604c302f 381 match = re.match(r'^\s*"?([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)"?\s*$', line)
15f4834d
NI
382 if match is not None:
383 email = match.group(1)
384 if email.endswith('@polytechnique.org'):
385 # First valid mail is something like <info_newsletter@polytechnique.org>, so we missed the real one
386 break
387 return email
388
389 print('! Unknown mailer-daemon message, unable to find email address:')
390 print('\n'.join(lines))
391 return None
392
58e64caf
AA
393#----------------------------------------------------------------------------#
394
395class DirectBouncesFilter(MboxFilter):
396
397 def initialize(self, mbox_file):
398 self.seen = 0
6208fd26 399 self.bad_problems = 0
58e64caf
AA
400 self.emails = []
401 self.mbox_file = '%s.bounced' % mbox_file
402 self.mbox = mailbox.mbox(self.mbox_file)
403 self.mbox.clear()
404
405 def process(self, message):
406 if message['X-Spam-Flag'] is None:
407 # During finalization, we will verifiy that all messages were processed
408 self.seen += 1
e0c82ac8
AA
409 # Special case: ignore mailman notifications for the mailing-list
410 # on which the NL is forwarded
532d50bc 411 if message['From'] == 'newsletter-externes-owner@polytechnique.org':
6208fd26 412 print('! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.')
e0c82ac8
AA
413 self.seen -= 1
414 return True
58e64caf 415 # Additionnal checks, just to be sure
e0c82ac8 416 elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
58e64caf 417 or message['Subject'] != 'Undelivered Mail Returned to Sender':
397d1980 418 print('! Not an usual direct bounce (From=%r, Subject=%r).' % (message['From'], message['Subject']))
e0c82ac8
AA
419 else:
420 email = findAddressInBounce(message)
421 if email is not None:
422 self.emails.append(email)
423 self.mbox.add(message)
424 return True
425 else:
6208fd26
NI
426 print('! => No email found in direct bounce, this is really bad.')
427 self.bad_problems += 1
58e64caf
AA
428 return False
429
430 def finalize(self):
6208fd26
NI
431 print('Found %d messages with no X-Spam-Flag header.' % self.seen)
432 print('Found %d of them that are confirmed bounces.' % len(self.mbox))
433 print('They were saved in %s.' % self.mbox_file)
434 if self.bad_problems:
435 print('Found %d of them that are invalid.' % self.bad_problems)
436 if self.seen != len(self.mbox) + self.bad_problems:
437 print(' /!\ These numbers shoud be equal! We have a problem! /!\\')
438 print('')
439 print('Here is the list of email adresses for these bounces:')
440 print('')
58e64caf 441 for email in self.emails:
6208fd26
NI
442 print(email)
443 print('')
58e64caf
AA
444 self.mbox.close()
445
446#----------------------------------------------------------------------------#
447
448class SpamFilter(MboxFilter):
449
450 def initialize(self, mbox_file):
451 self.mbox_file = '%s.spam' % mbox_file
452 self.mbox = mailbox.mbox(self.mbox_file)
453 self.mbox.clear()
454
455 def process(self, message):
e0c82ac8
AA
456 if message['X-Spam-Flag'] is not None \
457 and message['X-Spam-Flag'].startswith('Yes, tests=bogofilter'):
58e64caf
AA
458 self.mbox.add(message)
459 return True
460 return False
461
462 def finalize(self):
6208fd26
NI
463 print('Found %d spams. This is reliable.' % len(self.mbox))
464 print('They were saved in %s.' % self.mbox_file)
465 print('You might check the contents of this mbox.')
58e64caf
AA
466 self.mbox.close()
467
468#----------------------------------------------------------------------------#
469
470class UnsureFilter(MboxFilter):
471
472 def initialize(self, mbox_file):
473 self.mbox_file = '%s.unsure' % mbox_file
474 self.mbox = mailbox.mbox(self.mbox_file)
475 self.mbox.clear()
476
477 def process(self, message):
e0c82ac8
AA
478 if message['X-Spam-Flag'] is not None \
479 and message['X-Spam-Flag'].startswith('Unsure, tests=bogofilter'):
58e64caf
AA
480 self.mbox.add(message)
481 return True
482 return False
483
484 def finalize(self):
6208fd26
NI
485 print('Found %d unclassified messages. Most of them should be spams.' % len(self.mbox))
486 print('They were saved in %s.' % self.mbox_file)
487 print('You must check the contents of this mbox and feed the antispam.')
58e64caf
AA
488 self.mbox.close()
489
490#----------------------------------------------------------------------------#
491
492class CheckNonSpamFilter(MboxFilter):
493
494 def initialize(self, mbox_file):
495 self.seen = 0
496
497 def process(self, message):
e0c82ac8
AA
498 if message['X-Spam-Flag'] is None \
499 or not message['X-Spam-Flag'].startswith('No, tests=bogofilter'):
58e64caf
AA
500 self.seen += 1
501 return False
502
503 def finalize(self):
504 if self.seen > 0:
6208fd26
NI
505 print('Encountered %d messages that were neither spam, nor unsure, nor non-spams.' % self.seen)
506 print('Please investigate.')
58e64caf 507 else:
6208fd26 508 print('All messages were either spam, or unsure, or non-spams. Good.')
58e64caf
AA
509
510#----------------------------------------------------------------------------#
511
512class OutOfOfficeFilter(MboxFilter):
513
514 def initialize(self, mbox_file):
515 self.mbox_file = '%s.ooo' % mbox_file
516 self.mbox = mailbox.mbox(self.mbox_file)
517 self.mbox.clear()
518 subject_re = [
519 r'^Absen(t|ce)',
6208fd26 520 r'^(AUTO: )?Out of (the )?office',
fc643049 521 r'^Auto( ?): ',
d0ce063e 522 r'^AutoRe( ?):',
6208fd26 523 r'^Automatic reply: ',
aa6c6ed4 524 r'automatique d\'absence',
aa6c6ed4 525 r'AutoReply',
6208fd26 526 r'(est|is) absent',
95f9eb9c 527 r'^En dehors du bureau',
6208fd26
NI
528 r'I am out of town',
529 r'I am currently away',
fc643049 530 r'(am|is) out of (the )?office',
6208fd26 531 r'Notification d\'absence',
95f9eb9c 532 r'^Out of email reach',
6208fd26 533 r'R.{1,2}ponse automatique( :)?', # There may be encoding error of e acute
95f9eb9c 534 r'^Respuesta de Estoy ausente:',
58e64caf 535 ]
6208fd26 536 self.subject_regexes = [re.compile(sre, re.I | re.U) for sre in subject_re]
58e64caf
AA
537
538 def process(self, message):
539 subject = findSubject(message)
540 if subject is not None and any(regex.search(subject) for regex in self.subject_regexes):
541 self.mbox.add(message)
542 return True
fc643049
NI
543
544 # Some systems reply with "Re: ". Be smart here!
545 if subject is not None and subject.startswith('Re: '):
546 # Delivered-To: Autoresponder
547 if 'Autoresponder' in message.get_all('Delivered-To'):
548 self.mbox.add(message)
549 return True
550 # Parse content if it is simple enough
551 if message.get_content_type() == 'text/plain':
552 firstline = message.get_payload().splitlines()[0].lower()
553 if (' absent du bureau ' in firstline
554 or ' away from my office ' in firstline):
555 self.mbox.add(message)
556 return True
557
58e64caf
AA
558 return False
559
560 def finalize(self):
6208fd26
NI
561 print('Found %d "out of office". This is generally reliable.' % len(self.mbox))
562 print('They were saved in %s.' % self.mbox_file)
563 print('You may check the contents of this mbox.')
58e64caf
AA
564 self.mbox.close()
565
566#----------------------------------------------------------------------------#
567
568class DeliveryStatusNotificationFilter(MboxFilter):
569
570 def initialize(self, mbox_file):
571 self.emails = []
572 self.mbox_file = '%s.dsn' % mbox_file
573 self.mbox = mailbox.mbox(self.mbox_file)
574 self.mbox.clear()
6208fd26
NI
575 self.mbox_temp_file = '%s.dsn-temp' % mbox_file
576 self.mbox_temp = mailbox.mbox(self.mbox_temp_file)
577 self.mbox_temp.clear()
58e64caf
AA
578
579 def process(self, message):
15f4834d
NI
580 # Don't modify message variable for "self.mbox.add(message)"
581 report_message = message
582 # Find real report inside attachment
583 if message.get_content_type() == 'multipart/mixed':
8438b7d1
NI
584 # Some MTA confuse multipart/mixed with multipart/report
585 # Let's try to find a report!
586 if len(message.get_payload()) >= 2:
587 try_status = message.get_payload(1)
588 if try_status.get_content_type() == 'message/delivery-status':
589 # The world would be a nice place if delivery-status were
590 # formatted as expected...
591 email = findAddressInWeirdDeliveryStatus(try_status)
592 if email is not None:
593 self.emails.append(email)
594 self.mbox.add(message)
595 return True
596 try_status = None
15f4834d
NI
597 report_message = message.get_payload(0)
598
599 # Process report if its type is correct
600 if report_message.get_content_type() == 'multipart/report':
601 email = findAddressInBounce(report_message)
58e64caf
AA
602 if email is not None:
603 self.emails.append(email)
604 self.mbox.add(message)
6208fd26
NI
605 else:
606 print("! => Moved to temporary DSN mailbox")
607 self.mbox_temp.add(message)
15f4834d
NI
608 return True
609
610 # Detect ill-formatted reports, sent as plain text email
604c302f 611 if report_message.get_content_type() == 'text/plain' and (
6c4ff6f1
NI
612 'MAILER-DAEMON@' in message.get('From', '').upper() or
613 'mail delivery failure' == message.get('Subject', '').lower()
604c302f 614 ):
4cfc54b4 615 email = findAddressInPlainBounce(report_message)
15f4834d
NI
616 if email is not None:
617 self.emails.append(email)
618 self.mbox.add(message)
6208fd26 619 return True
58e64caf
AA
620 return False
621
622 def finalize(self):
6208fd26
NI
623 print('Found %d delivery status notifications. This is generally reliable.' % len(self.mbox))
624 print('They were saved in %s.' % self.mbox_file)
625 print('')
626 print('Here is the list of email adresses for these bounces:')
627 print('')
58e64caf 628 for email in self.emails:
6208fd26
NI
629 print(email)
630 print('')
58e64caf 631 self.mbox.close()
6208fd26
NI
632 print('Found %d temporary and invalid delivery status notifications.' % len(self.mbox_temp))
633 print('They were saved in %s.' % self.mbox_temp_file)
634 self.mbox_temp.close()
58e64caf
AA
635
636#----------------------------------------------------------------------------#
637
638class CatchAllFilter(MboxFilter):
639
640 def initialize(self, mbox_file):
641 self.mbox_file = '%s.catchall' % mbox_file
642 self.mbox = mailbox.mbox(self.mbox_file)
643 self.mbox.clear()
644
645 def process(self, message):
646 self.mbox.add(message)
647 return True
648
649 def finalize(self):
650 if len(self.mbox) > 0:
6208fd26
NI
651 print('%d messages reached the catchall.' % len(self.mbox))
652 print('They were saved in %s.' % self.mbox_file)
653 print('You must process the contents of this mbox manually.')
58e64caf
AA
654 self.mbox.close()
655 else:
6208fd26 656 print('No messages reached the catchall. Nice.')
58e64caf
AA
657 self.mbox.close()
658 os.unlink(self.mbox_file)
659
660#----------------------------------------------------------------------------#
661
662if __name__ == '__main__':
663
664 if len(sys.argv) != 2:
6208fd26 665 print('Usage: %s mbox' % sys.argv[0])
58e64caf
AA
666 sys.exit(1)
667
668 if not os.path.exists(sys.argv[1]):
6208fd26 669 print('No such file: %s' % sys.argv[1])
58e64caf
AA
670 sys.exit(1)
671
672 processor = MboxProcessor(sys.argv[1])
673 processor.run()