Fix bug on former users in group sync with lists.
[platal.git] / bin / newsletter.bounces.processor.py
CommitLineData
6208fd26 1#!/usr/bin/env python
58e64caf
AA
2# -*- coding: utf-8 -*-
3#***************************************************************************
c441aabe 4#* Copyright (C) 2003-2014 Polytechnique.org *
58e64caf
AA
5#* http://opensource.polytechnique.org/ *
6#* *
7#* This program is free software; you can redistribute it and/or modify *
8#* it under the terms of the GNU General Public License as published by *
9#* the Free Software Foundation; either version 2 of the License, or *
10#* (at your option) any later version. *
11#* *
12#* This program is distributed in the hope that it will be useful, *
13#* but WITHOUT ANY WARRANTY; without even the implied warranty of *
14#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15#* GNU General Public License for more details. *
16#* *
17#* You should have received a copy of the GNU General Public License *
18#* along with this program; if not, write to the Free Software *
19#* Foundation, Inc., *
20#* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
21#***************************************************************************
22
58e64caf
AA
23"""
24Process as automatically as possible bounces from the newsletter
25
26The goal is to extract the email adresses that actually bounced.
27Bounces conforming to RFC 1894 will be automatically processed.
28
29This script uses the X-Spam-Flag header to remove spam and heuristics
30to detect out-of-office auto-replies and delivery status notifications.
31
32All emails are saved in different mailboxes to make human post-processing easier.
33"""
34
6208fd26
NI
35import email
36import mailbox
37import os
38import re
39import sys
40import time
58e64caf
AA
41
42#----------------------------------------------------------------------------#
43
44class MboxProcessor:
45 """Applies a series of filters to each message in a mbox."""
46
47 def __init__(self, mbox):
48 self.mbox_file = mbox
49 self.mbox = mailbox.mbox(self.mbox_file)
50 self.filters = [
51 DirectBouncesFilter(),
52 SpamFilter(),
53 UnsureFilter(),
54 CheckNonSpamFilter(),
55 OutOfOfficeFilter(),
56 DeliveryStatusNotificationFilter(),
57 CatchAllFilter()
58 ]
59
60 def initialize_filters(self):
61 for f in self.filters: f.initialize(self.mbox_file)
62 self.start_time = time.clock()
63
64 def apply_filters(self, message):
65 return any(f.process(message) for f in self.filters)
66
67 def finalize_filters(self):
68 duration = time.clock() - self.start_time
69 separator = '-' * 80
6208fd26
NI
70 print(separator)
71 print('Processed the %d messages of %s in %.2fs' % (len(self.mbox), self.mbox_file, duration))
72 print(separator)
58e64caf 73 for f in self.filters:
6208fd26
NI
74 f.finalize()
75 print(separator)
58e64caf
AA
76
77 def run(self):
78 self.mbox.lock()
79 try:
80 self.initialize_filters()
81 for message in self.mbox: self.apply_filters(message)
82 self.finalize_filters()
83 finally:
84 self.mbox.unlock()
85 self.mbox.close()
86
87#----------------------------------------------------------------------------#
88
89class MboxFilter:
90 """Defines an interface for filters."""
91
92 def initialize(self, mbox_file):
93 """Called by the processor before processing starts.
6208fd26 94
58e64caf
AA
95 This is the place to open descriptors required during processing."""
96 pass
97
98 def process(self, message):
99 """Called by the processor for each message that reaches this step.
6208fd26 100
58e64caf
AA
101 Return true to stop processing, and false to go to the next filter."""
102 pass
103
104 def finalize(self):
105 """Called by the processor after processing ends.
6208fd26 106
58e64caf
AA
107 This is the place to display the results and close all descriptors."""
108 pass
109
110#----------------------------------------------------------------------------#
111
112def findSubject(message):
113 """Returns the subject of an email.Message as an unicode string."""
6208fd26
NI
114 if message['Subject'] is None:
115 return None
116
117 # decode_header returns a list of (decoded_string, charset) pairs
118 decoded_seq = email.header.decode_header(message['Subject'])
119 decoded_seq = [(subj, enc or 'utf-8') for subj, enc in decoded_seq]
120 header = email.header.make_header(decoded_seq)
121 # Be Python 2 & 3 compatible
122 return unicode(header) if sys.version_info < (3,) else str(header)
123
58e64caf
AA
124
125_recipient_re = re.compile(r'^rfc822; ?(.+)$', re.I | re.U)
8c9c7d77
NI
126# Some MTA set the Final-Recipient with "LOCAL;" instead of "rfc822;"
127_recipient_re2 = re.compile(r'^local; ?(.+)$', re.I | re.U)
58e64caf 128
6208fd26 129
58e64caf
AA
130def findAddressInBounce(bounce):
131 """Finds the faulty email address in a bounced email.
6208fd26 132
58e64caf
AA
133 See RFC 1894 for more information.
134 Returns None or the email address."""
6208fd26 135
58e64caf
AA
136 # Check that it is a bounce - a few MTA fail to set this correctly :(
137 if bounce.get_content_type() != 'multipart/report':
6208fd26 138 print('! Not a valid bounce (expected multipart/report, found %s).' % bounce.get_content_type())
58e64caf
AA
139 return None
140 # Extract the second component of the multipart/report
aa6c6ed4
AA
141 num_payloads = len(bounce.get_payload())
142 if num_payloads < 2:
6208fd26 143 print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
58e64caf
AA
144 return None
145 status = bounce.get_payload(1)
0cec3fee
NI
146
147 # If the second part is of type "message/rfc822" it is the undelivered message.
148 # Let's try to understand the text part
149 if status.get_content_type() == 'message/rfc822':
150 text_bounce = bounce.get_payload(0)
151 if text_bounce.get_content_type() == 'text/plain':
152 return findAddressInPlainBounce(text_bounce, bounce)
153 # If it's not a text message, let's continue to the next error message
154
58e64caf 155 if status.get_content_type() != 'message/delivery-status':
4aad6c9c 156 print('! Not a valid bounce (expected message/delivery-status, found %s).' % status.get_content_type())
58e64caf
AA
157 return None
158 # The per-message-fields don't matter here, get only the per-recipient-fields
aa6c6ed4
AA
159 num_payloads = len(status.get_payload())
160 if num_payloads < 2:
6208fd26 161 print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads)
58e64caf
AA
162 return None
163 content = status.get_payload(1)
164 if content.get_content_type() != 'text/plain':
4aad6c9c 165 print('! Not a valid bounce (expected text/plain, found %s).' % content.get_content_type())
58e64caf
AA
166 return None
167 # Extract the faulty email address
168 recipient_match = _recipient_re.search(content['Final-Recipient'])
169 if recipient_match is None:
8c9c7d77
NI
170 # Be nice, test another regexp
171 recipient_match = _recipient_re2.search(content['Final-Recipient'])
172 if recipient_match is None:
173 print('! Missing final recipient.')
174 return None
58e64caf
AA
175 email = recipient_match.group(1)
176 # Check the action field
4aad6c9c 177 if content['Action'].lower() != 'failed':
6208fd26 178 print('! Not a failed action (%s).' % content['Action'])
58e64caf 179 return None
29c6ffa5
NI
180
181 status = content['Status']
182 diag_code = content['Diagnostic-Code']
183
184 # Permanent failure state
185 if int(status[:1]) == 5:
8438b7d1 186 return email
29c6ffa5 187
58e64caf 188 # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
29c6ffa5
NI
189 if diag_code is not None and diag_code.startswith('X-Postfix'):
190 return email
191
192 failure_hints = [
193 "insufficient system storage",
194 "mailbox full",
95f9eb9c 195 "requested action aborted: local error in processing",
29c6ffa5
NI
196 "user unknown",
197 ]
198 if 'quota' in status.lower():
199 return email
200 if diag_code is not None:
201 ldiag_code = diag_code.lower()
202 if any(hint in ldiag_code for hint in failure_hints):
203 return email
204
205 print('! Not a permanent failure status (%s).' % status)
206 if diag_code is not None:
207 print('! Diagnostic code was: %s' % diag_code)
208 return None
58e64caf 209
15f4834d 210
8438b7d1
NI
211def findAddressInWeirdDeliveryStatus(message):
212 """Finds the faulty email address in the delivery-status part of an email
213
214 Unlikely to findAddressInBounce, the status does NOT follow RFC 1894, so
215 try to learn to get data nevertheless...
216 Returns None or the email address.
217 """
218 if message.get_content_type() != 'message/delivery-status':
219 print('! Not a valid weird bounce (expected message/delivery-status, found %s).' % message.get_content_type())
220 return None
221 # The per-message-fields don't matter here, get only the per-recipient-fields
222 num_payloads = len(message.get_payload())
223 if num_payloads < 2:
224 print('! Not a valid weird bounce (expected at least 2 parts, found %d).' % num_payloads)
225 return None
226 content = message.get_payload(1)
227 # The content may be missing, but interesting headers still present in the first payload...
228 if not content:
229 content = message.get_payload(0)
230 if 'Action' not in content:
231 print('! Not a valid weird bounce (unable to find content).')
232 return None
233 elif content.get_content_type() != 'text/plain':
234 print('! Not a valid weird bounce (expected text/plain, found %s).' % content.get_content_type())
235 return None
236
237 # Extract the faulty email address
238 if 'Final-Recipient' in content:
239 recipient_match = _recipient_re.search(content['Final-Recipient'])
240 if recipient_match is None:
241 # Be nice, test another regexp
242 recipient_match = _recipient_re2.search(content['Final-Recipient'])
243 if recipient_match is None:
244 print('! Unknown final recipient in weird bounce.')
245 return None
246 email = recipient_match.group(1)
247 elif 'Original-Recipient' in content:
248 recipient = content['Original-Recipient']
249 recipient_match = _recipient_re.search(recipient)
250 if recipient_match is None:
251 # Be nice, test another regexp
252 recipient_match = _recipient_re2.search(recipient)
253 if recipient_match is None:
254 recipient_match = re.match(r'<([^>]+@[^@>]+)>', recipient)
255 if recipient_match is None:
256 print('! Unknown original recipient in weird bounce.')
257 return None
258 email = recipient_match.group(1)
259 else:
260 print('! Missing recipient in weird bounce.')
261 return None
262
263 # Check the action field
264 if content['Action'].lower() != 'failed':
265 print('! Not a failed action (%s).' % content['Action'])
266 return None
267
268 status = content['Status']
269 diag_code = content['Diagnostic-Code']
270
271 # Permanent failure state
272 if status and int(status[:1]) == 5:
273 return email
274
275 # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors
276 if diag_code is not None and diag_code.startswith('X-Postfix'):
277 return email
278
279 failure_hints = [
280 "insufficient system storage",
281 "mailbox full",
282 "requested action aborted: local error in processing",
283 "sender address rejected",
284 "user unknown",
285 ]
286 if status and 'quota' in status.lower():
287 return email
288 if diag_code is not None:
289 ldiag_code = diag_code.lower()
290 if any(hint in ldiag_code for hint in failure_hints):
291 return email
292
293 print('! Not a permanent failure status (%s).' % status)
294 if diag_code is not None:
295 print('! Diagnostic code was: %s' % diag_code)
296 return None
297
298
0cec3fee 299def findAddressInPlainBounce(bounce, real_bounce=None):
15f4834d
NI
300 """Finds the faulty email address in a non-RFC-1894 bounced email
301 """
0cec3fee
NI
302 # real_bounce is the full email and bounce only the text/plain part, if email have several MIME parts
303 real_bounce = real_bounce or bounce
604c302f
NI
304 lower_from = real_bounce['From'].lower()
305 if 'mailer-daemon@' not in lower_from and 'postmaster' not in lower_from:
306 print('! Not a valid plain bounce (expected from MAILER-DAEMON or postmaster, found %s).' % bounce['From'])
15f4834d
NI
307 return None
308 if bounce.get_content_type() != 'text/plain':
309 print('! Not a valid plain bounce (expected text/plain, found %s).' % bounce.get_content_type())
310 return None
0cec3fee 311 subject = findSubject(real_bounce).lower()
fa7bc030
NI
312 known_subjects = [
313 "delivery status notification (failure)",
314 "failure notice",
604c302f 315 "mail delivery failure",
fa7bc030
NI
316 "returned mail: see transcript for details",
317 "undeliverable message",
318 "undelivered mail returned to sender",
319 ]
320 if subject not in known_subjects and not subject.startswith('mail delivery failed'):
15f4834d
NI
321 print('! Not a valid plain bounce (unknown subject: %s).' % subject)
322 return None
323
324 # Read the 15 first lines of content and find some relevant keywords to validate the bounce
325 lines = bounce.get_payload().splitlines()[:15]
326
222984e4
NI
327 # ALTOSPAM is a service which requires to click on a link when sending an email
328 # Don't consider the "554 5.0.0 Service unavailable" returned by ALTOSPAM as a failure
329 # but put this message in the dsn-temp mailbox so that it can be processed by hand.
330 if any("ALTOSPAM which is used by the person" in line for line in lines):
331 print('! ALTOSPAM has been detected. Moving this message to the dsn-temp mbox')
332 return None
333
15f4834d
NI
334 # Match:
335 # A message that you sent could not be delivered to one or more of its recipients.
336 # I'm afraid I wasn't able to deliver your message to the following addresses.
337 # The following message to <email@example.com> was undeliverable.
338 non_delivery_hints = [
604c302f 339 "could not be delivered to",
15f4834d 340 "Delivery to the following recipient failed permanently",
fa7bc030 341 "I'm sorry to have to inform you that your message could not",
15f4834d 342 "I wasn't able to deliver your message",
604c302f 343 "try to send your message again at a later time",
15f4834d 344 "> was undeliverable.",
15f4834d
NI
345 "we were unable to deliver your message",
346 ]
347 if not any(any(hint in line for hint in non_delivery_hints) for line in lines):
348 print('! Unknown mailer-daemon message, unable to find an hint for non-delivery in message:')
349 print('\n'.join(lines))
350 return None
351
352 # Match:
353 # This is a permanent error; I've given up. Sorry it didn't work out.
354 # 5.1.0 - Unknown address error 550-'email@example.com... No such user'
355 permanent_error_hints = [
356 "Delivery to the following recipient failed permanently",
604c302f 357 "failed due to an unavailable mailbox",
fa7bc030 358 "I'm sorry to have to inform you that your message could not",
15f4834d
NI
359 "This is a permanent error",
360 "Unknown address error",
95f9eb9c 361 "unreachable for too long",
15f4834d
NI
362 "550 Requested action not taken",
363 ]
364 if not any(any(hint in line for hint in permanent_error_hints) for line in lines):
365 print('! Unknown mailer-daemon message, unable to find an hint for permanent error in message:')
366 print('\n'.join(lines))
367 return None
368
369 # Retrieve the first occurence of <email@example.com>
370 for line in lines:
371 match = re.match(r'.*?<([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)>', line)
372 if match is None:
604c302f 373 match = re.match(r'^\s*"?([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)"?\s*$', line)
15f4834d
NI
374 if match is not None:
375 email = match.group(1)
376 if email.endswith('@polytechnique.org'):
377 # First valid mail is something like <info_newsletter@polytechnique.org>, so we missed the real one
378 break
379 return email
380
381 print('! Unknown mailer-daemon message, unable to find email address:')
382 print('\n'.join(lines))
383 return None
384
58e64caf
AA
385#----------------------------------------------------------------------------#
386
387class DirectBouncesFilter(MboxFilter):
388
389 def initialize(self, mbox_file):
390 self.seen = 0
6208fd26 391 self.bad_problems = 0
58e64caf
AA
392 self.emails = []
393 self.mbox_file = '%s.bounced' % mbox_file
394 self.mbox = mailbox.mbox(self.mbox_file)
395 self.mbox.clear()
396
397 def process(self, message):
398 if message['X-Spam-Flag'] is None:
399 # During finalization, we will verifiy that all messages were processed
400 self.seen += 1
e0c82ac8
AA
401 # Special case: ignore mailman notifications for the mailing-list
402 # on which the NL is forwarded
532d50bc 403 if message['From'] == 'newsletter-externes-owner@polytechnique.org':
6208fd26 404 print('! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.')
e0c82ac8
AA
405 self.seen -= 1
406 return True
58e64caf 407 # Additionnal checks, just to be sure
e0c82ac8 408 elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \
58e64caf 409 or message['Subject'] != 'Undelivered Mail Returned to Sender':
6208fd26 410 print('! Not an usual direct bounce (From="%s", Subject="%s").' % (message['From'], message['Subject']))
e0c82ac8
AA
411 else:
412 email = findAddressInBounce(message)
413 if email is not None:
414 self.emails.append(email)
415 self.mbox.add(message)
416 return True
417 else:
6208fd26
NI
418 print('! => No email found in direct bounce, this is really bad.')
419 self.bad_problems += 1
58e64caf
AA
420 return False
421
422 def finalize(self):
6208fd26
NI
423 print('Found %d messages with no X-Spam-Flag header.' % self.seen)
424 print('Found %d of them that are confirmed bounces.' % len(self.mbox))
425 print('They were saved in %s.' % self.mbox_file)
426 if self.bad_problems:
427 print('Found %d of them that are invalid.' % self.bad_problems)
428 if self.seen != len(self.mbox) + self.bad_problems:
429 print(' /!\ These numbers shoud be equal! We have a problem! /!\\')
430 print('')
431 print('Here is the list of email adresses for these bounces:')
432 print('')
58e64caf 433 for email in self.emails:
6208fd26
NI
434 print(email)
435 print('')
58e64caf
AA
436 self.mbox.close()
437
438#----------------------------------------------------------------------------#
439
440class SpamFilter(MboxFilter):
441
442 def initialize(self, mbox_file):
443 self.mbox_file = '%s.spam' % mbox_file
444 self.mbox = mailbox.mbox(self.mbox_file)
445 self.mbox.clear()
446
447 def process(self, message):
e0c82ac8
AA
448 if message['X-Spam-Flag'] is not None \
449 and message['X-Spam-Flag'].startswith('Yes, tests=bogofilter'):
58e64caf
AA
450 self.mbox.add(message)
451 return True
452 return False
453
454 def finalize(self):
6208fd26
NI
455 print('Found %d spams. This is reliable.' % len(self.mbox))
456 print('They were saved in %s.' % self.mbox_file)
457 print('You might check the contents of this mbox.')
58e64caf
AA
458 self.mbox.close()
459
460#----------------------------------------------------------------------------#
461
462class UnsureFilter(MboxFilter):
463
464 def initialize(self, mbox_file):
465 self.mbox_file = '%s.unsure' % mbox_file
466 self.mbox = mailbox.mbox(self.mbox_file)
467 self.mbox.clear()
468
469 def process(self, message):
e0c82ac8
AA
470 if message['X-Spam-Flag'] is not None \
471 and message['X-Spam-Flag'].startswith('Unsure, tests=bogofilter'):
58e64caf
AA
472 self.mbox.add(message)
473 return True
474 return False
475
476 def finalize(self):
6208fd26
NI
477 print('Found %d unclassified messages. Most of them should be spams.' % len(self.mbox))
478 print('They were saved in %s.' % self.mbox_file)
479 print('You must check the contents of this mbox and feed the antispam.')
58e64caf
AA
480 self.mbox.close()
481
482#----------------------------------------------------------------------------#
483
484class CheckNonSpamFilter(MboxFilter):
485
486 def initialize(self, mbox_file):
487 self.seen = 0
488
489 def process(self, message):
e0c82ac8
AA
490 if message['X-Spam-Flag'] is None \
491 or not message['X-Spam-Flag'].startswith('No, tests=bogofilter'):
58e64caf
AA
492 self.seen += 1
493 return False
494
495 def finalize(self):
496 if self.seen > 0:
6208fd26
NI
497 print('Encountered %d messages that were neither spam, nor unsure, nor non-spams.' % self.seen)
498 print('Please investigate.')
58e64caf 499 else:
6208fd26 500 print('All messages were either spam, or unsure, or non-spams. Good.')
58e64caf
AA
501
502#----------------------------------------------------------------------------#
503
504class OutOfOfficeFilter(MboxFilter):
505
506 def initialize(self, mbox_file):
507 self.mbox_file = '%s.ooo' % mbox_file
508 self.mbox = mailbox.mbox(self.mbox_file)
509 self.mbox.clear()
510 subject_re = [
511 r'^Absen(t|ce)',
6208fd26 512 r'^(AUTO: )?Out of (the )?office',
fc643049 513 r'^Auto( ?): ',
d0ce063e 514 r'^AutoRe( ?):',
6208fd26 515 r'^Automatic reply: ',
aa6c6ed4 516 r'automatique d\'absence',
aa6c6ed4 517 r'AutoReply',
6208fd26 518 r'(est|is) absent',
95f9eb9c 519 r'^En dehors du bureau',
6208fd26
NI
520 r'I am out of town',
521 r'I am currently away',
fc643049 522 r'(am|is) out of (the )?office',
6208fd26 523 r'Notification d\'absence',
95f9eb9c 524 r'^Out of email reach',
6208fd26 525 r'R.{1,2}ponse automatique( :)?', # There may be encoding error of e acute
95f9eb9c 526 r'^Respuesta de Estoy ausente:',
58e64caf 527 ]
6208fd26 528 self.subject_regexes = [re.compile(sre, re.I | re.U) for sre in subject_re]
58e64caf
AA
529
530 def process(self, message):
531 subject = findSubject(message)
532 if subject is not None and any(regex.search(subject) for regex in self.subject_regexes):
533 self.mbox.add(message)
534 return True
fc643049
NI
535
536 # Some systems reply with "Re: ". Be smart here!
537 if subject is not None and subject.startswith('Re: '):
538 # Delivered-To: Autoresponder
539 if 'Autoresponder' in message.get_all('Delivered-To'):
540 self.mbox.add(message)
541 return True
542 # Parse content if it is simple enough
543 if message.get_content_type() == 'text/plain':
544 firstline = message.get_payload().splitlines()[0].lower()
545 if (' absent du bureau ' in firstline
546 or ' away from my office ' in firstline):
547 self.mbox.add(message)
548 return True
549
58e64caf
AA
550 return False
551
552 def finalize(self):
6208fd26
NI
553 print('Found %d "out of office". This is generally reliable.' % len(self.mbox))
554 print('They were saved in %s.' % self.mbox_file)
555 print('You may check the contents of this mbox.')
58e64caf
AA
556 self.mbox.close()
557
558#----------------------------------------------------------------------------#
559
560class DeliveryStatusNotificationFilter(MboxFilter):
561
562 def initialize(self, mbox_file):
563 self.emails = []
564 self.mbox_file = '%s.dsn' % mbox_file
565 self.mbox = mailbox.mbox(self.mbox_file)
566 self.mbox.clear()
6208fd26
NI
567 self.mbox_temp_file = '%s.dsn-temp' % mbox_file
568 self.mbox_temp = mailbox.mbox(self.mbox_temp_file)
569 self.mbox_temp.clear()
58e64caf
AA
570
571 def process(self, message):
15f4834d
NI
572 # Don't modify message variable for "self.mbox.add(message)"
573 report_message = message
574 # Find real report inside attachment
575 if message.get_content_type() == 'multipart/mixed':
8438b7d1
NI
576 # Some MTA confuse multipart/mixed with multipart/report
577 # Let's try to find a report!
578 if len(message.get_payload()) >= 2:
579 try_status = message.get_payload(1)
580 if try_status.get_content_type() == 'message/delivery-status':
581 # The world would be a nice place if delivery-status were
582 # formatted as expected...
583 email = findAddressInWeirdDeliveryStatus(try_status)
584 if email is not None:
585 self.emails.append(email)
586 self.mbox.add(message)
587 return True
588 try_status = None
15f4834d
NI
589 report_message = message.get_payload(0)
590
591 # Process report if its type is correct
592 if report_message.get_content_type() == 'multipart/report':
593 email = findAddressInBounce(report_message)
58e64caf
AA
594 if email is not None:
595 self.emails.append(email)
596 self.mbox.add(message)
6208fd26
NI
597 else:
598 print("! => Moved to temporary DSN mailbox")
599 self.mbox_temp.add(message)
15f4834d
NI
600 return True
601
602 # Detect ill-formatted reports, sent as plain text email
604c302f
NI
603 if report_message.get_content_type() == 'text/plain' and (
604 'MAILER-DAEMON@' in message['From'].upper() or
605 'mail delivery failure' == message['Subject'].lower()
606 ):
4cfc54b4 607 email = findAddressInPlainBounce(report_message)
15f4834d
NI
608 if email is not None:
609 self.emails.append(email)
610 self.mbox.add(message)
6208fd26 611 return True
58e64caf
AA
612 return False
613
614 def finalize(self):
6208fd26
NI
615 print('Found %d delivery status notifications. This is generally reliable.' % len(self.mbox))
616 print('They were saved in %s.' % self.mbox_file)
617 print('')
618 print('Here is the list of email adresses for these bounces:')
619 print('')
58e64caf 620 for email in self.emails:
6208fd26
NI
621 print(email)
622 print('')
58e64caf 623 self.mbox.close()
6208fd26
NI
624 print('Found %d temporary and invalid delivery status notifications.' % len(self.mbox_temp))
625 print('They were saved in %s.' % self.mbox_temp_file)
626 self.mbox_temp.close()
58e64caf
AA
627
628#----------------------------------------------------------------------------#
629
630class CatchAllFilter(MboxFilter):
631
632 def initialize(self, mbox_file):
633 self.mbox_file = '%s.catchall' % mbox_file
634 self.mbox = mailbox.mbox(self.mbox_file)
635 self.mbox.clear()
636
637 def process(self, message):
638 self.mbox.add(message)
639 return True
640
641 def finalize(self):
642 if len(self.mbox) > 0:
6208fd26
NI
643 print('%d messages reached the catchall.' % len(self.mbox))
644 print('They were saved in %s.' % self.mbox_file)
645 print('You must process the contents of this mbox manually.')
58e64caf
AA
646 self.mbox.close()
647 else:
6208fd26 648 print('No messages reached the catchall. Nice.')
58e64caf
AA
649 self.mbox.close()
650 os.unlink(self.mbox_file)
651
652#----------------------------------------------------------------------------#
653
654if __name__ == '__main__':
655
656 if len(sys.argv) != 2:
6208fd26 657 print('Usage: %s mbox' % sys.argv[0])
58e64caf
AA
658 sys.exit(1)
659
660 if not os.path.exists(sys.argv[1]):
6208fd26 661 print('No such file: %s' % sys.argv[1])
58e64caf
AA
662 sys.exit(1)
663
664 processor = MboxProcessor(sys.argv[1])
665 processor.run()