Commit | Line | Data |
---|---|---|
6208fd26 | 1 | #!/usr/bin/env python |
58e64caf AA |
2 | # -*- coding: utf-8 -*- |
3 | #*************************************************************************** | |
c441aabe | 4 | #* Copyright (C) 2003-2014 Polytechnique.org * |
58e64caf AA |
5 | #* http://opensource.polytechnique.org/ * |
6 | #* * | |
7 | #* This program is free software; you can redistribute it and/or modify * | |
8 | #* it under the terms of the GNU General Public License as published by * | |
9 | #* the Free Software Foundation; either version 2 of the License, or * | |
10 | #* (at your option) any later version. * | |
11 | #* * | |
12 | #* This program is distributed in the hope that it will be useful, * | |
13 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of * | |
14 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | |
15 | #* GNU General Public License for more details. * | |
16 | #* * | |
17 | #* You should have received a copy of the GNU General Public License * | |
18 | #* along with this program; if not, write to the Free Software * | |
19 | #* Foundation, Inc., * | |
20 | #* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * | |
21 | #*************************************************************************** | |
22 | ||
58e64caf AA |
23 | """ |
24 | Process as automatically as possible bounces from the newsletter | |
25 | ||
26 | The goal is to extract the email adresses that actually bounced. | |
27 | Bounces conforming to RFC 1894 will be automatically processed. | |
28 | ||
29 | This script uses the X-Spam-Flag header to remove spam and heuristics | |
30 | to detect out-of-office auto-replies and delivery status notifications. | |
31 | ||
32 | All emails are saved in different mailboxes to make human post-processing easier. | |
33 | """ | |
34 | ||
6208fd26 NI |
35 | import email |
36 | import mailbox | |
37 | import os | |
38 | import re | |
39 | import sys | |
40 | import time | |
58e64caf AA |
41 | |
42 | #----------------------------------------------------------------------------# | |
43 | ||
44 | class MboxProcessor: | |
45 | """Applies a series of filters to each message in a mbox.""" | |
46 | ||
47 | def __init__(self, mbox): | |
48 | self.mbox_file = mbox | |
49 | self.mbox = mailbox.mbox(self.mbox_file) | |
50 | self.filters = [ | |
51 | DirectBouncesFilter(), | |
52 | SpamFilter(), | |
53 | UnsureFilter(), | |
54 | CheckNonSpamFilter(), | |
55 | OutOfOfficeFilter(), | |
56 | DeliveryStatusNotificationFilter(), | |
57 | CatchAllFilter() | |
58 | ] | |
59 | ||
60 | def initialize_filters(self): | |
61 | for f in self.filters: f.initialize(self.mbox_file) | |
62 | self.start_time = time.clock() | |
63 | ||
64 | def apply_filters(self, message): | |
65 | return any(f.process(message) for f in self.filters) | |
66 | ||
67 | def finalize_filters(self): | |
68 | duration = time.clock() - self.start_time | |
69 | separator = '-' * 80 | |
6208fd26 NI |
70 | print(separator) |
71 | print('Processed the %d messages of %s in %.2fs' % (len(self.mbox), self.mbox_file, duration)) | |
72 | print(separator) | |
58e64caf | 73 | for f in self.filters: |
6208fd26 NI |
74 | f.finalize() |
75 | print(separator) | |
58e64caf AA |
76 | |
77 | def run(self): | |
78 | self.mbox.lock() | |
79 | try: | |
80 | self.initialize_filters() | |
81 | for message in self.mbox: self.apply_filters(message) | |
82 | self.finalize_filters() | |
83 | finally: | |
84 | self.mbox.unlock() | |
85 | self.mbox.close() | |
86 | ||
87 | #----------------------------------------------------------------------------# | |
88 | ||
89 | class MboxFilter: | |
90 | """Defines an interface for filters.""" | |
91 | ||
92 | def initialize(self, mbox_file): | |
93 | """Called by the processor before processing starts. | |
6208fd26 | 94 | |
58e64caf AA |
95 | This is the place to open descriptors required during processing.""" |
96 | pass | |
97 | ||
98 | def process(self, message): | |
99 | """Called by the processor for each message that reaches this step. | |
6208fd26 | 100 | |
58e64caf AA |
101 | Return true to stop processing, and false to go to the next filter.""" |
102 | pass | |
103 | ||
104 | def finalize(self): | |
105 | """Called by the processor after processing ends. | |
6208fd26 | 106 | |
58e64caf AA |
107 | This is the place to display the results and close all descriptors.""" |
108 | pass | |
109 | ||
110 | #----------------------------------------------------------------------------# | |
111 | ||
112 | def findSubject(message): | |
113 | """Returns the subject of an email.Message as an unicode string.""" | |
6208fd26 NI |
114 | if message['Subject'] is None: |
115 | return None | |
116 | ||
117 | # decode_header returns a list of (decoded_string, charset) pairs | |
118 | decoded_seq = email.header.decode_header(message['Subject']) | |
119 | decoded_seq = [(subj, enc or 'utf-8') for subj, enc in decoded_seq] | |
120 | header = email.header.make_header(decoded_seq) | |
121 | # Be Python 2 & 3 compatible | |
122 | return unicode(header) if sys.version_info < (3,) else str(header) | |
123 | ||
58e64caf AA |
124 | |
125 | _recipient_re = re.compile(r'^rfc822; ?(.+)$', re.I | re.U) | |
8c9c7d77 NI |
126 | # Some MTA set the Final-Recipient with "LOCAL;" instead of "rfc822;" |
127 | _recipient_re2 = re.compile(r'^local; ?(.+)$', re.I | re.U) | |
58e64caf | 128 | |
6208fd26 | 129 | |
58e64caf AA |
130 | def findAddressInBounce(bounce): |
131 | """Finds the faulty email address in a bounced email. | |
6208fd26 | 132 | |
58e64caf AA |
133 | See RFC 1894 for more information. |
134 | Returns None or the email address.""" | |
6208fd26 | 135 | |
58e64caf AA |
136 | # Check that it is a bounce - a few MTA fail to set this correctly :( |
137 | if bounce.get_content_type() != 'multipart/report': | |
6208fd26 | 138 | print('! Not a valid bounce (expected multipart/report, found %s).' % bounce.get_content_type()) |
58e64caf AA |
139 | return None |
140 | # Extract the second component of the multipart/report | |
aa6c6ed4 AA |
141 | num_payloads = len(bounce.get_payload()) |
142 | if num_payloads < 2: | |
6208fd26 | 143 | print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads) |
58e64caf AA |
144 | return None |
145 | status = bounce.get_payload(1) | |
0cec3fee NI |
146 | |
147 | # If the second part is of type "message/rfc822" it is the undelivered message. | |
148 | # Let's try to understand the text part | |
149 | if status.get_content_type() == 'message/rfc822': | |
150 | text_bounce = bounce.get_payload(0) | |
151 | if text_bounce.get_content_type() == 'text/plain': | |
152 | return findAddressInPlainBounce(text_bounce, bounce) | |
153 | # If it's not a text message, let's continue to the next error message | |
154 | ||
58e64caf | 155 | if status.get_content_type() != 'message/delivery-status': |
4aad6c9c | 156 | print('! Not a valid bounce (expected message/delivery-status, found %s).' % status.get_content_type()) |
58e64caf AA |
157 | return None |
158 | # The per-message-fields don't matter here, get only the per-recipient-fields | |
aa6c6ed4 AA |
159 | num_payloads = len(status.get_payload()) |
160 | if num_payloads < 2: | |
6208fd26 | 161 | print('! Not a valid bounce (expected at least 2 parts, found %d).' % num_payloads) |
58e64caf AA |
162 | return None |
163 | content = status.get_payload(1) | |
164 | if content.get_content_type() != 'text/plain': | |
4aad6c9c | 165 | print('! Not a valid bounce (expected text/plain, found %s).' % content.get_content_type()) |
58e64caf AA |
166 | return None |
167 | # Extract the faulty email address | |
cfea91c4 NI |
168 | # Some MTA don't set Final-Recipient but use Remote-Recipient instead |
169 | if 'Final-Recipient' in content: | |
170 | final_recipient = content['Final-Recipient'] | |
171 | elif 'Remote-Recipient' in content: | |
172 | final_recipient = content['Remote-Recipient'] | |
173 | else: | |
174 | print('! Not a valid bounce (no Final-Recipient).') | |
175 | return None | |
176 | recipient_match = _recipient_re.search(final_recipient) | |
58e64caf | 177 | if recipient_match is None: |
8c9c7d77 | 178 | # Be nice, test another regexp |
cfea91c4 | 179 | recipient_match = _recipient_re2.search(final_recipient) |
8c9c7d77 NI |
180 | if recipient_match is None: |
181 | print('! Missing final recipient.') | |
182 | return None | |
58e64caf AA |
183 | email = recipient_match.group(1) |
184 | # Check the action field | |
8da7bf9f | 185 | if content['Action'].lower().strip() != 'failed': |
6208fd26 | 186 | print('! Not a failed action (%s).' % content['Action']) |
58e64caf | 187 | return None |
29c6ffa5 NI |
188 | |
189 | status = content['Status'] | |
190 | diag_code = content['Diagnostic-Code'] | |
191 | ||
192 | # Permanent failure state | |
193 | if int(status[:1]) == 5: | |
8438b7d1 | 194 | return email |
29c6ffa5 | 195 | |
58e64caf | 196 | # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors |
29c6ffa5 NI |
197 | if diag_code is not None and diag_code.startswith('X-Postfix'): |
198 | return email | |
199 | ||
200 | failure_hints = [ | |
201 | "insufficient system storage", | |
202 | "mailbox full", | |
e72022bc NI |
203 | "mailbox recipient does not have a mailbox database", |
204 | "over quota", | |
95f9eb9c | 205 | "requested action aborted: local error in processing", |
29c6ffa5 NI |
206 | "user unknown", |
207 | ] | |
208 | if 'quota' in status.lower(): | |
209 | return email | |
210 | if diag_code is not None: | |
211 | ldiag_code = diag_code.lower() | |
212 | if any(hint in ldiag_code for hint in failure_hints): | |
213 | return email | |
214 | ||
215 | print('! Not a permanent failure status (%s).' % status) | |
216 | if diag_code is not None: | |
217 | print('! Diagnostic code was: %s' % diag_code) | |
218 | return None | |
58e64caf | 219 | |
15f4834d | 220 | |
8438b7d1 NI |
221 | def findAddressInWeirdDeliveryStatus(message): |
222 | """Finds the faulty email address in the delivery-status part of an email | |
223 | ||
224 | Unlikely to findAddressInBounce, the status does NOT follow RFC 1894, so | |
225 | try to learn to get data nevertheless... | |
226 | Returns None or the email address. | |
227 | """ | |
228 | if message.get_content_type() != 'message/delivery-status': | |
229 | print('! Not a valid weird bounce (expected message/delivery-status, found %s).' % message.get_content_type()) | |
230 | return None | |
231 | # The per-message-fields don't matter here, get only the per-recipient-fields | |
232 | num_payloads = len(message.get_payload()) | |
233 | if num_payloads < 2: | |
234 | print('! Not a valid weird bounce (expected at least 2 parts, found %d).' % num_payloads) | |
235 | return None | |
236 | content = message.get_payload(1) | |
237 | # The content may be missing, but interesting headers still present in the first payload... | |
238 | if not content: | |
239 | content = message.get_payload(0) | |
240 | if 'Action' not in content: | |
241 | print('! Not a valid weird bounce (unable to find content).') | |
242 | return None | |
243 | elif content.get_content_type() != 'text/plain': | |
244 | print('! Not a valid weird bounce (expected text/plain, found %s).' % content.get_content_type()) | |
245 | return None | |
246 | ||
247 | # Extract the faulty email address | |
248 | if 'Final-Recipient' in content: | |
249 | recipient_match = _recipient_re.search(content['Final-Recipient']) | |
250 | if recipient_match is None: | |
251 | # Be nice, test another regexp | |
252 | recipient_match = _recipient_re2.search(content['Final-Recipient']) | |
253 | if recipient_match is None: | |
254 | print('! Unknown final recipient in weird bounce.') | |
255 | return None | |
256 | email = recipient_match.group(1) | |
257 | elif 'Original-Recipient' in content: | |
258 | recipient = content['Original-Recipient'] | |
259 | recipient_match = _recipient_re.search(recipient) | |
260 | if recipient_match is None: | |
261 | # Be nice, test another regexp | |
262 | recipient_match = _recipient_re2.search(recipient) | |
263 | if recipient_match is None: | |
264 | recipient_match = re.match(r'<([^>]+@[^@>]+)>', recipient) | |
265 | if recipient_match is None: | |
266 | print('! Unknown original recipient in weird bounce.') | |
267 | return None | |
268 | email = recipient_match.group(1) | |
269 | else: | |
270 | print('! Missing recipient in weird bounce.') | |
271 | return None | |
272 | ||
273 | # Check the action field | |
274 | if content['Action'].lower() != 'failed': | |
275 | print('! Not a failed action (%s).' % content['Action']) | |
276 | return None | |
277 | ||
278 | status = content['Status'] | |
279 | diag_code = content['Diagnostic-Code'] | |
280 | ||
281 | # Permanent failure state | |
282 | if status and int(status[:1]) == 5: | |
283 | return email | |
284 | ||
285 | # Mail forwarding loops, DNS errors and connection timeouts cause X-Postfix errors | |
286 | if diag_code is not None and diag_code.startswith('X-Postfix'): | |
287 | return email | |
288 | ||
289 | failure_hints = [ | |
290 | "insufficient system storage", | |
291 | "mailbox full", | |
292 | "requested action aborted: local error in processing", | |
293 | "sender address rejected", | |
294 | "user unknown", | |
295 | ] | |
296 | if status and 'quota' in status.lower(): | |
297 | return email | |
298 | if diag_code is not None: | |
299 | ldiag_code = diag_code.lower() | |
300 | if any(hint in ldiag_code for hint in failure_hints): | |
301 | return email | |
302 | ||
303 | print('! Not a permanent failure status (%s).' % status) | |
304 | if diag_code is not None: | |
305 | print('! Diagnostic code was: %s' % diag_code) | |
306 | return None | |
307 | ||
308 | ||
0cec3fee | 309 | def findAddressInPlainBounce(bounce, real_bounce=None): |
15f4834d NI |
310 | """Finds the faulty email address in a non-RFC-1894 bounced email |
311 | """ | |
0cec3fee NI |
312 | # real_bounce is the full email and bounce only the text/plain part, if email have several MIME parts |
313 | real_bounce = real_bounce or bounce | |
604c302f NI |
314 | lower_from = real_bounce['From'].lower() |
315 | if 'mailer-daemon@' not in lower_from and 'postmaster' not in lower_from: | |
316 | print('! Not a valid plain bounce (expected from MAILER-DAEMON or postmaster, found %s).' % bounce['From']) | |
15f4834d NI |
317 | return None |
318 | if bounce.get_content_type() != 'text/plain': | |
319 | print('! Not a valid plain bounce (expected text/plain, found %s).' % bounce.get_content_type()) | |
320 | return None | |
0cec3fee | 321 | subject = findSubject(real_bounce).lower() |
fa7bc030 NI |
322 | known_subjects = [ |
323 | "delivery status notification (failure)", | |
324 | "failure notice", | |
604c302f | 325 | "mail delivery failure", |
fa7bc030 NI |
326 | "returned mail: see transcript for details", |
327 | "undeliverable message", | |
328 | "undelivered mail returned to sender", | |
329 | ] | |
330 | if subject not in known_subjects and not subject.startswith('mail delivery failed'): | |
15f4834d NI |
331 | print('! Not a valid plain bounce (unknown subject: %s).' % subject) |
332 | return None | |
333 | ||
334 | # Read the 15 first lines of content and find some relevant keywords to validate the bounce | |
335 | lines = bounce.get_payload().splitlines()[:15] | |
336 | ||
222984e4 NI |
337 | # ALTOSPAM is a service which requires to click on a link when sending an email |
338 | # Don't consider the "554 5.0.0 Service unavailable" returned by ALTOSPAM as a failure | |
339 | # but put this message in the dsn-temp mailbox so that it can be processed by hand. | |
340 | if any("ALTOSPAM which is used by the person" in line for line in lines): | |
341 | print('! ALTOSPAM has been detected. Moving this message to the dsn-temp mbox') | |
342 | return None | |
343 | ||
15f4834d NI |
344 | # Match: |
345 | # A message that you sent could not be delivered to one or more of its recipients. | |
346 | # I'm afraid I wasn't able to deliver your message to the following addresses. | |
347 | # The following message to <email@example.com> was undeliverable. | |
348 | non_delivery_hints = [ | |
604c302f | 349 | "could not be delivered to", |
15f4834d | 350 | "Delivery to the following recipient failed permanently", |
fa7bc030 | 351 | "I'm sorry to have to inform you that your message could not", |
15f4834d | 352 | "I wasn't able to deliver your message", |
604c302f | 353 | "try to send your message again at a later time", |
e571dd3b | 354 | "User unknown in local recipient table", |
15f4834d | 355 | "> was undeliverable.", |
15f4834d NI |
356 | "we were unable to deliver your message", |
357 | ] | |
358 | if not any(any(hint in line for hint in non_delivery_hints) for line in lines): | |
359 | print('! Unknown mailer-daemon message, unable to find an hint for non-delivery in message:') | |
360 | print('\n'.join(lines)) | |
361 | return None | |
362 | ||
363 | # Match: | |
364 | # This is a permanent error; I've given up. Sorry it didn't work out. | |
365 | # 5.1.0 - Unknown address error 550-'email@example.com... No such user' | |
366 | permanent_error_hints = [ | |
367 | "Delivery to the following recipient failed permanently", | |
604c302f | 368 | "failed due to an unavailable mailbox", |
e571dd3b | 369 | "following addresses had permanent fatal errors", |
fa7bc030 | 370 | "I'm sorry to have to inform you that your message could not", |
e571dd3b | 371 | "The email account that you tried to reach does not exist", |
15f4834d NI |
372 | "This is a permanent error", |
373 | "Unknown address error", | |
95f9eb9c | 374 | "unreachable for too long", |
15f4834d NI |
375 | "550 Requested action not taken", |
376 | ] | |
377 | if not any(any(hint in line for hint in permanent_error_hints) for line in lines): | |
378 | print('! Unknown mailer-daemon message, unable to find an hint for permanent error in message:') | |
379 | print('\n'.join(lines)) | |
380 | return None | |
381 | ||
382 | # Retrieve the first occurence of <email@example.com> | |
383 | for line in lines: | |
384 | match = re.match(r'.*?<([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)>', line) | |
385 | if match is None: | |
604c302f | 386 | match = re.match(r'^\s*"?([0-9a-zA-Z_.-]+@[0-9a-zA-Z_.-]+)"?\s*$', line) |
15f4834d NI |
387 | if match is not None: |
388 | email = match.group(1) | |
389 | if email.endswith('@polytechnique.org'): | |
390 | # First valid mail is something like <info_newsletter@polytechnique.org>, so we missed the real one | |
391 | break | |
392 | return email | |
393 | ||
394 | print('! Unknown mailer-daemon message, unable to find email address:') | |
395 | print('\n'.join(lines)) | |
396 | return None | |
397 | ||
58e64caf AA |
398 | #----------------------------------------------------------------------------# |
399 | ||
400 | class DirectBouncesFilter(MboxFilter): | |
401 | ||
402 | def initialize(self, mbox_file): | |
403 | self.seen = 0 | |
6208fd26 | 404 | self.bad_problems = 0 |
58e64caf AA |
405 | self.emails = [] |
406 | self.mbox_file = '%s.bounced' % mbox_file | |
407 | self.mbox = mailbox.mbox(self.mbox_file) | |
408 | self.mbox.clear() | |
409 | ||
410 | def process(self, message): | |
411 | if message['X-Spam-Flag'] is None: | |
412 | # During finalization, we will verifiy that all messages were processed | |
413 | self.seen += 1 | |
e0c82ac8 AA |
414 | # Special case: ignore mailman notifications for the mailing-list |
415 | # on which the NL is forwarded | |
532d50bc | 416 | if message['From'] == 'newsletter-externes-owner@polytechnique.org': |
6208fd26 | 417 | print('! Dropping a notification from mailman for newsletter-externes@polytechnique.org, this should be OK.') |
e0c82ac8 AA |
418 | self.seen -= 1 |
419 | return True | |
58e64caf | 420 | # Additionnal checks, just to be sure |
e0c82ac8 | 421 | elif message['From'] != 'MAILER-DAEMON@polytechnique.org (Mail Delivery System)' \ |
58e64caf | 422 | or message['Subject'] != 'Undelivered Mail Returned to Sender': |
397d1980 | 423 | print('! Not an usual direct bounce (From=%r, Subject=%r).' % (message['From'], message['Subject'])) |
e0c82ac8 AA |
424 | else: |
425 | email = findAddressInBounce(message) | |
426 | if email is not None: | |
427 | self.emails.append(email) | |
428 | self.mbox.add(message) | |
429 | return True | |
430 | else: | |
6208fd26 NI |
431 | print('! => No email found in direct bounce, this is really bad.') |
432 | self.bad_problems += 1 | |
58e64caf AA |
433 | return False |
434 | ||
435 | def finalize(self): | |
6208fd26 NI |
436 | print('Found %d messages with no X-Spam-Flag header.' % self.seen) |
437 | print('Found %d of them that are confirmed bounces.' % len(self.mbox)) | |
438 | print('They were saved in %s.' % self.mbox_file) | |
439 | if self.bad_problems: | |
440 | print('Found %d of them that are invalid.' % self.bad_problems) | |
441 | if self.seen != len(self.mbox) + self.bad_problems: | |
442 | print(' /!\ These numbers shoud be equal! We have a problem! /!\\') | |
443 | print('') | |
444 | print('Here is the list of email adresses for these bounces:') | |
445 | print('') | |
58e64caf | 446 | for email in self.emails: |
6208fd26 NI |
447 | print(email) |
448 | print('') | |
58e64caf AA |
449 | self.mbox.close() |
450 | ||
451 | #----------------------------------------------------------------------------# | |
452 | ||
453 | class SpamFilter(MboxFilter): | |
454 | ||
455 | def initialize(self, mbox_file): | |
456 | self.mbox_file = '%s.spam' % mbox_file | |
457 | self.mbox = mailbox.mbox(self.mbox_file) | |
458 | self.mbox.clear() | |
459 | ||
460 | def process(self, message): | |
e0c82ac8 AA |
461 | if message['X-Spam-Flag'] is not None \ |
462 | and message['X-Spam-Flag'].startswith('Yes, tests=bogofilter'): | |
58e64caf AA |
463 | self.mbox.add(message) |
464 | return True | |
465 | return False | |
466 | ||
467 | def finalize(self): | |
6208fd26 NI |
468 | print('Found %d spams. This is reliable.' % len(self.mbox)) |
469 | print('They were saved in %s.' % self.mbox_file) | |
470 | print('You might check the contents of this mbox.') | |
58e64caf AA |
471 | self.mbox.close() |
472 | ||
473 | #----------------------------------------------------------------------------# | |
474 | ||
475 | class UnsureFilter(MboxFilter): | |
476 | ||
477 | def initialize(self, mbox_file): | |
478 | self.mbox_file = '%s.unsure' % mbox_file | |
479 | self.mbox = mailbox.mbox(self.mbox_file) | |
480 | self.mbox.clear() | |
481 | ||
482 | def process(self, message): | |
e0c82ac8 AA |
483 | if message['X-Spam-Flag'] is not None \ |
484 | and message['X-Spam-Flag'].startswith('Unsure, tests=bogofilter'): | |
58e64caf AA |
485 | self.mbox.add(message) |
486 | return True | |
487 | return False | |
488 | ||
489 | def finalize(self): | |
6208fd26 NI |
490 | print('Found %d unclassified messages. Most of them should be spams.' % len(self.mbox)) |
491 | print('They were saved in %s.' % self.mbox_file) | |
492 | print('You must check the contents of this mbox and feed the antispam.') | |
58e64caf AA |
493 | self.mbox.close() |
494 | ||
495 | #----------------------------------------------------------------------------# | |
496 | ||
497 | class CheckNonSpamFilter(MboxFilter): | |
498 | ||
499 | def initialize(self, mbox_file): | |
500 | self.seen = 0 | |
501 | ||
502 | def process(self, message): | |
e0c82ac8 AA |
503 | if message['X-Spam-Flag'] is None \ |
504 | or not message['X-Spam-Flag'].startswith('No, tests=bogofilter'): | |
58e64caf AA |
505 | self.seen += 1 |
506 | return False | |
507 | ||
508 | def finalize(self): | |
509 | if self.seen > 0: | |
6208fd26 NI |
510 | print('Encountered %d messages that were neither spam, nor unsure, nor non-spams.' % self.seen) |
511 | print('Please investigate.') | |
58e64caf | 512 | else: |
6208fd26 | 513 | print('All messages were either spam, or unsure, or non-spams. Good.') |
58e64caf AA |
514 | |
515 | #----------------------------------------------------------------------------# | |
516 | ||
517 | class OutOfOfficeFilter(MboxFilter): | |
518 | ||
519 | def initialize(self, mbox_file): | |
520 | self.mbox_file = '%s.ooo' % mbox_file | |
521 | self.mbox = mailbox.mbox(self.mbox_file) | |
522 | self.mbox.clear() | |
523 | subject_re = [ | |
524 | r'^Absen(t|ce)', | |
6208fd26 | 525 | r'^(AUTO: )?Out of (the )?office', |
fc643049 | 526 | r'^Auto( ?): ', |
d0ce063e | 527 | r'^AutoRe( ?):', |
6208fd26 | 528 | r'^Automatic reply: ', |
aa6c6ed4 | 529 | r'automatique d\'absence', |
aa6c6ed4 | 530 | r'AutoReply', |
6208fd26 | 531 | r'(est|is) absent', |
95f9eb9c | 532 | r'^En dehors du bureau', |
6208fd26 NI |
533 | r'I am out of town', |
534 | r'I am currently away', | |
fc643049 | 535 | r'(am|is) out of (the )?office', |
6208fd26 | 536 | r'Notification d\'absence', |
95f9eb9c | 537 | r'^Out of email reach', |
6208fd26 | 538 | r'R.{1,2}ponse automatique( :)?', # There may be encoding error of e acute |
95f9eb9c | 539 | r'^Respuesta de Estoy ausente:', |
58e64caf | 540 | ] |
6208fd26 | 541 | self.subject_regexes = [re.compile(sre, re.I | re.U) for sre in subject_re] |
58e64caf AA |
542 | |
543 | def process(self, message): | |
544 | subject = findSubject(message) | |
545 | if subject is not None and any(regex.search(subject) for regex in self.subject_regexes): | |
546 | self.mbox.add(message) | |
547 | return True | |
fc643049 NI |
548 | |
549 | # Some systems reply with "Re: ". Be smart here! | |
550 | if subject is not None and subject.startswith('Re: '): | |
551 | # Delivered-To: Autoresponder | |
552 | if 'Autoresponder' in message.get_all('Delivered-To'): | |
553 | self.mbox.add(message) | |
554 | return True | |
555 | # Parse content if it is simple enough | |
556 | if message.get_content_type() == 'text/plain': | |
557 | firstline = message.get_payload().splitlines()[0].lower() | |
558 | if (' absent du bureau ' in firstline | |
559 | or ' away from my office ' in firstline): | |
560 | self.mbox.add(message) | |
561 | return True | |
562 | ||
58e64caf AA |
563 | return False |
564 | ||
565 | def finalize(self): | |
6208fd26 NI |
566 | print('Found %d "out of office". This is generally reliable.' % len(self.mbox)) |
567 | print('They were saved in %s.' % self.mbox_file) | |
568 | print('You may check the contents of this mbox.') | |
58e64caf AA |
569 | self.mbox.close() |
570 | ||
571 | #----------------------------------------------------------------------------# | |
572 | ||
573 | class DeliveryStatusNotificationFilter(MboxFilter): | |
574 | ||
575 | def initialize(self, mbox_file): | |
576 | self.emails = [] | |
577 | self.mbox_file = '%s.dsn' % mbox_file | |
578 | self.mbox = mailbox.mbox(self.mbox_file) | |
579 | self.mbox.clear() | |
6208fd26 NI |
580 | self.mbox_temp_file = '%s.dsn-temp' % mbox_file |
581 | self.mbox_temp = mailbox.mbox(self.mbox_temp_file) | |
582 | self.mbox_temp.clear() | |
58e64caf AA |
583 | |
584 | def process(self, message): | |
15f4834d NI |
585 | # Don't modify message variable for "self.mbox.add(message)" |
586 | report_message = message | |
587 | # Find real report inside attachment | |
588 | if message.get_content_type() == 'multipart/mixed': | |
8438b7d1 NI |
589 | # Some MTA confuse multipart/mixed with multipart/report |
590 | # Let's try to find a report! | |
591 | if len(message.get_payload()) >= 2: | |
592 | try_status = message.get_payload(1) | |
593 | if try_status.get_content_type() == 'message/delivery-status': | |
594 | # The world would be a nice place if delivery-status were | |
595 | # formatted as expected... | |
596 | email = findAddressInWeirdDeliveryStatus(try_status) | |
597 | if email is not None: | |
598 | self.emails.append(email) | |
599 | self.mbox.add(message) | |
600 | return True | |
601 | try_status = None | |
15f4834d NI |
602 | report_message = message.get_payload(0) |
603 | ||
604 | # Process report if its type is correct | |
605 | if report_message.get_content_type() == 'multipart/report': | |
606 | email = findAddressInBounce(report_message) | |
58e64caf AA |
607 | if email is not None: |
608 | self.emails.append(email) | |
609 | self.mbox.add(message) | |
6208fd26 NI |
610 | else: |
611 | print("! => Moved to temporary DSN mailbox") | |
612 | self.mbox_temp.add(message) | |
15f4834d NI |
613 | return True |
614 | ||
615 | # Detect ill-formatted reports, sent as plain text email | |
604c302f | 616 | if report_message.get_content_type() == 'text/plain' and ( |
6c4ff6f1 NI |
617 | 'MAILER-DAEMON@' in message.get('From', '').upper() or |
618 | 'mail delivery failure' == message.get('Subject', '').lower() | |
604c302f | 619 | ): |
4cfc54b4 | 620 | email = findAddressInPlainBounce(report_message) |
15f4834d NI |
621 | if email is not None: |
622 | self.emails.append(email) | |
623 | self.mbox.add(message) | |
6208fd26 | 624 | return True |
58e64caf AA |
625 | return False |
626 | ||
627 | def finalize(self): | |
6208fd26 NI |
628 | print('Found %d delivery status notifications. This is generally reliable.' % len(self.mbox)) |
629 | print('They were saved in %s.' % self.mbox_file) | |
630 | print('') | |
631 | print('Here is the list of email adresses for these bounces:') | |
632 | print('') | |
58e64caf | 633 | for email in self.emails: |
6208fd26 NI |
634 | print(email) |
635 | print('') | |
58e64caf | 636 | self.mbox.close() |
6208fd26 NI |
637 | print('Found %d temporary and invalid delivery status notifications.' % len(self.mbox_temp)) |
638 | print('They were saved in %s.' % self.mbox_temp_file) | |
639 | self.mbox_temp.close() | |
58e64caf AA |
640 | |
641 | #----------------------------------------------------------------------------# | |
642 | ||
643 | class CatchAllFilter(MboxFilter): | |
644 | ||
645 | def initialize(self, mbox_file): | |
646 | self.mbox_file = '%s.catchall' % mbox_file | |
647 | self.mbox = mailbox.mbox(self.mbox_file) | |
648 | self.mbox.clear() | |
649 | ||
650 | def process(self, message): | |
651 | self.mbox.add(message) | |
652 | return True | |
653 | ||
654 | def finalize(self): | |
655 | if len(self.mbox) > 0: | |
6208fd26 NI |
656 | print('%d messages reached the catchall.' % len(self.mbox)) |
657 | print('They were saved in %s.' % self.mbox_file) | |
658 | print('You must process the contents of this mbox manually.') | |
58e64caf AA |
659 | self.mbox.close() |
660 | else: | |
6208fd26 | 661 | print('No messages reached the catchall. Nice.') |
58e64caf AA |
662 | self.mbox.close() |
663 | os.unlink(self.mbox_file) | |
664 | ||
665 | #----------------------------------------------------------------------------# | |
666 | ||
667 | if __name__ == '__main__': | |
668 | ||
669 | if len(sys.argv) != 2: | |
6208fd26 | 670 | print('Usage: %s mbox' % sys.argv[0]) |
58e64caf AA |
671 | sys.exit(1) |
672 | ||
673 | if not os.path.exists(sys.argv[1]): | |
6208fd26 | 674 | print('No such file: %s' % sys.argv[1]) |
58e64caf AA |
675 | sys.exit(1) |
676 | ||
677 | processor = MboxProcessor(sys.argv[1]) | |
678 | processor.run() |