2 #***************************************************************************
3 #* Copyright (C) 2003-2011 Polytechnique.org *
4 #* http://opensource.polytechnique.org/ *
6 #* This program is free software; you can redistribute it and/or modify *
7 #* it under the terms of the GNU General Public License as published by *
8 #* the Free Software Foundation; either version 2 of the License, or *
9 #* (at your option) any later version. *
11 #* This program is distributed in the hope that it will be useful, *
12 #* but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 #* GNU General Public License for more details. *
16 #* You should have received a copy of the GNU General Public License *
17 #* along with this program; if not, write to the Free Software *
18 #* Foundation, Inc., *
19 #* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
20 #***************************************************************************
22 import sys
, random
, re
24 ########################################################################
26 # A random word generator using Markov chains
30 def __init__(self
, order
=3, special
=u
'\n'):
32 self
.special
= special
35 def load(self
, corpus
):
37 word
= self
.special
* self
.order
+ word
.strip() + self
.special
38 for pos
in range(len(word
) - self
.order
):
39 prefix
= word
[pos
:pos
+ self
.order
]
40 suffix
= word
[pos
+ self
.order
]
41 if not self
.markov
.has_key(prefix
):
42 self
.markov
[prefix
] = []
43 self
.markov
[prefix
].append(suffix
)
46 word
= self
.special
* self
.order
48 c
= random
.choice(self
.markov
[word
[-self
.order
:]])
50 return word
[self
.order
:]
54 ########################################################################
56 def parse_aliases(file):
60 handle
= open(file, 'r') # aliases are ASCII only
61 aliases
= handle
.readlines()
64 alias_re
= re
.compile(r
'([a-z\-]+).([a-z\-]+).([0-9]{4})')
66 alias
= alias
.rstrip()
67 match
= alias_re
.match(alias
)
69 print "Warning: could not parse alias '%s'" % alias
71 firstnames
.append(match
.group(1))
72 lastnames
.append(match
.group(2))
73 promos
.append(match
.group(3))
75 return firstnames
, lastnames
, promos
77 # Returns the index of the first value of `array' strictly greater than `value'
78 def find_next(value
, array
, pmin
=0, pmax
=-1):
79 if pmax
== -1: pmax
= len(array
)
80 if pmax
== pmin
+ 1: return pmax
81 # At every step, array[pmin] < value < array[pmax]
82 pint
= (pmin
+ pmax
) / 2
83 if array
[pint
] < value
:
84 return find_next(value
, array
, pint
, pmax
)
86 return find_next(value
, array
, pmin
, pint
)
88 def create_alias(firstname
, pred_lastname
, succ_lastname
, rand_lastnames
):
89 i_pred
= find_next(pred_lastname
, rand_lastnames
)
90 i_succ
= find_next(succ_lastname
, rand_lastnames
)
91 # We don't know the order of the names
92 if i_pred
> i_succ
: i_pred
, i_succ
= i_succ
, i_pred
95 lastname
= "%s-%s" %
(pred_lastname
, random
.choice(rand_lastnames
))
97 lastname
= rand_lastnames
[random
.randint(i_pred
, i_succ
)]
98 promo
= random
.randint(100, 999)
99 return "%s.%s.%d" %
(firstname
, lastname
, promo
)
101 ########################################################################
103 if __name__
== '__main__':
106 if len(sys
.argv
) != 3:
107 print "Usage: %s aliases poisonous" % sys
.argv
[0]
109 print "Generate the aliases file with:"
110 print "$ mysql x4dat > aliases.txt"
111 print "SELECT alias FROM aliases WHERE type = 'a_vie';"
115 # Parse the list of existing aliases and sort it
116 firstnames
, lastnames
, promos
= parse_aliases(sys
.argv
[1])
118 # Generate many virtual lastnames and sort the list
119 generator
= WordGenerator()
120 generator
.load(lastnames
)
121 rand_lastnames
= [generator
.generate() for i
in range(100 * len(lastnames
))]
122 rand_lastnames
.sort()
124 # For each original, create a new alias
125 # alphabetically between this one and the next one
126 handle
= open(sys
.argv
[2], 'w')
127 lastnames
.append('zzzzzzzz') # hack to avoid off-by-one
128 for i
in range(len(firstnames
)):
129 handle
.write(create_alias(firstnames
[i
], lastnames
[i
], lastnames
[i
+ 1], rand_lastnames
))