| 1 | #!/usr/bin/php5 |
| 2 | <?php |
| 3 | // WARNING: this script takes a few minutes to be executed completly, thus run it into a screen. |
| 4 | |
| 5 | require_once 'connect.db.inc.php'; |
| 6 | require_once '../../classes/phone.php'; |
| 7 | require_once '../../classes/address.php'; |
| 8 | |
| 9 | $globals->debug = 0; // Do not store backtraces. |
| 10 | |
| 11 | $abbreviations = array( |
| 12 | 'commandant' => 'cdt', |
| 13 | 'docteur' => 'dr', |
| 14 | 'haut' => 'ht', |
| 15 | 'haute' => 'ht', |
| 16 | 'hauts' => 'ht', |
| 17 | 'hts' => 'ht', |
| 18 | 'general' => 'gen', |
| 19 | 'gal ' => 'gen ', |
| 20 | 'grand' => 'gd', |
| 21 | 'grande' => 'gd', |
| 22 | 'grands' => 'gd', |
| 23 | 'gde ' => 'gd ', |
| 24 | 'gds ' => 'gd ', |
| 25 | 'lieutenant' => 'lt', |
| 26 | 'marechal' => 'mal', |
| 27 | 'notre dame' => 'n d', |
| 28 | 'nouveau' => 'nouv', |
| 29 | 'president' => 'pdt', |
| 30 | 'saint' => 'st', |
| 31 | 'sainte' => 'st', |
| 32 | 'saintes' => 'st', |
| 33 | 'saints' => 'st', |
| 34 | 'ste ' => 'st ', |
| 35 | 'appartement' => 'app', |
| 36 | 'apt' => 'app', |
| 37 | 'appt' => 'app', |
| 38 | 'appart' => 'app', |
| 39 | 'arrondissement'=> 'arr', |
| 40 | 'batiment' => 'bat', |
| 41 | 'escalier' => 'esc', |
| 42 | 'etage' => 'etg', |
| 43 | 'et ' => 'etg', |
| 44 | 'immeuble' => 'imm', |
| 45 | 'lieu dit' => 'ld', |
| 46 | ' lt ' => ' lt ', |
| 47 | 'porte' => 'pte', |
| 48 | 'quartier' => 'quart', |
| 49 | 'residence' => 'res', |
| 50 | 'resi' => 'res', |
| 51 | 'villa' => 'vla', |
| 52 | 'village' => 'vlge', |
| 53 | 'vil ' => 'vlge ', |
| 54 | 'allee' => 'all', |
| 55 | 'avenue' => 'av', |
| 56 | 'boulevard' => 'bd', |
| 57 | 'bld' => 'bd', |
| 58 | 'chemin' => 'ch', |
| 59 | 'chem ' => 'ch ', |
| 60 | 'che ' => 'ch ', |
| 61 | 'cours' => 'crs', |
| 62 | 'domaine' => 'dom', |
| 63 | 'doma ' => 'dom ', |
| 64 | 'faubourg' => 'fg', |
| 65 | 'fbg' => 'fg', |
| 66 | 'hameau' => 'ham', |
| 67 | 'hame ' => 'ham ', |
| 68 | 'impasse' => 'imp', |
| 69 | 'impa ' => 'imp ', |
| 70 | 'lotissement' => 'lot', |
| 71 | 'montee' => 'mte', |
| 72 | 'passage' => 'pass', |
| 73 | 'place' => 'pl', |
| 74 | 'promenade' => 'pro ', |
| 75 | 'prom ' => 'pro ', |
| 76 | 'quai' => 'qu', |
| 77 | 'rue' => 'r', |
| 78 | 'route' => 'rte', |
| 79 | ' rde ' => ' rte ', |
| 80 | ' rle ' => ' rte ', |
| 81 | 'sentier' => 'sen', |
| 82 | 'sent ' => 'sen ', |
| 83 | 'square' => 'sq', |
| 84 | 'mount' => 'mt', |
| 85 | 'road' => 'rd', |
| 86 | 'street' => 'st', |
| 87 | 'str ' => 'str', |
| 88 | 'bis' => 'b', |
| 89 | 'ter' => 't' |
| 90 | ); |
| 91 | $patterns = array(); |
| 92 | $replacements = array(); |
| 93 | foreach ($abbreviations as $key => $abbreviation) { |
| 94 | $patterns[] = '/' . $key . '/'; |
| 95 | $replacements[] = $abbreviation; |
| 96 | } |
| 97 | |
| 98 | function check($address1, $address2) |
| 99 | { |
| 100 | return $address1['short'] == $address2['short'] || $address1['short'] == $address2['long'] |
| 101 | || $address1['long'] == $address2['short'] || $address1['long'] == $address2['long']; |
| 102 | } |
| 103 | |
| 104 | print "Deletes duplicated addresses. (1/3)\n"; |
| 105 | $pids = XDB::rawFetchColumn("SELECT DISTINCT(pid) |
| 106 | FROM profile_addresses AS a1 |
| 107 | WHERE type = 'home' AND EXISTS (SELECT * |
| 108 | FROM profile_addresses AS a2 |
| 109 | WHERE a2.type = 'home' AND a2.pid = a1.pid AND a2.id != a1.id) |
| 110 | ORDER BY pid"); |
| 111 | $total = count($pids); |
| 112 | $done = 0; |
| 113 | $aux = 0; |
| 114 | $deleted = 0; |
| 115 | $addresses = array(); |
| 116 | $rawAddresses = array(); |
| 117 | $duplicates = array(); |
| 118 | foreach ($pids as $pid) { |
| 119 | $count = 0; |
| 120 | $it = Address::iterate(array($pid), array(Address::LINK_PROFILE), array(0)); |
| 121 | while ($item = $it->next()) { |
| 122 | $addresses[$count] = $item; |
| 123 | $rawAddress = preg_replace('/[^a-z0-9]/', ' ', mb_strtolower(replace_accent($item->text))); |
| 124 | $rawAddresses[$count] = array( |
| 125 | 'long' => preg_replace('/\s+/', '', $rawAddress), |
| 126 | 'short' => preg_replace('/\s+/', '', preg_replace($patterns, $replacements, $rawAddress)), |
| 127 | ); |
| 128 | ++$count; |
| 129 | } |
| 130 | for ($i = 0; $i < $count; ++$i) { |
| 131 | for ($j = $i + 1; $j < $count; ++$j) { |
| 132 | if (check($rawAddresses[$i], $rawAddresses[$j])) { |
| 133 | $duplicates[$j] = true; |
| 134 | $minPub = new ProfileVisibility($addresses[$j]->pub); |
| 135 | if ($minPub->isVisible($addresses[$i]->pub)) { |
| 136 | $addresses[$i]->pub = $addresses[$j]->pub; |
| 137 | } |
| 138 | if ($addresses[$j]->hasFlag('mail') && !$addresses[$i]->hasFlag('mail')) { |
| 139 | $addresses[$i]->addFlag('mail'); |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | foreach ($duplicates as $key => $bool) { |
| 145 | unset($addresses[$key]); |
| 146 | } |
| 147 | if (count($addresses) != $count) { |
| 148 | $deleted += ($count - count($addresses)); |
| 149 | Address::deleteAddresses($pid, 'home'); |
| 150 | $id = 0; |
| 151 | foreach ($addresses as $address) { |
| 152 | $address->setId($id); |
| 153 | $address->save(); |
| 154 | ++$id; |
| 155 | } |
| 156 | XDB::execute('UPDATE IGNORE profile_merge_issues |
| 157 | SET issues = REPLACE(issues, \'address\', \'\') |
| 158 | WHERE pid = {?}', $pid); |
| 159 | } |
| 160 | unset($rawAddresses); |
| 161 | unset($addresses); |
| 162 | unset($duplicates); |
| 163 | |
| 164 | ++$done; |
| 165 | ++$aux; |
| 166 | if ($aux == 100) { |
| 167 | $aux = 0; |
| 168 | printf("\r%u / %u", $done, $total); |
| 169 | } |
| 170 | } |
| 171 | printf("\r%u / %u", $done, $total); |
| 172 | print "\n$deleted addresses deleted.\n\n"; |
| 173 | |
| 174 | print "Formats non formated phones. (2/3)\n"; |
| 175 | $it = XDB::rawIterator("SELECT search_tel AS search, display_tel AS display, comment, link_id, |
| 176 | tel_type AS type, link_type, tel_id AS id, pid, pub |
| 177 | FROM profile_phones |
| 178 | WHERE search_tel = '' OR search_tel IS NULL |
| 179 | ORDER BY pid, link_id, tel_id"); |
| 180 | $total = $it->total(); |
| 181 | $i = 0; |
| 182 | $j = 0; |
| 183 | while ($item = $it->next()) { |
| 184 | $phone = new Phone($item); |
| 185 | $phone->delete(); |
| 186 | $phone->save(); |
| 187 | |
| 188 | ++$i; |
| 189 | ++$j; |
| 190 | if ($j == 100) { |
| 191 | $j = 0; |
| 192 | printf("\r%u / %u", $i, $total); |
| 193 | } |
| 194 | } |
| 195 | printf("\r%u / %u", $i, $total); |
| 196 | print "\nFormating done.\n\n"; |
| 197 | |
| 198 | print "Deletes duplicated phones. (3/3)\n"; |
| 199 | $pids = XDB::rawFetchColumn("SELECT DISTINCT(pid) |
| 200 | FROM profile_phones AS p1 |
| 201 | WHERE link_type = 'user' AND EXISTS (SELECT * |
| 202 | FROM profile_phones AS p2 |
| 203 | WHERE p2.link_type = 'user' AND p2.pid = p1.pid AND p2.tel_id != p1.tel_id) |
| 204 | ORDER BY pid"); |
| 205 | $total = count($pids); |
| 206 | $done = 0; |
| 207 | $aux = 0; |
| 208 | $deleted = 0; |
| 209 | $phones = array(); |
| 210 | $duplicates = array(); |
| 211 | foreach ($pids as $pid) { |
| 212 | $count = 0; |
| 213 | $it = Phone::iterate(array($pid), array(Phone::LINK_PROFILE), array(0)); |
| 214 | while ($item = $it->next()) { |
| 215 | $phones[] = $item; |
| 216 | ++$count; |
| 217 | } |
| 218 | for ($i = 0; $i < $count; ++$i) { |
| 219 | for ($j = $i + 1; $j < $count; ++$j) { |
| 220 | if ($phones[$i]->search() == $phones[$j]->search()) { |
| 221 | $duplicates[$j] = true; |
| 222 | $minPub = new ProfileVisibility($phones[$j]->pub); |
| 223 | if ($minPub->isVisible($phones[$i]->pub)) { |
| 224 | $phones[$i]->pub = $phones[$j]->pub; |
| 225 | } |
| 226 | |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | foreach ($duplicates as $key => $bool) { |
| 231 | unset($phones[$key]); |
| 232 | } |
| 233 | if (count($phones) != $count) { |
| 234 | $deleted += ($count - count($phones)); |
| 235 | Phone::deletePhones($pid, 'user'); |
| 236 | $id = 0; |
| 237 | foreach ($phones as $phone) { |
| 238 | $phone->setId($id); |
| 239 | $phone->save(); |
| 240 | ++$id; |
| 241 | } |
| 242 | XDB::execute('UPDATE IGNORE profile_merge_issues |
| 243 | SET issues = REPLACE(issues, \'phone\', \'\') |
| 244 | WHERE pid = {?}', $pid); |
| 245 | } |
| 246 | unset($duplicates); |
| 247 | unset($phones); |
| 248 | |
| 249 | ++$done; |
| 250 | ++$aux; |
| 251 | if ($aux == 10) { |
| 252 | $aux = 0; |
| 253 | printf("\r%u / %u", $done, $total); |
| 254 | } |
| 255 | } |
| 256 | printf("\r%u / %u", $done, $total); |
| 257 | print "\n$deleted phones deleted.\n\n"; |
| 258 | |
| 259 | print "That's all folks!\n"; |
| 260 | |
| 261 | /* vim:set et sw=4 sts=4 ts=4: */ |
| 262 | ?> |