Improves and fixes script about merge issues linked to addresses.
authorStéphane Jacob <sj@m4x.org>
Tue, 2 Nov 2010 17:40:54 +0000 (18:40 +0100)
committerStéphane Jacob <sj@m4x.org>
Tue, 2 Nov 2010 18:11:53 +0000 (19:11 +0100)
Signed-off-by: Stéphane Jacob <sj@m4x.org>
upgrade/1.0.1/merge_issues_addresses.php

index 2557586..4c876f7 100755 (executable)
@@ -7,72 +7,102 @@ require_once '../../classes/address.php';
 
 $globals->debug = 0; // Do not store backtraces.
 
-echo "Tries to geocode addresses (due a bug in the previous release, all addresses must run once again). (1/2)\n";
+print "Tries to geocode addresses (due a bug in the previous release, all addresses must run once again). (1/2)\n";
+$time = XDB::fetchOneCell('SELECT  COUNT(distinct(pid), jobid)
+                             FROM  profile_addresses');
+$time = ceil($time / 60 / 24);
+print "It will approximately take $time days.\n";
+
 $it = XDB::rawIterator('SELECT  *
                           FROM  profile_addresses
                       ORDER BY  pid, jobid, type, id');
+$total = $it->total();
+$i = 0;
+$j = 0;
+printf("\r%u / %u",  $i, $total);
 $pid = 0;
 $jobid = 0;
-while ($address = new Address($it->next())) {
-  $address->format(array(true, true));
-  $address->delete();
-  $address->save();
-  if (!($pid == $address->pid && $jobid == $address->jobid)) {
-    $pid = $address->pid;
-    $jobid = $address->jobid;
-    sleep(60);
-  }
+while ($item = $it->next()) {
+    $address = new Address($item);
+    $address->format(array(true, true));
+    $address->delete();
+    $address->save();
+    if (!($pid == $address->pid && $jobid == $address->jobid)) {
+        $pid = $address->pid;
+        $jobid = $address->jobid;
+        sleep(60);
+    }
+
+    ++$i;
+    ++$j;
+    if ($j == 10) {
+        $j = 0;
+        printf("\r%u / %u",  $i, $total);
+    }
 }
+printf("\r%u / %u",  $i, $total);
+print "\nGeocoding done.\n\n";
 
-echo "Deletes duplicated addresses. (2/2)\n";
-$it = XDB::rawIterator("SELECT  *
-                          FROM  profile_addresses
-                         WHERE  type = 'home'
-                      ORDER BY  pid, id");
+print "Deletes duplicated addresses. (2/2)\n";
+$pids = XDB::rawFetchColumn("SELECT  DISTINCT(pid)
+                               FROM  profile_addresses AS a1
+                              WHERE  type = 'home' AND EXISTS (SELECT  *
+                                                                 FROM  profile_addresses AS a2
+                                                                WHERE  a2.type = 'home' AND a2.pid = a1.pid AND a2.id != a1.id)
+                           ORDER BY  pid");
+$total = count($pids);
+$done = 0;
+$aux = 0;
+$deleted = 0;
 $addresses = array();
+$rawAddresses = array();
 $duplicates = array();
-$address = new Address($it->next());
-$pid = $address->pid;
-$addresses[] = $address;
-$count = 1;
-while ($address = new Address($it->next())) {
-  if ($address->pid == $pid) {
-    $address[] = $address;
-    ++$count;
-  } else {
-    if ($count != 1) {
-      for ($i = 0; $i < $count; ++$i) {
+foreach ($pids as $pid) {
+    $count = 0;
+    $it = Address::iterate(array($pid), array(Address::LINK_PROFILE), array(0));
+    while ($item = $it->next()) {
+        $addresses[] = $item;
+        $rawAddresses[] = preg_replace('/[^a-zA-Z0-9]/', '', replace_accent($item->text));
+        ++$count;
+    }
+    for ($i = 0; $i < $count; ++$i) {
         for ($j = $i + 1; $j < $count; ++$j) {
-          if ($addresses[$i]->text == $addresses[$j]->text) {
-            $duplicates[$i] = true;
-          }
+            if ($rawAddresses[$i] == $rawAddresses[$j]) {
+                $duplicates[$i] = true;
+            }
         }
-      }
-      foreach ($duplicates as $key => $bool) {
+    }
+    foreach ($duplicates as $key => $bool) {
         unset($addresses[$key]);
-      }
-      if (count($addresses) != $count) {
+    }
+    if (count($addresses) != $count) {
+        $deleted += ($count - count($addresses));
         Address::deleteAddresses($pid, 'home');
         $id = 0;
         foreach ($addresses as $address) {
-          $address->setId($id);
-          $address->save();
-          ++$id;
+            $address->setId($id);
+            $address->save();
+            ++$id;
         }
-        XDB::execute('UPDATE  profile_merge_issues
-                         SET  issues = REPLACE(issues, \'address\', \'\')
-                       WHERE  pid = {?}', $pid);
-      }
-      unset($duplicates);
+        XDB::execute('UPDATE IGNORE  profile_merge_issues
+                                SET  issues = REPLACE(issues, \'address\', \'\')
+                              WHERE  pid = {?}', $pid);
     }
+    unset($rawAddresses);
     unset($addresses);
-    $pid = $address->pid;
-    $addresses[] = $address;
-    $count = 1;
-  }
+    unset($duplicates);
+
+    ++$done;
+    ++$aux;
+    if ($aux == 100) {
+        $aux = 0;
+        printf("\r%u / %u",  $done, $total);
+    }
 }
+printf("\r%u / %u",  $done, $total);
+print "\n$deleted addresses deleted.\n\n";
 
-echo "That's all folks!\n";
+print "That's all folks!\n";
 
 /* vim:set et sw=4 sts=4 ts=4: */
 ?>