Defaultly uses the address given by the user; allows bigger differences between the...
[platal.git] / include / geocoding.inc.php
index 8052070..3e193e6 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /***************************************************************************
- *  Copyright (C) 2003-2009 Polytechnique.org                              *
+ *  Copyright (C) 2003-2010 Polytechnique.org                              *
  *  http://opensource.polytechnique.org/                                   *
  *                                                                         *
  *  This program is free software; you can redistribute it and/or modify   *
@@ -47,9 +47,10 @@ abstract class Geocoder {
                                 WHERE  name = {?}",
                               $address[$area . 'Name']);
             if ($res->numRows() == 0) {
-                $address[$area . 'Id'] = XDB::execute("INSERT INTO  " . $databases[$area] . " (name, country)
-                                                            VALUES  ({?}, {?})",
-                                                      $address[$area . 'Name'], $address['countryId']);
+                XDB::execute('INSERT INTO  ' . $databases[$area] . ' (name, country)
+                                   VALUES  ({?}, {?})',
+                             $address[$area . 'Name'], $address['countryId']);
+                $address[$area . 'Id'] = XDB::insertId();
             } else {
                 $address[$area . 'Id'] = $res->fetchOneCell();
             }
@@ -92,10 +93,14 @@ class GMapsGeocoder extends Geocoder {
 
     // Maximum number of Geocoding calls to the Google Maps API.
     const MAX_GMAPS_RPC_CALLS = 5;
+    // Maximum levenshtein distance authorized between input and geocoded text in a single line.
+    const MAX_LINE_DISTANCE = 5;
+    // Maximum levenshtein distance authorized between input and geocoded text in the whole text.
+    const MAX_TOTAL_DISTANCE = 6;
 
     public function getGeocodedAddress(array $address) {
         $address = $this->prepareAddress($address);
-        $textAddress = $address['text'];
+        $textAddress = $this->getTextToGeocode($address);
 
         // Try to geocode the full address.
         if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) {
@@ -128,25 +133,12 @@ class GMapsGeocoder extends Geocoder {
         $address['accuracy'] = 0;
         return $address;
     }
+
     // Updates the address with the geocoded information from Google Maps. Also
     // cleans up the final informations.
     private function getUpdatedAddress(array $address, array $geocodedData, $extraLines) {
         $this->fillAddressWithGeocoding(&$address, $geocodedData);
-
-        // If the accuracy is 6, it means only the street has been gecoded
-        // but not the number, thus we need to fix it.
-        if ($address['accuracy'] == 6) {
-            $this->fixStreetNumber($address);
-        }
-
-        // We can now format the address.
         $this->formatAddress($address, $extraLines);
-
-        // Some entities in ISO 3166 are not countries, thus they have to be replaced
-        // by the country they belong to.
-        // TODO: fixCountry($address);
-
         return $address;
     }
 
@@ -326,22 +318,30 @@ class GMapsGeocoder extends Geocoder {
         $countGeoloc = count($arrayGeoloc);
         $countText   = count($arrayText);
 
+        $totalDistance = 0;
         if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
             || (($countText == $countGeoloc - 1)
                 && ($arrayText[$countText - 1] == strtoupper($address['country'])))) {
             $same = false;
         } else {
             for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
-                if (levenshtein($arrayText[$i], trim($arrayGeoloc[$i])) > 3) {
+                $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i]));
+                $totalDistance += $lineDistance;
+                if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) {
                     $same = false;
+                    break;
                 }
             }
         }
+
         if ($same) {
-            $address['text'] = $address['geoloc'];
-            $address['postalText'] = $address['geocodedPostalText'];
             unset($address['geoloc'], $address['geocodedPostalText']);
+        } else {
+            $address['geoloc'] = str_replace("\n", "\r\n", $address['geoloc']);
+            $address['geocodedPostalText'] = str_replace("\n", "\r\n", $address['geocodedPostalText']);
         }
+        $address['text'] = str_replace("\n", "\r\n", $address['text']);
+        $address['postalText'] = str_replace("\n", "\r\n", $address['postalText']);
     }
  
     // Returns the address formated for postal use.
@@ -388,36 +388,34 @@ class GMapsGeocoder extends Geocoder {
         return $postalText;
     }
 
-    // Search for the lign from the given address that is the closest to the geocoded thoroughfareName
-    // and replaces the corresponding lign in the geocoded text by it.
-    static protected function fixStreetNumber(&$address)
+    // Trims the name of the real country if it contains an ISO 3166-1 non-country
+    // item. For that purpose, we compare the last but one line of the address with
+    // all non-country items of ISO 3166-1.
+    private function getTextToGeocode($address)
     {
-        if (isset($address['thoroughfareName'])) {
-            $thoroughfareName  = $address['thoroughfareName'];
-            $thoroughfareToken = strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"),
-                                                              array("", "\n"), $thoroughfareName)));
-            $geolocLines = explode("\n", $address['geoloc']);
-            $textLines   = explode("\n", $address['text']);
-            $mindist = strlen($thoroughfareToken);
-            $minpos  = 0;
-            $pos     = 0;
-            foreach ($textLines as $i => $token) {
-                if (($l = levenshtein(strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"),
-                                                                   array("", "\n"), $token))),
-                                      $thoroughfareToken)) < $mindist) {
-                    $mindist = $l;
-                    $minpos  = $i;
-                }
-            }
-            foreach ($geolocLines as $i => $line) {
-                if (strtoupper(trim($thoroughfareName)) == strtoupper(trim($line))) {
-                    $pos = $i;
-                    break;
-                }
+        $res = XDB::iterator('SELECT  country, countryFR
+                                FROM  geoloc_countries
+                               WHERE  belongsTo IS NOT NULL');
+        $countries = array();
+        foreach ($res as $item) {
+            $countries[] = $item[0];
+            $countries[] = $item[1];
+        }
+        $textLines  = explode("\n", $address['text']);
+        $countLines = count($textLines);
+        $needle     = strtoupper(trim($textLines[$countLines - 2]));
+        $isPseudoCountry = false;
+        foreach ($countries as $country) {
+            if (strtoupper($country) == $needle) {
+                $isPseudoCountry = true;
+                break;
             }
-            $geolocLines[$pos] = $textLines[$minpos];
-            $address['geoloc'] = implode("\n", $geolocLines);
         }
+
+        if ($isPseudoCountry) {
+            return implode("\n", array_slice($textLines, 0, -1));
+        }
+        return $address['text'];
     }
 }