X-Git-Url: http://git.polytechnique.org/?a=blobdiff_plain;f=include%2Fgeocoding.inc.php;h=da809d78a5014f9541451bcdda8f80e119358727;hb=c67b874fd8d442bfded35295a819e7ce11a2d6dc;hp=c888a8552bc9507917d1d8aa63ad194da5aa73db;hpb=2aa2c77aef77e0bfba275dfcb3b257a8f8d66ef1;p=platal.git diff --git a/include/geocoding.inc.php b/include/geocoding.inc.php index c888a85..da809d7 100644 --- a/include/geocoding.inc.php +++ b/include/geocoding.inc.php @@ -1,6 +1,6 @@ 'geoloc_localities', ); - if (isset($address[$area . 'Name']) && isset($databases[$area])) { + if (isset($address[$area . 'Name']) && isset($databases[$area]) && !empty($address[$area . 'Name'])) { $res = XDB::query("SELECT id FROM " . $databases[$area] . " WHERE name = {?}", $address[$area . 'Name']); if ($res->numRows() == 0) { - $address[$area . 'Id'] = XDB::execute("INSERT INTO " . $databases[$area] . " (name, country) - VALUES ({?}, {?})", - $address[$area . 'Name'], $address['countryId']); + XDB::execute('INSERT INTO ' . $databases[$area] . ' (name, country) + VALUES ({?}, {?})', + $address[$area . 'Name'], $address['countryId']); + $address[$area . 'Id'] = XDB::insertId(); } else { $address[$area . 'Id'] = $res->fetchOneCell(); } + } else { + $address[$area . 'Id'] = null; } } @@ -92,10 +95,14 @@ class GMapsGeocoder extends Geocoder { // Maximum number of Geocoding calls to the Google Maps API. const MAX_GMAPS_RPC_CALLS = 5; + // Maximum levenshtein distance authorized between input and geocoded text in a single line. + const MAX_LINE_DISTANCE = 5; + // Maximum levenshtein distance authorized between input and geocoded text in the whole text. + const MAX_TOTAL_DISTANCE = 6; public function getGeocodedAddress(array $address) { $address = $this->prepareAddress($address); - $textAddress = $address['text']; + $textAddress = $this->getTextToGeocode($address); // Try to geocode the full address. if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) { @@ -121,32 +128,19 @@ class GMapsGeocoder extends Geocoder { } public function stripGeocodingFromAddress(array $address) { - unset($address['geoloc'], $address['geoloc_choice'], $address['countryId'], - $address['country'], $address['administrativeAreaName'], + unset($address['geoloc'], $address['geoloc_choice'], $address['geocodedPostalText'], + $address['countryId'], $address['country'], $address['administrativeAreaName'], $address['subAdministrativeAreaName'], $address['localityName'], $address['thoroughfareName'], $address['postalCode']); $address['accuracy'] = 0; return $address; } - + // Updates the address with the geocoded information from Google Maps. Also // cleans up the final informations. private function getUpdatedAddress(array $address, array $geocodedData, $extraLines) { $this->fillAddressWithGeocoding(&$address, $geocodedData); - - // If the accuracy is 6, it means only the street has been gecoded - // but not the number, thus we need to fix it. - if ($address['accuracy'] == 6) { - $this->fixStreetNumber($address); - } - - // We can now format the address. $this->formatAddress($address, $extraLines); - - // Some entities in ISO 3166 are not countries, thus they have to be replaced - // by the country they belong to. - // TODO: fixCountry($address); - return $address; } @@ -163,7 +157,7 @@ class GMapsGeocoder extends Geocoder { // Prepares address to be geocoded private function prepareAddress($address) { $address['text'] = preg_replace('/\s*\n\s*/m', "\n", trim($address['text'])); - // TODO: $address['postalAddress'] = getPostalAddress($address['text']); + $address['postalText'] = $this->getPostalAddress($address['text']); $address['updateTime'] = time(); unset($address['changed']); return $address; @@ -316,6 +310,7 @@ class GMapsGeocoder extends Geocoder { if ($extraLines) { $address['geoloc'] = $extraLines . "\n" . $address['geoloc']; } + $address['geocodedPostalText'] = $this->getPostalAddress($address['geoloc']); $geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"), array("", "\n"), $address['geoloc'])); $text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"), @@ -325,53 +320,104 @@ class GMapsGeocoder extends Geocoder { $countGeoloc = count($arrayGeoloc); $countText = count($arrayText); + $totalDistance = 0; if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1) || (($countText == $countGeoloc - 1) && ($arrayText[$countText - 1] == strtoupper($address['country'])))) { $same = false; } else { for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) { - if (levenshtein($arrayText[$i], trim($arrayGeoloc[$i])) > 3) { + $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i])); + $totalDistance += $lineDistance; + if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) { $same = false; + break; } } } + if ($same) { - $address['text'] = $address['geoloc']; - unset($address['geoloc']); + unset($address['geoloc'], $address['geocodedPostalText']); + } else { + $address['geoloc'] = str_replace("\n", "\r\n", $address['geoloc']); + $address['geocodedPostalText'] = str_replace("\n", "\r\n", $address['geocodedPostalText']); } + $address['text'] = str_replace("\n", "\r\n", $address['text']); + $address['postalText'] = str_replace("\n", "\r\n", $address['postalText']); } - - // Search for the lign from the given address that is the closest to the geocoded thoroughfareName - // and replaces the corresponding lign in the geocoded text by it. - static protected function fixStreetNumber(&$address) - { - if (isset($address['thoroughfareName'])) { - $thoroughfareName = $address['thoroughfareName']; - $thoroughfareToken = strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"), - array("", "\n"), $thoroughfareName))); - $geolocLines = explode("\n", $address['geoloc']); - $textLines = explode("\n", $address['text']); - $mindist = strlen($thoroughfareToken); - $minpos = 0; - $pos = 0; - foreach ($textLines as $i => $token) { - if (($l = levenshtein(strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"), - array("", "\n"), $token))), - $thoroughfareToken)) < $mindist) { - $mindist = $l; - $minpos = $i; + + // Returns the address formated for postal use. + // The main rules are (cf AFNOR XPZ 10-011): + // -everything in upper case; + // -if there are more then than 38 characters in a lign, split it; + // -if there are more then than 32 characters in the description of the "street", use abbreviations. + private function getPostalAddress($text) { + static $abbreviations = array( + "IMPASSE" => "IMP", + "RUE" => "R", + "AVENUE" => "AV", + "BOULEVARD" => "BVD", + "ROUTE" => "R", + "STREET" => "ST", + "ROAD" => "RD", + ); + + $text = strtoupper($text); + $arrayText = explode("\n", $text); + $postalText = ""; + + foreach ($arrayText as $i => $lign) { + $postalText .= (($i == 0) ? "" : "\n"); + if (($length = strlen($lign)) > 32) { + $words = explode(" ", $lign); + $count = 0; + foreach ($words as $word) { + if (isset($abbreviations[$word])) { + $word = $abbreviations[$word]; + } + if ($count + ($wordLength = strlen($word)) <= 38) { + $postalText .= (($count == 0) ? "" : " ") . $word; + $count += (($count == 0) ? 0 : 1) + $wordLength; + } else { + $postalText .= "\n" . $word; + $count = strlen($word); + } } + } else { + $postalText .= $lign; } - foreach ($geolocLines as $i => $line) { - if (strtoupper(trim($thoroughfareName)) == strtoupper(trim($line))) { - $pos = $i; - break; - } + } + return $postalText; + } + + // Trims the name of the real country if it contains an ISO 3166-1 non-country + // item. For that purpose, we compare the last but one line of the address with + // all non-country items of ISO 3166-1. + private function getTextToGeocode($address) + { + $res = XDB::iterator('SELECT country, countryFR + FROM geoloc_countries + WHERE belongsTo IS NOT NULL'); + $countries = array(); + foreach ($res as $item) { + $countries[] = $item[0]; + $countries[] = $item[1]; + } + $textLines = explode("\n", $address['text']); + $countLines = count($textLines); + $needle = strtoupper(trim($textLines[$countLines - 2])); + $isPseudoCountry = false; + foreach ($countries as $country) { + if (strtoupper($country) == $needle) { + $isPseudoCountry = true; + break; } - $geolocLines[$pos] = $textLines[$minpos]; - $address['geoloc'] = implode("\n", $geolocLines); } + + if ($isPseudoCountry) { + return implode("\n", array_slice($textLines, 0, -1)); + } + return $address['text']; } }