<?php
/***************************************************************************
- * Copyright (C) 2003-2009 Polytechnique.org *
+ * Copyright (C) 2003-2010 Polytechnique.org *
* http://opensource.polytechnique.org/ *
* *
* This program is free software; you can redistribute it and/or modify *
'locality' => 'geoloc_localities',
);
- if (isset($address[$area . 'Name']) && isset($databases[$area])) {
+ if (isset($address[$area . 'Name']) && isset($databases[$area]) && !empty($address[$area . 'Name'])) {
$res = XDB::query("SELECT id
FROM " . $databases[$area] . "
WHERE name = {?}",
$address[$area . 'Name']);
if ($res->numRows() == 0) {
- $address[$area . 'Id'] = XDB::execute("INSERT INTO " . $databases[$area] . " (name, country)
- VALUES ({?}, {?})",
- $address[$area . 'Name'], $address['countryId']);
+ XDB::execute('INSERT INTO ' . $databases[$area] . ' (name, country)
+ VALUES ({?}, {?})',
+ $address[$area . 'Name'], $address['countryId']);
+ $address[$area . 'Id'] = XDB::insertId();
} else {
$address[$area . 'Id'] = $res->fetchOneCell();
}
+ } else {
+ $address[$area . 'Id'] = null;
}
}
// Maximum number of Geocoding calls to the Google Maps API.
const MAX_GMAPS_RPC_CALLS = 5;
+ // Maximum levenshtein distance authorized between input and geocoded text in a single line.
+ const MAX_LINE_DISTANCE = 5;
+ // Maximum levenshtein distance authorized between input and geocoded text in the whole text.
+ const MAX_TOTAL_DISTANCE = 6;
public function getGeocodedAddress(array $address) {
$address = $this->prepareAddress($address);
- $textAddress = $address['text'];
+ $textAddress = $this->getTextToGeocode($address);
// Try to geocode the full address.
if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) {
}
public function stripGeocodingFromAddress(array $address) {
- unset($address['geoloc'], $address['geoloc_choice'], $address['countryId'],
- $address['country'], $address['administrativeAreaName'],
+ unset($address['geoloc'], $address['geoloc_choice'], $address['geocodedPostalText'],
+ $address['countryId'], $address['country'], $address['administrativeAreaName'],
$address['subAdministrativeAreaName'], $address['localityName'],
$address['thoroughfareName'], $address['postalCode']);
$address['accuracy'] = 0;
return $address;
}
-
+
// Updates the address with the geocoded information from Google Maps. Also
// cleans up the final informations.
private function getUpdatedAddress(array $address, array $geocodedData, $extraLines) {
$this->fillAddressWithGeocoding(&$address, $geocodedData);
-
- // If the accuracy is 6, it means only the street has been gecoded
- // but not the number, thus we need to fix it.
- if ($address['accuracy'] == 6) {
- $this->fixStreetNumber($address);
- }
-
- // We can now format the address.
$this->formatAddress($address, $extraLines);
-
- // Some entities in ISO 3166 are not countries, thus they have to be replaced
- // by the country they belong to.
- // TODO: fixCountry($address);
-
return $address;
}
// Prepares address to be geocoded
private function prepareAddress($address) {
$address['text'] = preg_replace('/\s*\n\s*/m', "\n", trim($address['text']));
- // TODO: $address['postalAddress'] = getPostalAddress($address['text']);
+ $address['postalText'] = $this->getPostalAddress($address['text']);
$address['updateTime'] = time();
unset($address['changed']);
return $address;
if ($extraLines) {
$address['geoloc'] = $extraLines . "\n" . $address['geoloc'];
}
+ $address['geocodedPostalText'] = $this->getPostalAddress($address['geoloc']);
$geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
array("", "\n"), $address['geoloc']));
$text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
$countGeoloc = count($arrayGeoloc);
$countText = count($arrayText);
+ $totalDistance = 0;
if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
|| (($countText == $countGeoloc - 1)
&& ($arrayText[$countText - 1] == strtoupper($address['country'])))) {
$same = false;
} else {
for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
- if (levenshtein($arrayText[$i], trim($arrayGeoloc[$i])) > 3) {
+ $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i]));
+ $totalDistance += $lineDistance;
+ if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) {
$same = false;
+ break;
}
}
}
+
if ($same) {
- $address['text'] = $address['geoloc'];
- unset($address['geoloc']);
+ unset($address['geoloc'], $address['geocodedPostalText']);
+ } else {
+ $address['geoloc'] = str_replace("\n", "\r\n", $address['geoloc']);
+ $address['geocodedPostalText'] = str_replace("\n", "\r\n", $address['geocodedPostalText']);
}
+ $address['text'] = str_replace("\n", "\r\n", $address['text']);
+ $address['postalText'] = str_replace("\n", "\r\n", $address['postalText']);
}
-
- // Search for the lign from the given address that is the closest to the geocoded thoroughfareName
- // and replaces the corresponding lign in the geocoded text by it.
- static protected function fixStreetNumber(&$address)
- {
- if (isset($address['thoroughfareName'])) {
- $thoroughfareName = $address['thoroughfareName'];
- $thoroughfareToken = strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"),
- array("", "\n"), $thoroughfareName)));
- $geolocLines = explode("\n", $address['geoloc']);
- $textLines = explode("\n", $address['text']);
- $mindist = strlen($thoroughfareToken);
- $minpos = 0;
- $pos = 0;
- foreach ($textLines as $i => $token) {
- if (($l = levenshtein(strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"),
- array("", "\n"), $token))),
- $thoroughfareToken)) < $mindist) {
- $mindist = $l;
- $minpos = $i;
+
+ // Returns the address formated for postal use.
+ // The main rules are (cf AFNOR XPZ 10-011):
+ // -everything in upper case;
+ // -if there are more then than 38 characters in a lign, split it;
+ // -if there are more then than 32 characters in the description of the "street", use abbreviations.
+ private function getPostalAddress($text) {
+ static $abbreviations = array(
+ "IMPASSE" => "IMP",
+ "RUE" => "R",
+ "AVENUE" => "AV",
+ "BOULEVARD" => "BVD",
+ "ROUTE" => "R",
+ "STREET" => "ST",
+ "ROAD" => "RD",
+ );
+
+ $text = strtoupper($text);
+ $arrayText = explode("\n", $text);
+ $postalText = "";
+
+ foreach ($arrayText as $i => $lign) {
+ $postalText .= (($i == 0) ? "" : "\n");
+ if (($length = strlen($lign)) > 32) {
+ $words = explode(" ", $lign);
+ $count = 0;
+ foreach ($words as $word) {
+ if (isset($abbreviations[$word])) {
+ $word = $abbreviations[$word];
+ }
+ if ($count + ($wordLength = strlen($word)) <= 38) {
+ $postalText .= (($count == 0) ? "" : " ") . $word;
+ $count += (($count == 0) ? 0 : 1) + $wordLength;
+ } else {
+ $postalText .= "\n" . $word;
+ $count = strlen($word);
+ }
}
+ } else {
+ $postalText .= $lign;
}
- foreach ($geolocLines as $i => $line) {
- if (strtoupper(trim($thoroughfareName)) == strtoupper(trim($line))) {
- $pos = $i;
- break;
- }
+ }
+ return $postalText;
+ }
+
+ // Trims the name of the real country if it contains an ISO 3166-1 non-country
+ // item. For that purpose, we compare the last but one line of the address with
+ // all non-country items of ISO 3166-1.
+ private function getTextToGeocode($address)
+ {
+ $res = XDB::iterator('SELECT country, countryFR
+ FROM geoloc_countries
+ WHERE belongsTo IS NOT NULL');
+ $countries = array();
+ foreach ($res as $item) {
+ $countries[] = $item[0];
+ $countries[] = $item[1];
+ }
+ $textLines = explode("\n", $address['text']);
+ $countLines = count($textLines);
+ $needle = strtoupper(trim($textLines[$countLines - 2]));
+ $isPseudoCountry = false;
+ foreach ($countries as $country) {
+ if (strtoupper($country) == $needle) {
+ $isPseudoCountry = true;
+ break;
}
- $geolocLines[$pos] = $textLines[$minpos];
- $address['geoloc'] = implode("\n", $geolocLines);
}
+
+ if ($isPseudoCountry) {
+ return implode("\n", array_slice($textLines, 0, -1));
+ }
+ return $address['text'];
}
}