Non geocoded addresses could not be saved.
[platal.git] / include / geocoding.inc.php
1 <?php
2 /***************************************************************************
3 * Copyright (C) 2003-2009 Polytechnique.org *
4 * http://opensource.polytechnique.org/ *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the Free Software *
18 * Foundation, Inc., *
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
20 ***************************************************************************/
21
22 // Interface for an address geocoder. It provides support for transforming a free
23 // form address into a fully structured one.
24 // TODO: define and use an Address object instead of a key-value map.
25 abstract class Geocoder {
26 // Geocodes @p the address, and returns the corresponding updated address.
27 // Unknown key-value pairs available in the input map are retained as-is.
28 abstract public function getGeocodedAddress(array $address);
29
30 // Cleans the address from its geocoded data
31 abstract public function stripGeocodingFromAddress(array $address);
32
33 // Updates geoloc_administrativeareas, geoloc_subadministrativeareas and
34 // geoloc_localities databases with new geocoded data and returns the
35 // corresponding id.
36 static public function getAreaId(array &$address, $area)
37 {
38 static $databases = array(
39 'administrativeArea' => 'geoloc_administrativeareas',
40 'subAdministrativeArea' => 'geoloc_subadministrativeareas',
41 'locality' => 'geoloc_localities',
42 );
43
44 if (isset($address[$area . 'Name']) && isset($databases[$area])) {
45 $res = XDB::query("SELECT id
46 FROM " . $databases[$area] . "
47 WHERE name = {?}",
48 $address[$area . 'Name']);
49 if ($res->numRows() == 0) {
50 $address[$area . 'Id'] = XDB::execute("INSERT INTO " . $databases[$area] . " (name, country)
51 VALUES ({?}, {?})",
52 $address[$area . 'Name'], $address['countryId']);
53 } else {
54 $address[$area . 'Id'] = $res->fetchOneCell();
55 }
56 }
57 }
58
59 // Returns the part of the text preceeding the line with the postal code
60 // and the city name, within the limit of $limit number of lines.
61 static public function getFirstLines($text, $postalCode, $limit)
62 {
63 $textArray = explode("\n", $text);
64 for ($i = 0; $i < count($textArray); ++$i) {
65 if ($i > $limit || strpos($textLine, $postalCode) !== false) {
66 $limit = $i; break;
67 }
68 }
69 return implode("\n", array_slice($textArray, 0, $limit));
70 }
71 }
72
73 // Implementation of a Geocoder using the Google Maps API. Please refer to
74 // the following links for details:
75 // http://code.google.com/apis/maps/documentation/services.html#Geocoding
76 // http://code.google.com/intl/en/apis/maps/documentation/geocoding/
77 // http://code.google.com/apis/maps/documentation/reference.html#GGeoAddressAccuracy
78 //
79 // It requires the properties gmaps_key and gmaps_url to be defined in section
80 // Geocoder in plat/al's configuration (platal.ini & platal.conf).
81 class GMapsGeocoder extends Geocoder {
82
83 // Maximum number of Geocoding calls to the Google Maps API.
84 const MAX_GMAPS_RPC_CALLS = 5;
85
86 public function getGeocodedAddress(array $address) {
87 $address = $this->prepareAddress($address);
88 $textAddress = $address['text'];
89
90 // Try to geocode the full address.
91 if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) {
92 return $this->getUpdatedAddress($address, $geocodedData, null);
93 }
94
95 // If the full geocoding failed, try to geocode only the final part of the address.
96 // We start by geocoding everything but the first line, and continue until we get
97 // a result. To respect the limit of GMaps calls, we ignore the first few lines
98 // if there are too many address lines.
99 $addressLines = explode("\n", $textAddress);
100 $linesCount = count($addressLines);
101 for ($i = max(1, $linesCount - self::MAX_GMAPS_RPC_CALLS + 1); $i < $linesCount; ++$i) {
102 $extraLines = implode("\n", array_slice($addressLines, 0, $i));
103 $toGeocode = implode("\n", array_slice($addressLines, $i));
104 if (($geocodedData = $this->getPlacemarkForAddress($toGeocode))) {
105 return $this->getUpdatedAddress($address, $geocodedData, $extraLines);
106 }
107 }
108
109 // No geocoding could be done, the initial address is returned as-is.
110 return $address;
111 }
112
113 public function stripGeocodingFromAddress(array $address) {
114 unset($address['geoloc'], $address['geoloc_choice'], $address['countryId'],
115 $address['country'], $address['administrativeAreaName'],
116 $address['subAdministrativeAreaName'], $address['localityName'],
117 $address['thoroughfareName'], $address['postalCode']);
118 $address['accuracy'] = 0;
119 return $address;
120 }
121
122 // Updates the address with the geocoded information from Google Maps. Also
123 // cleans up the final informations.
124 private function getUpdatedAddress(array $address, array $geocodedData, $extraLines) {
125 $this->fillAddressWithGeocoding(&$address, $geocodedData);
126
127 // If the accuracy is 6, it means only the street has been gecoded
128 // but not the number, thus we need to fix it.
129 if ($address['accuracy'] == 6) {
130 $this->fixStreetNumber($address);
131 }
132
133 // We can now format the address.
134 $this->formatAddress($address, $extraLines);
135
136 // Some entities in ISO 3166 are not countries, thus they have to be replaced
137 // by the country they belong to.
138 // TODO: fixCountry($address);
139
140 return $address;
141 }
142
143 // Retrieves the Placemark object (see #getPlacemarkFromJson()) for the @p
144 // address, by querying the Google Maps API. Returns the array on success,
145 // and null otherwise.
146 private function getPlacemarkForAddress($address) {
147 $url = $this->getGeocodingUrl($address);
148 $geoData = $this->getGeoJsonFromUrl($url);
149
150 return ($geoData ? $this->getPlacemarkFromJson($geoData) : null);
151 }
152
153 // Prepares address to be geocoded
154 private function prepareAddress($address) {
155 $address['text'] = preg_replace('/\s*\n\s*/m', "\n", trim($address['text']));
156 // TODO: $address['postalAddress'] = getPostalAddress($address['text']);
157 $address['updateTime'] = time();
158 unset($address['changed']);
159 return $address;
160 }
161
162 // Builds the Google Maps geocoder url to fetch information about @p address.
163 // Returns the built url.
164 private function getGeocodingUrl($address) {
165 global $globals;
166
167 $parameters = array(
168 'key' => $globals->geocoder->gmaps_key,
169 'sensor' => 'false', // The queried address wasn't obtained from a GPS sensor.
170 'hl' => 'fr', // Output langage.
171 'oe' => 'utf8', // Output encoding.
172 'output' => 'json', // Output format.
173 'gl' => 'fr', // Location preferences (addresses are in France by default).
174 'q' => $address, // The queries address.
175 );
176
177 return $globals->geocoder->gmaps_url . '?' . http_build_query($parameters);
178 }
179
180 // Fetches JSON-encoded data from a Google Maps API url, and decode them.
181 // Returns the json array on success, and null otherwise.
182 private function getGeoJsonFromUrl($url) {
183 global $globals;
184
185 // Prepare a backtrace object to log errors.
186 $bt = null;
187 if ($globals->debug & DEBUG_BT) {
188 if (!isset(PlBacktrace::$bt['Geoloc'])) {
189 new PlBacktrace('Geoloc');
190 }
191 $bt = &PlBacktrace::$bt['Geoloc'];
192 $bt->start($url);
193 }
194
195 // Fetch the geocoding data.
196 $rawData = file_get_contents($url);
197 if (!$rawData) {
198 if ($bt) {
199 $bt->stop(0, "Could not retrieve geocoded address from GoogleMaps.");
200 }
201 return null;
202 }
203
204 // Decode the JSON-encoded data, and check for their validity.
205 $data = json_decode($rawData, true);
206 if ($bt) {
207 $bt->stop(count($data), null, $data);
208 }
209
210 return $data;
211 }
212
213 // Extracts the most appropriate placemark from the JSON data fetched from
214 // Google Maps. Returns a Placemark array on success, and null otherwise. See
215 // http://code.google.com/apis/maps/documentation/services.html#Geocoding_Structured
216 // for details on the Placemark structure.
217 private function getPlacemarkFromJson(array $data) {
218 // Check for geocoding failures.
219 if (!isset($data['Status']['code']) || $data['Status']['code'] != 200) {
220 // TODO: handle non-200 codes in a better way, since the code might
221 // indicate a temporary error on Google's side.
222 return null;
223 }
224
225 // Check that at least one placemark was found.
226 if (count($data['Placemark']) == 0) {
227 return null;
228 }
229
230 // Extract the placemark with the best accuracy. This is not always the
231 // best result (since the same address may yield two different placemarks).
232 $result = $data['Placemark'][0];
233 foreach ($data['Placemark'] as $place) {
234 if ($place['AddressDetails']['Accuracy'] > $result['AddressDetails']['Accuracy']) {
235 $result = $place;
236 }
237 }
238
239 return $result;
240 }
241
242 // Fills the address with the geocoded data
243 private function fillAddressWithGeocoding(&$address, $geocodedData) {
244 // The geocoded address three is
245 // Country -> AdministrativeArea -> SubAdministrativeArea -> Locality -> Thoroughfare
246 // with all the possible shortcuts
247 // The address is formatted as xAL, or eXtensible Address Language, an international
248 // standard for address formatting.
249 // xAL documentation: http://www.oasis-open.org/committees/ciq/ciq.html#6
250 $address['geoloc'] = str_replace(", ", "\n", $geocodedData['address']);
251 if (isset($geocodedData['AddressDetails']['Accuracy'])) {
252 $address['accuracy'] = $geocodedData['AddressDetails']['Accuracy'];
253 }
254
255 $currentPosition = $geocodedData['AddressDetails'];
256 if (isset($currentPosition['Country'])) {
257 $currentPosition = $currentPosition['Country'];
258 $address['countryId'] = $currentPosition['CountryNameCode'];
259 $address['country'] = $currentPosition['CountryName'];
260 }
261 if (isset($currentPosition['AdministrativeArea'])) {
262 $currentPosition = $currentPosition['AdministrativeArea'];
263 $address['administrativeAreaName'] = $currentPosition['AdministrativeAreaName'];
264 }
265 if (isset($currentPosition['SubAdministrativeArea'])) {
266 $currentPosition = $currentPosition['SubAdministrativeArea'];
267 $address['subAdministrativeAreaName'] = $currentPosition['SubAdministrativeAreaName'];
268 }
269 if (isset($currentPosition['Locality'])) {
270 $currentPosition = $currentPosition['Locality'];
271 $address['localityName'] = $currentPosition['LocalityName'];
272 }
273 if (isset($currentPosition['Thoroughfare'])) {
274 $address['thoroughfareName'] = $currentPosition['Thoroughfare']['ThoroughfareName'];
275 }
276 if (isset($currentPosition['PostalCode'])) {
277 $address['postalCode'] = $currentPosition['PostalCode']['PostalCodeNumber'];
278 }
279
280 // Gets coordinates.
281 if (isset($geocodedData['Point']['coordinates'][0])) {
282 $address['latitude'] = $geocodedData['Point']['coordinates'][0];
283 }
284 if (isset($geocodedData['Point']['coordinates'][1])) {
285 $address['longitude'] = $geocodedData['Point']['coordinates'][1];
286 }
287 if (isset($geocodedData['ExtendedData']['LatLonBox']['north'])) {
288 $address['north'] = $geocodedData['ExtendedData']['LatLonBox']['north'];
289 }
290 if (isset($geocodedData['ExtendedData']['LatLonBox']['south'])) {
291 $address['south'] = $geocodedData['ExtendedData']['LatLonBox']['south'];
292 }
293 if (isset($geocodedData['ExtendedData']['LatLonBox']['east'])) {
294 $address['east'] = $geocodedData['ExtendedData']['LatLonBox']['east'];
295 }
296 if (isset($geocodedData['ExtendedData']['LatLonBox']['west'])) {
297 $address['west'] = $geocodedData['ExtendedData']['LatLonBox']['west'];
298 }
299 }
300
301 // Formats the text of the geocoded address using the unused data and
302 // compares it to the given address. If they are too different, the user
303 // will be asked to choose between them.
304 private function formatAddress(&$address, $extraLines) {
305 $same = true;
306 if ($extraLines) {
307 $address['geoloc'] = $extraLines . "\n" . $address['geoloc'];
308 }
309 $geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
310 array("", "\n"), $address['geoloc']));
311 $text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
312 array("", "\n"), $address['text']));
313 $arrayGeoloc = explode("\n", $geoloc);
314 $arrayText = explode("\n", $text);
315 $countGeoloc = count($arrayGeoloc);
316 $countText = count($arrayText);
317
318 if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
319 || (($countText == $countGeoloc - 1)
320 && ($arrayText[$countText - 1] == strtoupper($address['country'])))) {
321 $same = false;
322 } else {
323 for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
324 if (levenshtein($arrayText[$i], trim($arrayGeoloc[$i])) > 3) {
325 $same = false;
326 }
327 }
328 }
329 if ($same) {
330 $address['text'] = $address['geoloc'];
331 unset($address['geoloc']);
332 }
333 }
334
335 // Search for the lign from the given address that is the closest to the geocoded thoroughfareName
336 // and replaces the corresponding lign in the geocoded text by it.
337 static protected function fixStreetNumber(&$address)
338 {
339 if (isset($address['thoroughfareName'])) {
340 $thoroughfareName = $address['thoroughfareName'];
341 $thoroughfareToken = strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"),
342 array("", "\n"), $thoroughfareName)));
343 $geolocLines = explode("\n", $address['geoloc']);
344 $textLines = explode("\n", $address['text']);
345 $mindist = strlen($thoroughfareToken);
346 $minpos = 0;
347 $pos = 0;
348 foreach ($textLines as $i => $token) {
349 if (($l = levenshtein(strtoupper(trim(preg_replace(array("/[,\"'#~:;_\-]/", "/\r\n/"),
350 array("", "\n"), $token))),
351 $thoroughfareToken)) < $mindist) {
352 $mindist = $l;
353 $minpos = $i;
354 }
355 }
356 foreach ($geolocLines as $i => $line) {
357 if (strtoupper(trim($thoroughfareName)) == strtoupper(trim($line))) {
358 $pos = $i;
359 break;
360 }
361 }
362 $geolocLines[$pos] = $textLines[$minpos];
363 $address['geoloc'] = implode("\n", $geolocLines);
364 }
365 }
366 }
367
368 // vim:set et sw=4 sts=4 sws=4 foldmethod=marker enc=utf-8:
369 ?>