Prevents warnings in flags template.
[platal.git] / include / geocoding.inc.php
CommitLineData
4c906759
SJ
1<?php
2/***************************************************************************
d4c08d89 3 * Copyright (C) 2003-2010 Polytechnique.org *
4c906759
SJ
4 * http://opensource.polytechnique.org/ *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the Free Software *
18 * Foundation, Inc., *
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
20 ***************************************************************************/
21
22// Interface for an address geocoder. It provides support for transforming a free
23// form address into a fully structured one.
4c906759
SJ
24abstract class Geocoder {
25 // Geocodes @p the address, and returns the corresponding updated address.
26 // Unknown key-value pairs available in the input map are retained as-is.
4e7a3faa 27 abstract public function getGeocodedAddress(Address &$address);
4c906759 28
73f6c165 29 // Cleans the address from its geocoded data
4e7a3faa 30 abstract public function stripGeocodingFromAddress(Address &$address);
73f6c165 31
4c906759
SJ
32 // Updates geoloc_administrativeareas, geoloc_subadministrativeareas and
33 // geoloc_localities databases with new geocoded data and returns the
34 // corresponding id.
4e7a3faa 35 static public function getAreaId(Address &$address, $area)
4c906759
SJ
36 {
37 static $databases = array(
38 'administrativeArea' => 'geoloc_administrativeareas',
39 'subAdministrativeArea' => 'geoloc_subadministrativeareas',
40 'locality' => 'geoloc_localities',
4e7a3faa 41 );
4c906759 42
4e7a3faa
SJ
43 $areaName = $area . 'Name';
44 $areaId = $area . 'Id';
45 if (!is_null($address->$areaName) && isset($databases[$area])) {
46 $res = XDB::query('SELECT id
47 FROM ' . $databases[$area] . '
48 WHERE name = {?}',
49 $address->$areaName);
4c906759 50 if ($res->numRows() == 0) {
505fb9bb
SJ
51 XDB::execute('INSERT INTO ' . $databases[$area] . ' (name, country)
52 VALUES ({?}, {?})',
4e7a3faa
SJ
53 $address->$areaName, $address->countryId);
54 $address->$areaId = XDB::insertId();
4c906759 55 } else {
4e7a3faa 56 $address->$areaId = $res->fetchOneCell();
4c906759 57 }
4e7a3faa
SJ
58 } elseif (empty($address->$areaId)) {
59 $address->$areaId = null;
4c906759
SJ
60 }
61 }
8f60459a
SJ
62
63 // Returns the part of the text preceeding the line with the postal code
64 // and the city name, within the limit of $limit number of lines.
65 static public function getFirstLines($text, $postalCode, $limit)
66 {
67 $textArray = explode("\n", $text);
68 for ($i = 0; $i < count($textArray); ++$i) {
69 if ($i > $limit || strpos($textLine, $postalCode) !== false) {
4e7a3faa
SJ
70 $limit = $i;
71 break;
8f60459a
SJ
72 }
73 }
74 return implode("\n", array_slice($textArray, 0, $limit));
75 }
2aa2c77a
SJ
76
77 // Returns the number of non geocoded addresses for a user.
4e7a3faa 78 static public function countNonGeocoded($pid, $jobid = null, $type = Address::LINK_PROFILE)
2aa2c77a 79 {
4e7a3faa
SJ
80 $where = array();
81 if (!is_null($pid)) {
82 $where[] = XDB::format('pid = {?}', $pid);
83 }
84 if (!is_null($jobid)) {
85 $where[] = XDB::format('jobid = {?}', $jobid);
86 }
87 $where[] = XDB::format('FIND_IN_SET({?}, type) AND accuracy = 0', $type);
88 $res = XDB::query('SELECT COUNT(*)
2aa2c77a 89 FROM profile_addresses
4e7a3faa 90 WHERE ' . implode(' AND ', $where),
2aa2c77a
SJ
91 $pid);
92 return $res->fetchOneCell();
93 }
4c906759
SJ
94}
95
96// Implementation of a Geocoder using the Google Maps API. Please refer to
97// the following links for details:
98// http://code.google.com/apis/maps/documentation/services.html#Geocoding
99// http://code.google.com/intl/en/apis/maps/documentation/geocoding/
100// http://code.google.com/apis/maps/documentation/reference.html#GGeoAddressAccuracy
101//
102// It requires the properties gmaps_key and gmaps_url to be defined in section
103// Geocoder in plat/al's configuration (platal.ini & platal.conf).
104class GMapsGeocoder extends Geocoder {
105
106 // Maximum number of Geocoding calls to the Google Maps API.
107 const MAX_GMAPS_RPC_CALLS = 5;
7bc2c396
SJ
108 // Maximum levenshtein distance authorized between input and geocoded text in a single line.
109 const MAX_LINE_DISTANCE = 5;
110 // Maximum levenshtein distance authorized between input and geocoded text in the whole text.
111 const MAX_TOTAL_DISTANCE = 6;
4c906759 112
4e7a3faa
SJ
113 public function getGeocodedAddress(Address &$address) {
114 $this->prepareAddress($address);
115 $textAddress = $this->getTextToGeocode($address->text);
4c906759
SJ
116
117 // Try to geocode the full address.
118 if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) {
4e7a3faa
SJ
119 $this->getUpdatedAddress($address, $geocodedData, null);
120 return;
4c906759
SJ
121 }
122
123 // If the full geocoding failed, try to geocode only the final part of the address.
124 // We start by geocoding everything but the first line, and continue until we get
125 // a result. To respect the limit of GMaps calls, we ignore the first few lines
126 // if there are too many address lines.
127 $addressLines = explode("\n", $textAddress);
128 $linesCount = count($addressLines);
129 for ($i = max(1, $linesCount - self::MAX_GMAPS_RPC_CALLS + 1); $i < $linesCount; ++$i) {
130 $extraLines = implode("\n", array_slice($addressLines, 0, $i));
131 $toGeocode = implode("\n", array_slice($addressLines, $i));
132 if (($geocodedData = $this->getPlacemarkForAddress($toGeocode))) {
4e7a3faa
SJ
133 $this->getUpdatedAddress($address, $geocodedData, $extraLines);
134 return;
4c906759
SJ
135 }
136 }
4c906759
SJ
137 }
138
4e7a3faa
SJ
139 public function stripGeocodingFromAddress(Address &$address) {
140 $address->geocodedText = null;
141 $address->geocodedPostalText = null;
142 $address->geoloc_choice = null;
143 $address->countryId = null;
144 $address->country = null;
145 $address->administrativeAreaName = null;
146 $address->subAdministrativeAreaName = null;
147 $address->localityName = null;
148 $address->thoroughfareName = null;
149 $address->postalCode = null;
150 $address->accuracy = 0;
73f6c165 151 }
00e5200b 152
4c906759
SJ
153 // Updates the address with the geocoded information from Google Maps. Also
154 // cleans up the final informations.
4e7a3faa
SJ
155 private function getUpdatedAddress(Address &$address, array $geocodedData, $extraLines) {
156 $this->fillAddressWithGeocoding($address, $geocodedData);
4c906759 157 $this->formatAddress($address, $extraLines);
4c906759
SJ
158 }
159
160 // Retrieves the Placemark object (see #getPlacemarkFromJson()) for the @p
161 // address, by querying the Google Maps API. Returns the array on success,
162 // and null otherwise.
163 private function getPlacemarkForAddress($address) {
164 $url = $this->getGeocodingUrl($address);
165 $geoData = $this->getGeoJsonFromUrl($url);
166
167 return ($geoData ? $this->getPlacemarkFromJson($geoData) : null);
168 }
169
170 // Prepares address to be geocoded
4e7a3faa
SJ
171 private function prepareAddress(Address &$address) {
172 $address->text = preg_replace('/\s*\n\s*/m', "\n", trim($address->text));
173 $address->postalText = $this->getPostalAddress($address->text);
4c906759
SJ
174 }
175
176 // Builds the Google Maps geocoder url to fetch information about @p address.
177 // Returns the built url.
178 private function getGeocodingUrl($address) {
179 global $globals;
180
181 $parameters = array(
182 'key' => $globals->geocoder->gmaps_key,
183 'sensor' => 'false', // The queried address wasn't obtained from a GPS sensor.
184 'hl' => 'fr', // Output langage.
185 'oe' => 'utf8', // Output encoding.
186 'output' => 'json', // Output format.
187 'gl' => 'fr', // Location preferences (addresses are in France by default).
188 'q' => $address, // The queries address.
189 );
190
191 return $globals->geocoder->gmaps_url . '?' . http_build_query($parameters);
192 }
193
194 // Fetches JSON-encoded data from a Google Maps API url, and decode them.
195 // Returns the json array on success, and null otherwise.
196 private function getGeoJsonFromUrl($url) {
197 global $globals;
198
199 // Prepare a backtrace object to log errors.
200 $bt = null;
201 if ($globals->debug & DEBUG_BT) {
202 if (!isset(PlBacktrace::$bt['Geoloc'])) {
203 new PlBacktrace('Geoloc');
204 }
205 $bt = &PlBacktrace::$bt['Geoloc'];
206 $bt->start($url);
207 }
208
209 // Fetch the geocoding data.
210 $rawData = file_get_contents($url);
211 if (!$rawData) {
212 if ($bt) {
4e7a3faa 213 $bt->stop(0, 'Could not retrieve geocoded address from GoogleMaps.');
4c906759
SJ
214 }
215 return null;
216 }
217
218 // Decode the JSON-encoded data, and check for their validity.
219 $data = json_decode($rawData, true);
220 if ($bt) {
221 $bt->stop(count($data), null, $data);
222 }
223
224 return $data;
225 }
226
227 // Extracts the most appropriate placemark from the JSON data fetched from
228 // Google Maps. Returns a Placemark array on success, and null otherwise. See
229 // http://code.google.com/apis/maps/documentation/services.html#Geocoding_Structured
230 // for details on the Placemark structure.
231 private function getPlacemarkFromJson(array $data) {
232 // Check for geocoding failures.
233 if (!isset($data['Status']['code']) || $data['Status']['code'] != 200) {
234 // TODO: handle non-200 codes in a better way, since the code might
235 // indicate a temporary error on Google's side.
236 return null;
237 }
238
239 // Check that at least one placemark was found.
240 if (count($data['Placemark']) == 0) {
241 return null;
242 }
243
244 // Extract the placemark with the best accuracy. This is not always the
245 // best result (since the same address may yield two different placemarks).
246 $result = $data['Placemark'][0];
247 foreach ($data['Placemark'] as $place) {
248 if ($place['AddressDetails']['Accuracy'] > $result['AddressDetails']['Accuracy']) {
249 $result = $place;
250 }
251 }
252
253 return $result;
254 }
255
256 // Fills the address with the geocoded data
4e7a3faa 257 private function fillAddressWithGeocoding(Address &$address, $geocodedData) {
4c906759
SJ
258 // The geocoded address three is
259 // Country -> AdministrativeArea -> SubAdministrativeArea -> Locality -> Thoroughfare
260 // with all the possible shortcuts
261 // The address is formatted as xAL, or eXtensible Address Language, an international
262 // standard for address formatting.
263 // xAL documentation: http://www.oasis-open.org/committees/ciq/ciq.html#6
4e7a3faa 264 $address->geocodedText = str_replace(', ', "\n", $geocodedData['address']);
4c906759 265 if (isset($geocodedData['AddressDetails']['Accuracy'])) {
4e7a3faa 266 $address->accuracy = $geocodedData['AddressDetails']['Accuracy'];
4c906759
SJ
267 }
268
269 $currentPosition = $geocodedData['AddressDetails'];
270 if (isset($currentPosition['Country'])) {
4e7a3faa
SJ
271 $currentPosition = $currentPosition['Country'];
272 $address->countryId = $currentPosition['CountryNameCode'];
273 $address->country = $currentPosition['CountryName'];
4c906759
SJ
274 }
275 if (isset($currentPosition['AdministrativeArea'])) {
4e7a3faa
SJ
276 $currentPosition = $currentPosition['AdministrativeArea'];
277 $address->administrativeAreaName = $currentPosition['AdministrativeAreaName'];
4c906759
SJ
278 }
279 if (isset($currentPosition['SubAdministrativeArea'])) {
4e7a3faa
SJ
280 $currentPosition = $currentPosition['SubAdministrativeArea'];
281 $address->subAdministrativeAreaName = $currentPosition['SubAdministrativeAreaName'];
4c906759
SJ
282 }
283 if (isset($currentPosition['Locality'])) {
4e7a3faa
SJ
284 $currentPosition = $currentPosition['Locality'];
285 $address->localityName = $currentPosition['LocalityName'];
4c906759
SJ
286 }
287 if (isset($currentPosition['Thoroughfare'])) {
4e7a3faa 288 $address->thoroughfareName = $currentPosition['Thoroughfare']['ThoroughfareName'];
4c906759
SJ
289 }
290 if (isset($currentPosition['PostalCode'])) {
4e7a3faa 291 $address->postalCode = $currentPosition['PostalCode']['PostalCodeNumber'];
4c906759
SJ
292 }
293
294 // Gets coordinates.
295 if (isset($geocodedData['Point']['coordinates'][0])) {
4e7a3faa 296 $address->latitude = $geocodedData['Point']['coordinates'][0];
4c906759
SJ
297 }
298 if (isset($geocodedData['Point']['coordinates'][1])) {
4e7a3faa 299 $address->longitude = $geocodedData['Point']['coordinates'][1];
4c906759
SJ
300 }
301 if (isset($geocodedData['ExtendedData']['LatLonBox']['north'])) {
4e7a3faa 302 $address->north = $geocodedData['ExtendedData']['LatLonBox']['north'];
4c906759
SJ
303 }
304 if (isset($geocodedData['ExtendedData']['LatLonBox']['south'])) {
4e7a3faa 305 $address->south = $geocodedData['ExtendedData']['LatLonBox']['south'];
4c906759
SJ
306 }
307 if (isset($geocodedData['ExtendedData']['LatLonBox']['east'])) {
4e7a3faa 308 $address->east = $geocodedData['ExtendedData']['LatLonBox']['east'];
4c906759
SJ
309 }
310 if (isset($geocodedData['ExtendedData']['LatLonBox']['west'])) {
4e7a3faa 311 $address->west = $geocodedData['ExtendedData']['LatLonBox']['west'];
4c906759
SJ
312 }
313 }
314
315 // Formats the text of the geocoded address using the unused data and
316 // compares it to the given address. If they are too different, the user
317 // will be asked to choose between them.
4e7a3faa 318 private function formatAddress(Address &$address, $extraLines) {
4c906759
SJ
319 $same = true;
320 if ($extraLines) {
4e7a3faa 321 $address->geocodedText = $extraLines . "\n" . $address->geocodedText;
4c906759 322 }
4e7a3faa 323 $address->geocodedPostalText = $this->getPostalAddress($address->geocodedText);
4c906759 324 $geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
4e7a3faa 325 array('', "\n"), $address->geocodedText));
4c906759 326 $text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
4e7a3faa 327 array('', "\n"), $address->text));
4c906759
SJ
328 $arrayGeoloc = explode("\n", $geoloc);
329 $arrayText = explode("\n", $text);
330 $countGeoloc = count($arrayGeoloc);
331 $countText = count($arrayText);
332
7bc2c396 333 $totalDistance = 0;
4c906759
SJ
334 if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
335 || (($countText == $countGeoloc - 1)
4e7a3faa 336 && ($arrayText[$countText - 1] == strtoupper($address->country)))) {
4c906759
SJ
337 $same = false;
338 } else {
339 for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
7bc2c396
SJ
340 $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i]));
341 $totalDistance += $lineDistance;
342 if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) {
4c906759 343 $same = false;
7bc2c396 344 break;
4c906759
SJ
345 }
346 }
347 }
7bc2c396 348
4c906759 349 if ($same) {
4e7a3faa
SJ
350 $address->geocodedText = null;
351 $address->geocodedPostalText = null;
eecbf7f5 352 } else {
4e7a3faa
SJ
353 $address->geocodedText = str_replace("\n", "\r\n", $address->geocodedText);
354 $address->geocodedPostalText = str_replace("\n", "\r\n", $address->geocodedPostalText);
4c906759 355 }
4e7a3faa
SJ
356 $address->text = str_replace("\n", "\r\n", $address->text);
357 $address->postalText = str_replace("\n", "\r\n", $address->postalText);
4c906759 358 }
aab2ffdd 359
5a10ab14
SJ
360 // Returns the address formated for postal use.
361 // The main rules are (cf AFNOR XPZ 10-011):
362 // -everything in upper case;
4e7a3faa 363 // -if there are more then than 38 characters in a line, split it;
5a10ab14
SJ
364 // -if there are more then than 32 characters in the description of the "street", use abbreviations.
365 private function getPostalAddress($text) {
366 static $abbreviations = array(
4e7a3faa
SJ
367 'IMPASSE' => 'IMP',
368 'RUE' => 'R',
369 'AVENUE' => 'AV',
370 'BOULEVARD' => 'BVD',
371 'ROUTE' => 'R',
372 'STREET' => 'ST',
373 'ROAD' => 'RD',
5a10ab14
SJ
374 );
375
376 $text = strtoupper($text);
377 $arrayText = explode("\n", $text);
4e7a3faa 378 $postalText = '';
5a10ab14 379
4e7a3faa
SJ
380 foreach ($arrayText as $i => $line) {
381 $postalText .= (($i == 0) ? '' : "\n");
382 if (($length = strlen($line)) > 32) {
383 $words = explode(' ', $line);
5a10ab14
SJ
384 $count = 0;
385 foreach ($words as $word) {
386 if (isset($abbreviations[$word])) {
387 $word = $abbreviations[$word];
388 }
389 if ($count + ($wordLength = strlen($word)) <= 38) {
4e7a3faa 390 $postalText .= (($count == 0) ? '' : ' ') . $word;
5a10ab14
SJ
391 $count += (($count == 0) ? 0 : 1) + $wordLength;
392 } else {
393 $postalText .= "\n" . $word;
394 $count = strlen($word);
395 }
396 }
397 } else {
4e7a3faa 398 $postalText .= $line;
5a10ab14
SJ
399 }
400 }
401 return $postalText;
402 }
403
00e5200b
SJ
404 // Trims the name of the real country if it contains an ISO 3166-1 non-country
405 // item. For that purpose, we compare the last but one line of the address with
406 // all non-country items of ISO 3166-1.
4e7a3faa 407 private function getTextToGeocode($text)
00e5200b
SJ
408 {
409 $res = XDB::iterator('SELECT country, countryFR
410 FROM geoloc_countries
411 WHERE belongsTo IS NOT NULL');
412 $countries = array();
413 foreach ($res as $item) {
414 $countries[] = $item[0];
415 $countries[] = $item[1];
416 }
4e7a3faa 417 $textLines = explode("\n", $text);
00e5200b
SJ
418 $countLines = count($textLines);
419 $needle = strtoupper(trim($textLines[$countLines - 2]));
420 $isPseudoCountry = false;
421 foreach ($countries as $country) {
422 if (strtoupper($country) == $needle) {
423 $isPseudoCountry = true;
424 break;
425 }
426 }
427
428 if ($isPseudoCountry) {
02c4b93a 429 return implode("\n", array_slice($textLines, 0, -1));
00e5200b 430 }
4e7a3faa 431 return $text;
00e5200b 432 }
4c906759
SJ
433}
434
435// vim:set et sw=4 sts=4 sws=4 foldmethod=marker enc=utf-8:
436?>