Merge branch 'xorg/master' into xorg/f/geocoding
[platal.git] / classes / gmapsgeocoder.php
CommitLineData
4c906759
SJ
1<?php
2/***************************************************************************
12262f13 3 * Copyright (C) 2003-2011 Polytechnique.org *
4c906759
SJ
4 * http://opensource.polytechnique.org/ *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the Free Software *
18 * Foundation, Inc., *
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
20 ***************************************************************************/
21
4c906759
SJ
22// Implementation of a Geocoder using the Google Maps API. Please refer to
23// the following links for details:
24// http://code.google.com/apis/maps/documentation/services.html#Geocoding
25// http://code.google.com/intl/en/apis/maps/documentation/geocoding/
26// http://code.google.com/apis/maps/documentation/reference.html#GGeoAddressAccuracy
27//
28// It requires the properties gmaps_key and gmaps_url to be defined in section
29// Geocoder in plat/al's configuration (platal.ini & platal.conf).
30class GMapsGeocoder extends Geocoder {
31
32 // Maximum number of Geocoding calls to the Google Maps API.
33 const MAX_GMAPS_RPC_CALLS = 5;
7bc2c396
SJ
34 // Maximum levenshtein distance authorized between input and geocoded text in a single line.
35 const MAX_LINE_DISTANCE = 5;
36 // Maximum levenshtein distance authorized between input and geocoded text in the whole text.
37 const MAX_TOTAL_DISTANCE = 6;
4c906759 38
26ba053e 39 public function getGeocodedAddress(Address $address, $defaultLanguage = null, $forceLanguage = false) {
4e7a3faa
SJ
40 $this->prepareAddress($address);
41 $textAddress = $this->getTextToGeocode($address->text);
2ffc0393
SJ
42 if (is_null($defaultLanguage)) {
43 $defaultLanguage = Platal::globals()->geocoder->gmaps_hl;
44 }
4c906759
SJ
45
46 // Try to geocode the full address.
2ffc0393
SJ
47 if (($geocodedData = $this->getPlacemarkForAddress($textAddress, $defaultLanguage))) {
48 $this->getUpdatedAddress($address, $geocodedData, null, $forceLanguage);
4e7a3faa 49 return;
4c906759
SJ
50 }
51
52 // If the full geocoding failed, try to geocode only the final part of the address.
53 // We start by geocoding everything but the first line, and continue until we get
54 // a result. To respect the limit of GMaps calls, we ignore the first few lines
55 // if there are too many address lines.
56 $addressLines = explode("\n", $textAddress);
57 $linesCount = count($addressLines);
58 for ($i = max(1, $linesCount - self::MAX_GMAPS_RPC_CALLS + 1); $i < $linesCount; ++$i) {
59 $extraLines = implode("\n", array_slice($addressLines, 0, $i));
60 $toGeocode = implode("\n", array_slice($addressLines, $i));
2ffc0393
SJ
61 if (($geocodedData = $this->getPlacemarkForAddress($toGeocode, $defaultLanguage))) {
62 $this->getUpdatedAddress($address, $geocodedData, $extraLines, $forceLanguage);
4e7a3faa 63 return;
4c906759
SJ
64 }
65 }
4c906759
SJ
66 }
67
26ba053e 68 public function stripGeocodingFromAddress(Address $address) {
4e7a3faa 69 $address->geocodedText = null;
4e7a3faa
SJ
70 $address->geoloc_choice = null;
71 $address->countryId = null;
72 $address->country = null;
73 $address->administrativeAreaName = null;
74 $address->subAdministrativeAreaName = null;
75 $address->localityName = null;
76 $address->thoroughfareName = null;
77 $address->postalCode = null;
78 $address->accuracy = 0;
73f6c165 79 }
00e5200b 80
4c906759
SJ
81 // Updates the address with the geocoded information from Google Maps. Also
82 // cleans up the final informations.
26ba053e 83 private function getUpdatedAddress(Address $address, array $geocodedData, $extraLines, $forceLanguage) {
803612ae 84 $this->fillAddressWithGeocoding($address, $geocodedData, false);
2ffc0393 85 $this->formatAddress($address, $extraLines, $forceLanguage);
4c906759
SJ
86 }
87
88 // Retrieves the Placemark object (see #getPlacemarkFromJson()) for the @p
89 // address, by querying the Google Maps API. Returns the array on success,
90 // and null otherwise.
2ffc0393 91 private function getPlacemarkForAddress($address, $defaultLanguage) {
803612ae 92 $url = $this->getGeocodingUrl($address, $defaultLanguage);
4c906759
SJ
93 $geoData = $this->getGeoJsonFromUrl($url);
94
95 return ($geoData ? $this->getPlacemarkFromJson($geoData) : null);
96 }
97
98 // Prepares address to be geocoded
26ba053e 99 private function prepareAddress(Address $address) {
4e7a3faa 100 $address->text = preg_replace('/\s*\n\s*/m', "\n", trim($address->text));
4c906759
SJ
101 }
102
103 // Builds the Google Maps geocoder url to fetch information about @p address.
104 // Returns the built url.
803612ae 105 private function getGeocodingUrl($address, $defaultLanguage) {
4c906759
SJ
106 global $globals;
107
108 $parameters = array(
109 'key' => $globals->geocoder->gmaps_key,
110 'sensor' => 'false', // The queried address wasn't obtained from a GPS sensor.
803612ae 111 'hl' => $defaultLanguage,
4c906759
SJ
112 'oe' => 'utf8', // Output encoding.
113 'output' => 'json', // Output format.
ce3e0af3 114 'gl' => $globals->geocoder->gmaps_gl,
4c906759
SJ
115 'q' => $address, // The queries address.
116 );
117
118 return $globals->geocoder->gmaps_url . '?' . http_build_query($parameters);
119 }
120
121 // Fetches JSON-encoded data from a Google Maps API url, and decode them.
122 // Returns the json array on success, and null otherwise.
123 private function getGeoJsonFromUrl($url) {
124 global $globals;
125
126 // Prepare a backtrace object to log errors.
127 $bt = null;
128 if ($globals->debug & DEBUG_BT) {
129 if (!isset(PlBacktrace::$bt['Geoloc'])) {
130 new PlBacktrace('Geoloc');
131 }
132 $bt = &PlBacktrace::$bt['Geoloc'];
133 $bt->start($url);
134 }
135
136 // Fetch the geocoding data.
137 $rawData = file_get_contents($url);
138 if (!$rawData) {
139 if ($bt) {
4e7a3faa 140 $bt->stop(0, 'Could not retrieve geocoded address from GoogleMaps.');
4c906759
SJ
141 }
142 return null;
143 }
144
145 // Decode the JSON-encoded data, and check for their validity.
146 $data = json_decode($rawData, true);
147 if ($bt) {
148 $bt->stop(count($data), null, $data);
149 }
150
151 return $data;
152 }
153
154 // Extracts the most appropriate placemark from the JSON data fetched from
155 // Google Maps. Returns a Placemark array on success, and null otherwise. See
156 // http://code.google.com/apis/maps/documentation/services.html#Geocoding_Structured
157 // for details on the Placemark structure.
158 private function getPlacemarkFromJson(array $data) {
159 // Check for geocoding failures.
160 if (!isset($data['Status']['code']) || $data['Status']['code'] != 200) {
161 // TODO: handle non-200 codes in a better way, since the code might
162 // indicate a temporary error on Google's side.
163 return null;
164 }
165
166 // Check that at least one placemark was found.
167 if (count($data['Placemark']) == 0) {
168 return null;
169 }
170
171 // Extract the placemark with the best accuracy. This is not always the
172 // best result (since the same address may yield two different placemarks).
173 $result = $data['Placemark'][0];
174 foreach ($data['Placemark'] as $place) {
175 if ($place['AddressDetails']['Accuracy'] > $result['AddressDetails']['Accuracy']) {
176 $result = $place;
177 }
178 }
179
180 return $result;
181 }
182
183 // Fills the address with the geocoded data
26ba053e 184 private function fillAddressWithGeocoding(Address $address, $geocodedData, $isLocal) {
4c906759
SJ
185 // The geocoded address three is
186 // Country -> AdministrativeArea -> SubAdministrativeArea -> Locality -> Thoroughfare
187 // with all the possible shortcuts
188 // The address is formatted as xAL, or eXtensible Address Language, an international
189 // standard for address formatting.
190 // xAL documentation: http://www.oasis-open.org/committees/ciq/ciq.html#6
803612ae
SJ
191 if ($isLocal) {
192 $ext = 'Local';
193 } else {
194 $ext = ucfirst(Platal::globals()->geocoder->gmaps_hl);
195 $address->geocodedText = str_replace(', ', "\n", $geocodedData['address']);
196 }
197
4c906759 198 if (isset($geocodedData['AddressDetails']['Accuracy'])) {
4e7a3faa 199 $address->accuracy = $geocodedData['AddressDetails']['Accuracy'];
4c906759
SJ
200 }
201
202 $currentPosition = $geocodedData['AddressDetails'];
203 if (isset($currentPosition['Country'])) {
803612ae 204 $country = 'country' . $ext;
4e7a3faa
SJ
205 $currentPosition = $currentPosition['Country'];
206 $address->countryId = $currentPosition['CountryNameCode'];
803612ae 207 $address->$country = $currentPosition['CountryName'];
4c906759
SJ
208 }
209 if (isset($currentPosition['AdministrativeArea'])) {
803612ae
SJ
210 $administrativeAreaName = 'administrativeAreaName' . $ext;
211 $currentPosition = $currentPosition['AdministrativeArea'];
212 $address->$administrativeAreaName = $currentPosition['AdministrativeAreaName'];
4c906759
SJ
213 }
214 if (isset($currentPosition['SubAdministrativeArea'])) {
803612ae
SJ
215 $subAdministrativeAreaName = 'subAdministrativeAreaName' . $ext;
216 $currentPosition = $currentPosition['SubAdministrativeArea'];
217 $address->$subAdministrativeAreaName = $currentPosition['SubAdministrativeAreaName'];
4c906759
SJ
218 }
219 if (isset($currentPosition['Locality'])) {
803612ae
SJ
220 $localityName = 'localityName' . $ext;
221 $currentPosition = $currentPosition['Locality'];
222 $address->$localityName = $currentPosition['LocalityName'];
4c906759
SJ
223 }
224 if (isset($currentPosition['PostalCode'])) {
4e7a3faa 225 $address->postalCode = $currentPosition['PostalCode']['PostalCodeNumber'];
4c906759
SJ
226 }
227
228 // Gets coordinates.
229 if (isset($geocodedData['Point']['coordinates'][0])) {
4e7a3faa 230 $address->latitude = $geocodedData['Point']['coordinates'][0];
4c906759
SJ
231 }
232 if (isset($geocodedData['Point']['coordinates'][1])) {
4e7a3faa 233 $address->longitude = $geocodedData['Point']['coordinates'][1];
4c906759
SJ
234 }
235 if (isset($geocodedData['ExtendedData']['LatLonBox']['north'])) {
4e7a3faa 236 $address->north = $geocodedData['ExtendedData']['LatLonBox']['north'];
4c906759
SJ
237 }
238 if (isset($geocodedData['ExtendedData']['LatLonBox']['south'])) {
4e7a3faa 239 $address->south = $geocodedData['ExtendedData']['LatLonBox']['south'];
4c906759
SJ
240 }
241 if (isset($geocodedData['ExtendedData']['LatLonBox']['east'])) {
4e7a3faa 242 $address->east = $geocodedData['ExtendedData']['LatLonBox']['east'];
4c906759
SJ
243 }
244 if (isset($geocodedData['ExtendedData']['LatLonBox']['west'])) {
4e7a3faa 245 $address->west = $geocodedData['ExtendedData']['LatLonBox']['west'];
4c906759
SJ
246 }
247 }
248
803612ae
SJ
249 // Compares the geocoded address with the given address and returns true
250 // iff their are close enough to be considered as equals or not.
251 private function compareAddress($address)
252 {
4c906759 253 $same = true;
4c906759 254 $geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
4e7a3faa 255 array('', "\n"), $address->geocodedText));
4c906759 256 $text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
4e7a3faa 257 array('', "\n"), $address->text));
4c906759
SJ
258 $arrayGeoloc = explode("\n", $geoloc);
259 $arrayText = explode("\n", $text);
260 $countGeoloc = count($arrayGeoloc);
261 $countText = count($arrayText);
262
7bc2c396 263 $totalDistance = 0;
4c906759
SJ
264 if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
265 || (($countText == $countGeoloc - 1)
4e7a3faa 266 && ($arrayText[$countText - 1] == strtoupper($address->country)))) {
4c906759
SJ
267 $same = false;
268 } else {
269 for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
7bc2c396
SJ
270 $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i]));
271 $totalDistance += $lineDistance;
272 if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) {
4c906759 273 $same = false;
7bc2c396 274 break;
4c906759
SJ
275 }
276 }
277 }
7bc2c396 278
803612ae
SJ
279 return $same;
280 }
281
282 // Formats the text of the geocoded address using the unused data and
283 // compares it to the given address. If they are too different, the user
284 // will be asked to choose between them.
26ba053e 285 private function formatAddress(Address $address, $extraLines, $forceLanguage)
803612ae
SJ
286 {
287 if ($extraLines) {
288 $address->geocodedText = $extraLines . "\n" . $address->geocodedText;
289 }
290
291 if ($this->compareAddress($address)) {
4e7a3faa 292 $address->geocodedText = null;
2ffc0393
SJ
293 } elseif (!$forceLanguage) {
294 $languages = XDB::fetchOneCell('SELECT IF(ISNULL(gc1.belongsTo), gl1.language, gl2.language)
803612ae 295 FROM geoloc_countries AS gc1
2ffc0393 296 INNER JOIN geoloc_languages AS gl1 ON (gc1.iso_3166_1_a2 = gl1.iso_3166_1_a2)
803612ae 297 LEFT JOIN geoloc_countries AS gc2 ON (gc1.belongsTo = gc2.iso_3166_1_a2)
2ffc0393 298 LEFT JOIN geoloc_languages AS gl2 ON (gc2.iso_3166_1_a2 = gl2.iso_3166_1_a2)
803612ae
SJ
299 WHERE gc1.iso_3166_1_a2 = {?}',
300 $address->countryId);
301 $toGeocode = substr($address->text, strlen($extraLines));
302 foreach (explode(',', $languages) as $language) {
303 if ($language != Platal::globals()->geocoder->gmaps_hl) {
304 $geocodedData = $this->getPlacemarkForAddress($toGeocode, $language);
305 $address->geocodedText = str_replace(', ', "\n", $geocodedData['address']);
306 if ($extraLines) {
307 $address->geocodedText = $extraLines . "\n" . $address->geocodedText;
308 }
309 if ($this->compareAddress($address)) {
310 $this->fillAddressWithGeocoding($address, $geocodedData, true);
311 $address->geocodedText = null;
312 break;
313 }
314 }
315 }
4e7a3faa 316 $address->geocodedText = str_replace("\n", "\r\n", $address->geocodedText);
4c906759 317 }
4e7a3faa 318 $address->text = str_replace("\n", "\r\n", $address->text);
5a10ab14
SJ
319 }
320
00e5200b
SJ
321 // Trims the name of the real country if it contains an ISO 3166-1 non-country
322 // item. For that purpose, we compare the last but one line of the address with
323 // all non-country items of ISO 3166-1.
4e7a3faa 324 private function getTextToGeocode($text)
00e5200b 325 {
1c305d4c 326 $res = XDB::iterator('SELECT countryEn, country
00e5200b
SJ
327 FROM geoloc_countries
328 WHERE belongsTo IS NOT NULL');
329 $countries = array();
330 foreach ($res as $item) {
331 $countries[] = $item[0];
332 $countries[] = $item[1];
333 }
4e7a3faa 334 $textLines = explode("\n", $text);
00e5200b
SJ
335 $countLines = count($textLines);
336 $needle = strtoupper(trim($textLines[$countLines - 2]));
337 $isPseudoCountry = false;
96c7ea54
SJ
338 if ($needle) {
339 foreach ($countries as $country) {
340 if (strtoupper($country) === $needle) {
341 $isPseudoCountry = true;
342 break;
343 }
00e5200b
SJ
344 }
345 }
346
347 if ($isPseudoCountry) {
02c4b93a 348 return implode("\n", array_slice($textLines, 0, -1));
00e5200b 349 }
4e7a3faa 350 return $text;
00e5200b 351 }
4c906759
SJ
352}
353
354// vim:set et sw=4 sts=4 sws=4 foldmethod=marker enc=utf-8:
355?>