Moves geocoder into approrpiate classes.
[platal.git] / classes / gmapsgeocoder.php
1 <?php
2 /***************************************************************************
3 * Copyright (C) 2003-2010 Polytechnique.org *
4 * http://opensource.polytechnique.org/ *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the Free Software *
18 * Foundation, Inc., *
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
20 ***************************************************************************/
21
22 // Implementation of a Geocoder using the Google Maps API. Please refer to
23 // the following links for details:
24 // http://code.google.com/apis/maps/documentation/services.html#Geocoding
25 // http://code.google.com/intl/en/apis/maps/documentation/geocoding/
26 // http://code.google.com/apis/maps/documentation/reference.html#GGeoAddressAccuracy
27 //
28 // It requires the properties gmaps_key and gmaps_url to be defined in section
29 // Geocoder in plat/al's configuration (platal.ini & platal.conf).
30 class GMapsGeocoder extends Geocoder {
31
32 // Maximum number of Geocoding calls to the Google Maps API.
33 const MAX_GMAPS_RPC_CALLS = 5;
34 // Maximum levenshtein distance authorized between input and geocoded text in a single line.
35 const MAX_LINE_DISTANCE = 5;
36 // Maximum levenshtein distance authorized between input and geocoded text in the whole text.
37 const MAX_TOTAL_DISTANCE = 6;
38
39 public function getGeocodedAddress(Address &$address) {
40 $this->prepareAddress($address);
41 $textAddress = $this->getTextToGeocode($address->text);
42
43 // Try to geocode the full address.
44 if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) {
45 $this->getUpdatedAddress($address, $geocodedData, null);
46 return;
47 }
48
49 // If the full geocoding failed, try to geocode only the final part of the address.
50 // We start by geocoding everything but the first line, and continue until we get
51 // a result. To respect the limit of GMaps calls, we ignore the first few lines
52 // if there are too many address lines.
53 $addressLines = explode("\n", $textAddress);
54 $linesCount = count($addressLines);
55 for ($i = max(1, $linesCount - self::MAX_GMAPS_RPC_CALLS + 1); $i < $linesCount; ++$i) {
56 $extraLines = implode("\n", array_slice($addressLines, 0, $i));
57 $toGeocode = implode("\n", array_slice($addressLines, $i));
58 if (($geocodedData = $this->getPlacemarkForAddress($toGeocode))) {
59 $this->getUpdatedAddress($address, $geocodedData, $extraLines);
60 return;
61 }
62 }
63 }
64
65 public function stripGeocodingFromAddress(Address &$address) {
66 $address->geocodedText = null;
67 $address->geocodedPostalText = null;
68 $address->geoloc_choice = null;
69 $address->countryId = null;
70 $address->country = null;
71 $address->administrativeAreaName = null;
72 $address->subAdministrativeAreaName = null;
73 $address->localityName = null;
74 $address->thoroughfareName = null;
75 $address->postalCode = null;
76 $address->accuracy = 0;
77 }
78
79 // Updates the address with the geocoded information from Google Maps. Also
80 // cleans up the final informations.
81 private function getUpdatedAddress(Address &$address, array $geocodedData, $extraLines) {
82 $this->fillAddressWithGeocoding($address, $geocodedData);
83 $this->formatAddress($address, $extraLines);
84 }
85
86 // Retrieves the Placemark object (see #getPlacemarkFromJson()) for the @p
87 // address, by querying the Google Maps API. Returns the array on success,
88 // and null otherwise.
89 private function getPlacemarkForAddress($address) {
90 $url = $this->getGeocodingUrl($address);
91 $geoData = $this->getGeoJsonFromUrl($url);
92
93 return ($geoData ? $this->getPlacemarkFromJson($geoData) : null);
94 }
95
96 // Prepares address to be geocoded
97 private function prepareAddress(Address &$address) {
98 $address->text = preg_replace('/\s*\n\s*/m', "\n", trim($address->text));
99 $address->postalText = $this->getPostalAddress($address->text);
100 }
101
102 // Builds the Google Maps geocoder url to fetch information about @p address.
103 // Returns the built url.
104 private function getGeocodingUrl($address) {
105 global $globals;
106
107 $parameters = array(
108 'key' => $globals->geocoder->gmaps_key,
109 'sensor' => 'false', // The queried address wasn't obtained from a GPS sensor.
110 'hl' => 'fr', // Output langage.
111 'oe' => 'utf8', // Output encoding.
112 'output' => 'json', // Output format.
113 'gl' => 'fr', // Location preferences (addresses are in France by default).
114 'q' => $address, // The queries address.
115 );
116
117 return $globals->geocoder->gmaps_url . '?' . http_build_query($parameters);
118 }
119
120 // Fetches JSON-encoded data from a Google Maps API url, and decode them.
121 // Returns the json array on success, and null otherwise.
122 private function getGeoJsonFromUrl($url) {
123 global $globals;
124
125 // Prepare a backtrace object to log errors.
126 $bt = null;
127 if ($globals->debug & DEBUG_BT) {
128 if (!isset(PlBacktrace::$bt['Geoloc'])) {
129 new PlBacktrace('Geoloc');
130 }
131 $bt = &PlBacktrace::$bt['Geoloc'];
132 $bt->start($url);
133 }
134
135 // Fetch the geocoding data.
136 $rawData = file_get_contents($url);
137 if (!$rawData) {
138 if ($bt) {
139 $bt->stop(0, 'Could not retrieve geocoded address from GoogleMaps.');
140 }
141 return null;
142 }
143
144 // Decode the JSON-encoded data, and check for their validity.
145 $data = json_decode($rawData, true);
146 if ($bt) {
147 $bt->stop(count($data), null, $data);
148 }
149
150 return $data;
151 }
152
153 // Extracts the most appropriate placemark from the JSON data fetched from
154 // Google Maps. Returns a Placemark array on success, and null otherwise. See
155 // http://code.google.com/apis/maps/documentation/services.html#Geocoding_Structured
156 // for details on the Placemark structure.
157 private function getPlacemarkFromJson(array $data) {
158 // Check for geocoding failures.
159 if (!isset($data['Status']['code']) || $data['Status']['code'] != 200) {
160 // TODO: handle non-200 codes in a better way, since the code might
161 // indicate a temporary error on Google's side.
162 return null;
163 }
164
165 // Check that at least one placemark was found.
166 if (count($data['Placemark']) == 0) {
167 return null;
168 }
169
170 // Extract the placemark with the best accuracy. This is not always the
171 // best result (since the same address may yield two different placemarks).
172 $result = $data['Placemark'][0];
173 foreach ($data['Placemark'] as $place) {
174 if ($place['AddressDetails']['Accuracy'] > $result['AddressDetails']['Accuracy']) {
175 $result = $place;
176 }
177 }
178
179 return $result;
180 }
181
182 // Fills the address with the geocoded data
183 private function fillAddressWithGeocoding(Address &$address, $geocodedData) {
184 // The geocoded address three is
185 // Country -> AdministrativeArea -> SubAdministrativeArea -> Locality -> Thoroughfare
186 // with all the possible shortcuts
187 // The address is formatted as xAL, or eXtensible Address Language, an international
188 // standard for address formatting.
189 // xAL documentation: http://www.oasis-open.org/committees/ciq/ciq.html#6
190 $address->geocodedText = str_replace(', ', "\n", $geocodedData['address']);
191 if (isset($geocodedData['AddressDetails']['Accuracy'])) {
192 $address->accuracy = $geocodedData['AddressDetails']['Accuracy'];
193 }
194
195 $currentPosition = $geocodedData['AddressDetails'];
196 if (isset($currentPosition['Country'])) {
197 $currentPosition = $currentPosition['Country'];
198 $address->countryId = $currentPosition['CountryNameCode'];
199 $address->country = $currentPosition['CountryName'];
200 }
201 if (isset($currentPosition['AdministrativeArea'])) {
202 $currentPosition = $currentPosition['AdministrativeArea'];
203 $address->administrativeAreaName = $currentPosition['AdministrativeAreaName'];
204 }
205 if (isset($currentPosition['SubAdministrativeArea'])) {
206 $currentPosition = $currentPosition['SubAdministrativeArea'];
207 $address->subAdministrativeAreaName = $currentPosition['SubAdministrativeAreaName'];
208 }
209 if (isset($currentPosition['Locality'])) {
210 $currentPosition = $currentPosition['Locality'];
211 $address->localityName = $currentPosition['LocalityName'];
212 }
213 if (isset($currentPosition['Thoroughfare'])) {
214 $address->thoroughfareName = $currentPosition['Thoroughfare']['ThoroughfareName'];
215 }
216 if (isset($currentPosition['PostalCode'])) {
217 $address->postalCode = $currentPosition['PostalCode']['PostalCodeNumber'];
218 }
219
220 // Gets coordinates.
221 if (isset($geocodedData['Point']['coordinates'][0])) {
222 $address->latitude = $geocodedData['Point']['coordinates'][0];
223 }
224 if (isset($geocodedData['Point']['coordinates'][1])) {
225 $address->longitude = $geocodedData['Point']['coordinates'][1];
226 }
227 if (isset($geocodedData['ExtendedData']['LatLonBox']['north'])) {
228 $address->north = $geocodedData['ExtendedData']['LatLonBox']['north'];
229 }
230 if (isset($geocodedData['ExtendedData']['LatLonBox']['south'])) {
231 $address->south = $geocodedData['ExtendedData']['LatLonBox']['south'];
232 }
233 if (isset($geocodedData['ExtendedData']['LatLonBox']['east'])) {
234 $address->east = $geocodedData['ExtendedData']['LatLonBox']['east'];
235 }
236 if (isset($geocodedData['ExtendedData']['LatLonBox']['west'])) {
237 $address->west = $geocodedData['ExtendedData']['LatLonBox']['west'];
238 }
239 }
240
241 // Formats the text of the geocoded address using the unused data and
242 // compares it to the given address. If they are too different, the user
243 // will be asked to choose between them.
244 private function formatAddress(Address &$address, $extraLines) {
245 $same = true;
246 if ($extraLines) {
247 $address->geocodedText = $extraLines . "\n" . $address->geocodedText;
248 }
249 $address->geocodedPostalText = $this->getPostalAddress($address->geocodedText);
250 $geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
251 array('', "\n"), $address->geocodedText));
252 $text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
253 array('', "\n"), $address->text));
254 $arrayGeoloc = explode("\n", $geoloc);
255 $arrayText = explode("\n", $text);
256 $countGeoloc = count($arrayGeoloc);
257 $countText = count($arrayText);
258
259 $totalDistance = 0;
260 if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
261 || (($countText == $countGeoloc - 1)
262 && ($arrayText[$countText - 1] == strtoupper($address->country)))) {
263 $same = false;
264 } else {
265 for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
266 $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i]));
267 $totalDistance += $lineDistance;
268 if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) {
269 $same = false;
270 break;
271 }
272 }
273 }
274
275 if ($same) {
276 $address->geocodedText = null;
277 $address->geocodedPostalText = null;
278 } else {
279 $address->geocodedText = str_replace("\n", "\r\n", $address->geocodedText);
280 $address->geocodedPostalText = str_replace("\n", "\r\n", $address->geocodedPostalText);
281 }
282 $address->text = str_replace("\n", "\r\n", $address->text);
283 $address->postalText = str_replace("\n", "\r\n", $address->postalText);
284 }
285
286 // Returns the address formated for postal use.
287 // The main rules are (cf AFNOR XPZ 10-011):
288 // -everything in upper case;
289 // -if there are more then than 38 characters in a line, split it;
290 // -if there are more then than 32 characters in the description of the "street", use abbreviations.
291 private function getPostalAddress($text) {
292 static $abbreviations = array(
293 'IMPASSE' => 'IMP',
294 'RUE' => 'R',
295 'AVENUE' => 'AV',
296 'BOULEVARD' => 'BVD',
297 'ROUTE' => 'R',
298 'STREET' => 'ST',
299 'ROAD' => 'RD',
300 );
301
302 $text = strtoupper($text);
303 $arrayText = explode("\n", $text);
304 $postalText = '';
305
306 foreach ($arrayText as $i => $line) {
307 $postalText .= (($i == 0) ? '' : "\n");
308 if (($length = strlen($line)) > 32) {
309 $words = explode(' ', $line);
310 $count = 0;
311 foreach ($words as $word) {
312 if (isset($abbreviations[$word])) {
313 $word = $abbreviations[$word];
314 }
315 if ($count + ($wordLength = strlen($word)) <= 38) {
316 $postalText .= (($count == 0) ? '' : ' ') . $word;
317 $count += (($count == 0) ? 0 : 1) + $wordLength;
318 } else {
319 $postalText .= "\n" . $word;
320 $count = strlen($word);
321 }
322 }
323 } else {
324 $postalText .= $line;
325 }
326 }
327 return $postalText;
328 }
329
330 // Trims the name of the real country if it contains an ISO 3166-1 non-country
331 // item. For that purpose, we compare the last but one line of the address with
332 // all non-country items of ISO 3166-1.
333 private function getTextToGeocode($text)
334 {
335 $res = XDB::iterator('SELECT country, countryFR
336 FROM geoloc_countries
337 WHERE belongsTo IS NOT NULL');
338 $countries = array();
339 foreach ($res as $item) {
340 $countries[] = $item[0];
341 $countries[] = $item[1];
342 }
343 $textLines = explode("\n", $text);
344 $countLines = count($textLines);
345 $needle = strtoupper(trim($textLines[$countLines - 2]));
346 $isPseudoCountry = false;
347 foreach ($countries as $country) {
348 if (strtoupper($country) == $needle) {
349 $isPseudoCountry = true;
350 break;
351 }
352 }
353
354 if ($isPseudoCountry) {
355 return implode("\n", array_slice($textLines, 0, -1));
356 }
357 return $text;
358 }
359 }
360
361 // vim:set et sw=4 sts=4 sws=4 foldmethod=marker enc=utf-8:
362 ?>