Fixes display of comments on disabled accounts (Closes #1126).
[platal.git] / include / geocoding.inc.php
1 <?php
2 /***************************************************************************
3 * Copyright (C) 2003-2010 Polytechnique.org *
4 * http://opensource.polytechnique.org/ *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the Free Software *
18 * Foundation, Inc., *
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
20 ***************************************************************************/
21
22 // Interface for an address geocoder. It provides support for transforming a free
23 // form address into a fully structured one.
24 // TODO: define and use an Address object instead of a key-value map.
25 abstract class Geocoder {
26 // Geocodes @p the address, and returns the corresponding updated address.
27 // Unknown key-value pairs available in the input map are retained as-is.
28 abstract public function getGeocodedAddress(array $address);
29
30 // Cleans the address from its geocoded data
31 abstract public function stripGeocodingFromAddress(array $address);
32
33 // Updates geoloc_administrativeareas, geoloc_subadministrativeareas and
34 // geoloc_localities databases with new geocoded data and returns the
35 // corresponding id.
36 static public function getAreaId(array &$address, $area)
37 {
38 static $databases = array(
39 'administrativeArea' => 'geoloc_administrativeareas',
40 'subAdministrativeArea' => 'geoloc_subadministrativeareas',
41 'locality' => 'geoloc_localities',
42 );
43
44 if (isset($address[$area . 'Name']) && isset($databases[$area]) && !empty($address[$area . 'Name'])) {
45 $res = XDB::query("SELECT id
46 FROM " . $databases[$area] . "
47 WHERE name = {?}",
48 $address[$area . 'Name']);
49 if ($res->numRows() == 0) {
50 XDB::execute('INSERT INTO ' . $databases[$area] . ' (name, country)
51 VALUES ({?}, {?})',
52 $address[$area . 'Name'], $address['countryId']);
53 $address[$area . 'Id'] = XDB::insertId();
54 } else {
55 $address[$area . 'Id'] = $res->fetchOneCell();
56 }
57 } else {
58 $address[$area . 'Id'] = null;
59 }
60 }
61
62 // Returns the part of the text preceeding the line with the postal code
63 // and the city name, within the limit of $limit number of lines.
64 static public function getFirstLines($text, $postalCode, $limit)
65 {
66 $textArray = explode("\n", $text);
67 for ($i = 0; $i < count($textArray); ++$i) {
68 if ($i > $limit || strpos($textLine, $postalCode) !== false) {
69 $limit = $i; break;
70 }
71 }
72 return implode("\n", array_slice($textArray, 0, $limit));
73 }
74
75 // Returns the number of non geocoded addresses for a user.
76 static public function countNonGeocoded($pid)
77 {
78 $res = XDB::query("SELECT COUNT(*)
79 FROM profile_addresses
80 WHERE pid = {?} AND FIND_IN_SET('home', type) AND accuracy = 0",
81 $pid);
82 return $res->fetchOneCell();
83 }
84 }
85
86 // Implementation of a Geocoder using the Google Maps API. Please refer to
87 // the following links for details:
88 // http://code.google.com/apis/maps/documentation/services.html#Geocoding
89 // http://code.google.com/intl/en/apis/maps/documentation/geocoding/
90 // http://code.google.com/apis/maps/documentation/reference.html#GGeoAddressAccuracy
91 //
92 // It requires the properties gmaps_key and gmaps_url to be defined in section
93 // Geocoder in plat/al's configuration (platal.ini & platal.conf).
94 class GMapsGeocoder extends Geocoder {
95
96 // Maximum number of Geocoding calls to the Google Maps API.
97 const MAX_GMAPS_RPC_CALLS = 5;
98 // Maximum levenshtein distance authorized between input and geocoded text in a single line.
99 const MAX_LINE_DISTANCE = 5;
100 // Maximum levenshtein distance authorized between input and geocoded text in the whole text.
101 const MAX_TOTAL_DISTANCE = 6;
102
103 public function getGeocodedAddress(array $address) {
104 $address = $this->prepareAddress($address);
105 $textAddress = $this->getTextToGeocode($address);
106
107 // Try to geocode the full address.
108 if (($geocodedData = $this->getPlacemarkForAddress($textAddress))) {
109 return $this->getUpdatedAddress($address, $geocodedData, null);
110 }
111
112 // If the full geocoding failed, try to geocode only the final part of the address.
113 // We start by geocoding everything but the first line, and continue until we get
114 // a result. To respect the limit of GMaps calls, we ignore the first few lines
115 // if there are too many address lines.
116 $addressLines = explode("\n", $textAddress);
117 $linesCount = count($addressLines);
118 for ($i = max(1, $linesCount - self::MAX_GMAPS_RPC_CALLS + 1); $i < $linesCount; ++$i) {
119 $extraLines = implode("\n", array_slice($addressLines, 0, $i));
120 $toGeocode = implode("\n", array_slice($addressLines, $i));
121 if (($geocodedData = $this->getPlacemarkForAddress($toGeocode))) {
122 return $this->getUpdatedAddress($address, $geocodedData, $extraLines);
123 }
124 }
125
126 // No geocoding could be done, the initial address is returned as-is.
127 return $address;
128 }
129
130 public function stripGeocodingFromAddress(array $address) {
131 unset($address['geoloc'], $address['geoloc_choice'], $address['geocodedPostalText'],
132 $address['countryId'], $address['country'], $address['administrativeAreaName'],
133 $address['subAdministrativeAreaName'], $address['localityName'],
134 $address['thoroughfareName'], $address['postalCode']);
135 $address['accuracy'] = 0;
136 return $address;
137 }
138
139 // Updates the address with the geocoded information from Google Maps. Also
140 // cleans up the final informations.
141 private function getUpdatedAddress(array $address, array $geocodedData, $extraLines) {
142 $this->fillAddressWithGeocoding(&$address, $geocodedData);
143 $this->formatAddress($address, $extraLines);
144 return $address;
145 }
146
147 // Retrieves the Placemark object (see #getPlacemarkFromJson()) for the @p
148 // address, by querying the Google Maps API. Returns the array on success,
149 // and null otherwise.
150 private function getPlacemarkForAddress($address) {
151 $url = $this->getGeocodingUrl($address);
152 $geoData = $this->getGeoJsonFromUrl($url);
153
154 return ($geoData ? $this->getPlacemarkFromJson($geoData) : null);
155 }
156
157 // Prepares address to be geocoded
158 private function prepareAddress($address) {
159 $address['text'] = preg_replace('/\s*\n\s*/m', "\n", trim($address['text']));
160 $address['postalText'] = $this->getPostalAddress($address['text']);
161 $address['updateTime'] = time();
162 unset($address['changed']);
163 return $address;
164 }
165
166 // Builds the Google Maps geocoder url to fetch information about @p address.
167 // Returns the built url.
168 private function getGeocodingUrl($address) {
169 global $globals;
170
171 $parameters = array(
172 'key' => $globals->geocoder->gmaps_key,
173 'sensor' => 'false', // The queried address wasn't obtained from a GPS sensor.
174 'hl' => 'fr', // Output langage.
175 'oe' => 'utf8', // Output encoding.
176 'output' => 'json', // Output format.
177 'gl' => 'fr', // Location preferences (addresses are in France by default).
178 'q' => $address, // The queries address.
179 );
180
181 return $globals->geocoder->gmaps_url . '?' . http_build_query($parameters);
182 }
183
184 // Fetches JSON-encoded data from a Google Maps API url, and decode them.
185 // Returns the json array on success, and null otherwise.
186 private function getGeoJsonFromUrl($url) {
187 global $globals;
188
189 // Prepare a backtrace object to log errors.
190 $bt = null;
191 if ($globals->debug & DEBUG_BT) {
192 if (!isset(PlBacktrace::$bt['Geoloc'])) {
193 new PlBacktrace('Geoloc');
194 }
195 $bt = &PlBacktrace::$bt['Geoloc'];
196 $bt->start($url);
197 }
198
199 // Fetch the geocoding data.
200 $rawData = file_get_contents($url);
201 if (!$rawData) {
202 if ($bt) {
203 $bt->stop(0, "Could not retrieve geocoded address from GoogleMaps.");
204 }
205 return null;
206 }
207
208 // Decode the JSON-encoded data, and check for their validity.
209 $data = json_decode($rawData, true);
210 if ($bt) {
211 $bt->stop(count($data), null, $data);
212 }
213
214 return $data;
215 }
216
217 // Extracts the most appropriate placemark from the JSON data fetched from
218 // Google Maps. Returns a Placemark array on success, and null otherwise. See
219 // http://code.google.com/apis/maps/documentation/services.html#Geocoding_Structured
220 // for details on the Placemark structure.
221 private function getPlacemarkFromJson(array $data) {
222 // Check for geocoding failures.
223 if (!isset($data['Status']['code']) || $data['Status']['code'] != 200) {
224 // TODO: handle non-200 codes in a better way, since the code might
225 // indicate a temporary error on Google's side.
226 return null;
227 }
228
229 // Check that at least one placemark was found.
230 if (count($data['Placemark']) == 0) {
231 return null;
232 }
233
234 // Extract the placemark with the best accuracy. This is not always the
235 // best result (since the same address may yield two different placemarks).
236 $result = $data['Placemark'][0];
237 foreach ($data['Placemark'] as $place) {
238 if ($place['AddressDetails']['Accuracy'] > $result['AddressDetails']['Accuracy']) {
239 $result = $place;
240 }
241 }
242
243 return $result;
244 }
245
246 // Fills the address with the geocoded data
247 private function fillAddressWithGeocoding(&$address, $geocodedData) {
248 // The geocoded address three is
249 // Country -> AdministrativeArea -> SubAdministrativeArea -> Locality -> Thoroughfare
250 // with all the possible shortcuts
251 // The address is formatted as xAL, or eXtensible Address Language, an international
252 // standard for address formatting.
253 // xAL documentation: http://www.oasis-open.org/committees/ciq/ciq.html#6
254 $address['geoloc'] = str_replace(", ", "\n", $geocodedData['address']);
255 if (isset($geocodedData['AddressDetails']['Accuracy'])) {
256 $address['accuracy'] = $geocodedData['AddressDetails']['Accuracy'];
257 }
258
259 $currentPosition = $geocodedData['AddressDetails'];
260 if (isset($currentPosition['Country'])) {
261 $currentPosition = $currentPosition['Country'];
262 $address['countryId'] = $currentPosition['CountryNameCode'];
263 $address['country'] = $currentPosition['CountryName'];
264 }
265 if (isset($currentPosition['AdministrativeArea'])) {
266 $currentPosition = $currentPosition['AdministrativeArea'];
267 $address['administrativeAreaName'] = $currentPosition['AdministrativeAreaName'];
268 }
269 if (isset($currentPosition['SubAdministrativeArea'])) {
270 $currentPosition = $currentPosition['SubAdministrativeArea'];
271 $address['subAdministrativeAreaName'] = $currentPosition['SubAdministrativeAreaName'];
272 }
273 if (isset($currentPosition['Locality'])) {
274 $currentPosition = $currentPosition['Locality'];
275 $address['localityName'] = $currentPosition['LocalityName'];
276 }
277 if (isset($currentPosition['Thoroughfare'])) {
278 $address['thoroughfareName'] = $currentPosition['Thoroughfare']['ThoroughfareName'];
279 }
280 if (isset($currentPosition['PostalCode'])) {
281 $address['postalCode'] = $currentPosition['PostalCode']['PostalCodeNumber'];
282 }
283
284 // Gets coordinates.
285 if (isset($geocodedData['Point']['coordinates'][0])) {
286 $address['latitude'] = $geocodedData['Point']['coordinates'][0];
287 }
288 if (isset($geocodedData['Point']['coordinates'][1])) {
289 $address['longitude'] = $geocodedData['Point']['coordinates'][1];
290 }
291 if (isset($geocodedData['ExtendedData']['LatLonBox']['north'])) {
292 $address['north'] = $geocodedData['ExtendedData']['LatLonBox']['north'];
293 }
294 if (isset($geocodedData['ExtendedData']['LatLonBox']['south'])) {
295 $address['south'] = $geocodedData['ExtendedData']['LatLonBox']['south'];
296 }
297 if (isset($geocodedData['ExtendedData']['LatLonBox']['east'])) {
298 $address['east'] = $geocodedData['ExtendedData']['LatLonBox']['east'];
299 }
300 if (isset($geocodedData['ExtendedData']['LatLonBox']['west'])) {
301 $address['west'] = $geocodedData['ExtendedData']['LatLonBox']['west'];
302 }
303 }
304
305 // Formats the text of the geocoded address using the unused data and
306 // compares it to the given address. If they are too different, the user
307 // will be asked to choose between them.
308 private function formatAddress(&$address, $extraLines) {
309 $same = true;
310 if ($extraLines) {
311 $address['geoloc'] = $extraLines . "\n" . $address['geoloc'];
312 }
313 $address['geocodedPostalText'] = $this->getPostalAddress($address['geoloc']);
314 $geoloc = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
315 array("", "\n"), $address['geoloc']));
316 $text = strtoupper(preg_replace(array("/[0-9,\"'#~:;_\- ]/", "/\r\n/"),
317 array("", "\n"), $address['text']));
318 $arrayGeoloc = explode("\n", $geoloc);
319 $arrayText = explode("\n", $text);
320 $countGeoloc = count($arrayGeoloc);
321 $countText = count($arrayText);
322
323 $totalDistance = 0;
324 if (($countText > $countGeoloc) || ($countText < $countGeoloc - 1)
325 || (($countText == $countGeoloc - 1)
326 && ($arrayText[$countText - 1] == strtoupper($address['country'])))) {
327 $same = false;
328 } else {
329 for ($i = 0; $i < $countGeoloc && $i < $countText; ++$i) {
330 $lineDistance = levenshtein($arrayText[$i], trim($arrayGeoloc[$i]));
331 $totalDistance += $lineDistance;
332 if ($lineDistance > self::MAX_LINE_DISTANCE || $totalDistance > self::MAX_TOTAL_DISTANCE) {
333 $same = false;
334 break;
335 }
336 }
337 }
338
339 if ($same) {
340 unset($address['geoloc'], $address['geocodedPostalText']);
341 } else {
342 $address['geoloc'] = str_replace("\n", "\r\n", $address['geoloc']);
343 $address['geocodedPostalText'] = str_replace("\n", "\r\n", $address['geocodedPostalText']);
344 }
345 $address['text'] = str_replace("\n", "\r\n", $address['text']);
346 $address['postalText'] = str_replace("\n", "\r\n", $address['postalText']);
347 }
348
349 // Returns the address formated for postal use.
350 // The main rules are (cf AFNOR XPZ 10-011):
351 // -everything in upper case;
352 // -if there are more then than 38 characters in a lign, split it;
353 // -if there are more then than 32 characters in the description of the "street", use abbreviations.
354 private function getPostalAddress($text) {
355 static $abbreviations = array(
356 "IMPASSE" => "IMP",
357 "RUE" => "R",
358 "AVENUE" => "AV",
359 "BOULEVARD" => "BVD",
360 "ROUTE" => "R",
361 "STREET" => "ST",
362 "ROAD" => "RD",
363 );
364
365 $text = strtoupper($text);
366 $arrayText = explode("\n", $text);
367 $postalText = "";
368
369 foreach ($arrayText as $i => $lign) {
370 $postalText .= (($i == 0) ? "" : "\n");
371 if (($length = strlen($lign)) > 32) {
372 $words = explode(" ", $lign);
373 $count = 0;
374 foreach ($words as $word) {
375 if (isset($abbreviations[$word])) {
376 $word = $abbreviations[$word];
377 }
378 if ($count + ($wordLength = strlen($word)) <= 38) {
379 $postalText .= (($count == 0) ? "" : " ") . $word;
380 $count += (($count == 0) ? 0 : 1) + $wordLength;
381 } else {
382 $postalText .= "\n" . $word;
383 $count = strlen($word);
384 }
385 }
386 } else {
387 $postalText .= $lign;
388 }
389 }
390 return $postalText;
391 }
392
393 // Trims the name of the real country if it contains an ISO 3166-1 non-country
394 // item. For that purpose, we compare the last but one line of the address with
395 // all non-country items of ISO 3166-1.
396 private function getTextToGeocode($address)
397 {
398 $res = XDB::iterator('SELECT country, countryFR
399 FROM geoloc_countries
400 WHERE belongsTo IS NOT NULL');
401 $countries = array();
402 foreach ($res as $item) {
403 $countries[] = $item[0];
404 $countries[] = $item[1];
405 }
406 $textLines = explode("\n", $address['text']);
407 $countLines = count($textLines);
408 $needle = strtoupper(trim($textLines[$countLines - 2]));
409 $isPseudoCountry = false;
410 foreach ($countries as $country) {
411 if (strtoupper($country) == $needle) {
412 $isPseudoCountry = true;
413 break;
414 }
415 }
416
417 if ($isPseudoCountry) {
418 return implode("\n", array_slice($textLines, 0, -1));
419 }
420 return $address['text'];
421 }
422 }
423
424 // vim:set et sw=4 sts=4 sws=4 foldmethod=marker enc=utf-8:
425 ?>