| 1 | <?php |
| 2 | |
| 3 | require_once(dirname(__FILE__).'/exalead.class.php'); |
| 4 | |
| 5 | function convert_url($string) |
| 6 | { |
| 7 | return str_replace('+', '%2B', $string); |
| 8 | } |
| 9 | |
| 10 | $GLOBALS['query_all'] = 'a*'; |
| 11 | |
| 12 | class Exalead |
| 13 | { |
| 14 | |
| 15 | var $parserId; |
| 16 | |
| 17 | var $data; |
| 18 | |
| 19 | var $currentGroup; |
| 20 | var $currentCategories = array(); |
| 21 | var $currentSpelling; |
| 22 | var $currentHit; |
| 23 | var $currentHitField; |
| 24 | var $currentHitGroup; |
| 25 | var $currentHitCategory; |
| 26 | var $currentAction; |
| 27 | var $currentTextSegment; |
| 28 | var $currentQuery; |
| 29 | var $currentQueryTerm; |
| 30 | var $currentQueryParameter; |
| 31 | var $currentKeyword; |
| 32 | |
| 33 | //url de base du produit Exalead |
| 34 | var $base_cgi = ''; |
| 35 | |
| 36 | // Query to dump indexed database |
| 37 | var $query_all= ''; |
| 38 | |
| 39 | /**** Constructeur *********/ |
| 40 | |
| 41 | |
| 42 | function Exalead($base_cgi = '', $override_query_all = '') |
| 43 | { |
| 44 | $this->data = new ExaleadData(); |
| 45 | $this->currentGroup = new ExaleadGroup(); |
| 46 | $this->currentCategories = array(); |
| 47 | $this->currentSpelling = new ExaleadSpelling(); |
| 48 | $this->currentHit = new ExaleadHit(); |
| 49 | $this->currentHitField = new ExaleadHitField(); |
| 50 | $this->currentHitGroup = new ExaleadHitGroup(); |
| 51 | $this->currentHitCategory = new ExaleadHitCategory(); |
| 52 | $this->currentAction = new ExaleadAction(); |
| 53 | $this->currentTextSegment = new ExaleadTextSegment(); |
| 54 | $this->currentQuery = new ExaleadQuery(); |
| 55 | $this->currentQueryTerm = new ExaleadQueryTerm(); |
| 56 | $this->currentQueryParameter = new ExaleadQueryParameter(); |
| 57 | $this->currentKeyword = new ExaleadKeyword(); |
| 58 | |
| 59 | //url de base du produit Exalead |
| 60 | $this->base_cgi = $base_cgi; |
| 61 | if (!empty($override_query_all)) { |
| 62 | $this->query_all = $override_query_all; |
| 63 | } else { |
| 64 | $this->query_all = $GLOBALS['query_all']; |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/ |
| 69 | |
| 70 | function set_base_cgi($base_cgi) |
| 71 | { |
| 72 | $this->base_cgi = $base_cgi; |
| 73 | } |
| 74 | |
| 75 | //retourne vrai si une requete a été faite, faux sinon |
| 76 | function query($varname = 'query') |
| 77 | { |
| 78 | if (!empty($_REQUEST[$varname])) { |
| 79 | |
| 80 | $this->first_query(stripslashes($_REQUEST[$varname])); |
| 81 | return true; |
| 82 | |
| 83 | } elseif (isset($_REQUEST['_C'])) { |
| 84 | |
| 85 | $this->handle_request(); |
| 86 | return true; |
| 87 | } |
| 88 | return false; |
| 89 | } |
| 90 | |
| 91 | //a appeller pour faire la premiere requete |
| 92 | function first_query($query, $offset = 0) |
| 93 | { |
| 94 | if (empty($this->base_cgi)) return false; |
| 95 | |
| 96 | //$tmp = parse_url($this->base_cgi); |
| 97 | //$view_name = substr($tmp['path'], 5); |
| 98 | //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name; |
| 99 | $query_exa = $this->base_cgi."&_q=".urlencode($query)."&_f=xml2"; |
| 100 | if ($offset > 0) { |
| 101 | $query_exa .= "&_s=".$offset; |
| 102 | } |
| 103 | |
| 104 | $xml_response = file_get_contents($query_exa); |
| 105 | $this->parse($xml_response); |
| 106 | } |
| 107 | |
| 108 | //pour recuperer tous les résultats d'une base indexée |
| 109 | function get_db_dump() |
| 110 | { |
| 111 | $this->first_query($this->query_all); |
| 112 | } |
| 113 | |
| 114 | function handle_request() |
| 115 | { |
| 116 | if (empty($this->base_cgi)) return false; |
| 117 | if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead |
| 118 | $query_exa = $this->base_cgi.'&_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2'; |
| 119 | if (!empty($_REQUEST['_c'])) { |
| 120 | $query_exa .= "&_c=".urlencode($_REQUEST['_c']); |
| 121 | } |
| 122 | if (!empty($_REQUEST['_s'])) { |
| 123 | $query_exa .= "&_s=".((int) $_REQUEST['_s']); |
| 124 | } |
| 125 | if (isset($_REQUEST['_z'])) { |
| 126 | $query_exa .= "&_z=".$_REQUEST['_z']; |
| 127 | } |
| 128 | if (isset($_REQUEST['_sf'])) { |
| 129 | $query_exa .= "&_sf=".$_REQUEST['_sf']; |
| 130 | } |
| 131 | $xml_response = file_get_contents($query_exa); |
| 132 | $this->parse($xml_response); |
| 133 | } |
| 134 | |
| 135 | /******** Fonctions annexes relatives au parser ********/ |
| 136 | |
| 137 | function createParser() |
| 138 | { |
| 139 | $this->parserId = xml_parser_create(); |
| 140 | xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement")); |
| 141 | xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData")); |
| 142 | } |
| 143 | |
| 144 | function freeParser() |
| 145 | { |
| 146 | xml_parser_free($this->parserId); |
| 147 | } |
| 148 | |
| 149 | function parseString($string) |
| 150 | { |
| 151 | if (!xml_parse($this->parserId, $string, true)) { |
| 152 | die(sprintf("XML error: %s at line %d", |
| 153 | xml_error_string(xml_get_error_code($this->parserId)), |
| 154 | xml_get_current_line_number($this->parserId))); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | |
| 159 | |
| 160 | /******** Méthode qui lance le parser ***********/ |
| 161 | |
| 162 | function parse($string) |
| 163 | { |
| 164 | $this->createParser(); |
| 165 | $this->parseString($string); |
| 166 | $this->freeParser(); |
| 167 | } |
| 168 | |
| 169 | /********* fonctions spécifiques à chaque balise ******/ |
| 170 | |
| 171 | //Ces méthodes peuvent être surchargées |
| 172 | |
| 173 | function startQuery(&$attrs) |
| 174 | { |
| 175 | $this->currentQuery->query = utf8_decode($attrs['QUERY']); |
| 176 | $this->currentQuery->context = $attrs['CONTEXT']; |
| 177 | $this->currentQuery->time = $attrs['TIME']; |
| 178 | if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED']; |
| 179 | if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED']; |
| 180 | } |
| 181 | |
| 182 | function StartQueryTerm(&$attrs) |
| 183 | { |
| 184 | $this->currentQueryTerm->level = $attrs['LEVEL']; |
| 185 | $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']); |
| 186 | } |
| 187 | |
| 188 | function startQueryParameter(&$attrs) |
| 189 | { |
| 190 | $this->currentQueryParameter->name = $attrs['NAME']; |
| 191 | if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']); |
| 192 | } |
| 193 | |
| 194 | function startKeyword(&$attrs) |
| 195 | { |
| 196 | if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME']; |
| 197 | $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] ); |
| 198 | $this->currentKeyword->count = $attrs['COUNT']; |
| 199 | $this->currentKeyword->automatic = $attrs['AUTOMATIC']; |
| 200 | if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']); |
| 201 | if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF']; |
| 202 | if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF']; |
| 203 | } |
| 204 | |
| 205 | function startHits(&$attrs) |
| 206 | { |
| 207 | $this->data->nmatches = $attrs['NMATCHES']; |
| 208 | $this->data->nhits = $attrs['NHITS']; |
| 209 | if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED']; |
| 210 | $this->data->last = $attrs['LAST']; |
| 211 | $this->data->end = $attrs['END']; |
| 212 | $this->data->start = $attrs['START']; |
| 213 | } |
| 214 | |
| 215 | function startHit(&$attrs) |
| 216 | { |
| 217 | $this->currentHit->url = $attrs['URL']; |
| 218 | $this->currentHit->score = $attrs['SCORE']; |
| 219 | } |
| 220 | |
| 221 | function startHitGroup(&$attrs) |
| 222 | { |
| 223 | $this->currentHitGroup->title = utf8_decode($attrs['TITLE']); |
| 224 | $this->currentHitGroup->gid = $attrs['GID']; |
| 225 | } |
| 226 | |
| 227 | function startHitCategory(&$attrs) |
| 228 | { |
| 229 | $this->currentHitCategory->name = $attrs['NAME']; |
| 230 | $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']); |
| 231 | $this->currentHitCategory->cref = $attrs['CREF']; |
| 232 | $this->currentHitCategory->gid = $attrs['GID']; |
| 233 | if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF']; |
| 234 | } |
| 235 | |
| 236 | function startAction(&$attrs) |
| 237 | { |
| 238 | $this->currentAction->display = $attrs['DISPLAY']; |
| 239 | $this->currentAction->kind = $attrs['KIND']; |
| 240 | if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF']; |
| 241 | } |
| 242 | |
| 243 | function startHitField(&$attrs) |
| 244 | { |
| 245 | $this->currentHitField->name = $attrs['NAME']; |
| 246 | if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']); |
| 247 | } |
| 248 | |
| 249 | function startTextSeg(&$attrs) |
| 250 | { |
| 251 | $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']); |
| 252 | } |
| 253 | function startTextCut(&$attrs) { } |
| 254 | |
| 255 | function startSpellingSuggestionVariant(&$attrs) |
| 256 | { |
| 257 | $this->currentSpelling->setDisplay($attrs['DISPLAY']); |
| 258 | $this->currentSpelling->setQueryHref($attrs['QUERY']); |
| 259 | } |
| 260 | |
| 261 | function startGroup(&$attrs) |
| 262 | { |
| 263 | $this->currentGroup->setGid(utf8_decode($attrs['GID'])); |
| 264 | $this->currentGroup->setTitle(utf8_decode($attrs['TITLE'])); |
| 265 | $this->currentGroup->setClipped($attrs['CLIPPED']); |
| 266 | $this->currentGroup->setCount($attrs['COUNT']); |
| 267 | $this->currentGroup->setBrowsed($attrs['BROWSED']); |
| 268 | if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']); |
| 269 | if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']); |
| 270 | } |
| 271 | |
| 272 | function startCategory(&$attrs) |
| 273 | { |
| 274 | $currentCategory = new ExaleadCategory(); |
| 275 | $currentCategory->name = utf8_decode($attrs['NAME']); |
| 276 | $currentCategory->display = utf8_decode($attrs['DISPLAY']); |
| 277 | $currentCategory->count = $attrs['COUNT']; |
| 278 | $currentCategory->automatic = $attrs['AUTOMATIC']; |
| 279 | $currentCategory->cref = $attrs['CREF']; |
| 280 | if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']); |
| 281 | $currentCategory->exclude_href = '_c=-'.$currentCategory->cref; |
| 282 | if (isset($attrs['RESETHREF'])) { |
| 283 | $currentCategory->reset_href = $attrs['RESETHREF']; |
| 284 | } |
| 285 | $currentCategory->gid = $attrs['GID']; |
| 286 | $currentCategory->gcount = $attrs['GCOUNT']; |
| 287 | $this->currentCategories[] = $currentCategory; |
| 288 | } |
| 289 | |
| 290 | function startSearch(&$attrs) { } |
| 291 | |
| 292 | function startElement($parser, $name, $attrs) |
| 293 | { |
| 294 | //recupération des paramètres de query |
| 295 | switch ($name) { |
| 296 | case 'QUERY': |
| 297 | $this->startQuery($attrs); |
| 298 | break; |
| 299 | case 'QUERYTERM': |
| 300 | $this->startQueryTerm($attrs); |
| 301 | break; |
| 302 | case 'QUERYPARAMETER': |
| 303 | $this->startQueryParameter($attrs); |
| 304 | break; |
| 305 | case 'KEYWORD': |
| 306 | $this->startKeyword($attrs); |
| 307 | break; |
| 308 | case 'HITS': |
| 309 | $this->startHits($attrs); |
| 310 | break; |
| 311 | case 'HIT': |
| 312 | $this->startHit($attrs); |
| 313 | break; |
| 314 | case 'HITFIELD': |
| 315 | $this->startHitField($attrs); |
| 316 | break; |
| 317 | case 'HITGROUP': |
| 318 | $this->startHitGroup($attrs); |
| 319 | break; |
| 320 | case 'HITCATEGORY': |
| 321 | $this->startHitCategory($attrs); |
| 322 | break; |
| 323 | case 'ACTION': |
| 324 | $this->startAction($attrs); |
| 325 | break; |
| 326 | case 'TEXTSEG': |
| 327 | $this->startTextSeg($attrs); |
| 328 | break; |
| 329 | case 'TEXTCUT': |
| 330 | $this->startTextCut($attrs); |
| 331 | break; |
| 332 | case 'SPELLINGSUGGESTIONVARIANT': |
| 333 | $this->startSpellingSuggestionVariant($attrs); |
| 334 | break; |
| 335 | case 'GROUP': |
| 336 | $this->startGroup($attrs); |
| 337 | break; |
| 338 | case 'CATEGORY': |
| 339 | $this->startCategory($attrs); |
| 340 | break; |
| 341 | case 'SEARCH': |
| 342 | $this->startSearch($attrs); |
| 343 | break; |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | function endQuery() |
| 348 | { |
| 349 | $this->data->query = $this->currentQuery; |
| 350 | $this->currentQuery->clear(); |
| 351 | } |
| 352 | function endQueryTerm() |
| 353 | { |
| 354 | $this->currentQuery->addTerm($this->currentQueryTerm); |
| 355 | $this->currentQueryTerm->clear(); |
| 356 | } |
| 357 | function endQueryParameter() |
| 358 | { |
| 359 | $this->currentQuery->addParameter($this->currentQueryParameter); |
| 360 | $this->currentQueryParameter->clear(); |
| 361 | } |
| 362 | function endKeyword() |
| 363 | { |
| 364 | $this->data->addKeyword($this->currentKeyword); |
| 365 | $this->currentKeyword->clear(); |
| 366 | } |
| 367 | function endHits() |
| 368 | { |
| 369 | } |
| 370 | function endHit() |
| 371 | { |
| 372 | $this->data->addHit($this->currentHit); |
| 373 | $this->currentHit->clear(); |
| 374 | } |
| 375 | function endHitGroup() |
| 376 | { |
| 377 | $this->currentHit->addHitGroup($this->currentHitGroup); |
| 378 | $this->currentHitGroup->clear(); |
| 379 | } |
| 380 | function endHitCategory() |
| 381 | { |
| 382 | $this->currentHitGroup->addHitCategory($this->currentHitCategory); |
| 383 | $this->currentHitCategory->clear(); |
| 384 | } |
| 385 | function endAction() |
| 386 | { |
| 387 | $this->currentHit->addAction($this->currentAction); |
| 388 | $this->currentAction->clear(); |
| 389 | } |
| 390 | function endHitField() |
| 391 | { |
| 392 | $this->currentHit->addHitField($this->currentHitField); |
| 393 | $this->currentHitField->clear(); |
| 394 | } |
| 395 | function endTextSeg() |
| 396 | { |
| 397 | $this->currentHitField->addTextSegment($this->currentTextSegment); |
| 398 | $this->currentTextSegment->clear(); |
| 399 | } |
| 400 | function endTextCut() |
| 401 | { |
| 402 | $this->currentHitField->setHasTextCut(true); |
| 403 | } |
| 404 | function endSpellingSuggestionVariant() |
| 405 | { |
| 406 | $this->data->addSpelling($this->currentSpelling); |
| 407 | $this->currentSpelling->clear(); |
| 408 | } |
| 409 | function endGroup() |
| 410 | { |
| 411 | $this->data->addGroup($this->currentGroup); |
| 412 | $this->currentGroup->clear(); |
| 413 | } |
| 414 | function endCategory() |
| 415 | { |
| 416 | //the parent element is a Group element ? |
| 417 | if (count($this->currentCategories) == 1) { |
| 418 | $this->currentGroup->addCategory(array_pop($this->currentCategories)); |
| 419 | } else { |
| 420 | //var_dump($this->currentCategories); |
| 421 | $category = array_pop($this->currentCategories); |
| 422 | //reset($this->currentCategories); |
| 423 | end($this->currentCategories); |
| 424 | //var_dump($this->currentCategories); |
| 425 | $parentCategory = &$this->currentCategories[key($this->currentCategories)]; |
| 426 | //var_dump($parentCategory); |
| 427 | $parentCategory->addCategory($category); |
| 428 | } |
| 429 | } |
| 430 | function endSearch() { } |
| 431 | |
| 432 | |
| 433 | function endElement($parser, $name) |
| 434 | { |
| 435 | switch ($name) { |
| 436 | case 'QUERY': |
| 437 | $this->endQuery(); |
| 438 | break; |
| 439 | case 'QUERYTERM': |
| 440 | $this->endQueryTerm(); |
| 441 | break; |
| 442 | case 'QUERYPARAMETER': |
| 443 | $this->endQueryParameter(); |
| 444 | break; |
| 445 | case 'KEYWORD': |
| 446 | $this->endKeyword(); |
| 447 | break; |
| 448 | case 'HITS': |
| 449 | $this->endHits(); |
| 450 | break; |
| 451 | case 'HIT': |
| 452 | $this->endHit(); |
| 453 | break; |
| 454 | case 'HITFIELD': |
| 455 | $this->endHitField(); |
| 456 | break; |
| 457 | case 'HITGROUP': |
| 458 | $this->endHitGroup(); |
| 459 | break; |
| 460 | case 'HITCATEGORY': |
| 461 | $this->endHitCategory(); |
| 462 | break; |
| 463 | case 'ACTION': |
| 464 | $this->endAction(); |
| 465 | break; |
| 466 | case 'TEXTSEG': |
| 467 | $this->endTextSeg(); |
| 468 | break; |
| 469 | case 'TEXTCUT': |
| 470 | $this->endTextCut(); |
| 471 | break; |
| 472 | //gestion suggestions d'orthographe |
| 473 | case 'SPELLINGSUGGESTIONVARIANT': |
| 474 | $this->endSpellingSuggestionVariant(); |
| 475 | break; |
| 476 | //gestion des categories pour raffiner |
| 477 | case 'GROUP': |
| 478 | $this->endGroup(); |
| 479 | break; |
| 480 | case 'CATEGORY': |
| 481 | $this->endCategory(); |
| 482 | break; |
| 483 | case 'SEARCH': |
| 484 | $this->endSearch(); |
| 485 | break; |
| 486 | } |
| 487 | } |
| 488 | |
| 489 | function parsePCData($parser, $text) |
| 490 | { |
| 491 | $this->currentTextSegment->append(utf8_decode($text)); |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | ?> |