0337d704 |
1 | <?php |
2 | |
34265dc0 |
3 | require_once(dirname(__FILE__).'/exalead.class.php'); |
0337d704 |
4 | |
d44937f2 |
5 | function convert_url($string) |
6 | { |
7 | return str_replace('+', '%2B', $string); |
0337d704 |
8 | } |
9 | |
10 | $GLOBALS['query_all'] = 'a*'; |
11 | |
d44937f2 |
12 | class Exalead |
13 | { |
14 | |
15 | var $parserId; |
16 | |
17 | var $data; |
18 | |
19 | var $currentGroup; |
20 | var $currentCategories = array(); |
21 | var $currentSpelling; |
22 | var $currentHit; |
23 | var $currentHitField; |
24 | var $currentHitGroup; |
25 | var $currentHitCategory; |
26 | var $currentAction; |
27 | var $currentTextSegment; |
28 | var $currentQuery; |
29 | var $currentQueryTerm; |
30 | var $currentQueryParameter; |
31 | var $currentKeyword; |
32 | |
33 | //url de base du produit Exalead |
34 | var $base_cgi = ''; |
35 | |
36 | // Query to dump indexed database |
37 | var $query_all= ''; |
38 | |
39 | /**** Constructeur *********/ |
40 | |
41 | |
42 | function Exalead($base_cgi = '', $override_query_all = '') |
43 | { |
44 | $this->data = new ExaleadData(); |
45 | $this->currentGroup = new ExaleadGroup(); |
46 | $this->currentCategories = array(); |
47 | $this->currentSpelling = new ExaleadSpelling(); |
48 | $this->currentHit = new ExaleadHit(); |
49 | $this->currentHitField = new ExaleadHitField(); |
50 | $this->currentHitGroup = new ExaleadHitGroup(); |
51 | $this->currentHitCategory = new ExaleadHitCategory(); |
52 | $this->currentAction = new ExaleadAction(); |
53 | $this->currentTextSegment = new ExaleadTextSegment(); |
54 | $this->currentQuery = new ExaleadQuery(); |
55 | $this->currentQueryTerm = new ExaleadQueryTerm(); |
56 | $this->currentQueryParameter = new ExaleadQueryParameter(); |
57 | $this->currentKeyword = new ExaleadKeyword(); |
58 | |
59 | //url de base du produit Exalead |
60 | $this->base_cgi = $base_cgi; |
61 | if (!empty($override_query_all)) { |
62 | $this->query_all = $override_query_all; |
63 | } else { |
64 | $this->query_all = $GLOBALS['query_all']; |
65 | } |
66 | } |
67 | |
68 | /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/ |
69 | |
70 | function set_base_cgi($base_cgi) |
71 | { |
72 | $this->base_cgi = $base_cgi; |
73 | } |
74 | |
75 | //retourne vrai si une requete a été faite, faux sinon |
76 | function query($varname = 'query') |
77 | { |
78 | if (!empty($_REQUEST[$varname])) { |
79 | |
80 | $this->first_query(stripslashes($_REQUEST[$varname])); |
81 | return true; |
82 | |
83 | } elseif (isset($_REQUEST['_C'])) { |
84 | |
85 | $this->handle_request(); |
86 | return true; |
87 | } |
88 | return false; |
89 | } |
90 | |
91 | //a appeller pour faire la premiere requete |
92 | function first_query($query, $offset = 0) |
93 | { |
94 | if (empty($this->base_cgi)) return false; |
95 | |
96 | //$tmp = parse_url($this->base_cgi); |
97 | //$view_name = substr($tmp['path'], 5); |
98 | //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name; |
99 | $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2"; |
100 | if ($offset > 0) { |
101 | $query_exa .= "&_s=".$offset; |
102 | } |
103 | |
104 | $xml_response = file_get_contents($query_exa); |
105 | $this->parse($xml_response); |
106 | } |
107 | |
108 | //pour recuperer tous les résultats d'une base indexée |
109 | function get_db_dump() |
110 | { |
111 | $this->first_query($this->query_all); |
112 | } |
113 | |
114 | function handle_request() |
115 | { |
116 | if (empty($this->base_cgi)) return false; |
117 | if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead |
9b31f89c |
118 | $query_exa = $this->base_cgi.'&_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2'; |
119 | if (!empty($_REQUEST['_c'])) { |
120 | $query_exa .= "&_c=".urlencode($_REQUEST['_c']); |
121 | } |
d44937f2 |
122 | if (!empty($_REQUEST['_s'])) { |
123 | $query_exa .= "&_s=".((int) $_REQUEST['_s']); |
124 | } |
9b31f89c |
125 | if (isset($_REQUEST['_z'])) { |
126 | $query_exa .= "&_z=".$_REQUEST['_z']; |
127 | } |
816226aa |
128 | if (isset($_REQUEST['_sf'])) { |
129 | $query_exa .= "&_sf=".$_REQUEST['_sf']; |
130 | } |
d44937f2 |
131 | $xml_response = file_get_contents($query_exa); |
132 | $this->parse($xml_response); |
133 | } |
134 | |
135 | /******** Fonctions annexes relatives au parser ********/ |
136 | |
137 | function createParser() |
138 | { |
139 | $this->parserId = xml_parser_create(); |
140 | xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement")); |
141 | xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData")); |
142 | } |
143 | |
144 | function freeParser() |
145 | { |
146 | xml_parser_free($this->parserId); |
147 | } |
148 | |
149 | function parseString($string) |
150 | { |
151 | if (!xml_parse($this->parserId, $string, true)) { |
152 | die(sprintf("XML error: %s at line %d", |
153 | xml_error_string(xml_get_error_code($this->parserId)), |
154 | xml_get_current_line_number($this->parserId))); |
155 | } |
156 | } |
157 | |
158 | |
159 | |
160 | /******** Méthode qui lance le parser ***********/ |
161 | |
162 | function parse($string) |
163 | { |
164 | $this->createParser(); |
165 | $this->parseString($string); |
166 | $this->freeParser(); |
167 | } |
168 | |
169 | /********* fonctions spécifiques à chaque balise ******/ |
170 | |
171 | //Ces méthodes peuvent être surchargées |
172 | |
173 | function startQuery(&$attrs) |
174 | { |
175 | $this->currentQuery->query = utf8_decode($attrs['QUERY']); |
176 | $this->currentQuery->context = $attrs['CONTEXT']; |
177 | $this->currentQuery->time = $attrs['TIME']; |
178 | if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED']; |
179 | if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED']; |
180 | } |
181 | |
182 | function StartQueryTerm(&$attrs) |
183 | { |
184 | $this->currentQueryTerm->level = $attrs['LEVEL']; |
185 | $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']); |
186 | } |
187 | |
188 | function startQueryParameter(&$attrs) |
189 | { |
190 | $this->currentQueryParameter->name = $attrs['NAME']; |
191 | if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']); |
192 | } |
193 | |
194 | function startKeyword(&$attrs) |
195 | { |
196 | if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME']; |
197 | $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] ); |
198 | $this->currentKeyword->count = $attrs['COUNT']; |
199 | $this->currentKeyword->automatic = $attrs['AUTOMATIC']; |
200 | if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']); |
201 | if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF']; |
202 | if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF']; |
203 | } |
204 | |
205 | function startHits(&$attrs) |
206 | { |
207 | $this->data->nmatches = $attrs['NMATCHES']; |
208 | $this->data->nhits = $attrs['NHITS']; |
209 | if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED']; |
210 | $this->data->last = $attrs['LAST']; |
211 | $this->data->end = $attrs['END']; |
212 | $this->data->start = $attrs['START']; |
213 | } |
0337d704 |
214 | |
d44937f2 |
215 | function startHit(&$attrs) |
216 | { |
217 | $this->currentHit->url = $attrs['URL']; |
218 | $this->currentHit->score = $attrs['SCORE']; |
219 | } |
220 | |
221 | function startHitGroup(&$attrs) |
222 | { |
223 | $this->currentHitGroup->title = utf8_decode($attrs['TITLE']); |
224 | $this->currentHitGroup->gid = $attrs['GID']; |
225 | } |
226 | |
227 | function startHitCategory(&$attrs) |
228 | { |
229 | $this->currentHitCategory->name = $attrs['NAME']; |
230 | $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']); |
231 | $this->currentHitCategory->cref = $attrs['CREF']; |
232 | $this->currentHitCategory->gid = $attrs['GID']; |
233 | if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF']; |
234 | } |
235 | |
236 | function startAction(&$attrs) |
237 | { |
238 | $this->currentAction->display = $attrs['DISPLAY']; |
239 | $this->currentAction->kind = $attrs['KIND']; |
240 | if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF']; |
241 | } |
242 | |
243 | function startHitField(&$attrs) |
244 | { |
245 | $this->currentHitField->name = $attrs['NAME']; |
246 | if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']); |
247 | } |
248 | |
249 | function startTextSeg(&$attrs) |
250 | { |
251 | $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']); |
252 | } |
253 | function startTextCut(&$attrs) { } |
254 | |
255 | function startSpellingSuggestionVariant(&$attrs) |
256 | { |
257 | $this->currentSpelling->setDisplay($attrs['DISPLAY']); |
258 | $this->currentSpelling->setQueryHref($attrs['QUERY']); |
259 | } |
260 | |
261 | function startGroup(&$attrs) |
262 | { |
263 | $this->currentGroup->setGid(utf8_decode($attrs['GID'])); |
264 | $this->currentGroup->setTitle(utf8_decode($attrs['TITLE'])); |
265 | $this->currentGroup->setClipped($attrs['CLIPPED']); |
266 | $this->currentGroup->setCount($attrs['COUNT']); |
267 | $this->currentGroup->setBrowsed($attrs['BROWSED']); |
268 | if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']); |
269 | if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']); |
270 | } |
271 | |
272 | function startCategory(&$attrs) |
273 | { |
274 | $currentCategory = new ExaleadCategory(); |
275 | $currentCategory->name = utf8_decode($attrs['NAME']); |
276 | $currentCategory->display = utf8_decode($attrs['DISPLAY']); |
277 | $currentCategory->count = $attrs['COUNT']; |
278 | $currentCategory->automatic = $attrs['AUTOMATIC']; |
279 | $currentCategory->cref = $attrs['CREF']; |
280 | if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']); |
281 | $currentCategory->exclude_href = '_c=-'.$currentCategory->cref; |
282 | if (isset($attrs['RESETHREF'])) { |
283 | $currentCategory->reset_href = $attrs['RESETHREF']; |
284 | } |
285 | $currentCategory->gid = $attrs['GID']; |
286 | $currentCategory->gcount = $attrs['GCOUNT']; |
287 | $this->currentCategories[] = $currentCategory; |
288 | } |
289 | |
290 | function startSearch(&$attrs) { } |
291 | |
292 | function startElement($parser, $name, $attrs) |
293 | { |
294 | //recupération des paramètres de query |
295 | switch ($name) { |
296 | case 'QUERY': |
297 | $this->startQuery($attrs); |
298 | break; |
299 | case 'QUERYTERM': |
300 | $this->startQueryTerm($attrs); |
301 | break; |
302 | case 'QUERYPARAMETER': |
303 | $this->startQueryParameter($attrs); |
304 | break; |
305 | case 'KEYWORD': |
306 | $this->startKeyword($attrs); |
307 | break; |
308 | case 'HITS': |
309 | $this->startHits($attrs); |
310 | break; |
311 | case 'HIT': |
312 | $this->startHit($attrs); |
313 | break; |
314 | case 'HITFIELD': |
315 | $this->startHitField($attrs); |
316 | break; |
317 | case 'HITGROUP': |
318 | $this->startHitGroup($attrs); |
319 | break; |
320 | case 'HITCATEGORY': |
321 | $this->startHitCategory($attrs); |
322 | break; |
323 | case 'ACTION': |
324 | $this->startAction($attrs); |
325 | break; |
326 | case 'TEXTSEG': |
327 | $this->startTextSeg($attrs); |
328 | break; |
329 | case 'TEXTCUT': |
330 | $this->startTextCut($attrs); |
331 | break; |
332 | case 'SPELLINGSUGGESTIONVARIANT': |
333 | $this->startSpellingSuggestionVariant($attrs); |
334 | break; |
335 | case 'GROUP': |
336 | $this->startGroup($attrs); |
337 | break; |
338 | case 'CATEGORY': |
339 | $this->startCategory($attrs); |
340 | break; |
341 | case 'SEARCH': |
342 | $this->startSearch($attrs); |
343 | break; |
344 | } |
345 | } |
346 | |
347 | function endQuery() |
348 | { |
349 | $this->data->query = $this->currentQuery; |
350 | $this->currentQuery->clear(); |
351 | } |
352 | function endQueryTerm() |
353 | { |
354 | $this->currentQuery->addTerm($this->currentQueryTerm); |
355 | $this->currentQueryTerm->clear(); |
356 | } |
357 | function endQueryParameter() |
358 | { |
359 | $this->currentQuery->addParameter($this->currentQueryParameter); |
360 | $this->currentQueryParameter->clear(); |
361 | } |
362 | function endKeyword() |
363 | { |
364 | $this->data->addKeyword($this->currentKeyword); |
365 | $this->currentKeyword->clear(); |
366 | } |
367 | function endHits() |
368 | { |
369 | } |
370 | function endHit() |
371 | { |
372 | $this->data->addHit($this->currentHit); |
373 | $this->currentHit->clear(); |
374 | } |
375 | function endHitGroup() |
376 | { |
377 | $this->currentHit->addHitGroup($this->currentHitGroup); |
378 | $this->currentHitGroup->clear(); |
379 | } |
380 | function endHitCategory() |
381 | { |
382 | $this->currentHitGroup->addHitCategory($this->currentHitCategory); |
383 | $this->currentHitCategory->clear(); |
384 | } |
385 | function endAction() |
386 | { |
387 | $this->currentHit->addAction($this->currentAction); |
388 | $this->currentAction->clear(); |
389 | } |
390 | function endHitField() |
391 | { |
392 | $this->currentHit->addHitField($this->currentHitField); |
393 | $this->currentHitField->clear(); |
394 | } |
395 | function endTextSeg() |
396 | { |
397 | $this->currentHitField->addTextSegment($this->currentTextSegment); |
398 | $this->currentTextSegment->clear(); |
399 | } |
400 | function endTextCut() |
401 | { |
402 | $this->currentHitField->setHasTextCut(true); |
403 | } |
404 | function endSpellingSuggestionVariant() |
405 | { |
406 | $this->data->addSpelling($this->currentSpelling); |
407 | $this->currentSpelling->clear(); |
408 | } |
409 | function endGroup() |
410 | { |
411 | $this->data->addGroup($this->currentGroup); |
412 | $this->currentGroup->clear(); |
413 | } |
414 | function endCategory() |
415 | { |
416 | //the parent element is a Group element ? |
417 | if (count($this->currentCategories) == 1) { |
418 | $this->currentGroup->addCategory(array_pop($this->currentCategories)); |
419 | } else { |
420 | //var_dump($this->currentCategories); |
421 | $category = array_pop($this->currentCategories); |
422 | //reset($this->currentCategories); |
423 | end($this->currentCategories); |
424 | //var_dump($this->currentCategories); |
425 | $parentCategory = &$this->currentCategories[key($this->currentCategories)]; |
426 | //var_dump($parentCategory); |
427 | $parentCategory->addCategory($category); |
428 | } |
429 | } |
430 | function endSearch() { } |
431 | |
432 | |
433 | function endElement($parser, $name) |
434 | { |
435 | switch ($name) { |
436 | case 'QUERY': |
437 | $this->endQuery(); |
438 | break; |
439 | case 'QUERYTERM': |
440 | $this->endQueryTerm(); |
441 | break; |
442 | case 'QUERYPARAMETER': |
443 | $this->endQueryParameter(); |
444 | break; |
445 | case 'KEYWORD': |
446 | $this->endKeyword(); |
447 | break; |
448 | case 'HITS': |
449 | $this->endHits(); |
450 | break; |
451 | case 'HIT': |
452 | $this->endHit(); |
453 | break; |
454 | case 'HITFIELD': |
455 | $this->endHitField(); |
456 | break; |
457 | case 'HITGROUP': |
458 | $this->endHitGroup(); |
459 | break; |
460 | case 'HITCATEGORY': |
461 | $this->endHitCategory(); |
462 | break; |
463 | case 'ACTION': |
464 | $this->endAction(); |
465 | break; |
466 | case 'TEXTSEG': |
467 | $this->endTextSeg(); |
468 | break; |
469 | case 'TEXTCUT': |
470 | $this->endTextCut(); |
471 | break; |
472 | //gestion suggestions d'orthographe |
473 | case 'SPELLINGSUGGESTIONVARIANT': |
474 | $this->endSpellingSuggestionVariant(); |
475 | break; |
476 | //gestion des categories pour raffiner |
477 | case 'GROUP': |
478 | $this->endGroup(); |
479 | break; |
480 | case 'CATEGORY': |
481 | $this->endCategory(); |
482 | break; |
483 | case 'SEARCH': |
484 | $this->endSearch(); |
485 | break; |
486 | } |
487 | } |
488 | |
489 | function parsePCData($parser, $text) |
490 | { |
491 | $this->currentTextSegment->append(utf8_decode($text)); |
492 | } |
0337d704 |
493 | } |
494 | |
495 | ?> |