0337d704 |
1 | <?php |
2 | |
d44937f2 |
3 | require_once('exalead/exalead.class.php'); |
0337d704 |
4 | |
d44937f2 |
5 | function convert_url($string) |
6 | { |
7 | return str_replace('+', '%2B', $string); |
0337d704 |
8 | } |
9 | |
10 | $GLOBALS['query_all'] = 'a*'; |
11 | |
d44937f2 |
12 | class Exalead |
13 | { |
14 | |
15 | var $parserId; |
16 | |
17 | var $data; |
18 | |
19 | var $currentGroup; |
20 | var $currentCategories = array(); |
21 | var $currentSpelling; |
22 | var $currentHit; |
23 | var $currentHitField; |
24 | var $currentHitGroup; |
25 | var $currentHitCategory; |
26 | var $currentAction; |
27 | var $currentTextSegment; |
28 | var $currentQuery; |
29 | var $currentQueryTerm; |
30 | var $currentQueryParameter; |
31 | var $currentKeyword; |
32 | |
33 | //url de base du produit Exalead |
34 | var $base_cgi = ''; |
35 | |
36 | // Query to dump indexed database |
37 | var $query_all= ''; |
38 | |
39 | /**** Constructeur *********/ |
40 | |
41 | |
42 | function Exalead($base_cgi = '', $override_query_all = '') |
43 | { |
44 | $this->data = new ExaleadData(); |
45 | $this->currentGroup = new ExaleadGroup(); |
46 | $this->currentCategories = array(); |
47 | $this->currentSpelling = new ExaleadSpelling(); |
48 | $this->currentHit = new ExaleadHit(); |
49 | $this->currentHitField = new ExaleadHitField(); |
50 | $this->currentHitGroup = new ExaleadHitGroup(); |
51 | $this->currentHitCategory = new ExaleadHitCategory(); |
52 | $this->currentAction = new ExaleadAction(); |
53 | $this->currentTextSegment = new ExaleadTextSegment(); |
54 | $this->currentQuery = new ExaleadQuery(); |
55 | $this->currentQueryTerm = new ExaleadQueryTerm(); |
56 | $this->currentQueryParameter = new ExaleadQueryParameter(); |
57 | $this->currentKeyword = new ExaleadKeyword(); |
58 | |
59 | //url de base du produit Exalead |
60 | $this->base_cgi = $base_cgi; |
61 | if (!empty($override_query_all)) { |
62 | $this->query_all = $override_query_all; |
63 | } else { |
64 | $this->query_all = $GLOBALS['query_all']; |
65 | } |
66 | } |
67 | |
68 | /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/ |
69 | |
70 | function set_base_cgi($base_cgi) |
71 | { |
72 | $this->base_cgi = $base_cgi; |
73 | } |
74 | |
75 | //retourne vrai si une requete a été faite, faux sinon |
76 | function query($varname = 'query') |
77 | { |
78 | if (!empty($_REQUEST[$varname])) { |
79 | |
80 | $this->first_query(stripslashes($_REQUEST[$varname])); |
81 | return true; |
82 | |
83 | } elseif (isset($_REQUEST['_C'])) { |
84 | |
85 | $this->handle_request(); |
86 | return true; |
87 | } |
88 | return false; |
89 | } |
90 | |
91 | //a appeller pour faire la premiere requete |
92 | function first_query($query, $offset = 0) |
93 | { |
94 | if (empty($this->base_cgi)) return false; |
95 | |
96 | //$tmp = parse_url($this->base_cgi); |
97 | //$view_name = substr($tmp['path'], 5); |
98 | //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name; |
99 | $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2"; |
100 | if ($offset > 0) { |
101 | $query_exa .= "&_s=".$offset; |
102 | } |
103 | |
104 | $xml_response = file_get_contents($query_exa); |
105 | $this->parse($xml_response); |
106 | } |
107 | |
108 | //pour recuperer tous les résultats d'une base indexée |
109 | function get_db_dump() |
110 | { |
111 | $this->first_query($this->query_all); |
112 | } |
113 | |
114 | function handle_request() |
115 | { |
116 | if (empty($this->base_cgi)) return false; |
117 | if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead |
118 | $query_exa = $this->base_cgi.'/_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2'; |
119 | if (!empty($_REQUEST['_s'])) { |
120 | $query_exa .= "&_s=".((int) $_REQUEST['_s']); |
121 | } |
122 | $xml_response = file_get_contents($query_exa); |
123 | $this->parse($xml_response); |
124 | } |
125 | |
126 | /******** Fonctions annexes relatives au parser ********/ |
127 | |
128 | function createParser() |
129 | { |
130 | $this->parserId = xml_parser_create(); |
131 | xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement")); |
132 | xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData")); |
133 | } |
134 | |
135 | function freeParser() |
136 | { |
137 | xml_parser_free($this->parserId); |
138 | } |
139 | |
140 | function parseString($string) |
141 | { |
142 | if (!xml_parse($this->parserId, $string, true)) { |
143 | die(sprintf("XML error: %s at line %d", |
144 | xml_error_string(xml_get_error_code($this->parserId)), |
145 | xml_get_current_line_number($this->parserId))); |
146 | } |
147 | } |
148 | |
149 | |
150 | |
151 | /******** Méthode qui lance le parser ***********/ |
152 | |
153 | function parse($string) |
154 | { |
155 | $this->createParser(); |
156 | $this->parseString($string); |
157 | $this->freeParser(); |
158 | } |
159 | |
160 | /********* fonctions spécifiques à chaque balise ******/ |
161 | |
162 | //Ces méthodes peuvent être surchargées |
163 | |
164 | function startQuery(&$attrs) |
165 | { |
166 | $this->currentQuery->query = utf8_decode($attrs['QUERY']); |
167 | $this->currentQuery->context = $attrs['CONTEXT']; |
168 | $this->currentQuery->time = $attrs['TIME']; |
169 | if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED']; |
170 | if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED']; |
171 | } |
172 | |
173 | function StartQueryTerm(&$attrs) |
174 | { |
175 | $this->currentQueryTerm->level = $attrs['LEVEL']; |
176 | $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']); |
177 | } |
178 | |
179 | function startQueryParameter(&$attrs) |
180 | { |
181 | $this->currentQueryParameter->name = $attrs['NAME']; |
182 | if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']); |
183 | } |
184 | |
185 | function startKeyword(&$attrs) |
186 | { |
187 | if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME']; |
188 | $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] ); |
189 | $this->currentKeyword->count = $attrs['COUNT']; |
190 | $this->currentKeyword->automatic = $attrs['AUTOMATIC']; |
191 | if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']); |
192 | if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF']; |
193 | if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF']; |
194 | } |
195 | |
196 | function startHits(&$attrs) |
197 | { |
198 | $this->data->nmatches = $attrs['NMATCHES']; |
199 | $this->data->nhits = $attrs['NHITS']; |
200 | if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED']; |
201 | $this->data->last = $attrs['LAST']; |
202 | $this->data->end = $attrs['END']; |
203 | $this->data->start = $attrs['START']; |
204 | } |
0337d704 |
205 | |
d44937f2 |
206 | function startHit(&$attrs) |
207 | { |
208 | $this->currentHit->url = $attrs['URL']; |
209 | $this->currentHit->score = $attrs['SCORE']; |
210 | } |
211 | |
212 | function startHitGroup(&$attrs) |
213 | { |
214 | $this->currentHitGroup->title = utf8_decode($attrs['TITLE']); |
215 | $this->currentHitGroup->gid = $attrs['GID']; |
216 | } |
217 | |
218 | function startHitCategory(&$attrs) |
219 | { |
220 | $this->currentHitCategory->name = $attrs['NAME']; |
221 | $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']); |
222 | $this->currentHitCategory->cref = $attrs['CREF']; |
223 | $this->currentHitCategory->gid = $attrs['GID']; |
224 | if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF']; |
225 | } |
226 | |
227 | function startAction(&$attrs) |
228 | { |
229 | $this->currentAction->display = $attrs['DISPLAY']; |
230 | $this->currentAction->kind = $attrs['KIND']; |
231 | if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF']; |
232 | } |
233 | |
234 | function startHitField(&$attrs) |
235 | { |
236 | $this->currentHitField->name = $attrs['NAME']; |
237 | if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']); |
238 | } |
239 | |
240 | function startTextSeg(&$attrs) |
241 | { |
242 | $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']); |
243 | } |
244 | function startTextCut(&$attrs) { } |
245 | |
246 | function startSpellingSuggestionVariant(&$attrs) |
247 | { |
248 | $this->currentSpelling->setDisplay($attrs['DISPLAY']); |
249 | $this->currentSpelling->setQueryHref($attrs['QUERY']); |
250 | } |
251 | |
252 | function startGroup(&$attrs) |
253 | { |
254 | $this->currentGroup->setGid(utf8_decode($attrs['GID'])); |
255 | $this->currentGroup->setTitle(utf8_decode($attrs['TITLE'])); |
256 | $this->currentGroup->setClipped($attrs['CLIPPED']); |
257 | $this->currentGroup->setCount($attrs['COUNT']); |
258 | $this->currentGroup->setBrowsed($attrs['BROWSED']); |
259 | if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']); |
260 | if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']); |
261 | } |
262 | |
263 | function startCategory(&$attrs) |
264 | { |
265 | $currentCategory = new ExaleadCategory(); |
266 | $currentCategory->name = utf8_decode($attrs['NAME']); |
267 | $currentCategory->display = utf8_decode($attrs['DISPLAY']); |
268 | $currentCategory->count = $attrs['COUNT']; |
269 | $currentCategory->automatic = $attrs['AUTOMATIC']; |
270 | $currentCategory->cref = $attrs['CREF']; |
271 | if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']); |
272 | $currentCategory->exclude_href = '_c=-'.$currentCategory->cref; |
273 | if (isset($attrs['RESETHREF'])) { |
274 | $currentCategory->reset_href = $attrs['RESETHREF']; |
275 | } |
276 | $currentCategory->gid = $attrs['GID']; |
277 | $currentCategory->gcount = $attrs['GCOUNT']; |
278 | $this->currentCategories[] = $currentCategory; |
279 | } |
280 | |
281 | function startSearch(&$attrs) { } |
282 | |
283 | function startElement($parser, $name, $attrs) |
284 | { |
285 | //recupération des paramètres de query |
286 | switch ($name) { |
287 | case 'QUERY': |
288 | $this->startQuery($attrs); |
289 | break; |
290 | case 'QUERYTERM': |
291 | $this->startQueryTerm($attrs); |
292 | break; |
293 | case 'QUERYPARAMETER': |
294 | $this->startQueryParameter($attrs); |
295 | break; |
296 | case 'KEYWORD': |
297 | $this->startKeyword($attrs); |
298 | break; |
299 | case 'HITS': |
300 | $this->startHits($attrs); |
301 | break; |
302 | case 'HIT': |
303 | $this->startHit($attrs); |
304 | break; |
305 | case 'HITFIELD': |
306 | $this->startHitField($attrs); |
307 | break; |
308 | case 'HITGROUP': |
309 | $this->startHitGroup($attrs); |
310 | break; |
311 | case 'HITCATEGORY': |
312 | $this->startHitCategory($attrs); |
313 | break; |
314 | case 'ACTION': |
315 | $this->startAction($attrs); |
316 | break; |
317 | case 'TEXTSEG': |
318 | $this->startTextSeg($attrs); |
319 | break; |
320 | case 'TEXTCUT': |
321 | $this->startTextCut($attrs); |
322 | break; |
323 | case 'SPELLINGSUGGESTIONVARIANT': |
324 | $this->startSpellingSuggestionVariant($attrs); |
325 | break; |
326 | case 'GROUP': |
327 | $this->startGroup($attrs); |
328 | break; |
329 | case 'CATEGORY': |
330 | $this->startCategory($attrs); |
331 | break; |
332 | case 'SEARCH': |
333 | $this->startSearch($attrs); |
334 | break; |
335 | } |
336 | } |
337 | |
338 | function endQuery() |
339 | { |
340 | $this->data->query = $this->currentQuery; |
341 | $this->currentQuery->clear(); |
342 | } |
343 | function endQueryTerm() |
344 | { |
345 | $this->currentQuery->addTerm($this->currentQueryTerm); |
346 | $this->currentQueryTerm->clear(); |
347 | } |
348 | function endQueryParameter() |
349 | { |
350 | $this->currentQuery->addParameter($this->currentQueryParameter); |
351 | $this->currentQueryParameter->clear(); |
352 | } |
353 | function endKeyword() |
354 | { |
355 | $this->data->addKeyword($this->currentKeyword); |
356 | $this->currentKeyword->clear(); |
357 | } |
358 | function endHits() |
359 | { |
360 | } |
361 | function endHit() |
362 | { |
363 | $this->data->addHit($this->currentHit); |
364 | $this->currentHit->clear(); |
365 | } |
366 | function endHitGroup() |
367 | { |
368 | $this->currentHit->addHitGroup($this->currentHitGroup); |
369 | $this->currentHitGroup->clear(); |
370 | } |
371 | function endHitCategory() |
372 | { |
373 | $this->currentHitGroup->addHitCategory($this->currentHitCategory); |
374 | $this->currentHitCategory->clear(); |
375 | } |
376 | function endAction() |
377 | { |
378 | $this->currentHit->addAction($this->currentAction); |
379 | $this->currentAction->clear(); |
380 | } |
381 | function endHitField() |
382 | { |
383 | $this->currentHit->addHitField($this->currentHitField); |
384 | $this->currentHitField->clear(); |
385 | } |
386 | function endTextSeg() |
387 | { |
388 | $this->currentHitField->addTextSegment($this->currentTextSegment); |
389 | $this->currentTextSegment->clear(); |
390 | } |
391 | function endTextCut() |
392 | { |
393 | $this->currentHitField->setHasTextCut(true); |
394 | } |
395 | function endSpellingSuggestionVariant() |
396 | { |
397 | $this->data->addSpelling($this->currentSpelling); |
398 | $this->currentSpelling->clear(); |
399 | } |
400 | function endGroup() |
401 | { |
402 | $this->data->addGroup($this->currentGroup); |
403 | $this->currentGroup->clear(); |
404 | } |
405 | function endCategory() |
406 | { |
407 | //the parent element is a Group element ? |
408 | if (count($this->currentCategories) == 1) { |
409 | $this->currentGroup->addCategory(array_pop($this->currentCategories)); |
410 | } else { |
411 | //var_dump($this->currentCategories); |
412 | $category = array_pop($this->currentCategories); |
413 | //reset($this->currentCategories); |
414 | end($this->currentCategories); |
415 | //var_dump($this->currentCategories); |
416 | $parentCategory = &$this->currentCategories[key($this->currentCategories)]; |
417 | //var_dump($parentCategory); |
418 | $parentCategory->addCategory($category); |
419 | } |
420 | } |
421 | function endSearch() { } |
422 | |
423 | |
424 | function endElement($parser, $name) |
425 | { |
426 | switch ($name) { |
427 | case 'QUERY': |
428 | $this->endQuery(); |
429 | break; |
430 | case 'QUERYTERM': |
431 | $this->endQueryTerm(); |
432 | break; |
433 | case 'QUERYPARAMETER': |
434 | $this->endQueryParameter(); |
435 | break; |
436 | case 'KEYWORD': |
437 | $this->endKeyword(); |
438 | break; |
439 | case 'HITS': |
440 | $this->endHits(); |
441 | break; |
442 | case 'HIT': |
443 | $this->endHit(); |
444 | break; |
445 | case 'HITFIELD': |
446 | $this->endHitField(); |
447 | break; |
448 | case 'HITGROUP': |
449 | $this->endHitGroup(); |
450 | break; |
451 | case 'HITCATEGORY': |
452 | $this->endHitCategory(); |
453 | break; |
454 | case 'ACTION': |
455 | $this->endAction(); |
456 | break; |
457 | case 'TEXTSEG': |
458 | $this->endTextSeg(); |
459 | break; |
460 | case 'TEXTCUT': |
461 | $this->endTextCut(); |
462 | break; |
463 | //gestion suggestions d'orthographe |
464 | case 'SPELLINGSUGGESTIONVARIANT': |
465 | $this->endSpellingSuggestionVariant(); |
466 | break; |
467 | //gestion des categories pour raffiner |
468 | case 'GROUP': |
469 | $this->endGroup(); |
470 | break; |
471 | case 'CATEGORY': |
472 | $this->endCategory(); |
473 | break; |
474 | case 'SEARCH': |
475 | $this->endSearch(); |
476 | break; |
477 | } |
478 | } |
479 | |
480 | function parsePCData($parser, $text) |
481 | { |
482 | $this->currentTextSegment->append(utf8_decode($text)); |
483 | } |
0337d704 |
484 | } |
485 | |
486 | ?> |