0337d704 |
1 | <?php |
2 | |
34265dc0 |
3 | require_once(dirname(__FILE__).'/exalead.class.php'); |
0337d704 |
4 | |
d44937f2 |
5 | function convert_url($string) |
6 | { |
7 | return str_replace('+', '%2B', $string); |
0337d704 |
8 | } |
9 | |
10 | $GLOBALS['query_all'] = 'a*'; |
11 | |
d44937f2 |
12 | class Exalead |
13 | { |
14 | |
15 | var $parserId; |
16 | |
17 | var $data; |
18 | |
19 | var $currentGroup; |
20 | var $currentCategories = array(); |
21 | var $currentSpelling; |
22 | var $currentHit; |
23 | var $currentHitField; |
24 | var $currentHitGroup; |
25 | var $currentHitCategory; |
26 | var $currentAction; |
27 | var $currentTextSegment; |
28 | var $currentQuery; |
29 | var $currentQueryTerm; |
30 | var $currentQueryParameter; |
31 | var $currentKeyword; |
32 | |
33 | //url de base du produit Exalead |
34 | var $base_cgi = ''; |
35 | |
36 | // Query to dump indexed database |
37 | var $query_all= ''; |
38 | |
39 | /**** Constructeur *********/ |
40 | |
41 | |
42 | function Exalead($base_cgi = '', $override_query_all = '') |
43 | { |
44 | $this->data = new ExaleadData(); |
45 | $this->currentGroup = new ExaleadGroup(); |
46 | $this->currentCategories = array(); |
47 | $this->currentSpelling = new ExaleadSpelling(); |
48 | $this->currentHit = new ExaleadHit(); |
49 | $this->currentHitField = new ExaleadHitField(); |
50 | $this->currentHitGroup = new ExaleadHitGroup(); |
51 | $this->currentHitCategory = new ExaleadHitCategory(); |
52 | $this->currentAction = new ExaleadAction(); |
53 | $this->currentTextSegment = new ExaleadTextSegment(); |
54 | $this->currentQuery = new ExaleadQuery(); |
55 | $this->currentQueryTerm = new ExaleadQueryTerm(); |
56 | $this->currentQueryParameter = new ExaleadQueryParameter(); |
57 | $this->currentKeyword = new ExaleadKeyword(); |
58 | |
59 | //url de base du produit Exalead |
60 | $this->base_cgi = $base_cgi; |
61 | if (!empty($override_query_all)) { |
62 | $this->query_all = $override_query_all; |
63 | } else { |
64 | $this->query_all = $GLOBALS['query_all']; |
65 | } |
66 | } |
67 | |
68 | /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/ |
69 | |
70 | function set_base_cgi($base_cgi) |
71 | { |
72 | $this->base_cgi = $base_cgi; |
73 | } |
74 | |
75 | //retourne vrai si une requete a été faite, faux sinon |
76 | function query($varname = 'query') |
77 | { |
78 | if (!empty($_REQUEST[$varname])) { |
79 | |
80 | $this->first_query(stripslashes($_REQUEST[$varname])); |
81 | return true; |
82 | |
83 | } elseif (isset($_REQUEST['_C'])) { |
84 | |
85 | $this->handle_request(); |
86 | return true; |
87 | } |
88 | return false; |
89 | } |
90 | |
91 | //a appeller pour faire la premiere requete |
92 | function first_query($query, $offset = 0) |
93 | { |
94 | if (empty($this->base_cgi)) return false; |
95 | |
96 | //$tmp = parse_url($this->base_cgi); |
97 | //$view_name = substr($tmp['path'], 5); |
98 | //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name; |
99 | $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2"; |
100 | if ($offset > 0) { |
101 | $query_exa .= "&_s=".$offset; |
102 | } |
103 | |
104 | $xml_response = file_get_contents($query_exa); |
105 | $this->parse($xml_response); |
106 | } |
107 | |
108 | //pour recuperer tous les résultats d'une base indexée |
109 | function get_db_dump() |
110 | { |
111 | $this->first_query($this->query_all); |
112 | } |
113 | |
114 | function handle_request() |
115 | { |
116 | if (empty($this->base_cgi)) return false; |
117 | if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead |
9b31f89c |
118 | $query_exa = $this->base_cgi.'&_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2'; |
119 | if (!empty($_REQUEST['_c'])) { |
120 | $query_exa .= "&_c=".urlencode($_REQUEST['_c']); |
121 | } |
d44937f2 |
122 | if (!empty($_REQUEST['_s'])) { |
123 | $query_exa .= "&_s=".((int) $_REQUEST['_s']); |
124 | } |
9b31f89c |
125 | if (isset($_REQUEST['_z'])) { |
126 | $query_exa .= "&_z=".$_REQUEST['_z']; |
127 | } |
d44937f2 |
128 | $xml_response = file_get_contents($query_exa); |
129 | $this->parse($xml_response); |
130 | } |
131 | |
132 | /******** Fonctions annexes relatives au parser ********/ |
133 | |
134 | function createParser() |
135 | { |
136 | $this->parserId = xml_parser_create(); |
137 | xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement")); |
138 | xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData")); |
139 | } |
140 | |
141 | function freeParser() |
142 | { |
143 | xml_parser_free($this->parserId); |
144 | } |
145 | |
146 | function parseString($string) |
147 | { |
148 | if (!xml_parse($this->parserId, $string, true)) { |
149 | die(sprintf("XML error: %s at line %d", |
150 | xml_error_string(xml_get_error_code($this->parserId)), |
151 | xml_get_current_line_number($this->parserId))); |
152 | } |
153 | } |
154 | |
155 | |
156 | |
157 | /******** Méthode qui lance le parser ***********/ |
158 | |
159 | function parse($string) |
160 | { |
161 | $this->createParser(); |
162 | $this->parseString($string); |
163 | $this->freeParser(); |
164 | } |
165 | |
166 | /********* fonctions spécifiques à chaque balise ******/ |
167 | |
168 | //Ces méthodes peuvent être surchargées |
169 | |
170 | function startQuery(&$attrs) |
171 | { |
172 | $this->currentQuery->query = utf8_decode($attrs['QUERY']); |
173 | $this->currentQuery->context = $attrs['CONTEXT']; |
174 | $this->currentQuery->time = $attrs['TIME']; |
175 | if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED']; |
176 | if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED']; |
177 | } |
178 | |
179 | function StartQueryTerm(&$attrs) |
180 | { |
181 | $this->currentQueryTerm->level = $attrs['LEVEL']; |
182 | $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']); |
183 | } |
184 | |
185 | function startQueryParameter(&$attrs) |
186 | { |
187 | $this->currentQueryParameter->name = $attrs['NAME']; |
188 | if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']); |
189 | } |
190 | |
191 | function startKeyword(&$attrs) |
192 | { |
193 | if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME']; |
194 | $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] ); |
195 | $this->currentKeyword->count = $attrs['COUNT']; |
196 | $this->currentKeyword->automatic = $attrs['AUTOMATIC']; |
197 | if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']); |
198 | if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF']; |
199 | if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF']; |
200 | } |
201 | |
202 | function startHits(&$attrs) |
203 | { |
204 | $this->data->nmatches = $attrs['NMATCHES']; |
205 | $this->data->nhits = $attrs['NHITS']; |
206 | if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED']; |
207 | $this->data->last = $attrs['LAST']; |
208 | $this->data->end = $attrs['END']; |
209 | $this->data->start = $attrs['START']; |
210 | } |
0337d704 |
211 | |
d44937f2 |
212 | function startHit(&$attrs) |
213 | { |
214 | $this->currentHit->url = $attrs['URL']; |
215 | $this->currentHit->score = $attrs['SCORE']; |
216 | } |
217 | |
218 | function startHitGroup(&$attrs) |
219 | { |
220 | $this->currentHitGroup->title = utf8_decode($attrs['TITLE']); |
221 | $this->currentHitGroup->gid = $attrs['GID']; |
222 | } |
223 | |
224 | function startHitCategory(&$attrs) |
225 | { |
226 | $this->currentHitCategory->name = $attrs['NAME']; |
227 | $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']); |
228 | $this->currentHitCategory->cref = $attrs['CREF']; |
229 | $this->currentHitCategory->gid = $attrs['GID']; |
230 | if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF']; |
231 | } |
232 | |
233 | function startAction(&$attrs) |
234 | { |
235 | $this->currentAction->display = $attrs['DISPLAY']; |
236 | $this->currentAction->kind = $attrs['KIND']; |
237 | if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF']; |
238 | } |
239 | |
240 | function startHitField(&$attrs) |
241 | { |
242 | $this->currentHitField->name = $attrs['NAME']; |
243 | if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']); |
244 | } |
245 | |
246 | function startTextSeg(&$attrs) |
247 | { |
248 | $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']); |
249 | } |
250 | function startTextCut(&$attrs) { } |
251 | |
252 | function startSpellingSuggestionVariant(&$attrs) |
253 | { |
254 | $this->currentSpelling->setDisplay($attrs['DISPLAY']); |
255 | $this->currentSpelling->setQueryHref($attrs['QUERY']); |
256 | } |
257 | |
258 | function startGroup(&$attrs) |
259 | { |
260 | $this->currentGroup->setGid(utf8_decode($attrs['GID'])); |
261 | $this->currentGroup->setTitle(utf8_decode($attrs['TITLE'])); |
262 | $this->currentGroup->setClipped($attrs['CLIPPED']); |
263 | $this->currentGroup->setCount($attrs['COUNT']); |
264 | $this->currentGroup->setBrowsed($attrs['BROWSED']); |
265 | if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']); |
266 | if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']); |
267 | } |
268 | |
269 | function startCategory(&$attrs) |
270 | { |
271 | $currentCategory = new ExaleadCategory(); |
272 | $currentCategory->name = utf8_decode($attrs['NAME']); |
273 | $currentCategory->display = utf8_decode($attrs['DISPLAY']); |
274 | $currentCategory->count = $attrs['COUNT']; |
275 | $currentCategory->automatic = $attrs['AUTOMATIC']; |
276 | $currentCategory->cref = $attrs['CREF']; |
277 | if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']); |
278 | $currentCategory->exclude_href = '_c=-'.$currentCategory->cref; |
279 | if (isset($attrs['RESETHREF'])) { |
280 | $currentCategory->reset_href = $attrs['RESETHREF']; |
281 | } |
282 | $currentCategory->gid = $attrs['GID']; |
283 | $currentCategory->gcount = $attrs['GCOUNT']; |
284 | $this->currentCategories[] = $currentCategory; |
285 | } |
286 | |
287 | function startSearch(&$attrs) { } |
288 | |
289 | function startElement($parser, $name, $attrs) |
290 | { |
291 | //recupération des paramètres de query |
292 | switch ($name) { |
293 | case 'QUERY': |
294 | $this->startQuery($attrs); |
295 | break; |
296 | case 'QUERYTERM': |
297 | $this->startQueryTerm($attrs); |
298 | break; |
299 | case 'QUERYPARAMETER': |
300 | $this->startQueryParameter($attrs); |
301 | break; |
302 | case 'KEYWORD': |
303 | $this->startKeyword($attrs); |
304 | break; |
305 | case 'HITS': |
306 | $this->startHits($attrs); |
307 | break; |
308 | case 'HIT': |
309 | $this->startHit($attrs); |
310 | break; |
311 | case 'HITFIELD': |
312 | $this->startHitField($attrs); |
313 | break; |
314 | case 'HITGROUP': |
315 | $this->startHitGroup($attrs); |
316 | break; |
317 | case 'HITCATEGORY': |
318 | $this->startHitCategory($attrs); |
319 | break; |
320 | case 'ACTION': |
321 | $this->startAction($attrs); |
322 | break; |
323 | case 'TEXTSEG': |
324 | $this->startTextSeg($attrs); |
325 | break; |
326 | case 'TEXTCUT': |
327 | $this->startTextCut($attrs); |
328 | break; |
329 | case 'SPELLINGSUGGESTIONVARIANT': |
330 | $this->startSpellingSuggestionVariant($attrs); |
331 | break; |
332 | case 'GROUP': |
333 | $this->startGroup($attrs); |
334 | break; |
335 | case 'CATEGORY': |
336 | $this->startCategory($attrs); |
337 | break; |
338 | case 'SEARCH': |
339 | $this->startSearch($attrs); |
340 | break; |
341 | } |
342 | } |
343 | |
344 | function endQuery() |
345 | { |
346 | $this->data->query = $this->currentQuery; |
347 | $this->currentQuery->clear(); |
348 | } |
349 | function endQueryTerm() |
350 | { |
351 | $this->currentQuery->addTerm($this->currentQueryTerm); |
352 | $this->currentQueryTerm->clear(); |
353 | } |
354 | function endQueryParameter() |
355 | { |
356 | $this->currentQuery->addParameter($this->currentQueryParameter); |
357 | $this->currentQueryParameter->clear(); |
358 | } |
359 | function endKeyword() |
360 | { |
361 | $this->data->addKeyword($this->currentKeyword); |
362 | $this->currentKeyword->clear(); |
363 | } |
364 | function endHits() |
365 | { |
366 | } |
367 | function endHit() |
368 | { |
369 | $this->data->addHit($this->currentHit); |
370 | $this->currentHit->clear(); |
371 | } |
372 | function endHitGroup() |
373 | { |
374 | $this->currentHit->addHitGroup($this->currentHitGroup); |
375 | $this->currentHitGroup->clear(); |
376 | } |
377 | function endHitCategory() |
378 | { |
379 | $this->currentHitGroup->addHitCategory($this->currentHitCategory); |
380 | $this->currentHitCategory->clear(); |
381 | } |
382 | function endAction() |
383 | { |
384 | $this->currentHit->addAction($this->currentAction); |
385 | $this->currentAction->clear(); |
386 | } |
387 | function endHitField() |
388 | { |
389 | $this->currentHit->addHitField($this->currentHitField); |
390 | $this->currentHitField->clear(); |
391 | } |
392 | function endTextSeg() |
393 | { |
394 | $this->currentHitField->addTextSegment($this->currentTextSegment); |
395 | $this->currentTextSegment->clear(); |
396 | } |
397 | function endTextCut() |
398 | { |
399 | $this->currentHitField->setHasTextCut(true); |
400 | } |
401 | function endSpellingSuggestionVariant() |
402 | { |
403 | $this->data->addSpelling($this->currentSpelling); |
404 | $this->currentSpelling->clear(); |
405 | } |
406 | function endGroup() |
407 | { |
408 | $this->data->addGroup($this->currentGroup); |
409 | $this->currentGroup->clear(); |
410 | } |
411 | function endCategory() |
412 | { |
413 | //the parent element is a Group element ? |
414 | if (count($this->currentCategories) == 1) { |
415 | $this->currentGroup->addCategory(array_pop($this->currentCategories)); |
416 | } else { |
417 | //var_dump($this->currentCategories); |
418 | $category = array_pop($this->currentCategories); |
419 | //reset($this->currentCategories); |
420 | end($this->currentCategories); |
421 | //var_dump($this->currentCategories); |
422 | $parentCategory = &$this->currentCategories[key($this->currentCategories)]; |
423 | //var_dump($parentCategory); |
424 | $parentCategory->addCategory($category); |
425 | } |
426 | } |
427 | function endSearch() { } |
428 | |
429 | |
430 | function endElement($parser, $name) |
431 | { |
432 | switch ($name) { |
433 | case 'QUERY': |
434 | $this->endQuery(); |
435 | break; |
436 | case 'QUERYTERM': |
437 | $this->endQueryTerm(); |
438 | break; |
439 | case 'QUERYPARAMETER': |
440 | $this->endQueryParameter(); |
441 | break; |
442 | case 'KEYWORD': |
443 | $this->endKeyword(); |
444 | break; |
445 | case 'HITS': |
446 | $this->endHits(); |
447 | break; |
448 | case 'HIT': |
449 | $this->endHit(); |
450 | break; |
451 | case 'HITFIELD': |
452 | $this->endHitField(); |
453 | break; |
454 | case 'HITGROUP': |
455 | $this->endHitGroup(); |
456 | break; |
457 | case 'HITCATEGORY': |
458 | $this->endHitCategory(); |
459 | break; |
460 | case 'ACTION': |
461 | $this->endAction(); |
462 | break; |
463 | case 'TEXTSEG': |
464 | $this->endTextSeg(); |
465 | break; |
466 | case 'TEXTCUT': |
467 | $this->endTextCut(); |
468 | break; |
469 | //gestion suggestions d'orthographe |
470 | case 'SPELLINGSUGGESTIONVARIANT': |
471 | $this->endSpellingSuggestionVariant(); |
472 | break; |
473 | //gestion des categories pour raffiner |
474 | case 'GROUP': |
475 | $this->endGroup(); |
476 | break; |
477 | case 'CATEGORY': |
478 | $this->endCategory(); |
479 | break; |
480 | case 'SEARCH': |
481 | $this->endSearch(); |
482 | break; |
483 | } |
484 | } |
485 | |
486 | function parsePCData($parser, $text) |
487 | { |
488 | $this->currentTextSegment->append(utf8_decode($text)); |
489 | } |
0337d704 |
490 | } |
491 | |
492 | ?> |