26f7da0c28d5de1bc74fa0caee6368afbd97dddc
[platal.git] / include / exalead / exalead.parser.inc.php
1 <?php
2
3 require_once('exalead/exalead.class.php');
4
5 function convert_url($string)
6 {
7 return str_replace('+', '%2B', $string);
8 }
9
10 $GLOBALS['query_all'] = 'a*';
11
12 class Exalead
13 {
14
15 var $parserId;
16
17 var $data;
18
19 var $currentGroup;
20 var $currentCategories = array();
21 var $currentSpelling;
22 var $currentHit;
23 var $currentHitField;
24 var $currentHitGroup;
25 var $currentHitCategory;
26 var $currentAction;
27 var $currentTextSegment;
28 var $currentQuery;
29 var $currentQueryTerm;
30 var $currentQueryParameter;
31 var $currentKeyword;
32
33 //url de base du produit Exalead
34 var $base_cgi = '';
35
36 // Query to dump indexed database
37 var $query_all= '';
38
39 /**** Constructeur *********/
40
41
42 function Exalead($base_cgi = '', $override_query_all = '')
43 {
44 $this->data = new ExaleadData();
45 $this->currentGroup = new ExaleadGroup();
46 $this->currentCategories = array();
47 $this->currentSpelling = new ExaleadSpelling();
48 $this->currentHit = new ExaleadHit();
49 $this->currentHitField = new ExaleadHitField();
50 $this->currentHitGroup = new ExaleadHitGroup();
51 $this->currentHitCategory = new ExaleadHitCategory();
52 $this->currentAction = new ExaleadAction();
53 $this->currentTextSegment = new ExaleadTextSegment();
54 $this->currentQuery = new ExaleadQuery();
55 $this->currentQueryTerm = new ExaleadQueryTerm();
56 $this->currentQueryParameter = new ExaleadQueryParameter();
57 $this->currentKeyword = new ExaleadKeyword();
58
59 //url de base du produit Exalead
60 $this->base_cgi = $base_cgi;
61 if (!empty($override_query_all)) {
62 $this->query_all = $override_query_all;
63 } else {
64 $this->query_all = $GLOBALS['query_all'];
65 }
66 }
67
68 /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/
69
70 function set_base_cgi($base_cgi)
71 {
72 $this->base_cgi = $base_cgi;
73 }
74
75 //retourne vrai si une requete a été faite, faux sinon
76 function query($varname = 'query')
77 {
78 if (!empty($_REQUEST[$varname])) {
79
80 $this->first_query(stripslashes($_REQUEST[$varname]));
81 return true;
82
83 } elseif (isset($_REQUEST['_C'])) {
84
85 $this->handle_request();
86 return true;
87 }
88 return false;
89 }
90
91 //a appeller pour faire la premiere requete
92 function first_query($query, $offset = 0)
93 {
94 if (empty($this->base_cgi)) return false;
95
96 //$tmp = parse_url($this->base_cgi);
97 //$view_name = substr($tmp['path'], 5);
98 //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name;
99 $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2";
100 if ($offset > 0) {
101 $query_exa .= "&_s=".$offset;
102 }
103
104 $xml_response = file_get_contents($query_exa);
105 $this->parse($xml_response);
106 }
107
108 //pour recuperer tous les résultats d'une base indexée
109 function get_db_dump()
110 {
111 $this->first_query($this->query_all);
112 }
113
114 function handle_request()
115 {
116 if (empty($this->base_cgi)) return false;
117 if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead
118 $query_exa = $this->base_cgi.'/_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2';
119 if (!empty($_REQUEST['_s'])) {
120 $query_exa .= "&_s=".((int) $_REQUEST['_s']);
121 }
122 $xml_response = file_get_contents($query_exa);
123 $this->parse($xml_response);
124 }
125
126 /******** Fonctions annexes relatives au parser ********/
127
128 function createParser()
129 {
130 $this->parserId = xml_parser_create();
131 xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement"));
132 xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData"));
133 }
134
135 function freeParser()
136 {
137 xml_parser_free($this->parserId);
138 }
139
140 function parseString($string)
141 {
142 if (!xml_parse($this->parserId, $string, true)) {
143 die(sprintf("XML error: %s at line %d",
144 xml_error_string(xml_get_error_code($this->parserId)),
145 xml_get_current_line_number($this->parserId)));
146 }
147 }
148
149
150
151 /******** Méthode qui lance le parser ***********/
152
153 function parse($string)
154 {
155 $this->createParser();
156 $this->parseString($string);
157 $this->freeParser();
158 }
159
160 /********* fonctions spécifiques à chaque balise ******/
161
162 //Ces méthodes peuvent être surchargées
163
164 function startQuery(&$attrs)
165 {
166 $this->currentQuery->query = utf8_decode($attrs['QUERY']);
167 $this->currentQuery->context = $attrs['CONTEXT'];
168 $this->currentQuery->time = $attrs['TIME'];
169 if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED'];
170 if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED'];
171 }
172
173 function StartQueryTerm(&$attrs)
174 {
175 $this->currentQueryTerm->level = $attrs['LEVEL'];
176 $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']);
177 }
178
179 function startQueryParameter(&$attrs)
180 {
181 $this->currentQueryParameter->name = $attrs['NAME'];
182 if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']);
183 }
184
185 function startKeyword(&$attrs)
186 {
187 if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME'];
188 $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] );
189 $this->currentKeyword->count = $attrs['COUNT'];
190 $this->currentKeyword->automatic = $attrs['AUTOMATIC'];
191 if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']);
192 if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF'];
193 if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF'];
194 }
195
196 function startHits(&$attrs)
197 {
198 $this->data->nmatches = $attrs['NMATCHES'];
199 $this->data->nhits = $attrs['NHITS'];
200 if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED'];
201 $this->data->last = $attrs['LAST'];
202 $this->data->end = $attrs['END'];
203 $this->data->start = $attrs['START'];
204 }
205
206 function startHit(&$attrs)
207 {
208 $this->currentHit->url = $attrs['URL'];
209 $this->currentHit->score = $attrs['SCORE'];
210 }
211
212 function startHitGroup(&$attrs)
213 {
214 $this->currentHitGroup->title = utf8_decode($attrs['TITLE']);
215 $this->currentHitGroup->gid = $attrs['GID'];
216 }
217
218 function startHitCategory(&$attrs)
219 {
220 $this->currentHitCategory->name = $attrs['NAME'];
221 $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']);
222 $this->currentHitCategory->cref = $attrs['CREF'];
223 $this->currentHitCategory->gid = $attrs['GID'];
224 if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF'];
225 }
226
227 function startAction(&$attrs)
228 {
229 $this->currentAction->display = $attrs['DISPLAY'];
230 $this->currentAction->kind = $attrs['KIND'];
231 if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF'];
232 }
233
234 function startHitField(&$attrs)
235 {
236 $this->currentHitField->name = $attrs['NAME'];
237 if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']);
238 }
239
240 function startTextSeg(&$attrs)
241 {
242 $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']);
243 }
244 function startTextCut(&$attrs) { }
245
246 function startSpellingSuggestionVariant(&$attrs)
247 {
248 $this->currentSpelling->setDisplay($attrs['DISPLAY']);
249 $this->currentSpelling->setQueryHref($attrs['QUERY']);
250 }
251
252 function startGroup(&$attrs)
253 {
254 $this->currentGroup->setGid(utf8_decode($attrs['GID']));
255 $this->currentGroup->setTitle(utf8_decode($attrs['TITLE']));
256 $this->currentGroup->setClipped($attrs['CLIPPED']);
257 $this->currentGroup->setCount($attrs['COUNT']);
258 $this->currentGroup->setBrowsed($attrs['BROWSED']);
259 if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']);
260 if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']);
261 }
262
263 function startCategory(&$attrs)
264 {
265 $currentCategory = new ExaleadCategory();
266 $currentCategory->name = utf8_decode($attrs['NAME']);
267 $currentCategory->display = utf8_decode($attrs['DISPLAY']);
268 $currentCategory->count = $attrs['COUNT'];
269 $currentCategory->automatic = $attrs['AUTOMATIC'];
270 $currentCategory->cref = $attrs['CREF'];
271 if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']);
272 $currentCategory->exclude_href = '_c=-'.$currentCategory->cref;
273 if (isset($attrs['RESETHREF'])) {
274 $currentCategory->reset_href = $attrs['RESETHREF'];
275 }
276 $currentCategory->gid = $attrs['GID'];
277 $currentCategory->gcount = $attrs['GCOUNT'];
278 $this->currentCategories[] = $currentCategory;
279 }
280
281 function startSearch(&$attrs) { }
282
283 function startElement($parser, $name, $attrs)
284 {
285 //recupération des paramètres de query
286 switch ($name) {
287 case 'QUERY':
288 $this->startQuery($attrs);
289 break;
290 case 'QUERYTERM':
291 $this->startQueryTerm($attrs);
292 break;
293 case 'QUERYPARAMETER':
294 $this->startQueryParameter($attrs);
295 break;
296 case 'KEYWORD':
297 $this->startKeyword($attrs);
298 break;
299 case 'HITS':
300 $this->startHits($attrs);
301 break;
302 case 'HIT':
303 $this->startHit($attrs);
304 break;
305 case 'HITFIELD':
306 $this->startHitField($attrs);
307 break;
308 case 'HITGROUP':
309 $this->startHitGroup($attrs);
310 break;
311 case 'HITCATEGORY':
312 $this->startHitCategory($attrs);
313 break;
314 case 'ACTION':
315 $this->startAction($attrs);
316 break;
317 case 'TEXTSEG':
318 $this->startTextSeg($attrs);
319 break;
320 case 'TEXTCUT':
321 $this->startTextCut($attrs);
322 break;
323 case 'SPELLINGSUGGESTIONVARIANT':
324 $this->startSpellingSuggestionVariant($attrs);
325 break;
326 case 'GROUP':
327 $this->startGroup($attrs);
328 break;
329 case 'CATEGORY':
330 $this->startCategory($attrs);
331 break;
332 case 'SEARCH':
333 $this->startSearch($attrs);
334 break;
335 }
336 }
337
338 function endQuery()
339 {
340 $this->data->query = $this->currentQuery;
341 $this->currentQuery->clear();
342 }
343 function endQueryTerm()
344 {
345 $this->currentQuery->addTerm($this->currentQueryTerm);
346 $this->currentQueryTerm->clear();
347 }
348 function endQueryParameter()
349 {
350 $this->currentQuery->addParameter($this->currentQueryParameter);
351 $this->currentQueryParameter->clear();
352 }
353 function endKeyword()
354 {
355 $this->data->addKeyword($this->currentKeyword);
356 $this->currentKeyword->clear();
357 }
358 function endHits()
359 {
360 }
361 function endHit()
362 {
363 $this->data->addHit($this->currentHit);
364 $this->currentHit->clear();
365 }
366 function endHitGroup()
367 {
368 $this->currentHit->addHitGroup($this->currentHitGroup);
369 $this->currentHitGroup->clear();
370 }
371 function endHitCategory()
372 {
373 $this->currentHitGroup->addHitCategory($this->currentHitCategory);
374 $this->currentHitCategory->clear();
375 }
376 function endAction()
377 {
378 $this->currentHit->addAction($this->currentAction);
379 $this->currentAction->clear();
380 }
381 function endHitField()
382 {
383 $this->currentHit->addHitField($this->currentHitField);
384 $this->currentHitField->clear();
385 }
386 function endTextSeg()
387 {
388 $this->currentHitField->addTextSegment($this->currentTextSegment);
389 $this->currentTextSegment->clear();
390 }
391 function endTextCut()
392 {
393 $this->currentHitField->setHasTextCut(true);
394 }
395 function endSpellingSuggestionVariant()
396 {
397 $this->data->addSpelling($this->currentSpelling);
398 $this->currentSpelling->clear();
399 }
400 function endGroup()
401 {
402 $this->data->addGroup($this->currentGroup);
403 $this->currentGroup->clear();
404 }
405 function endCategory()
406 {
407 //the parent element is a Group element ?
408 if (count($this->currentCategories) == 1) {
409 $this->currentGroup->addCategory(array_pop($this->currentCategories));
410 } else {
411 //var_dump($this->currentCategories);
412 $category = array_pop($this->currentCategories);
413 //reset($this->currentCategories);
414 end($this->currentCategories);
415 //var_dump($this->currentCategories);
416 $parentCategory = &$this->currentCategories[key($this->currentCategories)];
417 //var_dump($parentCategory);
418 $parentCategory->addCategory($category);
419 }
420 }
421 function endSearch() { }
422
423
424 function endElement($parser, $name)
425 {
426 switch ($name) {
427 case 'QUERY':
428 $this->endQuery();
429 break;
430 case 'QUERYTERM':
431 $this->endQueryTerm();
432 break;
433 case 'QUERYPARAMETER':
434 $this->endQueryParameter();
435 break;
436 case 'KEYWORD':
437 $this->endKeyword();
438 break;
439 case 'HITS':
440 $this->endHits();
441 break;
442 case 'HIT':
443 $this->endHit();
444 break;
445 case 'HITFIELD':
446 $this->endHitField();
447 break;
448 case 'HITGROUP':
449 $this->endHitGroup();
450 break;
451 case 'HITCATEGORY':
452 $this->endHitCategory();
453 break;
454 case 'ACTION':
455 $this->endAction();
456 break;
457 case 'TEXTSEG':
458 $this->endTextSeg();
459 break;
460 case 'TEXTCUT':
461 $this->endTextCut();
462 break;
463 //gestion suggestions d'orthographe
464 case 'SPELLINGSUGGESTIONVARIANT':
465 $this->endSpellingSuggestionVariant();
466 break;
467 //gestion des categories pour raffiner
468 case 'GROUP':
469 $this->endGroup();
470 break;
471 case 'CATEGORY':
472 $this->endCategory();
473 break;
474 case 'SEARCH':
475 $this->endSearch();
476 break;
477 }
478 }
479
480 function parsePCData($parser, $text)
481 {
482 $this->currentTextSegment->append(utf8_decode($text));
483 }
484 }
485
486 ?>