f1e0c6d93fa13848f46e019d97b7bcb02c75720b
[platal.git] / include / exalead / exalead.parser.inc.php
1 <?php
2
3 require_once(dirname(__FILE__).'/exalead.class.php');
4
5 function convert_url($string)
6 {
7 return str_replace('+', '%2B', $string);
8 }
9
10 $GLOBALS['query_all'] = 'a*';
11
12 class Exalead
13 {
14
15 var $parserId;
16
17 var $data;
18
19 var $currentGroup;
20 var $currentCategories = array();
21 var $currentSpelling;
22 var $currentHit;
23 var $currentHitField;
24 var $currentHitGroup;
25 var $currentHitCategory;
26 var $currentAction;
27 var $currentTextSegment;
28 var $currentQuery;
29 var $currentQueryTerm;
30 var $currentQueryParameter;
31 var $currentKeyword;
32
33 //url de base du produit Exalead
34 var $base_cgi = '';
35
36 // Query to dump indexed database
37 var $query_all= '';
38
39 /**** Constructeur *********/
40
41
42 function Exalead($base_cgi = '', $override_query_all = '')
43 {
44 $this->data = new ExaleadData();
45 $this->currentGroup = new ExaleadGroup();
46 $this->currentCategories = array();
47 $this->currentSpelling = new ExaleadSpelling();
48 $this->currentHit = new ExaleadHit();
49 $this->currentHitField = new ExaleadHitField();
50 $this->currentHitGroup = new ExaleadHitGroup();
51 $this->currentHitCategory = new ExaleadHitCategory();
52 $this->currentAction = new ExaleadAction();
53 $this->currentTextSegment = new ExaleadTextSegment();
54 $this->currentQuery = new ExaleadQuery();
55 $this->currentQueryTerm = new ExaleadQueryTerm();
56 $this->currentQueryParameter = new ExaleadQueryParameter();
57 $this->currentKeyword = new ExaleadKeyword();
58
59 //url de base du produit Exalead
60 $this->base_cgi = $base_cgi;
61 if (!empty($override_query_all)) {
62 $this->query_all = $override_query_all;
63 } else {
64 $this->query_all = $GLOBALS['query_all'];
65 }
66 }
67
68 /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/
69
70 function set_base_cgi($base_cgi)
71 {
72 $this->base_cgi = $base_cgi;
73 }
74
75 //retourne vrai si une requete a été faite, faux sinon
76 function query($varname = 'query')
77 {
78 if (!empty($_REQUEST[$varname])) {
79
80 $this->first_query(stripslashes($_REQUEST[$varname]));
81 return true;
82
83 } elseif (isset($_REQUEST['_C'])) {
84
85 $this->handle_request();
86 return true;
87 }
88 return false;
89 }
90
91 //a appeller pour faire la premiere requete
92 function first_query($query, $offset = 0)
93 {
94 if (empty($this->base_cgi)) return false;
95
96 //$tmp = parse_url($this->base_cgi);
97 //$view_name = substr($tmp['path'], 5);
98 //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name;
99 $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2";
100 if ($offset > 0) {
101 $query_exa .= "&_s=".$offset;
102 }
103
104 $xml_response = file_get_contents($query_exa);
105 $this->parse($xml_response);
106 }
107
108 //pour recuperer tous les résultats d'une base indexée
109 function get_db_dump()
110 {
111 $this->first_query($this->query_all);
112 }
113
114 function handle_request()
115 {
116 if (empty($this->base_cgi)) return false;
117 if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead
118 $query_exa = $this->base_cgi.'&_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2';
119 if (!empty($_REQUEST['_c'])) {
120 $query_exa .= "&_c=".urlencode($_REQUEST['_c']);
121 }
122 if (!empty($_REQUEST['_s'])) {
123 $query_exa .= "&_s=".((int) $_REQUEST['_s']);
124 }
125 if (isset($_REQUEST['_z'])) {
126 $query_exa .= "&_z=".$_REQUEST['_z'];
127 }
128 $xml_response = file_get_contents($query_exa);
129 $this->parse($xml_response);
130 }
131
132 /******** Fonctions annexes relatives au parser ********/
133
134 function createParser()
135 {
136 $this->parserId = xml_parser_create();
137 xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement"));
138 xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData"));
139 }
140
141 function freeParser()
142 {
143 xml_parser_free($this->parserId);
144 }
145
146 function parseString($string)
147 {
148 if (!xml_parse($this->parserId, $string, true)) {
149 die(sprintf("XML error: %s at line %d",
150 xml_error_string(xml_get_error_code($this->parserId)),
151 xml_get_current_line_number($this->parserId)));
152 }
153 }
154
155
156
157 /******** Méthode qui lance le parser ***********/
158
159 function parse($string)
160 {
161 $this->createParser();
162 $this->parseString($string);
163 $this->freeParser();
164 }
165
166 /********* fonctions spécifiques à chaque balise ******/
167
168 //Ces méthodes peuvent être surchargées
169
170 function startQuery(&$attrs)
171 {
172 $this->currentQuery->query = utf8_decode($attrs['QUERY']);
173 $this->currentQuery->context = $attrs['CONTEXT'];
174 $this->currentQuery->time = $attrs['TIME'];
175 if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED'];
176 if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED'];
177 }
178
179 function StartQueryTerm(&$attrs)
180 {
181 $this->currentQueryTerm->level = $attrs['LEVEL'];
182 $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']);
183 }
184
185 function startQueryParameter(&$attrs)
186 {
187 $this->currentQueryParameter->name = $attrs['NAME'];
188 if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']);
189 }
190
191 function startKeyword(&$attrs)
192 {
193 if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME'];
194 $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] );
195 $this->currentKeyword->count = $attrs['COUNT'];
196 $this->currentKeyword->automatic = $attrs['AUTOMATIC'];
197 if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']);
198 if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF'];
199 if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF'];
200 }
201
202 function startHits(&$attrs)
203 {
204 $this->data->nmatches = $attrs['NMATCHES'];
205 $this->data->nhits = $attrs['NHITS'];
206 if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED'];
207 $this->data->last = $attrs['LAST'];
208 $this->data->end = $attrs['END'];
209 $this->data->start = $attrs['START'];
210 }
211
212 function startHit(&$attrs)
213 {
214 $this->currentHit->url = $attrs['URL'];
215 $this->currentHit->score = $attrs['SCORE'];
216 }
217
218 function startHitGroup(&$attrs)
219 {
220 $this->currentHitGroup->title = utf8_decode($attrs['TITLE']);
221 $this->currentHitGroup->gid = $attrs['GID'];
222 }
223
224 function startHitCategory(&$attrs)
225 {
226 $this->currentHitCategory->name = $attrs['NAME'];
227 $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']);
228 $this->currentHitCategory->cref = $attrs['CREF'];
229 $this->currentHitCategory->gid = $attrs['GID'];
230 if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF'];
231 }
232
233 function startAction(&$attrs)
234 {
235 $this->currentAction->display = $attrs['DISPLAY'];
236 $this->currentAction->kind = $attrs['KIND'];
237 if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF'];
238 }
239
240 function startHitField(&$attrs)
241 {
242 $this->currentHitField->name = $attrs['NAME'];
243 if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']);
244 }
245
246 function startTextSeg(&$attrs)
247 {
248 $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']);
249 }
250 function startTextCut(&$attrs) { }
251
252 function startSpellingSuggestionVariant(&$attrs)
253 {
254 $this->currentSpelling->setDisplay($attrs['DISPLAY']);
255 $this->currentSpelling->setQueryHref($attrs['QUERY']);
256 }
257
258 function startGroup(&$attrs)
259 {
260 $this->currentGroup->setGid(utf8_decode($attrs['GID']));
261 $this->currentGroup->setTitle(utf8_decode($attrs['TITLE']));
262 $this->currentGroup->setClipped($attrs['CLIPPED']);
263 $this->currentGroup->setCount($attrs['COUNT']);
264 $this->currentGroup->setBrowsed($attrs['BROWSED']);
265 if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']);
266 if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']);
267 }
268
269 function startCategory(&$attrs)
270 {
271 $currentCategory = new ExaleadCategory();
272 $currentCategory->name = utf8_decode($attrs['NAME']);
273 $currentCategory->display = utf8_decode($attrs['DISPLAY']);
274 $currentCategory->count = $attrs['COUNT'];
275 $currentCategory->automatic = $attrs['AUTOMATIC'];
276 $currentCategory->cref = $attrs['CREF'];
277 if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']);
278 $currentCategory->exclude_href = '_c=-'.$currentCategory->cref;
279 if (isset($attrs['RESETHREF'])) {
280 $currentCategory->reset_href = $attrs['RESETHREF'];
281 }
282 $currentCategory->gid = $attrs['GID'];
283 $currentCategory->gcount = $attrs['GCOUNT'];
284 $this->currentCategories[] = $currentCategory;
285 }
286
287 function startSearch(&$attrs) { }
288
289 function startElement($parser, $name, $attrs)
290 {
291 //recupération des paramètres de query
292 switch ($name) {
293 case 'QUERY':
294 $this->startQuery($attrs);
295 break;
296 case 'QUERYTERM':
297 $this->startQueryTerm($attrs);
298 break;
299 case 'QUERYPARAMETER':
300 $this->startQueryParameter($attrs);
301 break;
302 case 'KEYWORD':
303 $this->startKeyword($attrs);
304 break;
305 case 'HITS':
306 $this->startHits($attrs);
307 break;
308 case 'HIT':
309 $this->startHit($attrs);
310 break;
311 case 'HITFIELD':
312 $this->startHitField($attrs);
313 break;
314 case 'HITGROUP':
315 $this->startHitGroup($attrs);
316 break;
317 case 'HITCATEGORY':
318 $this->startHitCategory($attrs);
319 break;
320 case 'ACTION':
321 $this->startAction($attrs);
322 break;
323 case 'TEXTSEG':
324 $this->startTextSeg($attrs);
325 break;
326 case 'TEXTCUT':
327 $this->startTextCut($attrs);
328 break;
329 case 'SPELLINGSUGGESTIONVARIANT':
330 $this->startSpellingSuggestionVariant($attrs);
331 break;
332 case 'GROUP':
333 $this->startGroup($attrs);
334 break;
335 case 'CATEGORY':
336 $this->startCategory($attrs);
337 break;
338 case 'SEARCH':
339 $this->startSearch($attrs);
340 break;
341 }
342 }
343
344 function endQuery()
345 {
346 $this->data->query = $this->currentQuery;
347 $this->currentQuery->clear();
348 }
349 function endQueryTerm()
350 {
351 $this->currentQuery->addTerm($this->currentQueryTerm);
352 $this->currentQueryTerm->clear();
353 }
354 function endQueryParameter()
355 {
356 $this->currentQuery->addParameter($this->currentQueryParameter);
357 $this->currentQueryParameter->clear();
358 }
359 function endKeyword()
360 {
361 $this->data->addKeyword($this->currentKeyword);
362 $this->currentKeyword->clear();
363 }
364 function endHits()
365 {
366 }
367 function endHit()
368 {
369 $this->data->addHit($this->currentHit);
370 $this->currentHit->clear();
371 }
372 function endHitGroup()
373 {
374 $this->currentHit->addHitGroup($this->currentHitGroup);
375 $this->currentHitGroup->clear();
376 }
377 function endHitCategory()
378 {
379 $this->currentHitGroup->addHitCategory($this->currentHitCategory);
380 $this->currentHitCategory->clear();
381 }
382 function endAction()
383 {
384 $this->currentHit->addAction($this->currentAction);
385 $this->currentAction->clear();
386 }
387 function endHitField()
388 {
389 $this->currentHit->addHitField($this->currentHitField);
390 $this->currentHitField->clear();
391 }
392 function endTextSeg()
393 {
394 $this->currentHitField->addTextSegment($this->currentTextSegment);
395 $this->currentTextSegment->clear();
396 }
397 function endTextCut()
398 {
399 $this->currentHitField->setHasTextCut(true);
400 }
401 function endSpellingSuggestionVariant()
402 {
403 $this->data->addSpelling($this->currentSpelling);
404 $this->currentSpelling->clear();
405 }
406 function endGroup()
407 {
408 $this->data->addGroup($this->currentGroup);
409 $this->currentGroup->clear();
410 }
411 function endCategory()
412 {
413 //the parent element is a Group element ?
414 if (count($this->currentCategories) == 1) {
415 $this->currentGroup->addCategory(array_pop($this->currentCategories));
416 } else {
417 //var_dump($this->currentCategories);
418 $category = array_pop($this->currentCategories);
419 //reset($this->currentCategories);
420 end($this->currentCategories);
421 //var_dump($this->currentCategories);
422 $parentCategory = &$this->currentCategories[key($this->currentCategories)];
423 //var_dump($parentCategory);
424 $parentCategory->addCategory($category);
425 }
426 }
427 function endSearch() { }
428
429
430 function endElement($parser, $name)
431 {
432 switch ($name) {
433 case 'QUERY':
434 $this->endQuery();
435 break;
436 case 'QUERYTERM':
437 $this->endQueryTerm();
438 break;
439 case 'QUERYPARAMETER':
440 $this->endQueryParameter();
441 break;
442 case 'KEYWORD':
443 $this->endKeyword();
444 break;
445 case 'HITS':
446 $this->endHits();
447 break;
448 case 'HIT':
449 $this->endHit();
450 break;
451 case 'HITFIELD':
452 $this->endHitField();
453 break;
454 case 'HITGROUP':
455 $this->endHitGroup();
456 break;
457 case 'HITCATEGORY':
458 $this->endHitCategory();
459 break;
460 case 'ACTION':
461 $this->endAction();
462 break;
463 case 'TEXTSEG':
464 $this->endTextSeg();
465 break;
466 case 'TEXTCUT':
467 $this->endTextCut();
468 break;
469 //gestion suggestions d'orthographe
470 case 'SPELLINGSUGGESTIONVARIANT':
471 $this->endSpellingSuggestionVariant();
472 break;
473 //gestion des categories pour raffiner
474 case 'GROUP':
475 $this->endGroup();
476 break;
477 case 'CATEGORY':
478 $this->endCategory();
479 break;
480 case 'SEARCH':
481 $this->endSearch();
482 break;
483 }
484 }
485
486 function parsePCData($parser, $text)
487 {
488 $this->currentTextSegment->append(utf8_decode($text));
489 }
490 }
491
492 ?>