drop url modifier \o/
[platal.git] / include / exalead / exalead.parser.inc.php
CommitLineData
0337d704 1<?php
2
34265dc0 3require_once(dirname(__FILE__).'/exalead.class.php');
0337d704 4
d44937f2 5function convert_url($string)
6{
7 return str_replace('+', '%2B', $string);
0337d704 8}
9
10$GLOBALS['query_all'] = 'a*';
11
d44937f2 12class Exalead
13{
14
15 var $parserId;
16
17 var $data;
18
19 var $currentGroup;
20 var $currentCategories = array();
21 var $currentSpelling;
22 var $currentHit;
23 var $currentHitField;
24 var $currentHitGroup;
25 var $currentHitCategory;
26 var $currentAction;
27 var $currentTextSegment;
28 var $currentQuery;
29 var $currentQueryTerm;
30 var $currentQueryParameter;
31 var $currentKeyword;
32
33 //url de base du produit Exalead
34 var $base_cgi = '';
35
36 // Query to dump indexed database
37 var $query_all= '';
38
39 /**** Constructeur *********/
40
41
42 function Exalead($base_cgi = '', $override_query_all = '')
43 {
44 $this->data = new ExaleadData();
45 $this->currentGroup = new ExaleadGroup();
46 $this->currentCategories = array();
47 $this->currentSpelling = new ExaleadSpelling();
48 $this->currentHit = new ExaleadHit();
49 $this->currentHitField = new ExaleadHitField();
50 $this->currentHitGroup = new ExaleadHitGroup();
51 $this->currentHitCategory = new ExaleadHitCategory();
52 $this->currentAction = new ExaleadAction();
53 $this->currentTextSegment = new ExaleadTextSegment();
54 $this->currentQuery = new ExaleadQuery();
55 $this->currentQueryTerm = new ExaleadQueryTerm();
56 $this->currentQueryParameter = new ExaleadQueryParameter();
57 $this->currentKeyword = new ExaleadKeyword();
58
59 //url de base du produit Exalead
60 $this->base_cgi = $base_cgi;
61 if (!empty($override_query_all)) {
62 $this->query_all = $override_query_all;
63 } else {
64 $this->query_all = $GLOBALS['query_all'];
65 }
66 }
67
68 /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/
69
70 function set_base_cgi($base_cgi)
71 {
72 $this->base_cgi = $base_cgi;
73 }
74
75 //retourne vrai si une requete a été faite, faux sinon
76 function query($varname = 'query')
77 {
78 if (!empty($_REQUEST[$varname])) {
79
80 $this->first_query(stripslashes($_REQUEST[$varname]));
81 return true;
82
83 } elseif (isset($_REQUEST['_C'])) {
84
85 $this->handle_request();
86 return true;
87 }
88 return false;
89 }
90
91 //a appeller pour faire la premiere requete
92 function first_query($query, $offset = 0)
93 {
94 if (empty($this->base_cgi)) return false;
95
96 //$tmp = parse_url($this->base_cgi);
97 //$view_name = substr($tmp['path'], 5);
98 //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name;
e5eb3314 99 $query_exa = $this->base_cgi."&_q=".urlencode($query)."&_f=xml2";
d44937f2 100 if ($offset > 0) {
101 $query_exa .= "&_s=".$offset;
102 }
103
104 $xml_response = file_get_contents($query_exa);
105 $this->parse($xml_response);
106 }
107
108 //pour recuperer tous les résultats d'une base indexée
109 function get_db_dump()
110 {
111 $this->first_query($this->query_all);
112 }
113
114 function handle_request()
115 {
116 if (empty($this->base_cgi)) return false;
117 if (empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead
9b31f89c 118 $query_exa = $this->base_cgi.'&_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2';
119 if (!empty($_REQUEST['_c'])) {
120 $query_exa .= "&_c=".urlencode($_REQUEST['_c']);
121 }
d44937f2 122 if (!empty($_REQUEST['_s'])) {
123 $query_exa .= "&_s=".((int) $_REQUEST['_s']);
124 }
9b31f89c 125 if (isset($_REQUEST['_z'])) {
126 $query_exa .= "&_z=".$_REQUEST['_z'];
127 }
816226aa 128 if (isset($_REQUEST['_sf'])) {
129 $query_exa .= "&_sf=".$_REQUEST['_sf'];
130 }
d44937f2 131 $xml_response = file_get_contents($query_exa);
132 $this->parse($xml_response);
133 }
134
135 /******** Fonctions annexes relatives au parser ********/
136
137 function createParser()
138 {
139 $this->parserId = xml_parser_create();
140 xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement"));
141 xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData"));
142 }
143
144 function freeParser()
145 {
146 xml_parser_free($this->parserId);
147 }
148
149 function parseString($string)
150 {
151 if (!xml_parse($this->parserId, $string, true)) {
152 die(sprintf("XML error: %s at line %d",
153 xml_error_string(xml_get_error_code($this->parserId)),
154 xml_get_current_line_number($this->parserId)));
155 }
156 }
157
158
159
160 /******** Méthode qui lance le parser ***********/
161
162 function parse($string)
163 {
164 $this->createParser();
165 $this->parseString($string);
166 $this->freeParser();
167 }
168
169 /********* fonctions spécifiques à chaque balise ******/
170
171 //Ces méthodes peuvent être surchargées
172
173 function startQuery(&$attrs)
174 {
175 $this->currentQuery->query = utf8_decode($attrs['QUERY']);
176 $this->currentQuery->context = $attrs['CONTEXT'];
177 $this->currentQuery->time = $attrs['TIME'];
178 if (isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED'];
179 if (isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED'];
180 }
181
182 function StartQueryTerm(&$attrs)
183 {
184 $this->currentQueryTerm->level = $attrs['LEVEL'];
185 $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']);
186 }
187
188 function startQueryParameter(&$attrs)
189 {
190 $this->currentQueryParameter->name = $attrs['NAME'];
191 if (isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']);
192 }
193
194 function startKeyword(&$attrs)
195 {
196 if (isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME'];
197 $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] );
198 $this->currentKeyword->count = $attrs['COUNT'];
199 $this->currentKeyword->automatic = $attrs['AUTOMATIC'];
200 if (isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']);
201 if (isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF'];
202 if (isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF'];
203 }
204
205 function startHits(&$attrs)
206 {
207 $this->data->nmatches = $attrs['NMATCHES'];
208 $this->data->nhits = $attrs['NHITS'];
209 if (isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED'];
210 $this->data->last = $attrs['LAST'];
211 $this->data->end = $attrs['END'];
212 $this->data->start = $attrs['START'];
213 }
0337d704 214
d44937f2 215 function startHit(&$attrs)
216 {
217 $this->currentHit->url = $attrs['URL'];
218 $this->currentHit->score = $attrs['SCORE'];
219 }
220
221 function startHitGroup(&$attrs)
222 {
223 $this->currentHitGroup->title = utf8_decode($attrs['TITLE']);
224 $this->currentHitGroup->gid = $attrs['GID'];
225 }
226
227 function startHitCategory(&$attrs)
228 {
229 $this->currentHitCategory->name = $attrs['NAME'];
230 $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']);
231 $this->currentHitCategory->cref = $attrs['CREF'];
232 $this->currentHitCategory->gid = $attrs['GID'];
233 if (isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF'];
234 }
235
236 function startAction(&$attrs)
237 {
238 $this->currentAction->display = $attrs['DISPLAY'];
239 $this->currentAction->kind = $attrs['KIND'];
240 if (isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF'];
241 }
242
243 function startHitField(&$attrs)
244 {
245 $this->currentHitField->name = $attrs['NAME'];
246 if (isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']);
247 }
248
249 function startTextSeg(&$attrs)
250 {
251 $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']);
252 }
253 function startTextCut(&$attrs) { }
254
255 function startSpellingSuggestionVariant(&$attrs)
256 {
257 $this->currentSpelling->setDisplay($attrs['DISPLAY']);
258 $this->currentSpelling->setQueryHref($attrs['QUERY']);
259 }
260
261 function startGroup(&$attrs)
262 {
263 $this->currentGroup->setGid(utf8_decode($attrs['GID']));
264 $this->currentGroup->setTitle(utf8_decode($attrs['TITLE']));
265 $this->currentGroup->setClipped($attrs['CLIPPED']);
266 $this->currentGroup->setCount($attrs['COUNT']);
267 $this->currentGroup->setBrowsed($attrs['BROWSED']);
268 if (isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']);
269 if (isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']);
270 }
271
272 function startCategory(&$attrs)
273 {
274 $currentCategory = new ExaleadCategory();
275 $currentCategory->name = utf8_decode($attrs['NAME']);
276 $currentCategory->display = utf8_decode($attrs['DISPLAY']);
277 $currentCategory->count = $attrs['COUNT'];
278 $currentCategory->automatic = $attrs['AUTOMATIC'];
279 $currentCategory->cref = $attrs['CREF'];
280 if (isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']);
281 $currentCategory->exclude_href = '_c=-'.$currentCategory->cref;
282 if (isset($attrs['RESETHREF'])) {
283 $currentCategory->reset_href = $attrs['RESETHREF'];
284 }
285 $currentCategory->gid = $attrs['GID'];
286 $currentCategory->gcount = $attrs['GCOUNT'];
287 $this->currentCategories[] = $currentCategory;
288 }
289
290 function startSearch(&$attrs) { }
291
292 function startElement($parser, $name, $attrs)
293 {
294 //recupération des paramètres de query
295 switch ($name) {
296 case 'QUERY':
297 $this->startQuery($attrs);
298 break;
299 case 'QUERYTERM':
300 $this->startQueryTerm($attrs);
301 break;
302 case 'QUERYPARAMETER':
303 $this->startQueryParameter($attrs);
304 break;
305 case 'KEYWORD':
306 $this->startKeyword($attrs);
307 break;
308 case 'HITS':
309 $this->startHits($attrs);
310 break;
311 case 'HIT':
312 $this->startHit($attrs);
313 break;
314 case 'HITFIELD':
315 $this->startHitField($attrs);
316 break;
317 case 'HITGROUP':
318 $this->startHitGroup($attrs);
319 break;
320 case 'HITCATEGORY':
321 $this->startHitCategory($attrs);
322 break;
323 case 'ACTION':
324 $this->startAction($attrs);
325 break;
326 case 'TEXTSEG':
327 $this->startTextSeg($attrs);
328 break;
329 case 'TEXTCUT':
330 $this->startTextCut($attrs);
331 break;
332 case 'SPELLINGSUGGESTIONVARIANT':
333 $this->startSpellingSuggestionVariant($attrs);
334 break;
335 case 'GROUP':
336 $this->startGroup($attrs);
337 break;
338 case 'CATEGORY':
339 $this->startCategory($attrs);
340 break;
341 case 'SEARCH':
342 $this->startSearch($attrs);
343 break;
344 }
345 }
346
347 function endQuery()
348 {
349 $this->data->query = $this->currentQuery;
350 $this->currentQuery->clear();
351 }
352 function endQueryTerm()
353 {
354 $this->currentQuery->addTerm($this->currentQueryTerm);
355 $this->currentQueryTerm->clear();
356 }
357 function endQueryParameter()
358 {
359 $this->currentQuery->addParameter($this->currentQueryParameter);
360 $this->currentQueryParameter->clear();
361 }
362 function endKeyword()
363 {
364 $this->data->addKeyword($this->currentKeyword);
365 $this->currentKeyword->clear();
366 }
367 function endHits()
368 {
369 }
370 function endHit()
371 {
372 $this->data->addHit($this->currentHit);
373 $this->currentHit->clear();
374 }
375 function endHitGroup()
376 {
377 $this->currentHit->addHitGroup($this->currentHitGroup);
378 $this->currentHitGroup->clear();
379 }
380 function endHitCategory()
381 {
382 $this->currentHitGroup->addHitCategory($this->currentHitCategory);
383 $this->currentHitCategory->clear();
384 }
385 function endAction()
386 {
387 $this->currentHit->addAction($this->currentAction);
388 $this->currentAction->clear();
389 }
390 function endHitField()
391 {
392 $this->currentHit->addHitField($this->currentHitField);
393 $this->currentHitField->clear();
394 }
395 function endTextSeg()
396 {
397 $this->currentHitField->addTextSegment($this->currentTextSegment);
398 $this->currentTextSegment->clear();
399 }
400 function endTextCut()
401 {
402 $this->currentHitField->setHasTextCut(true);
403 }
404 function endSpellingSuggestionVariant()
405 {
406 $this->data->addSpelling($this->currentSpelling);
407 $this->currentSpelling->clear();
408 }
409 function endGroup()
410 {
411 $this->data->addGroup($this->currentGroup);
412 $this->currentGroup->clear();
413 }
414 function endCategory()
415 {
416 //the parent element is a Group element ?
417 if (count($this->currentCategories) == 1) {
418 $this->currentGroup->addCategory(array_pop($this->currentCategories));
419 } else {
420 //var_dump($this->currentCategories);
421 $category = array_pop($this->currentCategories);
422 //reset($this->currentCategories);
423 end($this->currentCategories);
424 //var_dump($this->currentCategories);
425 $parentCategory = &$this->currentCategories[key($this->currentCategories)];
426 //var_dump($parentCategory);
427 $parentCategory->addCategory($category);
428 }
429 }
430 function endSearch() { }
431
432
433 function endElement($parser, $name)
434 {
435 switch ($name) {
436 case 'QUERY':
437 $this->endQuery();
438 break;
439 case 'QUERYTERM':
440 $this->endQueryTerm();
441 break;
442 case 'QUERYPARAMETER':
443 $this->endQueryParameter();
444 break;
445 case 'KEYWORD':
446 $this->endKeyword();
447 break;
448 case 'HITS':
449 $this->endHits();
450 break;
451 case 'HIT':
452 $this->endHit();
453 break;
454 case 'HITFIELD':
455 $this->endHitField();
456 break;
457 case 'HITGROUP':
458 $this->endHitGroup();
459 break;
460 case 'HITCATEGORY':
461 $this->endHitCategory();
462 break;
463 case 'ACTION':
464 $this->endAction();
465 break;
466 case 'TEXTSEG':
467 $this->endTextSeg();
468 break;
469 case 'TEXTCUT':
470 $this->endTextCut();
471 break;
472 //gestion suggestions d'orthographe
473 case 'SPELLINGSUGGESTIONVARIANT':
474 $this->endSpellingSuggestionVariant();
475 break;
476 //gestion des categories pour raffiner
477 case 'GROUP':
478 $this->endGroup();
479 break;
480 case 'CATEGORY':
481 $this->endCategory();
482 break;
483 case 'SEARCH':
484 $this->endSearch();
485 break;
486 }
487 }
488
489 function parsePCData($parser, $text)
490 {
491 $this->currentTextSegment->append(utf8_decode($text));
492 }
0337d704 493}
494
495?>