not used, not up2date, just pollute the thing. trash it
[platal.git] / include / exalead / exalead.parser.inc.php
1 <?php
2
3 require_once('exalead.class.php');
4
5 function convert_url($string){
6 return str_replace('+', '%2B', $string);
7 }
8
9 $GLOBALS['query_all'] = 'a*';
10
11 class Exalead{
12
13 var $parserId;
14
15 var $data;
16
17 var $currentGroup;
18 var $currentCategories = array();
19 var $currentSpelling;
20 var $currentHit;
21 var $currentHitField;
22 var $currentHitGroup;
23 var $currentHitCategory;
24 var $currentAction;
25 var $currentTextSegment;
26 var $currentQuery;
27 var $currentQueryTerm;
28 var $currentQueryParameter;
29 var $currentKeyword;
30
31 //url de base du produit Exalead
32 var $base_cgi = '';
33
34 // Query to dump indexed database
35 var $query_all= '';
36
37 /**** Constructeur *********/
38
39
40 function Exalead($base_cgi = '', $override_query_all = ''){
41 $this->data = new ExaleadData();
42 $this->currentGroup = new ExaleadGroup();
43 $this->currentCategories = array();
44 $this->currentSpelling = new ExaleadSpelling();
45 $this->currentHit = new ExaleadHit();
46 $this->currentHitField = new ExaleadHitField();
47 $this->currentHitGroup = new ExaleadHitGroup();
48 $this->currentHitCategory = new ExaleadHitCategory();
49 $this->currentAction = new ExaleadAction();
50 $this->currentTextSegment = new ExaleadTextSegment();
51 $this->currentQuery = new ExaleadQuery();
52 $this->currentQueryTerm = new ExaleadQueryTerm();
53 $this->currentQueryParameter = new ExaleadQueryParameter();
54 $this->currentKeyword = new ExaleadKeyword();
55
56 //url de base du produit Exalead
57 $this->base_cgi = $base_cgi;
58 if(!empty($override_query_all)){
59 $this->query_all = $override_query_all;
60 }
61 else{
62 $this->query_all = $GLOBALS['query_all'];
63 }
64 }
65
66 /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/
67
68 function set_base_cgi($base_cgi){
69 $this->base_cgi = $base_cgi;
70 }
71
72 //retourne vrai si une requete a été faite, faux sinon
73 function query($varname = 'query'){
74 if(!empty($_REQUEST[$varname])){
75
76 $this->first_query(stripslashes($_REQUEST[$varname]));
77 return true;
78 }
79 elseif(isset($_REQUEST['_C'])){
80
81 $this->handle_request();
82 return true;
83 }
84 return false;
85 }
86
87 //a appeller pour faire la premiere requete
88 function first_query($query, $offset = 0){
89 if(empty($this->base_cgi)) return false;
90
91 //$tmp = parse_url($this->base_cgi);
92 //$view_name = substr($tmp['path'], 5);
93 //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name;
94 $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2";
95 if($offset > 0){
96 $query_exa .= "&_s=".$offset;
97 }
98
99 $xml_response = file_get_contents($query_exa);
100 /*$xml_response = '';
101 $query_explode = parse_url($query_exa);
102
103 $fp = fsockopen("murphy.m4x.org", 10000, $errno, $errstr, 30);
104 if (!$fp) {
105 echo "$errstr ($errno)<br />\n";
106 } else {
107 $out = "GET {$query_explode['path']}?{$query_explode['query']} HTTP/1.1\r\n";
108 $out .= "Host: murphy.m4x.org:10000\r\n";
109 $out .= "Accept: text/xml\r\n";
110 $out .= "Accept-Charset: utf-8\r\n";
111 $out .= "Connection: Close\r\n\r\n";
112
113 fwrite($fp, $out);
114 $body = false;
115 while (!feof($fp)) {
116 $s = fgets($fp, 1024);
117 if ( $body )
118 $xml_response .= $s;
119 if ( $s == "\r\n" )
120 $body = true;
121 }
122 fclose($fp);
123 }*/
124 //echo $xml_response;exit;
125 $this->parse($xml_response);
126 //var_dump($this);
127 }
128
129 //pour recuperer tous les résultats d'une base indexée
130 function get_db_dump(){
131 $this->first_query($this->query_all);
132 }
133
134 function handle_request(){
135 if(empty($this->base_cgi)) return false;
136 if(empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead
137 $query_exa = $this->base_cgi.'/_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2';
138 if(!empty($_REQUEST['_s'])){
139 $query_exa .= "&_s=".((int) $_REQUEST['_s']);
140 }
141 $xml_response = file_get_contents($query_exa);
142 $this->parse($xml_response);
143 }
144
145 /******** Fonctions annexes relatives au parser ********/
146
147 function createParser(){
148 $this->parserId = xml_parser_create();
149 xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement"));
150 xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData"));
151 }
152
153 function freeParser(){
154 xml_parser_free($this->parserId);
155 }
156
157 function parseString($string){
158 if (!xml_parse($this->parserId, $string, true)) {
159 die(sprintf("XML error: %s at line %d",
160 xml_error_string(xml_get_error_code($this->parserId)),
161 xml_get_current_line_number($this->parserId)));
162 }
163 }
164
165
166
167 /******** Méthode qui lance le parser ***********/
168
169 function parse($string){
170 $this->createParser();
171 $this->parseString($string);
172 $this->freeParser();
173 }
174
175 /********* fonctions spécifiques à chaque balise ******/
176
177 //Ces méthodes peuvent être surchargées
178
179 function startQuery(&$attrs){
180 $this->currentQuery->query = utf8_decode($attrs['QUERY']);
181 $this->currentQuery->context = $attrs['CONTEXT'];
182 $this->currentQuery->time = $attrs['TIME'];
183 if(isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED'];
184 if(isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED'];
185 }
186
187 function StartQueryTerm(&$attrs){
188 $this->currentQueryTerm->level = $attrs['LEVEL'];
189 $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']);
190 }
191
192 function startQueryParameter(&$attrs){
193 $this->currentQueryParameter->name = $attrs['NAME'];
194 if(isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']);
195 }
196
197 function startKeyword(&$attrs){
198 if(isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME'];
199 $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] );
200 $this->currentKeyword->count = $attrs['COUNT'];
201 $this->currentKeyword->automatic = $attrs['AUTOMATIC'];
202 if(isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']);
203 if(isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF'];
204 if(isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF'];
205 }
206
207 function startHits(&$attrs){
208 $this->data->nmatches = $attrs['NMATCHES'];
209 $this->data->nhits = $attrs['NHITS'];
210 if(isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED'];
211 $this->data->last = $attrs['LAST'];
212 $this->data->end = $attrs['END'];
213 $this->data->start = $attrs['START'];
214 }
215
216 function startHit(&$attrs){
217 $this->currentHit->url = $attrs['URL'];
218 $this->currentHit->score = $attrs['SCORE'];
219 }
220
221 function startHitGroup(&$attrs){
222 $this->currentHitGroup->title = utf8_decode($attrs['TITLE']);
223 $this->currentHitGroup->gid = $attrs['GID'];
224 }
225
226 function startHitCategory(&$attrs){
227 $this->currentHitCategory->name = $attrs['NAME'];
228 $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']);
229 $this->currentHitCategory->cref = $attrs['CREF'];
230 $this->currentHitCategory->gid = $attrs['GID'];
231 if(isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF'];
232 }
233
234 function startAction(&$attrs){
235 $this->currentAction->display = $attrs['DISPLAY'];
236 $this->currentAction->kind = $attrs['KIND'];
237 if(isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF'];
238 }
239
240 function startHitField(&$attrs){
241 $this->currentHitField->name = $attrs['NAME'];
242 if(isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']);
243 }
244
245 function startTextSeg(&$attrs){
246 $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']);
247 }
248 function startTextCut(&$attrs){}
249
250 function startSpellingSuggestionVariant(&$attrs){
251 $this->currentSpelling->setDisplay($attrs['DISPLAY']);
252 $this->currentSpelling->setQueryHref($attrs['QUERY']);
253 }
254
255 function startGroup(&$attrs){
256 $this->currentGroup->setGid(utf8_decode($attrs['GID']));
257 $this->currentGroup->setTitle(utf8_decode($attrs['TITLE']));
258 $this->currentGroup->setClipped($attrs['CLIPPED']);
259 $this->currentGroup->setCount($attrs['COUNT']);
260 $this->currentGroup->setBrowsed($attrs['BROWSED']);
261 if(isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']);
262 if(isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']);
263 }
264
265 function startCategory(&$attrs){
266 $currentCategory = new ExaleadCategory();
267 $currentCategory->name = utf8_decode($attrs['NAME']);
268 $currentCategory->display = utf8_decode($attrs['DISPLAY']);
269 $currentCategory->count = $attrs['COUNT'];
270 $currentCategory->automatic = $attrs['AUTOMATIC'];
271 $currentCategory->cref = $attrs['CREF'];
272 if(isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']);
273 $currentCategory->exclude_href = '_c=-'.$currentCategory->cref;
274 if(isset($attrs['RESETHREF'])){
275 $currentCategory->reset_href = $attrs['RESETHREF'];
276 }
277 $currentCategory->gid = $attrs['GID'];
278 $currentCategory->gcount = $attrs['GCOUNT'];
279 $this->currentCategories[] = $currentCategory;
280 }
281
282 function startSearch(&$attrs){}
283
284 function startElement($parser, $name, $attrs) {
285 //echo "start $name<br />";
286 //recupération des paramètres de query
287 if($name == 'QUERY'){
288 $this->startQuery($attrs);
289 }
290 elseif($name == 'QUERYTERM'){
291 $this->startQueryTerm($attrs);
292 }
293 elseif($name == 'QUERYPARAMETER'){
294 $this->startQueryParameter($attrs);
295 }
296 //gestion des mots-clés
297 elseif($name == 'KEYWORD'){
298 $this->startKeyword($attrs);
299 }
300 //gestion des resultats
301 elseif($name == 'HITS'){
302 $this->startHits($attrs);
303 }
304 elseif($name == 'HIT'){
305 $this->startHit($attrs);
306 }
307 elseif($name == 'HITFIELD'){
308 $this->startHitField($attrs);
309 }
310 elseif($name == 'HITGROUP'){
311 $this->startHitGroup($attrs);
312 }
313 elseif($name == 'HITCATEGORY'){
314 $this->startHitCategory($attrs);
315 }
316 elseif($name == 'ACTION'){
317 $this->startAction($attrs);
318 }
319 elseif($name == 'TEXTSEG'){
320 $this->startTextSeg($attrs);
321 }
322 elseif($name == 'TEXTCUT'){
323 $this->startTextCut($attrs);
324 }
325 //gestion suggestions d'orthographe
326 elseif($name == 'SPELLINGSUGGESTIONVARIANT'){
327 $this->startSpellingSuggestionVariant($attrs);
328 }
329 //gestion des categories pour raffiner
330 elseif($name == 'GROUP'){
331 $this->startGroup($attrs);
332 }
333 elseif($name == 'CATEGORY'){
334 $this->startCategory($attrs);
335 }
336 elseif($name == 'SEARCH'){
337 $this->startSearch($attrs);
338 }
339 }
340
341 function endQuery(){
342 $this->data->query = $this->currentQuery;
343 $this->currentQuery->clear();
344 }
345 function endQueryTerm(){
346 $this->currentQuery->addTerm($this->currentQueryTerm);
347 $this->currentQueryTerm->clear();
348 }
349 function endQueryParameter(){
350 $this->currentQuery->addParameter($this->currentQueryParameter);
351 $this->currentQueryParameter->clear();
352 }
353 function endKeyword(){
354 $this->data->addKeyword($this->currentKeyword);
355 $this->currentKeyword->clear();
356 }
357 function endHits(){
358 }
359 function endHit(){
360 $this->data->addHit($this->currentHit);
361 $this->currentHit->clear();
362 }
363 function endHitGroup(){
364 $this->currentHit->addHitGroup($this->currentHitGroup);
365 $this->currentHitGroup->clear();
366 }
367 function endHitCategory(){
368 $this->currentHitGroup->addHitCategory($this->currentHitCategory);
369 $this->currentHitCategory->clear();
370 }
371 function endAction(){
372 $this->currentHit->addAction($this->currentAction);
373 $this->currentAction->clear();
374 }
375 function endHitField(){
376 $this->currentHit->addHitField($this->currentHitField);
377 $this->currentHitField->clear();
378 }
379 function endTextSeg(){
380 $this->currentHitField->addTextSegment($this->currentTextSegment);
381 $this->currentTextSegment->clear();
382 }
383 function endTextCut(){
384 $this->currentHitField->setHasTextCut(true);
385 }
386 function endSpellingSuggestionVariant(){
387 $this->data->addSpelling($this->currentSpelling);
388 $this->currentSpelling->clear();
389 }
390 function endGroup(){
391 $this->data->addGroup($this->currentGroup);
392 $this->currentGroup->clear();
393 }
394 function endCategory(){
395 //the parent element is a Group element ?
396 if(count($this->currentCategories) == 1){
397 $this->currentGroup->addCategory(array_pop($this->currentCategories));
398 }
399 else{
400 //var_dump($this->currentCategories);
401 $category = array_pop($this->currentCategories);
402 //reset($this->currentCategories);
403 end($this->currentCategories);
404 //var_dump($this->currentCategories);
405 $parentCategory = &$this->currentCategories[key($this->currentCategories)];
406 //var_dump($parentCategory);
407 $parentCategory->addCategory($category);
408 }
409 }
410 function endSearch(){
411 }
412
413
414 function endElement($parser, $name) {
415 //echo "end $name<br >";
416 if($name == 'QUERY'){
417 $this->endQuery();
418 }
419 elseif($name == 'QUERYTERM'){
420 $this->endQueryTerm();
421 }
422 elseif($name == 'QUERYPARAMETER'){
423 $this->endQueryParameter();
424 }
425 elseif($name == 'KEYWORD'){
426 $this->endKeyword();
427 }
428 elseif($name == 'HITS'){
429 $this->endHits();
430 }
431 elseif($name == 'HIT'){
432 $this->endHit();
433 }
434 elseif($name == 'HITFIELD'){
435 $this->endHitField();
436 }
437 elseif($name == 'HITGROUP'){
438 $this->endHitGroup();
439 }
440 elseif($name == 'HITCATEGORY'){
441 $this->endHitCategory();
442 }
443 elseif($name == 'ACTION'){
444 $this->endAction();
445 }
446 elseif($name == 'TEXTSEG'){
447 $this->endTextSeg();
448 }
449 elseif($name == 'TEXTCUT'){
450 $this->endTextCut();
451 }
452 //gestion suggestions d'orthographe
453 elseif($name == 'SPELLINGSUGGESTIONVARIANT'){
454 $this->endSpellingSuggestionVariant();
455 }
456 //gestion des categories pour raffiner
457 elseif($name == 'GROUP'){
458 $this->endGroup();
459 }
460 elseif($name == 'CATEGORY'){
461 $this->endCategory();
462 }
463 elseif($name == 'SEARCH'){
464 $this->endSearch();
465 }
466 }
467
468 function parsePCData($parser, $text){
469 $this->currentTextSegment->append(utf8_decode($text));
470 }
471
472 }
473
474 ?>