0337d704 |
1 | <?php |
2 | |
3 | require_once('exalead.class.php'); |
4 | |
5 | function convert_url($string){ |
6 | return str_replace('+', '%2B', $string); |
7 | } |
8 | |
9 | $GLOBALS['query_all'] = 'a*'; |
10 | |
11 | class Exalead{ |
12 | |
13 | var $parserId; |
14 | |
15 | var $data; |
16 | |
17 | var $currentGroup; |
18 | var $currentCategories = array(); |
19 | var $currentSpelling; |
20 | var $currentHit; |
21 | var $currentHitField; |
22 | var $currentHitGroup; |
23 | var $currentHitCategory; |
24 | var $currentAction; |
25 | var $currentTextSegment; |
26 | var $currentQuery; |
27 | var $currentQueryTerm; |
28 | var $currentQueryParameter; |
29 | var $currentKeyword; |
30 | |
31 | //url de base du produit Exalead |
32 | var $base_cgi = ''; |
33 | |
34 | // Query to dump indexed database |
35 | var $query_all= ''; |
36 | |
37 | /**** Constructeur *********/ |
38 | |
39 | |
40 | function Exalead($base_cgi = '', $override_query_all = ''){ |
41 | $this->data = new ExaleadData(); |
42 | $this->currentGroup = new ExaleadGroup(); |
43 | $this->currentCategories = array(); |
44 | $this->currentSpelling = new ExaleadSpelling(); |
45 | $this->currentHit = new ExaleadHit(); |
46 | $this->currentHitField = new ExaleadHitField(); |
47 | $this->currentHitGroup = new ExaleadHitGroup(); |
48 | $this->currentHitCategory = new ExaleadHitCategory(); |
49 | $this->currentAction = new ExaleadAction(); |
50 | $this->currentTextSegment = new ExaleadTextSegment(); |
51 | $this->currentQuery = new ExaleadQuery(); |
52 | $this->currentQueryTerm = new ExaleadQueryTerm(); |
53 | $this->currentQueryParameter = new ExaleadQueryParameter(); |
54 | $this->currentKeyword = new ExaleadKeyword(); |
55 | |
56 | //url de base du produit Exalead |
57 | $this->base_cgi = $base_cgi; |
58 | if(!empty($override_query_all)){ |
59 | $this->query_all = $override_query_all; |
60 | } |
61 | else{ |
62 | $this->query_all = $GLOBALS['query_all']; |
63 | } |
64 | } |
65 | |
66 | /**** Fonctions d'interface avec le cgi d'Exalead Corporate ******/ |
67 | |
68 | function set_base_cgi($base_cgi){ |
69 | $this->base_cgi = $base_cgi; |
70 | } |
71 | |
72 | //retourne vrai si une requete a été faite, faux sinon |
73 | function query($varname = 'query'){ |
74 | if(!empty($_REQUEST[$varname])){ |
75 | |
76 | $this->first_query(stripslashes($_REQUEST[$varname])); |
77 | return true; |
78 | } |
79 | elseif(isset($_REQUEST['_C'])){ |
80 | |
81 | $this->handle_request(); |
82 | return true; |
83 | } |
84 | return false; |
85 | } |
86 | |
87 | //a appeller pour faire la premiere requete |
88 | function first_query($query, $offset = 0){ |
89 | if(empty($this->base_cgi)) return false; |
90 | |
91 | //$tmp = parse_url($this->base_cgi); |
92 | //$view_name = substr($tmp['path'], 5); |
93 | //$query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2&A=-1&_vn=".$view_name; |
94 | $query_exa = $this->base_cgi."?_q=".urlencode($query)."&_f=xml2"; |
95 | if($offset > 0){ |
96 | $query_exa .= "&_s=".$offset; |
97 | } |
98 | |
99 | $xml_response = file_get_contents($query_exa); |
100 | /*$xml_response = ''; |
101 | $query_explode = parse_url($query_exa); |
102 | |
103 | $fp = fsockopen("murphy.m4x.org", 10000, $errno, $errstr, 30); |
104 | if (!$fp) { |
105 | echo "$errstr ($errno)<br />\n"; |
106 | } else { |
107 | $out = "GET {$query_explode['path']}?{$query_explode['query']} HTTP/1.1\r\n"; |
108 | $out .= "Host: murphy.m4x.org:10000\r\n"; |
109 | $out .= "Accept: text/xml\r\n"; |
110 | $out .= "Accept-Charset: utf-8\r\n"; |
111 | $out .= "Connection: Close\r\n\r\n"; |
112 | |
113 | fwrite($fp, $out); |
114 | $body = false; |
115 | while (!feof($fp)) { |
116 | $s = fgets($fp, 1024); |
117 | if ( $body ) |
118 | $xml_response .= $s; |
119 | if ( $s == "\r\n" ) |
120 | $body = true; |
121 | } |
122 | fclose($fp); |
123 | }*/ |
124 | //echo $xml_response;exit; |
125 | $this->parse($xml_response); |
126 | //var_dump($this); |
127 | } |
128 | |
129 | //pour recuperer tous les résultats d'une base indexée |
130 | function get_db_dump(){ |
131 | $this->first_query($this->query_all); |
132 | } |
133 | |
134 | function handle_request(){ |
135 | if(empty($this->base_cgi)) return false; |
136 | if(empty($_REQUEST['_C'])) return false;// _C est le contexte Exalead |
137 | $query_exa = $this->base_cgi.'/_C='.str_replace(' ', '%20', $_REQUEST['_C']).'&_f=xml2'; |
138 | if(!empty($_REQUEST['_s'])){ |
139 | $query_exa .= "&_s=".((int) $_REQUEST['_s']); |
140 | } |
141 | $xml_response = file_get_contents($query_exa); |
142 | $this->parse($xml_response); |
143 | } |
144 | |
145 | /******** Fonctions annexes relatives au parser ********/ |
146 | |
147 | function createParser(){ |
148 | $this->parserId = xml_parser_create(); |
149 | xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement")); |
150 | xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData")); |
151 | } |
152 | |
153 | function freeParser(){ |
154 | xml_parser_free($this->parserId); |
155 | } |
156 | |
157 | function parseString($string){ |
158 | if (!xml_parse($this->parserId, $string, true)) { |
159 | die(sprintf("XML error: %s at line %d", |
160 | xml_error_string(xml_get_error_code($this->parserId)), |
161 | xml_get_current_line_number($this->parserId))); |
162 | } |
163 | } |
164 | |
165 | |
166 | |
167 | /******** Méthode qui lance le parser ***********/ |
168 | |
169 | function parse($string){ |
170 | $this->createParser(); |
171 | $this->parseString($string); |
172 | $this->freeParser(); |
173 | } |
174 | |
175 | /********* fonctions spécifiques à chaque balise ******/ |
176 | |
177 | //Ces méthodes peuvent être surchargées |
178 | |
179 | function startQuery(&$attrs){ |
180 | $this->currentQuery->query = utf8_decode($attrs['QUERY']); |
181 | $this->currentQuery->context = $attrs['CONTEXT']; |
182 | $this->currentQuery->time = $attrs['TIME']; |
183 | if(isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED']; |
184 | if(isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED']; |
185 | } |
186 | |
187 | function StartQueryTerm(&$attrs){ |
188 | $this->currentQueryTerm->level = $attrs['LEVEL']; |
189 | $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']); |
190 | } |
191 | |
192 | function startQueryParameter(&$attrs){ |
193 | $this->currentQueryParameter->name = $attrs['NAME']; |
194 | if(isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']); |
195 | } |
196 | |
197 | function startKeyword(&$attrs){ |
198 | if(isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME']; |
199 | $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] ); |
200 | $this->currentKeyword->count = $attrs['COUNT']; |
201 | $this->currentKeyword->automatic = $attrs['AUTOMATIC']; |
202 | if(isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = convert_url($attrs['REFINEHREF']); |
203 | if(isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF']; |
204 | if(isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF']; |
205 | } |
206 | |
207 | function startHits(&$attrs){ |
208 | $this->data->nmatches = $attrs['NMATCHES']; |
209 | $this->data->nhits = $attrs['NHITS']; |
210 | if(isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED']; |
211 | $this->data->last = $attrs['LAST']; |
212 | $this->data->end = $attrs['END']; |
213 | $this->data->start = $attrs['START']; |
214 | } |
215 | |
216 | function startHit(&$attrs){ |
217 | $this->currentHit->url = $attrs['URL']; |
218 | $this->currentHit->score = $attrs['SCORE']; |
219 | } |
220 | |
221 | function startHitGroup(&$attrs){ |
222 | $this->currentHitGroup->title = utf8_decode($attrs['TITLE']); |
223 | $this->currentHitGroup->gid = $attrs['GID']; |
224 | } |
225 | |
226 | function startHitCategory(&$attrs){ |
227 | $this->currentHitCategory->name = $attrs['NAME']; |
228 | $this->currentHitCategory->display = utf8_decode($attrs['DISPLAY']); |
229 | $this->currentHitCategory->cref = $attrs['CREF']; |
230 | $this->currentHitCategory->gid = $attrs['GID']; |
231 | if(isset($attrs['BROWSEHREF'])) $this->currentHitCategory->browsehref = $attrs['BROWSEHREF']; |
232 | } |
233 | |
234 | function startAction(&$attrs){ |
235 | $this->currentAction->display = $attrs['DISPLAY']; |
236 | $this->currentAction->kind = $attrs['KIND']; |
237 | if(isset($attrs['EXECHREF']))$this->currentAction->execHref = $attrs['EXECHREF']; |
238 | } |
239 | |
240 | function startHitField(&$attrs){ |
241 | $this->currentHitField->name = $attrs['NAME']; |
242 | if(isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']); |
243 | } |
244 | |
245 | function startTextSeg(&$attrs){ |
246 | $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']); |
247 | } |
248 | function startTextCut(&$attrs){} |
249 | |
250 | function startSpellingSuggestionVariant(&$attrs){ |
251 | $this->currentSpelling->setDisplay($attrs['DISPLAY']); |
252 | $this->currentSpelling->setQueryHref($attrs['QUERY']); |
253 | } |
254 | |
255 | function startGroup(&$attrs){ |
256 | $this->currentGroup->setGid(utf8_decode($attrs['GID'])); |
257 | $this->currentGroup->setTitle(utf8_decode($attrs['TITLE'])); |
258 | $this->currentGroup->setClipped($attrs['CLIPPED']); |
259 | $this->currentGroup->setCount($attrs['COUNT']); |
260 | $this->currentGroup->setBrowsed($attrs['BROWSED']); |
261 | if(isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']); |
262 | if(isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']); |
263 | } |
264 | |
265 | function startCategory(&$attrs){ |
266 | $currentCategory = new ExaleadCategory(); |
267 | $currentCategory->name = utf8_decode($attrs['NAME']); |
268 | $currentCategory->display = utf8_decode($attrs['DISPLAY']); |
269 | $currentCategory->count = $attrs['COUNT']; |
270 | $currentCategory->automatic = $attrs['AUTOMATIC']; |
271 | $currentCategory->cref = $attrs['CREF']; |
272 | if(isset($attrs['REFINEHREF'])) $currentCategory->refine_href = convert_url($attrs['REFINEHREF']); |
273 | $currentCategory->exclude_href = '_c=-'.$currentCategory->cref; |
274 | if(isset($attrs['RESETHREF'])){ |
275 | $currentCategory->reset_href = $attrs['RESETHREF']; |
276 | } |
277 | $currentCategory->gid = $attrs['GID']; |
278 | $currentCategory->gcount = $attrs['GCOUNT']; |
279 | $this->currentCategories[] = $currentCategory; |
280 | } |
281 | |
282 | function startSearch(&$attrs){} |
283 | |
284 | function startElement($parser, $name, $attrs) { |
285 | //echo "start $name<br />"; |
286 | //recupération des paramètres de query |
287 | if($name == 'QUERY'){ |
288 | $this->startQuery($attrs); |
289 | } |
290 | elseif($name == 'QUERYTERM'){ |
291 | $this->startQueryTerm($attrs); |
292 | } |
293 | elseif($name == 'QUERYPARAMETER'){ |
294 | $this->startQueryParameter($attrs); |
295 | } |
296 | //gestion des mots-clés |
297 | elseif($name == 'KEYWORD'){ |
298 | $this->startKeyword($attrs); |
299 | } |
300 | //gestion des resultats |
301 | elseif($name == 'HITS'){ |
302 | $this->startHits($attrs); |
303 | } |
304 | elseif($name == 'HIT'){ |
305 | $this->startHit($attrs); |
306 | } |
307 | elseif($name == 'HITFIELD'){ |
308 | $this->startHitField($attrs); |
309 | } |
310 | elseif($name == 'HITGROUP'){ |
311 | $this->startHitGroup($attrs); |
312 | } |
313 | elseif($name == 'HITCATEGORY'){ |
314 | $this->startHitCategory($attrs); |
315 | } |
316 | elseif($name == 'ACTION'){ |
317 | $this->startAction($attrs); |
318 | } |
319 | elseif($name == 'TEXTSEG'){ |
320 | $this->startTextSeg($attrs); |
321 | } |
322 | elseif($name == 'TEXTCUT'){ |
323 | $this->startTextCut($attrs); |
324 | } |
325 | //gestion suggestions d'orthographe |
326 | elseif($name == 'SPELLINGSUGGESTIONVARIANT'){ |
327 | $this->startSpellingSuggestionVariant($attrs); |
328 | } |
329 | //gestion des categories pour raffiner |
330 | elseif($name == 'GROUP'){ |
331 | $this->startGroup($attrs); |
332 | } |
333 | elseif($name == 'CATEGORY'){ |
334 | $this->startCategory($attrs); |
335 | } |
336 | elseif($name == 'SEARCH'){ |
337 | $this->startSearch($attrs); |
338 | } |
339 | } |
340 | |
341 | function endQuery(){ |
342 | $this->data->query = $this->currentQuery; |
343 | $this->currentQuery->clear(); |
344 | } |
345 | function endQueryTerm(){ |
346 | $this->currentQuery->addTerm($this->currentQueryTerm); |
347 | $this->currentQueryTerm->clear(); |
348 | } |
349 | function endQueryParameter(){ |
350 | $this->currentQuery->addParameter($this->currentQueryParameter); |
351 | $this->currentQueryParameter->clear(); |
352 | } |
353 | function endKeyword(){ |
354 | $this->data->addKeyword($this->currentKeyword); |
355 | $this->currentKeyword->clear(); |
356 | } |
357 | function endHits(){ |
358 | } |
359 | function endHit(){ |
360 | $this->data->addHit($this->currentHit); |
361 | $this->currentHit->clear(); |
362 | } |
363 | function endHitGroup(){ |
364 | $this->currentHit->addHitGroup($this->currentHitGroup); |
365 | $this->currentHitGroup->clear(); |
366 | } |
367 | function endHitCategory(){ |
368 | $this->currentHitGroup->addHitCategory($this->currentHitCategory); |
369 | $this->currentHitCategory->clear(); |
370 | } |
371 | function endAction(){ |
372 | $this->currentHit->addAction($this->currentAction); |
373 | $this->currentAction->clear(); |
374 | } |
375 | function endHitField(){ |
376 | $this->currentHit->addHitField($this->currentHitField); |
377 | $this->currentHitField->clear(); |
378 | } |
379 | function endTextSeg(){ |
380 | $this->currentHitField->addTextSegment($this->currentTextSegment); |
381 | $this->currentTextSegment->clear(); |
382 | } |
383 | function endTextCut(){ |
384 | $this->currentHitField->setHasTextCut(true); |
385 | } |
386 | function endSpellingSuggestionVariant(){ |
387 | $this->data->addSpelling($this->currentSpelling); |
388 | $this->currentSpelling->clear(); |
389 | } |
390 | function endGroup(){ |
391 | $this->data->addGroup($this->currentGroup); |
392 | $this->currentGroup->clear(); |
393 | } |
394 | function endCategory(){ |
395 | //the parent element is a Group element ? |
396 | if(count($this->currentCategories) == 1){ |
397 | $this->currentGroup->addCategory(array_pop($this->currentCategories)); |
398 | } |
399 | else{ |
400 | //var_dump($this->currentCategories); |
401 | $category = array_pop($this->currentCategories); |
402 | //reset($this->currentCategories); |
403 | end($this->currentCategories); |
404 | //var_dump($this->currentCategories); |
405 | $parentCategory = &$this->currentCategories[key($this->currentCategories)]; |
406 | //var_dump($parentCategory); |
407 | $parentCategory->addCategory($category); |
408 | } |
409 | } |
410 | function endSearch(){ |
411 | } |
412 | |
413 | |
414 | function endElement($parser, $name) { |
415 | //echo "end $name<br >"; |
416 | if($name == 'QUERY'){ |
417 | $this->endQuery(); |
418 | } |
419 | elseif($name == 'QUERYTERM'){ |
420 | $this->endQueryTerm(); |
421 | } |
422 | elseif($name == 'QUERYPARAMETER'){ |
423 | $this->endQueryParameter(); |
424 | } |
425 | elseif($name == 'KEYWORD'){ |
426 | $this->endKeyword(); |
427 | } |
428 | elseif($name == 'HITS'){ |
429 | $this->endHits(); |
430 | } |
431 | elseif($name == 'HIT'){ |
432 | $this->endHit(); |
433 | } |
434 | elseif($name == 'HITFIELD'){ |
435 | $this->endHitField(); |
436 | } |
437 | elseif($name == 'HITGROUP'){ |
438 | $this->endHitGroup(); |
439 | } |
440 | elseif($name == 'HITCATEGORY'){ |
441 | $this->endHitCategory(); |
442 | } |
443 | elseif($name == 'ACTION'){ |
444 | $this->endAction(); |
445 | } |
446 | elseif($name == 'TEXTSEG'){ |
447 | $this->endTextSeg(); |
448 | } |
449 | elseif($name == 'TEXTCUT'){ |
450 | $this->endTextCut(); |
451 | } |
452 | //gestion suggestions d'orthographe |
453 | elseif($name == 'SPELLINGSUGGESTIONVARIANT'){ |
454 | $this->endSpellingSuggestionVariant(); |
455 | } |
456 | //gestion des categories pour raffiner |
457 | elseif($name == 'GROUP'){ |
458 | $this->endGroup(); |
459 | } |
460 | elseif($name == 'CATEGORY'){ |
461 | $this->endCategory(); |
462 | } |
463 | elseif($name == 'SEARCH'){ |
464 | $this->endSearch(); |
465 | } |
466 | } |
467 | |
468 | function parsePCData($parser, $text){ |
469 | $this->currentTextSegment->append(utf8_decode($text)); |
470 | } |
471 | |
472 | } |
473 | |
474 | ?> |