Some classes to parse Exalead XML
authorx2000coic <x2000coic>
Sun, 14 Nov 2004 17:14:30 +0000 (17:14 +0000)
committerx2000coic <x2000coic>
Sun, 14 Nov 2004 17:14:30 +0000 (17:14 +0000)
include/exalead/exalead.class.php [new file with mode: 0644]
include/exalead/exalead.parser.inc.php [new file with mode: 0644]

diff --git a/include/exalead/exalead.class.php b/include/exalead/exalead.class.php
new file mode 100644 (file)
index 0000000..c4cb70e
--- /dev/null
@@ -0,0 +1,241 @@
+<?php
+
+
+class ExaleadData{
+
+  var $query;
+  var $groups = array();
+  var $hits = array();
+  var $spellings = array();
+  var $keywords = array();
+
+  var $nhits = "";
+  var $nmatches = "";
+  var $estimated = false;
+  var $last = "";
+  var $start = "";
+  var $end = "";
+
+  function ExaleadData(){}
+
+  function setQuery($query){$this->query = $query;}
+  function addHit($hit){$this->hits[] = $hit;}
+  function addGroup($group){$this->groups[] = $group;}
+  function addSpelling($spelling){$this->spellings[] = $spelling;}
+  function addKeyword($keyword){$this->keywords[] = $keyword;} 
+}
+
+class ExaleadKeyword{
+  var $name = "";
+  var $display = "";
+  var $count = "";
+  var $automatic = "";
+  var $refine_href = "";
+  var $exclude_href = "";
+  var $reset_href = "";
+
+  function ExaleadKeyword(){}
+  function clear(){
+    $name = "";
+    $display = "";
+    $count = "";
+    $automatic = "";
+    $refine_href = "";
+    $exclude_href = "";
+    $reset_href = "";  
+  }
+}
+
+class ExaleadGroup{
+
+  var $categories = array();
+  var $title = "";
+  var $clipped = false;
+  var $count = "";
+  var $browsed = false;
+  var $clip_href = "";
+  var $reset_href = "";
+  
+  function ExaleadGroup(){}
+
+  function addCategory($category){
+    $this->categories[] = $category;
+  }
+
+  function setTitle($title){$this->title = $title;}
+  function setClipped($clipped){$this->clipped = $clipped;}
+  function setCount($count){$this->count = $count;}
+  function setBrowsed($browsed){$this->browsed = $browsed;}
+  function setClipHref($clip_href){$this->clip_href = $clip_href;}
+  function setResetHref($reset_href){$this->reset_href = $reset_href;}
+
+  function clear(){
+   $this->categories = array();
+   $this->title = "";
+   $this->clipped = false;
+   $this->count = "";
+   $this->browsed = false;
+   $this->clip_href = "";
+   $this->reset_href = "";
+  }
+
+}
+
+class ExaleadSpelling{
+
+  var $display = "";
+  var $query_href = "";
+
+  function ExaleadSpelling(){}
+  
+  function setDisplay($display){$this->display = $display;}
+  function setQueryHref($query_href){$this->query_href = $query_href;}
+
+  function clear(){
+    $this->display = "";
+    $this->query_href = "";
+  }
+}
+
+class ExaleadCategory{
+  var $name = "";
+  var $display = "";
+  var $count = "";
+  var $automatic = false;
+  var $refine_href = "";
+  var $exclude_href = "";
+  var $reset_href = "";
+  var $cref = "";
+  var $gid = "";
+  var $gcount = "";
+
+  function ExaleadCategory(){}
+
+  function clear(){
+   $this->name = "";
+   $this->display = "";
+   $this->count = "";
+   $this->automatic = false;
+   $this->refine_href = "";
+   $this->exclude_href = "";
+   $this->reset_href = "";
+   $this->cref = "";
+   $this->gid = "";
+   $this->gcount = "";
+  }
+  
+}
+
+class ExaleadHit{
+  var $hitfields = array();
+  var $hitgroups = array();
+  var $actions = array();
+  var $score = "";
+  var $url = "";
+
+  function ExaleadHit(){}
+
+  function addHitField($hitfield){$this->hitfields[] = $hitfield;}
+  function addHitGroup($hitgroup){$this->hitgroups[] = $hitgroup;}
+  function addAction($action){$this->actions[] = $action;}
+
+  function clear(){
+   $this->hitfields = array();
+   $this->hitgroups = array();
+   $this->actions = array();
+   $this->score = "";
+   $this->url = "";
+  }
+}
+
+class ExaleadHitField{
+  var $text_segments = array();
+  var $has_text_cut = false;
+  var $name = "";
+  var $value = "";
+
+  function ExaleadHitField(){}
+
+  function addTextSegment($text_segment){$this->text_segments[] = $text_segment;}
+  function setHasTextCut($has_text_cut){$this->has_text_cut = $has_text_cut;}
+  
+  function clear(){
+   $this->text_segments = array();
+   $this->has_text_cut = false;
+   $this->name = "";
+   $this->value = "";
+  }
+}
+
+class ExaleadTextSegment{
+  var $text = "";
+  var $highlighted = false;
+
+  function ExaleadTextSegment(){
+  }
+
+  function setHighlighted($highlighted){$this->highlighted = $highlighted;}
+
+  function append($text){
+    $this->text .= $text;
+  }
+
+  function clear(){
+    $this->text = "";
+    $this->highlighted = false;
+  }
+}
+
+class ExaleadQuery{
+  var $query_parameters = array();
+  var $query_terms = array();
+  var $query = "";
+  var $context = "";
+  var $time = "";
+  var $interrupted = false;
+  var $browsed = false;
+  
+  function ExaleadQuery(){}
+
+  function addParameter($parameter){$this->query_parameters[] = $parameter;}
+  function addTerm($term){$this->query_terms[] = $term;}
+
+  function clear(){
+    $this->query_parameters = array();
+    $this->query_terms = array();
+    $this->query = "";
+    $this->context = "";
+    $this->time = "";
+    $this->interrupted = false;
+    $this->browsed = false;
+  }
+}
+
+class ExaleadQueryParameter{
+  var $name = "";
+  var $value = "";
+  function ExaleadQueryParameter(){}
+
+  function clear(){
+    $this->name = "";
+    $this->value = "";
+  }
+}
+
+class ExaleadQueryTerm{
+  var $level = "";
+  var $regexp = "";
+  function ExaleadQueryTerm(){}
+
+  function clear(){
+    $this->regexp = "";
+    $this->level = "";
+  }
+}
+
+
+?>
diff --git a/include/exalead/exalead.parser.inc.php b/include/exalead/exalead.parser.inc.php
new file mode 100644 (file)
index 0000000..d6e6d21
--- /dev/null
@@ -0,0 +1,309 @@
+<?php
+
+require_once('exalead.class.php');
+
+
+class Exalead{
+
+  var $parserId;
+
+  var $data;
+
+  var $currentGroup;
+  var $currentCategory;
+  var $currentSpelling;
+  var $currentHit;
+  var $currentHitField;
+  var $currentTextSegment;
+  var $currentQuery;
+  var $currentQueryTerm;
+  var $currentQueryParameter;
+  var $currentKeyword;
+
+
+/****    Constructeur     *********/
+
+
+  function Exalead(){
+     $this->data = new ExaleadData();
+     $this->currentGroup = new ExaleadGroup();
+     $this->currentCategory = new ExaleadCategory();
+     $this->currentSpelling = new ExaleadSpelling();
+     $this->currentHit = new ExaleadHit();
+     $this->currentHitField = new ExaleadHitField();
+     $this->currentTextSegment = new ExaleadTextSegment();
+     $this->currentQuery = new ExaleadQuery();
+     $this->currentQueryTerm = new ExaleadQueryTerm();
+     $this->currentQueryParameter = new ExaleadQueryParameter();
+     $this->currentKeyword = new ExaleadKeyword();
+  }
+
+
+
+/********      Fonctions annexes relatives au parser     ********/
+
+  function createParser(){
+    $this->parserId = xml_parser_create(); 
+    xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement"));
+    xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData"));
+  }
+  
+  function setElementHandler($stratElement, $endElement){
+  }
+  
+  function freeParser(){
+    xml_parser_free($this->parserId);
+  }
+  
+  function parseString($string){
+    if (!xml_parse($this->parserId, $string, true)) {
+       die(sprintf("XML error: %s at line %d",
+             xml_error_string(xml_get_error_code($this->parserId)),
+             xml_get_current_line_number($this->parserId)));
+    }
+  }
+
+
+
+/********        Méthode qui lance le parser           ***********/
+
+  function parse($string){
+    $this->createParser();
+    $this->parseString($string);
+    $this->freeParser();
+  }
+
+/*********      fonctions spécifiques à chaque balise     ******/
+
+//Ces méthodes peuvent être surchargées
+
+  function startQuery(&$attrs){
+     $this->currentQuery->query = utf8_decode($attrs['QUERY']);
+     $this->currentQuery->context = $attrs['CONTEXT'];
+     $this->currentQuery->time = $attrs['TIME'];
+     if(isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED'];
+     if(isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED'];
+  }
+  
+  function StartQueryTerm(&$attrs){
+     $this->currentQueryTerm->level = $attrs['LEVEL'];
+     $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']);
+  }
+
+  function startQueryParameter(&$attrs){
+     $this->currentQueryParameter->name = $attrs['NAME'];
+     if(isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']);
+  }
+
+  function startKeyword(&$attrs){
+     if(isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME'];
+     $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] );
+     $this->currentKeyword->count = $attrs['COUNT'];
+     $this->currentKeyword->automatic = $attrs['AUTOMATIC'];
+     if(isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = $attrs['REFINEHREF'];
+     if(isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF'];
+     if(isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF'];
+  }
+
+  function startHits(&$attrs){
+     $this->data->nmatches = $attrs['NMATCHES'];
+     $this->data->nhits = $attrs['NHITS'];
+     if(isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED'];
+     $this->data->last = $attrs['LAST'];
+     $this->data->end = $attrs['END'];
+     $this->data->start = $attrs['START'];
+  }
+  
+  function startHit(&$attrs){
+     $this->currentHit->url = $attrs['URL'];
+     $this->currentHit->score = $attrs['SCORE'];
+  }
+  
+  function startHitField(&$attrs){
+     $this->currentHitField->name = $attrs['NAME'];
+     if(isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']);
+  }
+  function startTextSeg(&$attrs){
+    $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']);
+  }
+  function startTextCut(&$attrs){}
+
+  function startSpellingSuggestionVariant(&$attrs){
+     $this->currentSpelling->setDisplay($attrs['DISPLAY']);
+     $this->currentSpelling->setQueryHref($attrs['QUERY']);
+  }
+
+  function startGroup(&$attrs){
+     $this->currentGroup->setTitle(utf8_decode($attrs['TITLE']));
+     $this->currentGroup->setClipped($attrs['CLIPPED']);
+     $this->currentGroup->setCount($attrs['COUNT']);
+     $this->currentGroup->setBrowsed($attrs['BROWSED']);
+     if(isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']);
+     if(isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']);
+  }
+
+  function startCategory(&$attrs){
+     $this->currentCategory->name = $attrs['NAME'];
+     $this->currentCategory->display = utf8_decode($attrs['DISPLAY']);
+     $this->currentCategory->count = $attrs['COUNT'];
+     $this->currentCategory->automatic = $attrs['AUTOMATIC'];
+     if(isset($attrs['REFINEHREF'])) $this->currentCategory->refine_href = '_c=%2B'.substr($attrs['REFINEHREF'],4);
+     if(isset($attrs['EXCLUDEHREF'])) $this->currentCategory->exclude_href = $attrs['EXCLUDEHREF'];
+     if(isset($attrs['RESETHREF'])) $this->currentCategory->reset_href = $attrs['RESETHREF'];
+     $this->currentCategory->cref = $attrs['CREF'];
+     $this->currentCategory->gid = $attrs['GID'];
+     $this->currentCategory->gcount = $attrs['GCOUNT'];
+  }
+
+  function startSearch(&$attrs){}
+
+  function startElement($parser, $name, $attrs) {
+   //echo "start $name<br />";
+   //recupération des paramètres de query
+   if($name == 'QUERY'){
+     $this->startQuery($attrs);
+   }
+   elseif($name == 'QUERYTERM'){
+     $this->startQueryTerm($attrs);
+   }
+   elseif($name == 'QUERYPARAMETER'){
+     $this->startQueryParameter($attrs);
+   }
+   //gestion des mots-clés
+   elseif($name == 'KEYWORD'){
+     $this->startKeyword($attrs);
+   }
+   //gestion des resultats
+   elseif($name == 'HITS'){
+     $this->startHits($attrs);
+   }
+   elseif($name == 'HIT'){
+     $this->startHit($attrs);
+   }
+   elseif($name == 'HITFIELD'){
+     $this->startHitField($attrs);
+   }
+   elseif($name == 'TEXTSEG'){
+     $this->startTextSeg($attrs);
+   }
+   elseif($name == 'TEXTCUT'){
+     $this->startTextCut($attrs);
+   }
+   //gestion suggestions d'orthographe
+   elseif($name == 'SPELLINGSUGGESTIONVARIANT'){
+     $this->startSpellingSuggestionVariant($attrs);
+   }
+   //gestion des categories pour raffiner
+   elseif($name == 'GROUP'){
+     $this->startGroup($attrs);
+   }
+   elseif($name == 'CATEGORY'){
+     $this->startCategory($attrs);
+   }
+   elseif($name == 'SEARCH'){
+     $this->startSearch($attrs);
+   }
+  }
+
+  function endQuery(){
+     $this->data->query = $this->currentQuery;
+     $this->currentQuery->clear();
+  }
+  function endQueryTerm(){
+     $this->currentQuery->addTerm($this->currentQueryTerm);
+     $this->currentQueryTerm->clear();
+  }
+  function endQueryParameter(){
+     $this->currentQuery->addParameter($this->currentQueryParameter);
+     $this->currentQueryParameter->clear();
+  }
+  function endKeyword(){
+     $this->data->addKeyword($this->currentKeyword);
+     $this->currentKeyword->clear();
+  }
+  function endHits(){
+  }
+  function endHit(){
+     $this->data->addHit($this->currentHit);
+     $this->currentHit->clear();
+  }
+  function endHitField(){
+     $this->currentHit->addHitField($this->currentHitField);
+     $this->currentHitField->clear();
+  }
+  function endTextSeg(){
+     $this->currentHitField->addTextSegment($this->currentTextSegment);
+     $this->currentTextSegment->clear();
+  }
+  function endTextCut(){
+     $this->currentHitField->setHasTextCut(true);
+  }
+  function endSpellingSuggestionVariant(){
+     $this->data->addSpelling($this->currentSpelling);
+     $this->currentSpelling->clear();
+  }
+  function endGroup(){
+     $this->data->addGroup($this->currentGroup);
+     $this->currentGroup->clear();
+  }
+  function endCategory(){
+     $this->currentGroup->addCategory($this->currentCategory);
+     $this->currentCategory->clear();
+  }
+  function endSearch(){
+  }
+
+  
+  function endElement($parser, $name) {
+   //echo "end $name<br >";
+   if($name == 'QUERY'){
+     $this->endQuery();
+   }
+   elseif($name == 'QUERYTERM'){
+     $this->endQueryTerm();
+   }
+   elseif($name == 'QUERYPARAMETER'){
+     $this->endQueryParameter();
+   }
+   elseif($name == 'KEYWORD'){
+     $this->endKeyword();
+   }
+   elseif($name == 'HITS'){
+     $this->endHits();
+   }
+   elseif($name == 'HIT'){
+     $this->endHit();
+   }
+   elseif($name == 'HITFIELD'){
+     $this->endHitField();
+   }
+   elseif($name == 'TEXTSEG'){
+     $this->endTextSeg();
+   }
+   elseif($name == 'TEXTCUT'){
+     $this->endTextCut();
+   }
+   //gestion suggestions d'orthographe
+   elseif($name == 'SPELLINGSUGGESTIONVARIANT'){
+     $this->endSpellingSuggestionVariant();
+   }
+   //gestion des categories pour raffiner
+   elseif($name == 'GROUP'){
+     $this->endGroup();
+   }
+   elseif($name == 'CATEGORY'){
+     $this->endCategory();
+   }
+   elseif($name == 'SEARCH'){
+     $this->endSearch();
+   }
+  }
+
+  function parsePCData($parser, $text){
+    $this->currentTextSegment->append(utf8_decode($text));
+  }
+
+}
+
+?>