From ce78ed0204cf36d398c780f3111793aa12650ac9 Mon Sep 17 00:00:00 2001 From: x2000coic Date: Sun, 14 Nov 2004 17:14:30 +0000 Subject: [PATCH] Some classes to parse Exalead XML --- include/exalead/exalead.class.php | 241 +++++++++++++++++++++++++ include/exalead/exalead.parser.inc.php | 309 +++++++++++++++++++++++++++++++++ 2 files changed, 550 insertions(+) create mode 100644 include/exalead/exalead.class.php create mode 100644 include/exalead/exalead.parser.inc.php diff --git a/include/exalead/exalead.class.php b/include/exalead/exalead.class.php new file mode 100644 index 0000000..c4cb70e --- /dev/null +++ b/include/exalead/exalead.class.php @@ -0,0 +1,241 @@ +query = $query;} + function addHit($hit){$this->hits[] = $hit;} + function addGroup($group){$this->groups[] = $group;} + function addSpelling($spelling){$this->spellings[] = $spelling;} + function addKeyword($keyword){$this->keywords[] = $keyword;} +} + +class ExaleadKeyword{ + var $name = ""; + var $display = ""; + var $count = ""; + var $automatic = ""; + var $refine_href = ""; + var $exclude_href = ""; + var $reset_href = ""; + + function ExaleadKeyword(){} + function clear(){ + $name = ""; + $display = ""; + $count = ""; + $automatic = ""; + $refine_href = ""; + $exclude_href = ""; + $reset_href = ""; + } +} + +class ExaleadGroup{ + + var $categories = array(); + var $title = ""; + var $clipped = false; + var $count = ""; + var $browsed = false; + var $clip_href = ""; + var $reset_href = ""; + + function ExaleadGroup(){} + + function addCategory($category){ + $this->categories[] = $category; + } + + function setTitle($title){$this->title = $title;} + function setClipped($clipped){$this->clipped = $clipped;} + function setCount($count){$this->count = $count;} + function setBrowsed($browsed){$this->browsed = $browsed;} + function setClipHref($clip_href){$this->clip_href = $clip_href;} + function setResetHref($reset_href){$this->reset_href = $reset_href;} + + function clear(){ + $this->categories = array(); + $this->title = ""; + $this->clipped = false; + $this->count = ""; + $this->browsed = false; + $this->clip_href = ""; + $this->reset_href = ""; + } + +} + +class ExaleadSpelling{ + + var $display = ""; + var $query_href = ""; + + function ExaleadSpelling(){} + + function setDisplay($display){$this->display = $display;} + function setQueryHref($query_href){$this->query_href = $query_href;} + + function clear(){ + $this->display = ""; + $this->query_href = ""; + } +} + +class ExaleadCategory{ + var $name = ""; + var $display = ""; + var $count = ""; + var $automatic = false; + var $refine_href = ""; + var $exclude_href = ""; + var $reset_href = ""; + var $cref = ""; + var $gid = ""; + var $gcount = ""; + + function ExaleadCategory(){} + + function clear(){ + $this->name = ""; + $this->display = ""; + $this->count = ""; + $this->automatic = false; + $this->refine_href = ""; + $this->exclude_href = ""; + $this->reset_href = ""; + $this->cref = ""; + $this->gid = ""; + $this->gcount = ""; + } + +} + +class ExaleadHit{ + var $hitfields = array(); + var $hitgroups = array(); + var $actions = array(); + var $score = ""; + var $url = ""; + + function ExaleadHit(){} + + function addHitField($hitfield){$this->hitfields[] = $hitfield;} + function addHitGroup($hitgroup){$this->hitgroups[] = $hitgroup;} + function addAction($action){$this->actions[] = $action;} + + function clear(){ + $this->hitfields = array(); + $this->hitgroups = array(); + $this->actions = array(); + $this->score = ""; + $this->url = ""; + } +} + +class ExaleadHitField{ + var $text_segments = array(); + var $has_text_cut = false; + var $name = ""; + var $value = ""; + + function ExaleadHitField(){} + + function addTextSegment($text_segment){$this->text_segments[] = $text_segment;} + function setHasTextCut($has_text_cut){$this->has_text_cut = $has_text_cut;} + + function clear(){ + $this->text_segments = array(); + $this->has_text_cut = false; + $this->name = ""; + $this->value = ""; + } +} + +class ExaleadTextSegment{ + var $text = ""; + var $highlighted = false; + + function ExaleadTextSegment(){ + } + + function setHighlighted($highlighted){$this->highlighted = $highlighted;} + + function append($text){ + $this->text .= $text; + } + + function clear(){ + $this->text = ""; + $this->highlighted = false; + } +} + +class ExaleadQuery{ + var $query_parameters = array(); + var $query_terms = array(); + var $query = ""; + var $context = ""; + var $time = ""; + var $interrupted = false; + var $browsed = false; + + function ExaleadQuery(){} + + function addParameter($parameter){$this->query_parameters[] = $parameter;} + function addTerm($term){$this->query_terms[] = $term;} + + function clear(){ + $this->query_parameters = array(); + $this->query_terms = array(); + $this->query = ""; + $this->context = ""; + $this->time = ""; + $this->interrupted = false; + $this->browsed = false; + } +} + +class ExaleadQueryParameter{ + + var $name = ""; + var $value = ""; + + function ExaleadQueryParameter(){} + + function clear(){ + $this->name = ""; + $this->value = ""; + } +} + +class ExaleadQueryTerm{ + + var $level = ""; + var $regexp = ""; + + function ExaleadQueryTerm(){} + + function clear(){ + $this->regexp = ""; + $this->level = ""; + } +} + + +?> diff --git a/include/exalead/exalead.parser.inc.php b/include/exalead/exalead.parser.inc.php new file mode 100644 index 0000000..d6e6d21 --- /dev/null +++ b/include/exalead/exalead.parser.inc.php @@ -0,0 +1,309 @@ +data = new ExaleadData(); + $this->currentGroup = new ExaleadGroup(); + $this->currentCategory = new ExaleadCategory(); + $this->currentSpelling = new ExaleadSpelling(); + $this->currentHit = new ExaleadHit(); + $this->currentHitField = new ExaleadHitField(); + $this->currentTextSegment = new ExaleadTextSegment(); + $this->currentQuery = new ExaleadQuery(); + $this->currentQueryTerm = new ExaleadQueryTerm(); + $this->currentQueryParameter = new ExaleadQueryParameter(); + $this->currentKeyword = new ExaleadKeyword(); + } + + + +/******** Fonctions annexes relatives au parser ********/ + + function createParser(){ + $this->parserId = xml_parser_create(); + xml_set_element_handler($this->parserId, array(&$this, "startElement"), array(&$this, "endElement")); + xml_set_character_data_handler($this->parserId, array(&$this, "parsePCData")); + } + + function setElementHandler($stratElement, $endElement){ + } + + function freeParser(){ + xml_parser_free($this->parserId); + } + + function parseString($string){ + if (!xml_parse($this->parserId, $string, true)) { + die(sprintf("XML error: %s at line %d", + xml_error_string(xml_get_error_code($this->parserId)), + xml_get_current_line_number($this->parserId))); + } + } + + + +/******** Méthode qui lance le parser ***********/ + + function parse($string){ + $this->createParser(); + $this->parseString($string); + $this->freeParser(); + } + +/********* fonctions spécifiques à chaque balise ******/ + +//Ces méthodes peuvent être surchargées + + function startQuery(&$attrs){ + $this->currentQuery->query = utf8_decode($attrs['QUERY']); + $this->currentQuery->context = $attrs['CONTEXT']; + $this->currentQuery->time = $attrs['TIME']; + if(isset($attrs['INTERRUPTED'])) $this->currentQuery->interrupted = $attrs['INTERRUPTED']; + if(isset($attrs['BROWSED'])) $this->currentQuery->browsed = $attrs['BROWSED']; + } + + function StartQueryTerm(&$attrs){ + $this->currentQueryTerm->level = $attrs['LEVEL']; + $this->currentQueryTerm->regexp = utf8_decode($attrs['REGEXP']); + } + + function startQueryParameter(&$attrs){ + $this->currentQueryParameter->name = $attrs['NAME']; + if(isset($attrs['VALUE'])) $this->currentQueryParameter->value = utf8_decode($attrs['VALUE']); + } + + function startKeyword(&$attrs){ + if(isset($attrs['NAME'])) $this->currentKeyword->name = $attrs['NAME']; + $this->currentKeyword->display = utf8_decode( $attrs['DISPLAY'] ); + $this->currentKeyword->count = $attrs['COUNT']; + $this->currentKeyword->automatic = $attrs['AUTOMATIC']; + if(isset($attrs['REFINEHREF'])) $this->currentKeyword->refine_href = $attrs['REFINEHREF']; + if(isset($attrs['EXCLUDEHREF'])) $this->currentKeyword->exclude_href = $attrs['EXCLUDEHREF']; + if(isset($attrs['RESETHREF'])) $this->currentKeyword->reset_href = $attrs['RESETHREF']; + } + + function startHits(&$attrs){ + $this->data->nmatches = $attrs['NMATCHES']; + $this->data->nhits = $attrs['NHITS']; + if(isset($attrs['INTERRUPTED'])) $this->data->interrupted = $attrs['INTERRUPTED']; + $this->data->last = $attrs['LAST']; + $this->data->end = $attrs['END']; + $this->data->start = $attrs['START']; + } + + function startHit(&$attrs){ + $this->currentHit->url = $attrs['URL']; + $this->currentHit->score = $attrs['SCORE']; + } + + function startHitField(&$attrs){ + $this->currentHitField->name = $attrs['NAME']; + if(isset($attrs['VALUE'])) $this->currentHitField->value = utf8_decode($attrs['VALUE']); + } + + function startTextSeg(&$attrs){ + $this->currentTextSegment->setHighlighted($attrs['HIGHLIGHTED']); + } + function startTextCut(&$attrs){} + + function startSpellingSuggestionVariant(&$attrs){ + $this->currentSpelling->setDisplay($attrs['DISPLAY']); + $this->currentSpelling->setQueryHref($attrs['QUERY']); + } + + function startGroup(&$attrs){ + $this->currentGroup->setTitle(utf8_decode($attrs['TITLE'])); + $this->currentGroup->setClipped($attrs['CLIPPED']); + $this->currentGroup->setCount($attrs['COUNT']); + $this->currentGroup->setBrowsed($attrs['BROWSED']); + if(isset($attrs['CLIPHREF'])) $this->currentGroup->setClipHref($attrs['CLIPHREF']); + if(isset($attrs['RESETHREF'])) $this->currentGroup->setResetHref($attrs['RESETHREF']); + } + + function startCategory(&$attrs){ + $this->currentCategory->name = $attrs['NAME']; + $this->currentCategory->display = utf8_decode($attrs['DISPLAY']); + $this->currentCategory->count = $attrs['COUNT']; + $this->currentCategory->automatic = $attrs['AUTOMATIC']; + if(isset($attrs['REFINEHREF'])) $this->currentCategory->refine_href = '_c=%2B'.substr($attrs['REFINEHREF'],4); + if(isset($attrs['EXCLUDEHREF'])) $this->currentCategory->exclude_href = $attrs['EXCLUDEHREF']; + if(isset($attrs['RESETHREF'])) $this->currentCategory->reset_href = $attrs['RESETHREF']; + $this->currentCategory->cref = $attrs['CREF']; + $this->currentCategory->gid = $attrs['GID']; + $this->currentCategory->gcount = $attrs['GCOUNT']; + } + + function startSearch(&$attrs){} + + function startElement($parser, $name, $attrs) { + //echo "start $name
"; + //recupération des paramètres de query + if($name == 'QUERY'){ + $this->startQuery($attrs); + } + elseif($name == 'QUERYTERM'){ + $this->startQueryTerm($attrs); + } + elseif($name == 'QUERYPARAMETER'){ + $this->startQueryParameter($attrs); + } + //gestion des mots-clés + elseif($name == 'KEYWORD'){ + $this->startKeyword($attrs); + } + //gestion des resultats + elseif($name == 'HITS'){ + $this->startHits($attrs); + } + elseif($name == 'HIT'){ + $this->startHit($attrs); + } + elseif($name == 'HITFIELD'){ + $this->startHitField($attrs); + } + elseif($name == 'TEXTSEG'){ + $this->startTextSeg($attrs); + } + elseif($name == 'TEXTCUT'){ + $this->startTextCut($attrs); + } + //gestion suggestions d'orthographe + elseif($name == 'SPELLINGSUGGESTIONVARIANT'){ + $this->startSpellingSuggestionVariant($attrs); + } + //gestion des categories pour raffiner + elseif($name == 'GROUP'){ + $this->startGroup($attrs); + } + elseif($name == 'CATEGORY'){ + $this->startCategory($attrs); + } + elseif($name == 'SEARCH'){ + $this->startSearch($attrs); + } + } + + function endQuery(){ + $this->data->query = $this->currentQuery; + $this->currentQuery->clear(); + } + function endQueryTerm(){ + $this->currentQuery->addTerm($this->currentQueryTerm); + $this->currentQueryTerm->clear(); + } + function endQueryParameter(){ + $this->currentQuery->addParameter($this->currentQueryParameter); + $this->currentQueryParameter->clear(); + } + function endKeyword(){ + $this->data->addKeyword($this->currentKeyword); + $this->currentKeyword->clear(); + } + function endHits(){ + } + function endHit(){ + $this->data->addHit($this->currentHit); + $this->currentHit->clear(); + } + function endHitField(){ + $this->currentHit->addHitField($this->currentHitField); + $this->currentHitField->clear(); + } + function endTextSeg(){ + $this->currentHitField->addTextSegment($this->currentTextSegment); + $this->currentTextSegment->clear(); + } + function endTextCut(){ + $this->currentHitField->setHasTextCut(true); + } + function endSpellingSuggestionVariant(){ + $this->data->addSpelling($this->currentSpelling); + $this->currentSpelling->clear(); + } + function endGroup(){ + $this->data->addGroup($this->currentGroup); + $this->currentGroup->clear(); + } + function endCategory(){ + $this->currentGroup->addCategory($this->currentCategory); + $this->currentCategory->clear(); + } + function endSearch(){ + } + + + function endElement($parser, $name) { + //echo "end $name
"; + if($name == 'QUERY'){ + $this->endQuery(); + } + elseif($name == 'QUERYTERM'){ + $this->endQueryTerm(); + } + elseif($name == 'QUERYPARAMETER'){ + $this->endQueryParameter(); + } + elseif($name == 'KEYWORD'){ + $this->endKeyword(); + } + elseif($name == 'HITS'){ + $this->endHits(); + } + elseif($name == 'HIT'){ + $this->endHit(); + } + elseif($name == 'HITFIELD'){ + $this->endHitField(); + } + elseif($name == 'TEXTSEG'){ + $this->endTextSeg(); + } + elseif($name == 'TEXTCUT'){ + $this->endTextCut(); + } + //gestion suggestions d'orthographe + elseif($name == 'SPELLINGSUGGESTIONVARIANT'){ + $this->endSpellingSuggestionVariant(); + } + //gestion des categories pour raffiner + elseif($name == 'GROUP'){ + $this->endGroup(); + } + elseif($name == 'CATEGORY'){ + $this->endCategory(); + } + elseif($name == 'SEARCH'){ + $this->endSearch(); + } + } + + function parsePCData($parser, $text){ + $this->currentTextSegment->append(utf8_decode($text)); + } + +} + +?> -- 2.1.4