From 94f6c381d91fe4eedbc4acb58fd44a119a09b091 Mon Sep 17 00:00:00 2001 From: x2003bruneau Date: Thu, 15 Mar 2007 15:09:05 +0000 Subject: [PATCH] Close #132 and add soundex search for quick search git-svn-id: svn+ssh://murphy/home/svn/platal/trunk@1574 839d8a87-29fc-0310-9880-83ba4fa771e5 --- ChangeLog | 2 + bin/cron/recherche.php | 35 ------- bin/search.rebuild_db.php | 7 +- include/globals.inc.php.in | 2 +- include/user.func.inc.php | 24 ++++- include/xorg.misc.inc.php | 62 +++++++----- modules/search/classes.inc.php | 213 ++++++++++++++++++++-------------------- templates/search/quick.form.tpl | 10 +- upgrade/0.9.14/06_search.sql | 4 + 9 files changed, 182 insertions(+), 177 deletions(-) delete mode 100644 bin/cron/recherche.php create mode 100644 upgrade/0.9.14/06_search.sql diff --git a/ChangeLog b/ChangeLog index 0051eae..cf56834 100644 --- a/ChangeLog +++ b/ChangeLog @@ -15,6 +15,7 @@ New: * Search: - Search Engine IE7 compatible -FRU + - Add soundex search for quick search -FRU * Xnetgrp: - Direct access to the group forum (if exists) -FRU @@ -49,6 +50,7 @@ Bug/Wish: - #638: Sort trombi by promo then name -FRU * Search: + - #132: Better french soundex -FRU - #173: Show only countries where there is a user -FRU * Xnet: diff --git a/bin/cron/recherche.php b/bin/cron/recherche.php deleted file mode 100644 index 52eb2eb..0000000 --- a/bin/cron/recherche.php +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/php5 -q -next()) { - XDB::execute( - "INSERT INTO recherche_soundex (matricule,nom1_soundex,nom2_soundex,nom3_soundex,prenom1_soundex,prenom2_soundex,promo) - VALUES ({?},{?},{?},{?},{?},{?},{?})", - $matricule, soundex_fr($nom1), soundex_fr($nom2), soundex_fr($nom3), soundex_fr($prenom1), soundex_fr($prenom2), $promo); -} - -// vim:set et sw=4 sts=4 sws=4 foldmethod=marker enc=utf-8: -?> diff --git a/bin/search.rebuild_db.php b/bin/search.rebuild_db.php index 0509c92..829d032 100755 --- a/bin/search.rebuild_db.php +++ b/bin/search.rebuild_db.php @@ -24,9 +24,12 @@ require('./connect.db.inc.php'); require('user.func.inc.php'); ini_set('memory_limit', "16M"); -XDB::execute('DELETE FROM search_name'); +$globals->debug = 0; // Do not store backtraces -$res = XDB::iterRow('SELECT auth_user_md5.user_id, nom, prenom, nom_usage, profile_nick FROM auth_user_md5 LEFT JOIN auth_user_quick USING(user_id)'); +XDB::execute('DELETE FROM search_name'); +$res = XDB::iterRow('SELECT auth_user_md5.user_id, nom, prenom, nom_usage, profile_nick + FROM auth_user_md5 + LEFT JOIN auth_user_quick USING(user_id)'); $i = 0; $muls = array(1, 1, 1, 0.2); while ($tmp = $res->next()) { diff --git a/include/globals.inc.php.in b/include/globals.inc.php.in index fa8646a..82d2500 100644 --- a/include/globals.inc.php.in +++ b/include/globals.inc.php.in @@ -46,7 +46,7 @@ class PlatalGlobals $this->session = $sess; $base = empty($_SERVER['HTTPS']) ? 'http://' : 'https://'; - $this->baseurl = trim($base.$_SERVER['SERVER_NAME'].dirname($_SERVER['PHP_SELF']), '/'); + $this->baseurl = @trim($base.$_SERVER['SERVER_NAME'].dirname($_SERVER['PHP_SELF']), '/'); $this->spoolroot = dirname(dirname(__FILE__)); $this->read_config(); diff --git a/include/user.func.inc.php b/include/user.func.inc.php index 6def026..ac14221 100644 --- a/include/user.func.inc.php +++ b/include/user.func.inc.php @@ -682,7 +682,8 @@ function set_user_details($uid, $details) { // }}} // {{{ function _user_reindex -function _user_reindex($uid, $keys, $muls) { +function _user_reindex($uid, $keys, $muls) +{ foreach ($keys as $i => $key) { if ($key == '') { continue; @@ -693,10 +694,29 @@ function _user_reindex($uid, $keys, $muls) { while ($toks) { $token = strtolower(replace_accent(array_pop($toks) . $token)); $score = ($toks ? 0 : 10 + $first) * $muls[$i]; - XDB::execute("REPLACE INTO search_name (token, uid, score) VALUES({?}, {?}, {?})", $token, $uid, $score); + XDB::execute("REPLACE INTO search_name (token, uid, soundex, score) + VALUES ({?}, {?}, {?}, {?})", + $token, $uid, soundex_fr($token), $score); $first = 0; } } + $res = XDB::query("SELECT nom_ini, nom, nom_usage, prenom_ini, prenom, promo, matricule + FROM auth_user_md5 + WHERE user_id = {?}", $uid); + if (!$res->numRows()) { + unset($res); + return; + } + $array = $res->fetchOneRow(); + $promo = intval(array_pop($array)); + $mat = array_shift($array); + array_walk($array, 'soundex_fr'); + XDB::execute("REPLACE INTO recherche_soundex + SET matricule = {?}, nom1_soundex = {?}, nom2_soundex= {?}, nom3_soundex = {?}, + prenom1_soundex = {?}, prenom2_soundex= {?}, promo = {?}", + $mat, $array[0], $array[1], $array[2], $array[3], $array[4], $promo); + unset($res); + unset($array); } // }}} diff --git a/include/xorg.misc.inc.php b/include/xorg.misc.inc.php index 0b3b239..86a8785 100644 --- a/include/xorg.misc.inc.php +++ b/include/xorg.misc.inc.php @@ -169,43 +169,50 @@ function make_username($prenom,$nom) { C'est une bonne démonstration de la force des expressions régulières compatible Perl. trouvé sur http://expreg.com/voirsource.php?id=40&type=Chaines%20de%20caract%E8res */ function soundex_fr($sIn) -{ +{ + static $convVIn, $convVOut, $convGuIn, $convGuOut, $accents; + if (!isset($convGuIn)) { + global $uc_convert; + $convGuIn = array( 'GUI', 'GUE', 'GA', 'GO', 'GU', 'SC', 'CA', 'CO', 'CU', 'QU', 'Q', 'CC', 'CK', 'G', 'ST', 'PH'); + $convGuOut = array( 'KI', 'KE', 'KA', 'KO', 'KU', 'SK', 'KA', 'KO', 'KU', 'K', 'K', 'K', 'K', 'J', 'T', 'F'); + $convVIn = array( '/E?(AU)/', '/([EA])?I([NM])([^EAIOUY]|$)/', '/[AE]O?[NM]([^AEIOUY]|$)/', + '/[EA][IY]([NM]?[^NM]|$)/', '/(^|[^OEUIA])(OEU|OE|EU)([^OEUIA]|$)/', '/OI/', + '/(ILLE?|I)/', '/O(U|W)/', '/O[NM]($|[^EAOUIY])/', + '/([^AEIOUY])[^AEIOUYLKTP]([UAO])([^AEIOUY])/', '/([^AEIOUY]|^)([AUO])[^AEIOUYKTP]([^AEIOUY])/', '/^KN/', + '/^PF/', '/(SC|S|C)H/', '/^C|C$/', + '/C/', '/Z$/', '/(?!^)Z+/'); + $convVOut = array( 'O', '1\3', 'A\1', + 'E\1', '\1\2', 'O', + 'Y', 'U', 'O\1', + '\1\2\3', '\1\2\3', 'N', + 'F', '9', 'K', + 'S', 'SE', 'S'); + $accents = $uc_convert; + $accents['Ç'] = 'S'; + $accents['¿'] = 'E'; + } // Si il n'y a pas de mot, on sort immédiatement if ( $sIn === '' ) return ' '; // On met tout en minuscule $sIn = strtoupper( $sIn ); // On supprime les accents - global $uc_convert; - $accents = $uc_convert; - $accents['Ç'] = 'S'; - $accents['¿'] = 'E'; $sIn = strtr( $sIn, $accents); // On supprime tout ce qui n'est pas une lettre $sIn = preg_replace( '`[^A-Z]`', '', $sIn ); // Si la chaîne ne fait qu'un seul caractère, on sort avec. if ( strlen( $sIn ) === 1 ) return $sIn . ' '; // on remplace les consonnances primaires - $convIn = array( 'GUI', 'GUE', 'GA', 'GO', 'GU', 'CA', 'CO', 'CU', 'Q', 'CC', 'CK' ); - $convOut = array( 'KI', 'KE', 'KA', 'KO', 'K', 'KA', 'KO', 'KU', 'K', 'K', 'K' ); - $sIn = str_replace( $convIn, $convOut, $sIn ); - // on remplace les voyelles sauf le Y et sauf la première par A - $sIn = preg_replace( '`(?search->result_fields = ' - u.user_id, u.promo, u.matricule, u.matricule_ax, - if(u.nom_usage=\'\', u.nom, u.nom_usage) AS NomSortKey, - u.nom_usage,u.date, - u.deces!=0 AS dcd,u.deces, - u.perms IN (\'admin\',\'user\', \'disabled\') AS inscrit, - u.perms != \'pending\' AS wasinscrit, - FIND_IN_SET(\'femme\', u.flags) AS sexe, - a.alias AS forlife, - ad0.text AS app0text, ad0.url AS app0url, ai0.type AS app0type, - ad1.text AS app1text, ad1.url AS app1url, ai1.type AS app1type, - es.label AS secteur, ef.fonction_fr AS fonction, - IF(n.nat=\'\',n.pays,n.nat) AS nat, n.a2 AS iso3166, - COUNT(em.email) > 0 AS actif,'; + u.user_id, u.promo, u.matricule, u.matricule_ax, + if(u.nom_usage=\'\', u.nom, u.nom_usage) AS NomSortKey, + u.nom_usage,u.date, + u.deces!=0 AS dcd,u.deces, + u.perms IN (\'admin\',\'user\', \'disabled\') AS inscrit, + u.perms != \'pending\' AS wasinscrit, + FIND_IN_SET(\'femme\', u.flags) AS sexe, + a.alias AS forlife, + ad0.text AS app0text, ad0.url AS app0url, ai0.type AS app0type, + ad1.text AS app1text, ad1.url AS app1url, ai1.type AS app1type, + es.label AS secteur, ef.fonction_fr AS fonction, + IF(n.nat=\'\',n.pays,n.nat) AS nat, n.a2 AS iso3166, + COUNT(em.email) > 0 AS actif,'; // hide private information if not logged if (S::logged()) - $globals->search->result_fields .=' - q.profile_web AS web, - q.profile_mobile AS mobile, - q.profile_freetext AS freetext, - adr.city, gp.pays AS countrytxt, gr.name AS region, - e.entreprise,'; + $globals->search->result_fields .=' + q.profile_web AS web, + q.profile_mobile AS mobile, + q.profile_freetext AS freetext, + adr.city, gp.pays AS countrytxt, gr.name AS region, + e.entreprise,'; else - $globals->search->result_fields .=" - IF(q.profile_web_pub='public', q.profile_web, '') AS web, - IF(q.profile_mobile_pub='public', q.profile_mobile, '') AS mobile, - IF(q.profile_freetext_pub='public', q.profile_freetext, '') AS freetext, - IF(adr.pub='public', adr.city, '') AS city, - IF(adr.pub='public', gp.pays, '') AS countrytxt, - IF(adr.pub='public', gr.name, '') AS region, - IF(e.pub='public', e.entreprise, '') AS entreprise,"; + $globals->search->result_fields .=" + IF(q.profile_web_pub='public', q.profile_web, '') AS web, + IF(q.profile_mobile_pub='public', q.profile_mobile, '') AS mobile, + IF(q.profile_freetext_pub='public', q.profile_freetext, '') AS freetext, + IF(adr.pub='public', adr.city, '') AS city, + IF(adr.pub='public', gp.pays, '') AS countrytxt, + IF(adr.pub='public', gr.name, '') AS region, + IF(e.pub='public', e.entreprise, '') AS entreprise,"; $globals->search->result_where_statement = ' - LEFT JOIN applis_ins AS ai0 ON (u.user_id = ai0.uid AND ai0.ordre = 0) - LEFT JOIN applis_def AS ad0 ON (ad0.id = ai0.aid) - LEFT JOIN applis_ins AS ai1 ON (u.user_id = ai1.uid AND ai1.ordre = 1) - LEFT JOIN applis_def AS ad1 ON (ad1.id = ai1.aid) - LEFT JOIN entreprises AS e ON (e.entrid = 0 AND e.uid = u.user_id) - LEFT JOIN emploi_secteur AS es ON (e.secteur = es.id) - LEFT JOIN fonctions_def AS ef ON (e.fonction = ef.id) - LEFT JOIN geoloc_pays AS n ON (u.nationalite = n.a2) - LEFT JOIN adresses AS adr ON (u.user_id = adr.uid AND FIND_IN_SET(\'active\',adr.statut)) - LEFT JOIN geoloc_pays AS gp ON (adr.country = gp.a2) - LEFT JOIN geoloc_region AS gr ON (adr.country = gr.a2 AND adr.region = gr.region) - LEFT JOIN emails AS em ON (em.uid = u.user_id AND em.flags = \'active\')'; + LEFT JOIN applis_ins AS ai0 ON (u.user_id = ai0.uid AND ai0.ordre = 0) + LEFT JOIN applis_def AS ad0 ON (ad0.id = ai0.aid) + LEFT JOIN applis_ins AS ai1 ON (u.user_id = ai1.uid AND ai1.ordre = 1) + LEFT JOIN applis_def AS ad1 ON (ad1.id = ai1.aid) + LEFT JOIN entreprises AS e ON (e.entrid = 0 AND e.uid = u.user_id) + LEFT JOIN emploi_secteur AS es ON (e.secteur = es.id) + LEFT JOIN fonctions_def AS ef ON (e.fonction = ef.id) + LEFT JOIN geoloc_pays AS n ON (u.nationalite = n.a2) + LEFT JOIN adresses AS adr ON (u.user_id = adr.uid AND FIND_IN_SET(\'active\',adr.statut)) + LEFT JOIN geoloc_pays AS gp ON (adr.country = gp.a2) + LEFT JOIN geoloc_region AS gr ON (adr.country = gr.a2 AND adr.region = gr.region) + LEFT JOIN emails AS em ON (em.uid = u.user_id AND em.flags = \'active\')'; // }}} // {{{ function display_lines() @@ -117,7 +117,7 @@ class ThrowError class SField { // {{{ properties - + /** le nom du champ dans le formulaire HTML */ var $fieldFormName; /** champs de la bdd correspondant à ce champ sous forme d'un tableau */ @@ -153,7 +153,7 @@ class SField // }}} // {{{ function get_where_statement() - + /** récupérer la clause correspondant au champ dans la clause WHERE de la requête * on parcourt l'ensemble des champs de la bdd de $fieldDbName et on associe * à chacun d'entre eux une clause spécifique @@ -169,7 +169,7 @@ class SField // }}} // {{{ function get_order_statement() - + /** récupérer la clause correspondant au champ dans la clause ORDER BY de la requête * utilisé par exemple pour placer d'abord le nom égal à la requête avant les approximations */ function get_order_statement() @@ -208,7 +208,7 @@ class SField class QuickSearch extends SField { // {{{ properties - + /** stores tokens */ var $strings; /** stores numerical ranges */ @@ -216,7 +216,7 @@ class QuickSearch extends SField // }}} // {{{ constructor - + function QuickSearch($_fieldFormName) { $this->fieldFormName = $_fieldFormName; @@ -231,62 +231,67 @@ class QuickSearch extends SField function isempty() { - return empty($this->strings) && empty($this->ranges); + return empty($this->strings) && empty($this->ranges); } // }}} // {{{ function get_request() - + function get_request() { - parent::get_request(); - $s = replace_accent(trim($this->value)); - $s = preg_replace('!\d+!', ' ', $s); + parent::get_request(); + $s = replace_accent(trim($this->value)); + $s = preg_replace('!\d+!', ' ', $s); $s = str_replace('*','%',$s); - $this->strings = preg_split("![^a-zA-Z%]+!",$s, -1, PREG_SPLIT_NO_EMPTY); - - $s = trim($this->value); - $s = preg_replace('! *- *!', '-', $s); - $s = preg_replace('!([<>]) *!', ' \1', $s); - $s = preg_replace('![^0-9\-><]!', ' ', $s); - $s = preg_replace('![<>\-] !', '', $s); - $ranges = preg_split('! +!', $s, -1, PREG_SPLIT_NO_EMPTY); - $this->ranges=Array(); - foreach ($ranges as $r) { - if (preg_match('!^([<>]\d{4}|\d{4}(-\d{4})?)$!', $r)) $this->ranges[] = $r; - } + $this->strings = preg_split("![^a-zA-Z%]+!",$s, -1, PREG_SPLIT_NO_EMPTY); + + $s = trim($this->value); + $s = preg_replace('! *- *!', '-', $s); + $s = preg_replace('!([<>]) *!', ' \1', $s); + $s = preg_replace('![^0-9\-><]!', ' ', $s); + $s = preg_replace('![<>\-] !', '', $s); + $ranges = preg_split('! +!', $s, -1, PREG_SPLIT_NO_EMPTY); + $this->ranges=Array(); + foreach ($ranges as $r) { + if (preg_match('!^([<>]\d{4}|\d{4}(-\d{4})?)$!', $r)) $this->ranges[] = $r; + } } // }}} // {{{ function get_where_statement() - + function get_where_statement() { - $where = Array(); - foreach ($this->strings as $i => $s) { - $t = str_replace('*', '%', $s).'%'; - $t = str_replace('%%', '%', $t); - $where[] = "sn$i.token LIKE '$t'"; - } - - $wherep = Array(); - foreach ($this->ranges as $r) { - if (preg_match('!^\d{4}$!', $r)) { - $wherep[] = "u.promo=$r"; - } elseif (preg_match('!^(\d{4})-(\d{4})$!', $r, $matches)) { - $p1=min(intval($matches[1]), intval($matches[2])); - $p2=max(intval($matches[1]), intval($matches[2])); - $wherep[] = "(u.promo>=$p1 AND u.promo<=$p2)"; - } elseif (preg_match('!^<(\d{4})!', $r, $matches)) { - $wherep[] = "u.promo<={$matches[1]}"; - } elseif (preg_match('!^>(\d{4})!', $r, $matches)) { - $wherep[] = "u.promo>={$matches[1]}"; - } - } - if (!empty($wherep)) { + $where = Array(); + foreach ($this->strings as $i => $s) { + if (Env::i('with_soundex')) { + $t = soundex_fr($s); + $where[] = "sn$i.soundex = '$t'"; + } else { + $t = str_replace('*', '%', $s).'%'; + $t = str_replace('%%', '%', $t); + $where[] = "sn$i.token LIKE '$t'"; + } + } + + $wherep = Array(); + foreach ($this->ranges as $r) { + if (preg_match('!^\d{4}$!', $r)) { + $wherep[] = "u.promo=$r"; + } elseif (preg_match('!^(\d{4})-(\d{4})$!', $r, $matches)) { + $p1=min(intval($matches[1]), intval($matches[2])); + $p2=max(intval($matches[1]), intval($matches[2])); + $wherep[] = "(u.promo>=$p1 AND u.promo<=$p2)"; + } elseif (preg_match('!^<(\d{4})!', $r, $matches)) { + $wherep[] = "u.promo<={$matches[1]}"; + } elseif (preg_match('!^>(\d{4})!', $r, $matches)) { + $wherep[] = "u.promo>={$matches[1]}"; + } + } + if (!empty($wherep)) { $where[] = '('.join(' OR ',$wherep).')'; } - return join(" AND ", $where); + return join(" AND ", $where); } // }}} @@ -294,14 +299,14 @@ class QuickSearch extends SField function get_select_statement() { $join = ""; - foreach ($this->strings as $i => $s) { + foreach ($this->strings as $i => $s) { $join .= "INNER JOIN search_name AS sn$i ON (u.user_id = sn$i.uid)\n"; } return $join; } // }}} // {{{ function get_order_statement() - + function get_order_statement() { return false; @@ -309,11 +314,11 @@ class QuickSearch extends SField // }}} // {{{ function get_score_statement - + function get_score_statement() { $sum = array('0'); - foreach ($this->strings as $i => $s) { + foreach ($this->strings as $i => $s) { $sum[] .= "SUM(sn$i.score + IF('$s'=sn$i.token,5,0))"; } return join('+', $sum).' AS score'; @@ -330,7 +335,7 @@ class QuickSearch extends SField class NumericSField extends SField { // {{{ constructor - + /** constructeur * (récupère la requête de l'utilisateur pour ce champ) */ function NumericSField($_fieldFormName) @@ -341,7 +346,7 @@ class NumericSField extends SField // }}} // {{{ function get_request() - + /** récupère la requête de l'utilisateur et échoue s'il ne s'agit pas d'un entier */ function get_request() { @@ -353,7 +358,7 @@ class NumericSField extends SField new ThrowError('Un champ numérique contient des caractères alphanumériques.'); } } - + // }}} } @@ -363,7 +368,7 @@ class NumericSField extends SField class RefSField extends SField { // {{{ properties - + var $refTable; var $refAlias; var $refCondition; @@ -385,7 +390,7 @@ class RefSField extends SField // }}} // {{{ function get_request() - + function get_request() { parent::get_request(); if ($this->value=='00' || $this->value=='0') { @@ -446,8 +451,8 @@ class RefSField extends SField // {{{ class RefSFieldMultipleTable class MapSField extends RefSField { - var $mapId; - + var $mapId; + function MapSField($_fieldFormName, $_fieldDbName='', $_refTable, $_refAlias, $_refCondition, $_mapId=false) { if ($_mapId === false) @@ -478,10 +483,10 @@ class MapSField extends RefSField class RefWithSoundexSField extends RefSField { // {{{ function compare() - + function compare() { - return "='".soundex_fr($this->value)."'"; + return "='".soundex_fr($this->value)."'"; } // }}} @@ -495,7 +500,7 @@ class RefWithSoundexSField extends RefSField class StringSField extends SField { // {{{ function get_request() - + /** récupère la requête de l'utilisateur et échoue si la chaîne contient des caractères * interdits */ function get_request() @@ -562,7 +567,7 @@ class StringSField extends SField class NameSField extends StringSField { // {{{ function get_single_where_statement() - + function get_single_where_statement($field) { $regexp = strtr(addslashes($this->value), '-*', '_%'); @@ -571,7 +576,7 @@ class NameSField extends StringSField // }}} // {{{ function get_order_statement() - + function get_order_statement() { if ($this->value!='' && $this->fieldResultName!='') { @@ -609,7 +614,7 @@ class StringWithSoundexSField extends StringSField class PromoSField extends SField { // {{{ properties - + /** opérateur de comparaison (<,>,=) de la promo utilisé pour ce champ de formulaire */ var $compareField; @@ -692,7 +697,7 @@ class PromoSField extends SField class SFieldGroup { // {{{ properties - + /** tableau des classes correspondant aux champs groupés */ var $fields; /** type de groupe : ET ou OU */ @@ -754,7 +759,7 @@ class SFieldGroup { return $f->get_url(); } - + // }}} // {{{ function get_select_statement() diff --git a/templates/search/quick.form.tpl b/templates/search/quick.form.tpl index f0eaec2..4bdf86b 100644 --- a/templates/search/quick.form.tpl +++ b/templates/search/quick.form.tpl @@ -37,11 +37,9 @@ - - mettre les fiches modifiées récemment en premier - {if $smarty.request.nonins} -
Chercher uniquement des non inscrits - {/if} + Activer la recherche par proximité sonore +
Mettre les fiches modifiées récemment en premier +
Chercher uniquement des non inscrits {else} @@ -49,7 +47,7 @@
{/if} - + diff --git a/upgrade/0.9.14/06_search.sql b/upgrade/0.9.14/06_search.sql new file mode 100644 index 0000000..e47a7c4 --- /dev/null +++ b/upgrade/0.9.14/06_search.sql @@ -0,0 +1,4 @@ +alter table search_name add column soundex char(4) not null; +alter table search_name add key soundex (soundex); + +# vim:set syntax=mysql: -- 2.1.4