X-Git-Url: http://git.polytechnique.org/?a=blobdiff_plain;f=modules%2Ffusionax%2Fformation.pl;h=d10ce65f617213d5fc2d4e9eab46d2e4508fa9c7;hb=0efb08e688d7ba0de0a0fbf2116cd4d10791d598;hp=69a8923ce9b5bde23d2d123e65fdb1999d900650;hpb=023c46fb7f3015fc87744c5a8293342c2e569acd;p=platal.git diff --git a/modules/fusionax/formation.pl b/modules/fusionax/formation.pl index 69a8923..d10ce65 100755 --- a/modules/fusionax/formation.pl +++ b/modules/fusionax/formation.pl @@ -1632,3 +1632,33 @@ while () close(FILE); close(OUT); + +$path = $0; +$path =~ s/modules\/fusionax\/formation\.pl//; +$path .= "spool/fusionax/"; +$in = $path . "Formations_MD.txt"; +$out = $path . "Formations_MD_out.txt"; +open(FILE, "<:encoding(UTF-8)", $in) || die ("Formations_MD.txt failed to open."); +open(OUT, ">:encoding(UTF-8)", $out) || die ("Formations_MD_out.txt failed to open."); + +while () +{ + # Dates removal. + s/\r$//; + s/^(FO\t\w{8}\t(\w|\.|'|&| )+)\t.*$/\1/; + # Trailing tab, spaces and dot removal. + s/(\t| )*$//; + s/( \t|\t )/\t/g; + s/ +/ /g; + + # Removes diploma + s/Doct. de l'Ec. polytechnique//; + s/Doct. de l'Ec. Polytechnique//; + s/Etudiante en Master de l'Ec. polytechnique//; + s/Etudiant en Master de l'Ec. polytechnique//; + + print OUT $_; +} + +close(FILE); +close(OUT);