
use XML::XPath;
use XML::XPath::XMLParser;



sub arborescence {
    
    my $path = shift( @_);
    my $allFiles= shift;
    
    # Supprime le dernier / d'un chemin.
    $path =~ s/[\/]$//;
    
    opendir(DIR, $path);
    my @files = readdir(DIR);
    closedir(DIR);
    
    foreach my $file (@files) {
        next if $file =~ /^\.\.?$/; # ignore le répertoire . et ..
        $file = $path."/".$file;
        
        if (-d $file) {
            $allFiles = &arborescence($file, $allFiles); # recurse!
        }else{
            # On ne garde que les fichiers réguliers XML.
            if ((-f $file) && ($file =~ /\.xml$/)){
                push @$allFiles, $file;
            }
        }
    }
    return $allFiles;
}




#programme principal


%entite=("&#233;","é",
	"&#234;","è",
	"&amp;#39;","'",
	   "&amp;#34;","\"",
	   "&#39;","'",
	   "&#34;","\"",
	   "&nbsp;"," ",
	   "&lt;", "<",);

    
%rubriques=("3208","une",
			"3210","international",
			"3214","europe",
			"3224","societe",
			"3232","opinions",
			"3234","economie",
			"3236","medias",
			"3238","rendez-vous",
			"3242","sports",
			"3244","environnement-sciences",
			"3246","culture",
			"3260","livres",
			"3404","examens_2008",
			"3476","cinema",
			"3546","voyages",
			"651865","technologies",
			"823353","politique",
			"987718","municipales-cantonales_2008",);




    my $dirName = shift;
    my $FSORTIE = shift;



  	foreach $rubrique (values %rubriques) {
		my $output="sortie".$rubrique.".xml";
		if (!open (FILEOUT,">:encoding(UTF-8)",$output)) {
			die "fichier $output";
		}
		print FILEOUT "<?xml version=\"1.0\" encoding =\"utf-8\"?>\n";
		close(SORTIE);
	}  

open(FSORTIE, ">$FSORTIE");
    
    # Parcours de l'arborescence et création des noms de fichiers.
    my $allFilesName = (); # référence sur tableau.
    $allFilesName = arborescence($dirName, $allFilesName);
 

##version2
foreach my $fName (@$allFilesName){
	
	
	
	
	

		my $xp = XML::XPath->new(filename => $fName);
		my $nodeset = $xp->find('//item/title/following-sibling::description[1]');
		foreach my $node ($nodeset->get_nodelist) {
			$temp = XML::XPath->getNodeText($node);
					
			foreach $car(keys %entite){
									my $remplace = $caractere{$car};
									$temp=~ s/$car/$caractere/g;
							$temp=~ s/<\/?(p|img |a\W)[^>]*>//g; #supprime les champs inutiles dans la balise <description></description>
									chomp ($temp);
			
						
										
		}
	
			 unless ($contenu =~ $temp){	$contenu .= $temp."\n";}
		
			foreach $code(keys %rubriques){
				my $rubrique = $rubriques{$code};
				if($fName=~/$code/){

					my $output="sortie".$rubrique.".xml";
					if (!open (SORTIE,">>:encoding(UTF-8)",$output)) {
						die "fichier $output";
					}
					
		

					print FSORTIE "<fichier>\n".$temp."\n</fichier>\n";
				}
			}



##fin version2

}
}
print FSORTIE $contenu;
close FSORTIE;
