#/usr/bin/perl

#Ce programme prend en argument un repertoire et une date (Année),  parcourt récursivement le répertoire 
#et fait une action sur certains fichiers rencontrés dans ce parcours d'arbre
#Dans le programme defini ici, cette action consiste à supprimer les fichiers dont l'extension est
# .html.readme

# Exemple d'appel : perl parcoursRepertoire.pl monrepertoire/ AAAA

my $filecount=0;
$ARGV[0] =~ s/\\$//;
$date=$ARGV[1] ;
$sortie="leMonde-ph2-all-rub-".$date.".txt";

$rub1="ALAUNE";
$rub2="INTERNATIONAL";
$rub3="EUROPE";
$rub4="FRANCE-SOCIETE";
$rub5="OPINIONS";
$rub6="ECONOMIE";
$rub7="MEDIAS";
$rub8="RENDEZ-VOUS";
$rub9="SPORTS";
$rub10="ENVIRONNEMENT-SCIENCES";
$rub11="CULTURE";
$rub12="LIVRES";
$rub13="EXAMENS";
$rub14="CINEMA";
$rub15="VOYAGES";
$rub16="TECHNOLOGIE";
$rub17="POLITIQUE";
$rub18="MUNICIPALES-2007";


open(F1,">$rub1");
print F1 "<RUBRIQUE=\"$rub1\">\n";
open(F2,">$rub2");
print F2 "<RUBRIQUE=\"$rub2\">\n";
open(F3,">$rub3");
print F3 "<RUBRIQUE=\"$rub3\">\n";
open(F4,">$rub4");
print F4 "<RUBRIQUE=\"$rub4\">\n";
open(F5,">$rub5");
print F5 "<RUBRIQUE=\"$rub5\">\n";
open(F6,">$rub6");
print F6 "<RUBRIQUE=\"$rub6\">\n";
open(F7,">$rub7");
print F7 "<RUBRIQUE=\"$rub7\">\n";
open(F8,">$rub8");
print F8 "<RUBRIQUE=\"$rub8\">\n";
open(F9,">$rub9");
print F9 "<RUBRIQUE=\"$rub9\">\n";
open(F10,">$rub10");
print F10 "<RUBRIQUE=\"$rub10\">\n";
open(F11,">$rub11");
print F11 "<RUBRIQUE=\"$rub11\">\n";
open(F12,">$rub12");
print F12 "<RUBRIQUE=\"$rub12\">\n";
open(F13,">$rub13");
print F13 "<RUBRIQUE=\"$rub13\">\n";
open(F14,">$rub14");
print F14 "<RUBRIQUE=\"$rub14\">\n";
open(F15,">$rub15");
print F15 "<RUBRIQUE=\"$rub15\">\n";
open(F16,">$rub16");
print F16 "<RUBRIQUE=\"$rub16\">\n";
open(F17,">$rub17");
print F17 "<RUBRIQUE=\"$rub17\">\n";
open(F18,">$rub18");
print F18 "<RUBRIQUE=\"$rub18\">\n";
close(F1);
close(F2);
close(F3);
close(F4);
close(F5);
close(F6);
close(F7);
close(F8);
close(F9);
close(F10);
close(F11);
close(F12);
close(F13);
close(F14);
close(F15);
close(F16);
close(F17);
close(F18);


Show_Dir($ARGV[0]);

system("cat $rub1 $rub2 $rub3 $rub4 $rub5 $rub6 $rub7 $rub8 $rub9 $rub10 $rub11 $rub12 $rub13 $rub14 $rub15 $rub16 $rub17 $rub18  > $sortie");


sub Show_Dir {
    my $path = shift(@_);
    opendir(DIR, $path) or die "can't open $path: $!\n";
    my @files = readdir(DIR);
    closedir(DIR);
    foreach my $file (@files) {
	next if $file =~ /^\.\.?$/;
	$file = $path."/".$file;

	if (-d $file) {
	    Show_Dir($file);	#appel recursif
	}
	elsif (-f $file) {
	    
	    if (($file=~/0,2-3208,1-0,0.txt/)) {
		nettoieFile($file,$rub1);
	    }
	    if (($file=~/0,2-3210,1-0,0.txt/)) {
		nettoieFile($file,$rub2);
	    }
	    if (($file=~/0,2-3214,1-0,0.txt/)) {
		nettoieFile($file,$rub3);
	    }
	    if (($file=~/0,2-3224,1-0,0.txt/)) {
		nettoieFile($file,$rub4);
	    }
	    if (($file=~/0,2-3232,1-0,0.txt/)) {
		nettoieFile($file,$rub5);
	    }
	    if (($file=~/0,2-3234,1-0,0.txt/)) {
		nettoieFile($file,$rub6);
	    }
	    if (($file=~/0,2-3236,1-0,0.txt/)) {
		nettoieFile($file,$rub7);
	    }
	    if (($file=~/0,2-3238,1-0,0.txt/)) {
		nettoieFile($file,$rub8);
	    }
	    if (($file=~/0,2-3242,1-0,0.txt/)) {
		nettoieFile($file,$rub9);
	    }
	    if (($file=~/0,2-3244,1-0,0.txt/)) {
		nettoieFile($file,$rub10);
	    }
	    if (($file=~/0,2-3246,1-0,0.txt/)) {
		nettoieFile($file,$rub11);
	    }
	    if (($file=~/0,2-3260,1-0,0.txt/)) {
		nettoieFile($file,$rub12);
	    }
	    if (($file=~/0,2-3404,1-0,0.txt/)) {
		nettoieFile($file,$rub13);
	    }
	    if (($file=~/0,2-3476,1-0,0.txt/)) {
		nettoieFile($file,$rub14);
	    }
	    if (($file=~/0,2-3546,1-0,0.txt/)) {
		nettoieFile($file,$rub15);
	    }
	    if (($file=~/0,2-651865,1-0,0.txt/)) {
		nettoieFile($file,$rub16);
	    }
	    if (($file=~/0,57-0,64-823353,0.txt/)) {
		nettoieFile($file,$rub17);
	    }
	    if (($file=~/0,57-0,64-987718,0.txt/)) {
		nettoieFile($file,$rub18);
	    }
	    
	}
	
	else {
#			print "else: $file\n";
	}
	
    }
    
}

sub nettoieFile {

    my ($file,$rub) = @_;
    open(G,">tmp.txt");
    my $tmp="tmp.txt";
    open(F, $file);

    while (<F>) {
	$_=~s/&#38;#39;/\'/g;
	$_=~s/&#233;/é/g;
	$_=~s/&#234;/ê/g;
	$_=~s/&#38;#34;/\"/g;
	$_=~s/&#91;//g;
	$_=~s/&#93;//g;
	$_=~s/Retournez en haut de la page//g;
	$_=~s/IFRAME: navigation_basse//g;
	$_=~s/Pour visualiser le Desk il faut avoir un navigateur qui affiche des//g;
	$_=~s/frames\. Le document dans cet frame se trouve ici.//g;
	$_=~s/IFRAME: gab_url_copyright//g;
	$_=~s/Pour visualiser le Desk il faut avoir un navigateur qui affiche des//g;
	$_=~s/\(presidentielle2007_index_articles\&\)//g;
	$_=~s/\(index_articles\&\)//g;
	$_=~s/\(europe_articles\&\)//g;
	$_=~s/\(amerique_articles\&\)//g;
	$_=~s/_________________________________________________________________//g;

	$_=~s/\(fl-sq\.gif\)//g;
	$_=~s/Réagissez à cet article//g; 
	$_=~s/Classez cet article//g;
	$_=~s/Citez cet article sur votre blog//g;
	$_=~s/Recommandez cet article//g;
	$_=~s/Imprimez cet article//g; 
	$_=~s/Envoyez cet article par e-mail//g;
	$_=~s/Cliquez pour agrandir l\'image//g;
	$_=~s/\(1024_lire.gif\)//g;
	$_=~s/1 \| 2 \| 3 \| suivant \(fl-rg.gif\)//g;
	$_=~s/IFRAME: pangora//g;

	$_=~s/\^//g;
	$_=~s/¤/§/g;
	$_=~s/&#38;/&/g;
	$_=~s/[ ]+/ /g;

	if ($_!~/^[ ]*$/) {
	    print G $_;
	}
    }
    
    close(F);
    close(G);
    system("cat $tmp >> $rub");
    unlink $tmp;   
}
