#!/usr/bin/perl
use utf8;
use Time::HiRes qw(gettimeofday tv_interval);
binmode STDOUT,":utf8";
my $timepg = [gettimeofday];
my $filetalismane=$ARGV[0];
my $filetermino=$ARGV[1];
open my $fileT,"<:encoding(UTF-8)",$filetalismane;
open my $fileTer,"<:encoding(UTF-8)",$filetermino;
my @TERMINO=<$fileTer>;
close $fileTer;
my %dicotmp=();
my %dicoPatron=();
my $nbTerme=0;
my $nbMotInPhrase=0;
while (my $ligne=<$fileT>) {
	next if (($ligne=~/££/) or ($ligne=~/^##/) or ($ligne=~/^$/));
	if ($ligne!~/^\d+\t§/) {
		$ligne=~/^\d+\t(.+)$/;
		my $reste=$1;
		$nbMotInPhrase++;
		my $cle=$nbMotInPhrase;
		my @listereste=split(/\t/,$reste);
		$dicotmp{$cle}=\@listereste;
		
	}
	else {
		#------------------------
		# extraction termino....
		my $phrase=" ";
		my $longueur = scalar (keys %dicotmp);
		for (my $i=1;$i<=$longueur;$i++) {
			my $LISTE=$dicotmp{$i};
			my @listedeferencee = @$LISTE;
			my $mot=$listedeferencee[0];
			my $pos=$listedeferencee[2];
			$phrase=$phrase.$mot."/".$pos." ";
		}
		foreach my $patron (@TERMINO) {
			my $patron2=$patron;
			chomp($patron2);
			$patron2=~s/([^ ]+)/\[\^ \]\+\/\\b$1/g;
			while ($phrase=~/(?=\s($patron2))/g) { 
				my $terme=$1;
				$terme=~s/\/[^ ]+//g;
				$dicoPatron{$patron}->{$terme}++;
				$nbTerme++;
			}
		}
		%dicotmp=();
		$nbMotInPhrase=0;
		#------------------------
	}

}
close($fileT);
open my $fileResu,">:encoding(UTF-8)","sortie_bao3.txt";
print $fileResu "$nbTerme éléments trouvés\n";
foreach my $patron (keys %dicoPatron) {
	print $fileResu "\nType de pattern: ".$patron." \n\n";
	foreach my $terme (sort {$dicoPatron{$patron}->{$b} <=> $dicoPatron{$patron}->{$a} } keys %{$dicoPatron{$patron}}) {
		print $fileResu $dicoPatron{$patron}->{$terme}."\t".$terme."\n";
	}
}
print $fileResu "\nScript execution time: " . tv_interval($timepg) . " seconds.";
close($fileResu);
