#!/usr/bin/perl

use strict;
use warnings;

my $guide = "Couldn't run the program: perl $0 file.cnr file";
die $guide unless @ARGV == 2;

open(my $INPUT, "<:encoding(UTF-8)", $ARGV[0]) or die "Couldn't open: $!";

open(my $PATTERNS, "<:encoding(UTF-8)", $ARGV[1]) or die "Couldn't open: $!";
my @POS_LIST = <$PATTERNS>;
close($PATTERNS);

my @TOKENS = (); # Sauvegarde des tokens
my @TAGS = ();   # Sauvegarde des étiquettes

my $ctr_pos = 0; # Compteur de patrons extraits

# Création des fichiers de sortie pour chaque motif
my %FILE_LIST = ();
foreach my $POS (@POS_LIST) {
	# Génération du nom du fichier
	chomp $POS;
	$POS =~ s/\r//;
	my $pattern = $POS;
	$pattern =~ s/\Q[^\s]+\E//g;
	$pattern =~ s/ /-/g;
	my $file = substr($ARGV[0], 0, -4);
	$file = $file."_".$pattern.".txt";

	open(my $OUTPUT, ">:encoding(UTF-8)", $file) or die "Couldn't open $file: $!";
	close($OUTPUT);

	$FILE_LIST{$POS} = $file;
}

while(my $line = <$INPUT>) {
	next if($line !~ /^[^\t]+\t[^\t]+\t[^\t]+$/);
	chomp $line;
	$line =~ s/\r//;
	if($line !~ /PCTFORTE/) {
		my @list = split(/\t/, $line);
		push(@TOKENS, $list[0]);
		push(@TAGS, $list[2]);
	} else {
		&extract_pos_patterns;
		@TOKENS = ();
		@TAGS = ();
	}
}

print "NOMBRE DE PATRONS EXTRAITS : $ctr_pos";
exit;

# -----------------------------------------------------------------

sub extract_pos_patterns {
	foreach my $POS_line (@POS_LIST) {
		chomp $POS_line;
		$POS_line =~ s/\r//;

		open(my $OUTPUT, ">>:encoding(UTF-8)", $FILE_LIST{$POS_line}) or die "Couldn't open: $!";

		my $TAGS_line = join(" ", @TAGS);
		while($TAGS_line =~ /($POS_line)/g) {
			my $before_match = $`;
			my $extr_from = $before_match =~ tr/ //;
			my $e = $POS_line =~ tr/ //;
			my $extr_to = $extr_from + $e;
			my @RSLT_TOKENS = @TOKENS[$extr_from..$extr_to];

			print $OUTPUT "@RSLT_TOKENS\n";

			$ctr_pos++;
		}

		close($OUTPUT);
	}
}