#!/usr/bin/perl

use strict;
use warnings;

my $guide = "Couldn't run the program: perl $0 file.cnr";
die $guide unless @ARGV == 1;

open(my $INPUT, "<:encoding(UTF-8)", $ARGV[0]) or die "Couldn't open: $!";
my @DATA = <$INPUT>;
close($INPUT);

my %PATTERN_LIST = (); # Patrons extraits
my $ctr_pos = 0;       # Compteur de patrons extraits

# Le patron cherché ici est du type NOM ADJ
while(my $line = shift(@DATA)) {
    chomp $line;
    $line =~ s/\r//;
    my $sequence = "";
    my $length = 0;

    if($line =~ /^([^\t]+)\t[^\t]+\tNC.*/) {
		my $form = $1;
		$sequence .= $form;
		$length = 1;
		my $next_line = $DATA[0];

		if($next_line =~ /^([^\t]+)\t[^\t]+\tADJ.*/) {
			my $form = $1;
			$sequence .= " ".$form;
			$length = 2;
		}
    }
    if($length == 2) {
		$PATTERN_LIST{$sequence}++;
		$ctr_pos++;
    }
}

# Génération du nom du fichier
my $file = substr($ARGV[0], 0, -4);
$file = $file."_NC-ADJ.txt";

open(my $OUTPUT, ">:encoding(UTF-8)", $file) or die "Couldn't open $file: $!";
foreach my $pattern (sort {$PATTERN_LIST{$b} <=> $PATTERN_LIST{$a}} (keys %PATTERN_LIST)) {
	print $OUTPUT "$PATTERN_LIST{$pattern}\n";
}
close($OUTPUT);

print "NOMBRE DE PATRONS EXTRAITS : $ctr_pos";
exit;