# Two ways of sorting: alphabetically and by frequency.
# 1st argument: name of input file
# 2nd argument: name of output file
# 3rd argument (optional): list of flags:
#     f            follow word by its count in corpus
#     s            sort by frequency, not alphabetically


if ( scalar( @ARGV ) >=  2 )
{ 
            $TheFile = $ARGV[0];
            $TheMainOutFile = ">" . $ARGV[1];
} 

 

else 
{           print  "Not enough arguments given to operate; goodbye."; 
            exit;
}


$Options = @ARGV[2];
if ( $Options =~ m/f/ ) 
{
           $PrintFreqs = TRUE;
} 
else 
{
           $PrintFreqs = FALSE;
}


open (OUTFILE, $TheMainOutFile);
open (INFILE, $TheFile) or die "The file $TheFile could not be found";

$WordCount=0;
$LineCount =0;
$Counter = 0;
$LongestWordlength =0;



while (<INFILE>)        
{

            $TheLine= $_;                               
            chomp ($TheLine); 
            $LineCount =  $LineCount + 1;

            @Words=split(/ /, $TheLine); 
            while ($Word= lc(pop(@Words) ) )     
            {          $WordCount++;
                       $Word =~ s/\W$//;
                       $Word =~ s/\W//;
                       $Word =~ s/\d//;
 
                       $Frequency { $Word }  ++;                              	
            }
}


print "finished reading.\n";


if ( $Options =~ /s/ ) 
{
              @SortedList = sort {    $Frequency{ $b } <=> $Frequency { $a }    }  keys(%Frequency) ;
}
else
{
              @SortedList = sort  keys(%Frequency) ;
}

$NumberOfWords =  $#SortedList;
print "\nNumber of words: ", $NumberOfWords;  

while (@SortedList)
{
        $Word = shift (@SortedList);
        print OUTFILE "\n", $Word;

              
	if ( $PrintFreqs eq TRUE  ) 
	{ 	
		print OUTFILE  "\t", $Frequency{$Word}; 
	}
        

}

 

close OUTFILE;






