# 1st argument: name of input file
# 2nd argument: name of output file
# 3rd argument: v	for verbose output
#				r	for input list is reverse alphabetized (right to left)

$True =1; $False = 0;

if ( $#ARGV >=  1 )
{ 
            $TheFile = $ARGV[0];
            $TheMainOutFile = ">" . $ARGV[1];
} 

 

else 
{           print  "Not enough arguments given to operate; goodbye."; 
            exit;
}


open (OUTFILE, $TheMainOutFile);
open (INFILE, $TheFile) or die "The file $TheFile could not be found";

$True = 1; $False = 0;
if ( @ARGV > 2 )
{ 
	if ($ARGV[2] =~ /v/ ) 
	{
		$Verbose = $True;
		print "\n it's verbose\n";
	} 
		else 
	{ $Verbose = $False; 
	}


	if ($ARGV[2] =~ /r/ ) 
	{
		$Reverse = $True;	
	}
	else { $Reverse = $False; }

}
$WordCount=0;

%Frequency;

$LineCount =0;
$Counter = 0;
$LongestWordlength =0;
while (<INFILE>)	
{
	
	chomp ; 
	@Words = split (/\s/);
	if ($#Words >= 1) 
	{
		$Word = $Words[1]; # the first is being ignored -- it's presumably a line number.		
	} 
	else
	{
		$Word = $Words[0];
	}
	if ($Reverse == $False) { 	$Word .= "#"; } else { $Word = "#".$Word;}
	$LineCount =  $LineCount + 1;
	push @AlphabetizedList, $Word;
 }
 
print "finished reading.\n";
$NumberOfWords =  $#AlphabetizedList;
print "\nNumber of words: ", $NumberOfWords;
 

$Carat = 0;
$MaxLength   = 5;
$PreviousWord = "#"; 
for ($i = 0; $i < $NumberOfWords; $i++)
{
	$Word = $AlphabetizedList[$i];
	print OUTFILE "\n\n$Word (Previous word: $PreviousWord)";
	

	if ( $Reverse == $False )
	{
 		for ($j = 0 ; $j < length ($PreviousWord) && (  $j < length ($Word) ); $j++)
		{			
				if (  (substr $PreviousWord, $j, 1) ne (substr $Word, $j, 1) )
				{
 					$CommonPrefix = substr $Word, 0, $j;
					last;
				}
			}
	}
	else 
	{																		# REVERSE, for suffixes
		for ($j = 1 ; $j <= length ($PreviousWord) && (  $j <= length ($Word) ); $j++)		
		{
		
		if (  LetterFromEnd ($PreviousWord, $j) ne LetterFromEnd ($Word, $j) )
		{			
 			$CommonSuffix = substr $Word, length ($Word) - $j +1, $j-1;		
			last;
		}
		}
	}
	

	
	#Clear ZelligCount past the length of Common part:
	if ($Reverse == $False) 
	{
		while ( scalar (@ZelligCount) > length ($Word) ) 
		{ 
	 		pop @ZelligCount; 
		}
	}else
	{
		while ( scalar (@ZelligCount) > length ($CommonSuffix) ) 
		{
	 		shift @ZelligCount; 
		}
	}
	




	if ( $Reverse == $False )	#Normal, forward direction....
	{
		$StartPoint = length ($CommonPrefix)+1;
		$EndPoint = (length ($Word)) - 1;
		
		for ( $j = $EndPoint; $j >= $StartPoint; $j-- )
		{	$TempCount = 1;
			$CurrentLetterInThisPosition = substr $Word, $j, 1;
			$PrefixConsidered = substr $Word,0, $j;
			
			if ($Verbose)  {print OUTFILE "\n$PrefixConsidered  First successor variant: $CurrentLetterInThisPosition" ;}
			for ( $k = $i+1 ; $k < $NumberOfWords ; $k++ )
			{
				$TempWord = $AlphabetizedList[$k];
				if  (length $TempWord < $j )
				{
					
					if ( $Verbose )  { print OUTFILE " \n\t1. After $TempWord, we quit."; }
					last;		
				}    
				if ( (substr $TempWord, 0, $j) ne (substr $Word, 0, $j) ) 
				{	
					
					if ( $Verbose ) {print OUTFILE " \n\t2. $TempWord doesn't agree with $Word in $j places; we'll quit.";}
					last;		
				}
				if  ( $CurrentLetterInThisPosition eq substr $TempWord, $j, 1 ) 
				{ 
					next; 
				} 
				else
				{	$CurrentLetterInThisPosition = substr $TempWord, $j, 1;
					$TempCount++;
					if ( $Verbose ) {print OUTFILE " \n\t3. After $TempWord, we increment count in column $j to $TempCount."; }
				}
			}
			$ZelligCount[$j] = $TempCount;	
			print OUTFILE " Count: $TempCount " if ($VerboseFlag);		

		}
	} # end of forward case
	else 
	{				#reverse case:

		if ($Verbose) {print OUTFILE "\tCommon suffix = $CommonSuffix"; 
					   print OUTFILE "; Predecessor count for common suffix: ", "@ZelligCount";}
		
		$LengthOfCommonSuffix = length ($CommonSuffix);		
		
		for ($j = $LengthOfCommonSuffix+2; $j <= length($ Word ); $j++)	 	
		{	
			$TempCount = 1;
			$CurrentLetterInThisPosition = LetterFromEnd ($Word , $j);
			$SuffixConsidered = substr ($Word, length($Word)-$j+1);
			$p = length($Word) - $j +1; #p is the letter whose pred. variants we are counting
			if ($Verbose)  {print OUTFILE "\n$SuffixConsidered  First pred variant: $CurrentLetterInThisPosition" ;}
			
			
			for ( $k = $i+1 ; $k < $NumberOfWords ; $k++ )
			{	
				$TempWord = $AlphabetizedList[$k];
				if  (length $TempWord < $j )
				{
					if ( $Verbose )  { print OUTFILE " \n\t1. After $TempWord, we quit."; }			
					last;		
				}    
				
				
				if ( (substr $TempWord, length($TempWord) - $j+1) ne (substr $Word, length($Word) - $j+1 ) )				
				{	
					
					if ( $Verbose ) {print OUTFILE " \n\t2. $TempWord doesn't agree with $Word in final $j places; we'll quit.";}					
					last;		
				}
				if  ( $CurrentLetterInThisPosition eq LetterFromEnd( $TempWord, $j) ) 
				{	
					next; 
				} 
				else
				{	$CurrentLetterInThisPosition = LetterFromEnd( $TempWord, $j) ;
					$TempCount++;
					if ( $Verbose ) {print OUTFILE "\n\t3. After $TempWord, we increment count in position $p to $TempCount."; 									 
									 print OUTFILE " New letter in this position: $CurrentLetterInThisPosition";}
				}	
			}			
						
			unshift @ZelligCount, $TempCount;	
			#if ($Verbose)  { print OUTFILE "\nPredecessor count: "; for ( $n = 0; $n < length ($Word)-1; $n++) {print OUTFILE $n,":", $ZelligCount[$n], "   " ; } }
		
		}
		
		 

	}	# end of reverse case
	


	if ( $Reverse == $False ) 
	{
		
		
		print OUTFILE "\n";
		for ( $n = 0; $n < length ($Word)-1; $n++) {print OUTFILE substr ($Word,0,$n+1),":", $ZelligCount[$n+1], " " ; }
		
		for ($w = 1; $w < $EndPoint and $w < ( length $Word ) ; $w++)
		{					   
				if (  ( $ZelligCount[$w] < $ZelligCount[$w+1] ) and ( $ZelligCount[$w+1] > $ZelligCount[$w+2] ) )
				{
					$Prefix = substr $Word, 0, $w+1;
					print OUTFILE "\nBreak after: ", $Prefix;
					$Prefixes{$Prefix}++;
	
					$Suffix = substr $Word, $w+1;
					print OUTFILE " $Suffix";
					$Suffixes{$Suffix}++;
	
						
					
				}  
		}
	}	#end of forward case
	else 
	{
		print OUTFILE "\n";		
		for ( $n = 0; $n < length ($Word)-1; $n++) {print OUTFILE substr ($Word,$n+1),":", $ZelligCount[$n], " " ; }

		
		for ($w = 2; $w < length ($Word) - 2  ; $w++)
		{
				if (  ( $ZelligCount[$w] > $ZelligCount[$w+1] ) and ( $ZelligCount[$w] > $ZelligCount[$w-1] ) )
				{
					$Suffix = substr $Word, $w+1;
					print OUTFILE "\nBreak before: ", $Suffix;
					$Suffixes{$Suffix}++;
					
				}  
		}

	
	

	}	#end of reverse case

 	$PreviousWord = $Word;
}


print OUTFILE "\n\n\nResults:\n";

if ( $Reverse == $False ) 
	{	
		$a=0;
		@SortedList = sort {    $Prefixes{ $b } <=> $Prefixes { $a }    }  keys(%Prefixes) ;	
		@SortedList2 = sort {    $Suffixes{ $b } <=> $Suffixes { $a }    }  keys(%Suffixes) ;	
		while (@SortedList)
		{
			$Found = shift @SortedList;
			print OUTFILE $a++, "\t",  $Found, "\t", $Prefixes{$Found}, "\n";
		}
		$a=0;
		print OUTFILE "\n\n\nSuffixes:\n\n";
		while (@SortedList2)
		{
			$Found = shift @SortedList2;
			print OUTFILE $a++, "\t",  $Found, "\t", $Suffixes{$Found}, "\n";
		}


	 
	}	#end of forward case
	else 
	{	
		$a=0;
		@SortedList = sort {    $Suffixes{ $b } <=> $Suffixes { $a }    }  keys(%Suffixes) ;	
		while (@SortedList)
		{
			$Found = shift @SortedList;
			print OUTFILE $a++, "\t",  $Found, "\t", $Suffixes{$Found}, "\n";

		}	

	}	#end of reverse case




print "...all done!";
close OUTFILE;

sub Letter ()
{	my $Word = @_[0];
	$N= @_[1];
	if ($N >= 0)
	{
		return substr ($Word, $N, 1);
	}
	else
	{
		return substr ($Word, length ($Word) + $N, 1 ) ;
	}
}

sub LetterFromEnd()
{
	my $Word = @_[0];
	$N= @_[1];
	if ($N >= 0)
	{
		return substr ($Word, length ($Word) - $N , 1);
	}
}
