# à lancer avec comme arguments : [fichier xml annoté][patron souhaité]

from typing import List
import re
import sys
from pathlib import Path


def extract(corpus_file: str, patron: List[str]):
    
    if len(patron) == 2:
        fichier = f"resultats/BAO3_py_{patron[0]}_{patron[1]}.txt"
    elif len(patron) == 3:
        fichier = f"resultats/BAO3_py_{patron[0]}_{patron[1]}_{patron[2]}.txt"
    elif len(patron) == 4:
        fichier = f"resultats/BAO3_py_{patron[0]}_{patron[1]}_{patron[2]}_{patron[3]}.txt"
    else:
        fichier = f"resultats/BAO3_py.txt"
    
    
    
    with open(fichier, "w", encoding="UTF-8") as sortie:
        
        buf = ["----"] * len(patron)
        
        with open(corpus_file) as corpus:
            for line in corpus:
                
                buf.pop(0)
                buf.append(line.strip())
                
                ok = True
                terme = ""
                
                for i, tag in enumerate(patron):
                    match = re.match(f'<element><data type="type">{tag}<\/data><data type="lemma">[^<]+?<\/data><data type="string">([^<]+?)<\/data><\/element>', buf[i])
                    if match:
                        terme = terme + match.group(1) + f"/{tag} "
                    else:
                        ok = False
                if ok:
                    sortie.write(f"{terme}\n")





if __name__ == '__main__':
    corpus_file = sys.argv[1]
    patron = sys.argv[2:]
    extract(corpus_file, patron)
    
