#!/usr/bin/perl
#  IM2GraphML.pl

<<DOC; 
B. Habert
lun mar 20 20:50:06 CET 2006

               IM2GraphML.pl :


Format d'entrée : 

<c c1="ibn_Nc" c2="b_Nc"  fc1="27" fc2="33" cf="5" dm="5.60" im="9.56"/>
<c c1="ontologie_Nc" c2="théorie_Nc"  fc1="43" fc2="132" cf="5" dm="11.00" im="6.88"/>
<c c1="bien_Nc" c2="bien_Nc"  fc1="83" fc2="83" cf="84" dm="10.08" im="10.68"/>
<c c1="1_Nc" c2="philosophie_Nc"  fc1="22" fc2="855" cf="5" dm="11.40" im="5.16"/>
<c c1="philosophie_Nc" c2="branche_Nc"  fc1="855" fc2="6" cf="6" dm="6.67" im="7.29"/>
<c c1="ontologie_Nc" c2="ontologique_A"  fc1="43" fc2="14" cf="5" dm="13.00" im="10.12"/>
<c c1="o_Nc" c2="origine_Nc"  fc1="100" fc2="60" cf="5" dm="10.00" im="6.80"/>

Exemple : IM2GraphML.pl PHILOLynx-dump-normaliseCordialLemmeCatBreve.ArticleEgalFenetre.CoocMaxDistance20MinCooc5.IM 8 9.0  > PHILOLynx-dump-normaliseCordialLemmeCatBreve.ArticleEgalFenetre.CoocMaxDistance20MinCooc5.IM.DMMAx8IMMin9.0.gml

Puis :
xsltproc GraphML2Pajek.xsl PHILOLynx-dump-normaliseCordialLemmeCatBreve.ArticleEgalFenetre.CoocMaxDistance20MinCooc5.IM.DMMAx8IMMin9.0.gml |AjouteCR > PHILOCordialDMMax8IMMin9.net


Format de sortie :

GraphML

<?xml version="1.0" encoding="iso-8859-1"?>
<graphml>
 <key id="d0" for="node" attr.name="nom" attr.type="string"/>
 <key id="d1" for="edge" attr.name="poids" attr.type="double"/>
 <graph edgedefault="undirected">
    <node id="1"><data key="d0">José_Np</data></node>
    <node id="2"><data key="d0">Gil_Np</data></node>
    <edge source="1" target="2"><data key="d1">14.42</data></edge>
    <node id="3"><data key="d0">jacques_Nc</data></node>

A faire :

Bugs et problèmes :

DOC



$ChaineUsage = "Usage : IM2GraphML.pl <fichier IM><distance plafond><IM plancher>\n" ; 
if (@ARGV != 3) {die $ChaineUsage ; }  
$FichierEntree   = $ARGV[0] ;  
$DistancePlafond = $ARGV[1] ;  
$IMPlancher      = $ARGV[2] ;  
$Trace           = 0 ; 
$PourWindows = 0 ;
if ($PourWindows == 0) {
  $FinLigne     = "\n" ; 
}
else {
  $FinLigne     = "\r\n" ; 
}
%Cooccurrent2Numero = () ;
$NumeroCooccurrent  = 1 ;

imprimeEnTeteGraphML() ;
open(ENTREE, $FichierEntree) ;  
# <c c1="philosophie_Nc" c2="ontologie_Nc"  fc1="855" fc2="43" cf="6" dm="8.00" im="4.45"/>
while ($Ligne = <ENTREE>){
  if ($Ligne =~ /.+ c1=\"(.+)\" c2=\"(.+)\" +fc1.+ dm=\"([0-9.]+)\" im=\"([0-9.]+).+/) {
    $Cooc1           = $1 ;
    $Cooc2           = $2 ;
    $DistanceMoyenne = $3 ;
    $IM              = $4 ;
    if ($Trace == 1) {print "$Ligne\t$Cooc1\t$Cooc2\t$DistanceMoyenne\t$IM$FinLigne" ; }
    if ($DistanceMoyenne <= $DistancePlafond 
        && $IM >= $IMPlancher) {
      if ($Trace == 1) {print "===$FinLigne" ; }
      #print "$Ligne" ;

      if (exists $Cooccurrent2Numero{$Cooc1}) {
        $NumeroCooc1 = $Cooccurrent2Numero{$Cooc1} ;
      }
      else {
        $Cooccurrent2Numero{$Cooc1} = $NumeroCooccurrent ;
        print "    <node id=\"$Cooccurrent2Numero{$Cooc1}\"><data key=\"d0\">$Cooc1</data></node>$FinLigne" ;
        $NumeroCooccurrent++ ;
      }

      if (exists $Cooccurrent2Numero{$Cooc2}) {
        $NumeroCooc1 = $Cooccurrent2Numero{$Cooc2} ;
      }
      else {
        $Cooccurrent2Numero{$Cooc2} = $NumeroCooccurrent ;
        print "    <node id=\"$Cooccurrent2Numero{$Cooc2}\"><data key=\"d0\">$Cooc2</data></node>$FinLigne" ;
        $NumeroCooccurrent++ ;
      }

      print  "    <edge source=\"$Cooccurrent2Numero{$Cooc1}\" target=\"$Cooccurrent2Numero{$Cooc2}\"><data key=\"d1\">$IM</data></edge>$FinLigne" ;

    }
  }
}
close(ENTREE) ;
imprimeEnPiedGraphML() ;


sub imprimeEnTeteGraphML{
  print "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>$FinLigne<graphml>$FinLigne <key id=\"d0\" for=\"node\" attr.name=\"nom\" attr.type=\"string\"/>$FinLigne <key id=\"d1\" for=\"edge\" attr.name=\"poids\" attr.type=\"double\"/>$FinLigne <graph edgedefault=\"undirected\">$FinLigne" ; 
}

sub imprimeEnPiedGraphML{
  print " </graph>$FinLigne</graphml>$FinLigne" ;
}
