## ## PERL script: create an XML file from the textFile the indexFile ## and the soundFile of SoundIndex ## $textFile = "TEXT.txt"; $indexFile = "INDEX.txt"; $soundFile = "SOUND.aif"; if (!open (INDEX, $indexFile)) { die "Can't open ficINDEX: $!\n"; } if (!open (TEXT, $textFile)) { die "Can't open ficTEXT: $!\n"; } $text = ""; while ($line = <TEXT>) { $text = $text.$line; } $num = 0; print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n\n"; print "<TEXT>\n"; print "<HEADER>\n"; print "<SOUNDFILE href=\"$soundFile\" />\n"; print "</HEADER>\n"; print "<BODY>\n"; while ($line = <INDEX>) { $num++; chomp($line); if ($line =~ /^(.*)\s(.*)\s(.*)\s(.*)$/) { $startSon = $1; $endSon = $2; $startText = $3; $endText = $4; $startSon =~s/,/\./g; $endSon =~s/,/\./g; my $s = substr($text, $startText, $endText-$startText); $s = &XMLEntities($s); $s = &MacRoman2isoLatin($s); print "<S id=\"s$num\">"; print "<TRANSCR>$s</TRANSCR>"; print "<AUDIO start=\"$startSon\" end=\"$endSon\"></AUDIO>"; print "</S>\n"; } } print "</BODY>\n"; print "</TEXT>\n"; close (INDEX); close (TEXT); sub XMLEntities { my $line = shift; $line =~s/</</g; $line =~s/>/>/g; $line =~s/&/&/g; $line =~s/"/"/g; return $line; } sub MacRoman2isoLatin { my $line = shift; my $result = ""; my $size = length($line); for ($i=0; $i<length($line); $i++) { my $car = substr($line, $i, 1); if ($car =~/\x80/) { $car = "\xc4";} elsif ($car =~/\x81/) { $car = "\xc5";} elsif ($car =~/\x82/) { $car = "\xc7";} elsif ($car =~/\x83/) { $car = "\xc9";} elsif ($car =~/\x84/) { $car = "\xd1";} elsif ($car =~/\x85/) { $car = "\xd6";} elsif ($car =~/\x86/) { $car = "\xdc";} elsif ($car =~/\x87/) { $car = "\xe1";} elsif ($car =~/\x88/) { $car = "\xe0";} elsif ($car =~/\x89/) { $car = "\xe2";} elsif ($car =~/\x8a/) { $car = "\xe4";} elsif ($car =~/\x8b/) { $car = "\xe3";} elsif ($car =~/\x8c/) { $car = "\xe5";} elsif ($car =~/\x8d/) { $car = "\xe7";} elsif ($car =~/\x8e/) { $car = "\xe9";} elsif ($car =~/\x8f/) { $car = "\xe8";} elsif ($car =~/\x90/) { $car = "\xea";} elsif ($car =~/\x91/) { $car = "\xeb";} elsif ($car =~/\x92/) { $car = "\xed";} elsif ($car =~/\x93/) { $car = "\xec";} elsif ($car =~/\x94/) { $car = "\xee";} elsif ($car =~/\x95/) { $car = "\xef";} elsif ($car =~/\x96/) { $car = "\xf1";} elsif ($car =~/\x97/) { $car = "\xf3";} elsif ($car =~/\x98/) { $car = "\xf2";} elsif ($car =~/\x99/) { $car = "\xf4";} elsif ($car =~/\x9a/) { $car = "\xf6";} elsif ($car =~/\x9b/) { $car = "\xf5";} elsif ($car =~/\x9c/) { $car = "\xfa";} elsif ($car =~/\x9d/) { $car = "\xf9";} elsif ($car =~/\x9e/) { $car = "\xfb";} elsif ($car =~/\x9f/) { $car = "\xfc";} elsif ($car =~/\xa0/) { $car = "†";} elsif ($car =~/\xa1/) { $car = "\xb0";} elsif ($car =~/\xa2/) { $car = "\xa2";} elsif ($car =~/\xa3/) { $car = "\xa3";} elsif ($car =~/\xa4/) { $car = "\xa7";} elsif ($car =~/\xa5/) { $car = "•";} elsif ($car =~/\xa6/) { $car = "\xb6";} elsif ($car =~/\xa7/) { $car = "\xdf";} elsif ($car =~/\xa8/) { $car = "\xae";} elsif ($car =~/\xa9/) { $car = "\xa9";} elsif ($car =~/\xaa/) { $car = "™";} elsif ($car =~/\xab/) { $car = "\xb4";} elsif ($car =~/\xac/) { $car = "\xa8";} elsif ($car =~/\xad/) { $car = "≠";} elsif ($car =~/\xae/) { $car = "\xc6";} elsif ($car =~/\xaf/) { $car = "\xd8";} elsif ($car =~/\xb0/) { $car = "∞";} elsif ($car =~/\xb1/) { $car = "\xb1";} elsif ($car =~/\xb2/) { $car = "≤";} elsif ($car =~/\xb3/) { $car = "≥";} elsif ($car =~/\xb4/) { $car = "\xa5";} elsif ($car =~/\xb5/) { $car = "\xb5";} elsif ($car =~/\xb6/) { $car = "∂";} elsif ($car =~/\xb7/) { $car = "∑";} elsif ($car =~/\xb8/) { $car = "∏";} elsif ($car =~/\xb9/) { $car = "π";} elsif ($car =~/\xba/) { $car = "⊲";} elsif ($car =~/\xbb/) { $car = "\xaa";} elsif ($car =~/\xbc/) { $car = "\xba";} elsif ($car =~/\xbd/) { $car = "Ω";} elsif ($car =~/\xbe/) { $car = "\xe6";} elsif ($car =~/\xbf/) { $car = "\xf8";} elsif ($car =~/\xc0/) { $car = "\xbf";} elsif ($car =~/\xc1/) { $car = "\xa1";} elsif ($car =~/\xc2/) { $car = "\xac";} elsif ($car =~/\xc3/) { $car = "√";} elsif ($car =~/\xc4/) { $car = "ƒ";} elsif ($car =~/\xc5/) { $car = "≈";} elsif ($car =~/\xc6/) { $car = "∆";} elsif ($car =~/\xc7/) { $car = "\xab";} elsif ($car =~/\xc8/) { $car = "\xbb";} elsif ($car =~/\xc9/) { $car = "…";} elsif ($car =~/\xca/) { $car = "\x0a";} elsif ($car =~/\xcb/) { $car = "\xc0";} elsif ($car =~/\xcc/) { $car = "\xc3";} elsif ($car =~/\xcd/) { $car = "\xd5";} elsif ($car =~/\xce/) { $car = "Œ";} elsif ($car =~/\xcf/) { $car = "œ";} elsif ($car =~/\xd0/) { $car = "–";} elsif ($car =~/\xd1/) { $car = "—";} elsif ($car =~/\xd2/) { $car = "“";} elsif ($car =~/\xd3/) { $car = "”";} elsif ($car =~/\xd4/) { $car = "‘";} elsif ($car =~/\xd5/) { $car = "’";} elsif ($car =~/\xd6/) { $car = "\xf7";} elsif ($car =~/\xd7/) { $car = "◊";} elsif ($car =~/\xd8/) { $car = "\xff";} elsif ($car =~/\xd9/) { $car = "Ÿ";} elsif ($car =~/\xda/) { $car = "⁄";} elsif ($car =~/\xdb/) { $car = "\xa4";} elsif ($car =~/\xdc/) { $car = "‹";} elsif ($car =~/\xdd/) { $car = "›";} elsif ($car =~/\xde/) { $car = "fi";} elsif ($car =~/\xdf/) { $car = "fl";} elsif ($car =~/\xe0/) { $car = "‡";} elsif ($car =~/\xe1/) { $car = "\xb7";} elsif ($car =~/\xe2/) { $car = "‚";} elsif ($car =~/\xe3/) { $car = "„";} elsif ($car =~/\xe4/) { $car = "‰";} elsif ($car =~/\xe5/) { $car = "\xc2";} elsif ($car =~/\xe6/) { $car = "\xca";} elsif ($car =~/\xe7/) { $car = "\xc1";} elsif ($car =~/\xe8/) { $car = "\xcb";} elsif ($car =~/\xe9/) { $car = "\xc8";} elsif ($car =~/\xea/) { $car = "\xcd";} elsif ($car =~/\xeb/) { $car = "\xce";} elsif ($car =~/\xec/) { $car = "\xcf";} elsif ($car =~/\xed/) { $car = "\xcc";} elsif ($car =~/\xee/) { $car = "\xd3";} elsif ($car =~/\xef/) { $car = "\xd4";} elsif ($car =~/\xf0/) { $car = "";} elsif ($car =~/\xf1/) { $car = "\xd2";} elsif ($car =~/\xf2/) { $car = "\xda";} elsif ($car =~/\xf3/) { $car = "\xdb";} elsif ($car =~/\xf4/) { $car = "\xd9";} elsif ($car =~/\xf5/) { $car = "ı";} elsif ($car =~/\xf6/) { $car = "ˆ";} elsif ($car =~/\xf7/) { $car = "˜";} elsif ($car =~/\xf8/) { $car = "\xaf";} elsif ($car =~/\xf9/) { $car = "˘";} elsif ($car =~/\xfa/) { $car = "˙";} elsif ($car =~/\xfb/) { $car = "˚";} elsif ($car =~/\xfc/) { $car = "\xb8";} elsif ($car =~/\xfd/) { $car = "˝";} elsif ($car =~/\xfe/) { $car = "˛";} elsif ($car =~/\xff/) { $car = "ˇ";} $result = $result.$car; } return $result; }