##
## PERL script: create an XML file from the textFile the indexFile
## and the soundFile of SoundIndex
##
$textFile = "TEXT.txt";
$indexFile = "INDEX.txt";
$soundFile = "SOUND.aif";
if (!open (INDEX, $indexFile)) {
die "Can't open ficINDEX: $!\n";
}
if (!open (TEXT, $textFile)) {
die "Can't open ficTEXT: $!\n";
}
$text = "";
while ($line = <TEXT>) {
$text = $text.$line;
}
$num = 0;
print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n\n";
print "<TEXT>\n";
print "<HEADER>\n";
print "<SOUNDFILE href=\"$soundFile\" />\n";
print "</HEADER>\n";
print "<BODY>\n";
while ($line = <INDEX>) {
$num++;
chomp($line);
if ($line =~ /^(.*)\s(.*)\s(.*)\s(.*)$/) {
$startSon = $1;
$endSon = $2;
$startText = $3;
$endText = $4;
$startSon =~s/,/\./g;
$endSon =~s/,/\./g;
my $s = substr($text, $startText, $endText-$startText);
$s = &XMLEntities($s);
$s = &MacRoman2isoLatin($s);
print "<S id=\"s$num\">";
print "<TRANSCR>$s</TRANSCR>";
print "<AUDIO start=\"$startSon\" end=\"$endSon\"></AUDIO>";
print "</S>\n";
}
}
print "</BODY>\n";
print "</TEXT>\n";
close (INDEX);
close (TEXT);
sub XMLEntities {
my $line = shift;
$line =~s/</</g;
$line =~s/>/>/g;
$line =~s/&/&/g;
$line =~s/"/"/g;
return $line;
}
sub MacRoman2isoLatin {
my $line = shift;
my $result = "";
my $size = length($line);
for ($i=0; $i<length($line); $i++) {
my $car = substr($line, $i, 1);
if ($car =~/\x80/) { $car = "\xc4";}
elsif ($car =~/\x81/) { $car = "\xc5";}
elsif ($car =~/\x82/) { $car = "\xc7";}
elsif ($car =~/\x83/) { $car = "\xc9";}
elsif ($car =~/\x84/) { $car = "\xd1";}
elsif ($car =~/\x85/) { $car = "\xd6";}
elsif ($car =~/\x86/) { $car = "\xdc";}
elsif ($car =~/\x87/) { $car = "\xe1";}
elsif ($car =~/\x88/) { $car = "\xe0";}
elsif ($car =~/\x89/) { $car = "\xe2";}
elsif ($car =~/\x8a/) { $car = "\xe4";}
elsif ($car =~/\x8b/) { $car = "\xe3";}
elsif ($car =~/\x8c/) { $car = "\xe5";}
elsif ($car =~/\x8d/) { $car = "\xe7";}
elsif ($car =~/\x8e/) { $car = "\xe9";}
elsif ($car =~/\x8f/) { $car = "\xe8";}
elsif ($car =~/\x90/) { $car = "\xea";}
elsif ($car =~/\x91/) { $car = "\xeb";}
elsif ($car =~/\x92/) { $car = "\xed";}
elsif ($car =~/\x93/) { $car = "\xec";}
elsif ($car =~/\x94/) { $car = "\xee";}
elsif ($car =~/\x95/) { $car = "\xef";}
elsif ($car =~/\x96/) { $car = "\xf1";}
elsif ($car =~/\x97/) { $car = "\xf3";}
elsif ($car =~/\x98/) { $car = "\xf2";}
elsif ($car =~/\x99/) { $car = "\xf4";}
elsif ($car =~/\x9a/) { $car = "\xf6";}
elsif ($car =~/\x9b/) { $car = "\xf5";}
elsif ($car =~/\x9c/) { $car = "\xfa";}
elsif ($car =~/\x9d/) { $car = "\xf9";}
elsif ($car =~/\x9e/) { $car = "\xfb";}
elsif ($car =~/\x9f/) { $car = "\xfc";}
elsif ($car =~/\xa0/) { $car = "†";}
elsif ($car =~/\xa1/) { $car = "\xb0";}
elsif ($car =~/\xa2/) { $car = "\xa2";}
elsif ($car =~/\xa3/) { $car = "\xa3";}
elsif ($car =~/\xa4/) { $car = "\xa7";}
elsif ($car =~/\xa5/) { $car = "•";}
elsif ($car =~/\xa6/) { $car = "\xb6";}
elsif ($car =~/\xa7/) { $car = "\xdf";}
elsif ($car =~/\xa8/) { $car = "\xae";}
elsif ($car =~/\xa9/) { $car = "\xa9";}
elsif ($car =~/\xaa/) { $car = "™";}
elsif ($car =~/\xab/) { $car = "\xb4";}
elsif ($car =~/\xac/) { $car = "\xa8";}
elsif ($car =~/\xad/) { $car = "≠";}
elsif ($car =~/\xae/) { $car = "\xc6";}
elsif ($car =~/\xaf/) { $car = "\xd8";}
elsif ($car =~/\xb0/) { $car = "∞";}
elsif ($car =~/\xb1/) { $car = "\xb1";}
elsif ($car =~/\xb2/) { $car = "≤";}
elsif ($car =~/\xb3/) { $car = "≥";}
elsif ($car =~/\xb4/) { $car = "\xa5";}
elsif ($car =~/\xb5/) { $car = "\xb5";}
elsif ($car =~/\xb6/) { $car = "∂";}
elsif ($car =~/\xb7/) { $car = "∑";}
elsif ($car =~/\xb8/) { $car = "∏";}
elsif ($car =~/\xb9/) { $car = "π";}
elsif ($car =~/\xba/) { $car = "⊲";}
elsif ($car =~/\xbb/) { $car = "\xaa";}
elsif ($car =~/\xbc/) { $car = "\xba";}
elsif ($car =~/\xbd/) { $car = "Ω";}
elsif ($car =~/\xbe/) { $car = "\xe6";}
elsif ($car =~/\xbf/) { $car = "\xf8";}
elsif ($car =~/\xc0/) { $car = "\xbf";}
elsif ($car =~/\xc1/) { $car = "\xa1";}
elsif ($car =~/\xc2/) { $car = "\xac";}
elsif ($car =~/\xc3/) { $car = "√";}
elsif ($car =~/\xc4/) { $car = "ƒ";}
elsif ($car =~/\xc5/) { $car = "≈";}
elsif ($car =~/\xc6/) { $car = "∆";}
elsif ($car =~/\xc7/) { $car = "\xab";}
elsif ($car =~/\xc8/) { $car = "\xbb";}
elsif ($car =~/\xc9/) { $car = "…";}
elsif ($car =~/\xca/) { $car = "\x0a";}
elsif ($car =~/\xcb/) { $car = "\xc0";}
elsif ($car =~/\xcc/) { $car = "\xc3";}
elsif ($car =~/\xcd/) { $car = "\xd5";}
elsif ($car =~/\xce/) { $car = "Œ";}
elsif ($car =~/\xcf/) { $car = "œ";}
elsif ($car =~/\xd0/) { $car = "–";}
elsif ($car =~/\xd1/) { $car = "—";}
elsif ($car =~/\xd2/) { $car = "“";}
elsif ($car =~/\xd3/) { $car = "”";}
elsif ($car =~/\xd4/) { $car = "‘";}
elsif ($car =~/\xd5/) { $car = "’";}
elsif ($car =~/\xd6/) { $car = "\xf7";}
elsif ($car =~/\xd7/) { $car = "◊";}
elsif ($car =~/\xd8/) { $car = "\xff";}
elsif ($car =~/\xd9/) { $car = "Ÿ";}
elsif ($car =~/\xda/) { $car = "⁄";}
elsif ($car =~/\xdb/) { $car = "\xa4";}
elsif ($car =~/\xdc/) { $car = "‹";}
elsif ($car =~/\xdd/) { $car = "›";}
elsif ($car =~/\xde/) { $car = "fi";}
elsif ($car =~/\xdf/) { $car = "fl";}
elsif ($car =~/\xe0/) { $car = "‡";}
elsif ($car =~/\xe1/) { $car = "\xb7";}
elsif ($car =~/\xe2/) { $car = "‚";}
elsif ($car =~/\xe3/) { $car = "„";}
elsif ($car =~/\xe4/) { $car = "‰";}
elsif ($car =~/\xe5/) { $car = "\xc2";}
elsif ($car =~/\xe6/) { $car = "\xca";}
elsif ($car =~/\xe7/) { $car = "\xc1";}
elsif ($car =~/\xe8/) { $car = "\xcb";}
elsif ($car =~/\xe9/) { $car = "\xc8";}
elsif ($car =~/\xea/) { $car = "\xcd";}
elsif ($car =~/\xeb/) { $car = "\xce";}
elsif ($car =~/\xec/) { $car = "\xcf";}
elsif ($car =~/\xed/) { $car = "\xcc";}
elsif ($car =~/\xee/) { $car = "\xd3";}
elsif ($car =~/\xef/) { $car = "\xd4";}
elsif ($car =~/\xf0/) { $car = "";}
elsif ($car =~/\xf1/) { $car = "\xd2";}
elsif ($car =~/\xf2/) { $car = "\xda";}
elsif ($car =~/\xf3/) { $car = "\xdb";}
elsif ($car =~/\xf4/) { $car = "\xd9";}
elsif ($car =~/\xf5/) { $car = "ı";}
elsif ($car =~/\xf6/) { $car = "ˆ";}
elsif ($car =~/\xf7/) { $car = "˜";}
elsif ($car =~/\xf8/) { $car = "\xaf";}
elsif ($car =~/\xf9/) { $car = "˘";}
elsif ($car =~/\xfa/) { $car = "˙";}
elsif ($car =~/\xfb/) { $car = "˚";}
elsif ($car =~/\xfc/) { $car = "\xb8";}
elsif ($car =~/\xfd/) { $car = "˝";}
elsif ($car =~/\xfe/) { $car = "˛";}
elsif ($car =~/\xff/) { $car = "ˇ";}
$result = $result.$car;
}
return $result;
}