#! /client/bin/perl # Transliterate the portion before the TAB character from Beta code into # Hebrew and the portion after the TAB into Greek. # For the transliteration the subroutines below may be used whose # names contain _ # Typical usage: # # Translate Greek into ISO 8859-7, leave Hebrew as is: # $greek = &beta_iso7 ($greek); # # Transcribe the Greek in Latin letters: # $greek = &greek_latin ($greek); # # Translate Hebrew into ISO 8859-8, leave Greek as is: # $hebrew = &beta_iso8 ($hebrew); # # Translate both into Unicode UTF-8 encoding: # $greek = &iso7_utf8 (&beta_iso7 ($greek)); # $hebrew = &iso8_utf8 (&beta_iso8 ($hebrew)); # If the software cannot process bi-directional text (which is a # standard violation when Unicode is involved), the Hebrew text must be # reversed, i.e.: # # $hebrew = reverse (&beta_iso8 ($hebrew)); # $hebrew = &iso8_utf8 (reverse (&beta_iso8 ($hebrew))); &initialize_tables; while (<>) { if (/\t/) { $hebrew = $`; $greek = $'; $greek = &greek_latin ($greek); print "$hebrew\t$greek"; } else { print $_; }; }; sub beta_iso7 { # convert Greek text from Beta to ISO 8859-7 $_ = $_[0]; # koppa, sampi, and final sigma s/[#]([35])/$arch[$1]/ge; # koppa, sampi (become q, @) s/S\b/J/g; # final sigma # accents s![()|]!!g; # discard psili, dasia, ypogegrammeni tr!:\\/=!·///!; # unify accents as tonos, convert ano telia s!([*]?[AEHIOUW][/+]{1,2})!$agv{$1}?$agv{$1}:$1!ge; # introduce accented vowels s![\\/=]!!g; # discard unidentified accents # translate tr/ABGDEZHQIKLMNCOPRJSTUFXYWV/á-ùf/; # capital letters s/[*]([^@ò])/chr(ord($1)-32)/ge; # replace by upper case s/[*]//g; # discard spurious asterisks return $_; }; sub beta_iso8 { # convert Hebrew text from Beta to ISO 8859-8 $_ = $_[0]; tr=)+(\$\&\#/=atesssb=; # make everything a letter s/\B([KMNPC])\b/chr(ord($1)+32)/ge; # final letters tr/aBGDHWZXtYkKLmMnNSepPcCQRsTb/à-ú·/; # translate return $_; }; sub iso7_utf8 { # convert Greek text from ISO 8859-7 to Unicode UTF-8 $_ = $_[0]; s/([¡¢¯´µ¶¸¹º¼¾-ÑÓ-þ])/ord($1)<0xF0?"Î".chr(ord($1)-0x30):"Ï".chr(ord($1)-0x70)/ge; return $_; }; sub iso8_utf8 { # convert Hebrew text from ISO 8859-8 to Unicode UTF-8 $_ = $_[0]; s/([à-ú])/"×".chr(ord($1)-0x50)/ge; return $_; }; sub greek_latin { # convert Greek text from Beta to ISO8859-1 # # The result is a Latin-script transliteration using the alphabet # a-b-g-d-e-z-ê-th-i-k-l-m-n-x-o-p-r-s-t-u/y-ph-ch-ps-ô/õ # # This transliteration is used to make as many words appear similar # to Greek words in Western European languages. In this spirit, the # vowel ypsilon is given as "y" (as in "physics") except in # combination with another vowel (as in "auto..."). # # Accents are placed on the vowels similar to the original but never # so that the distinction epsilon/eta or omicron/omega is blurred. $_ = $_[0]; s+H[/=\\]?+ê+g; s+W=+õ+g; s+W[/\\]?+ô+g; s/[)]//g; s/([^ ]*)[(]/h$1/g; s/([QFXY])/$dig{$1}/g; tr/ABGDEZIKLMNCOPRSJTU|/abgdeziklmnxoprsstyj/; s/([aeo])y/$1u/g; s+.[/=\\]+$mog{"$&"}+g; s/\bhr/rh/g; s/[*]([a-z])/chr(ord($1)-32)/ge; # replace by upper case s/[*]//g; # discard spurious asterisks return $_; }; sub initialize_tables { $agv{'*A/'} = chr(0xB6); # GREEK CAPITAL LETTER ALPHA WITH TONOS $agv{'*E/'} = chr(0xB8); # GREEK CAPITAL LETTER EPSILON WITH TONOS $agv{'*H/'} = chr(0xB9); # GREEK CAPITAL LETTER ETA WITH TONOS $agv{'*I/'} = chr(0xBA); # GREEK CAPITAL LETTER IOTA WITH TONOS $agv{'*O/'} = chr(0xBC); # GREEK CAPITAL LETTER OMICRON WITH TONOS $agv{'*U/'} = chr(0xBE); # GREEK CAPITAL LETTER UPSILON WITH TONOS $agv{'*W/'} = chr(0xBF); # GREEK CAPITAL LETTER OMEGA WITH TONOS $agv{'I+/'} = chr(0xC0); # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS $agv{'I/+'} = chr(0xC0); # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS $agv{'*I+'} = chr(0xDA); # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA $agv{'*U+'} = chr(0xDB); # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA $agv{'A/'} = chr(0xDC); # GREEK SMALL LETTER ALPHA WITH TONOS $agv{'E/'} = chr(0xDD); # GREEK SMALL LETTER EPSILON WITH TONOS $agv{'H/'} = chr(0xDE); # GREEK SMALL LETTER ETA WITH TONOS $agv{'I/'} = chr(0xDF); # GREEK SMALL LETTER IOTA WITH TONOS $agv{'U+/'} = chr(0xE0); # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS $agv{'U/+'} = chr(0xE0); # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS $agv{'I+'} = chr(0xFA); # GREEK SMALL LETTER IOTA WITH DIALYTIKA $agv{'U+'} = chr(0xFB); # GREEK SMALL LETTER UPSILON WITH DIALYTIKA $agv{'O/'} = chr(0xFC); # GREEK SMALL LETTER OMICRON WITH TONOS $agv{'U/'} = chr(0xFD); # GREEK SMALL LETTER UPSILON WITH TONOS $agv{'W/'} = chr(0xFE); # GREEK SMALL LETTER OMEGA WITH TONOS $arch[3] = 'q'; $arch[5] = '@'; $dig{'Q'} = 'th'; $dig{'F'} = 'ph'; $dig{'X'} = 'ch'; $dig{'Y'} = 'ps'; $mog{'a/'} = 'á'; $mog{'a\\'} = 'à'; $mog{'a='} = 'â'; $mog{'e/'} = 'é'; $mog{'e\\'} = 'è'; $mog{'e='} = 'é'; $mog{'i/'} = 'í'; $mog{'i\\'} = 'ì'; $mog{'i='} = 'î'; $mog{'o/'} = 'ó'; $mog{'o\\'} = 'ò'; $mog{'o='} = 'ó'; $mog{'u/'} = 'ú'; $mog{'u\\'} = 'ù'; $mog{'u='} = 'û'; $mog{'y/'} = 'ý'; $mog{'y\\'} = 'ý'; $mog{'y='} = 'ý'; };