#!/usr/bin/perl ################################################################## # Copyright (C) 1998-2001 Stefan Mashkevich # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License. # Disclaimer: The program is provided "as is" and there is no # warranty of any kind whatsoever. Your using it is at your own risk. ################################################################## # converting transliterated text into KOI-8 # # v0.8 S.Mashkevich 13 Dec 1998 # v0.9 S.Mashkevich 19 Jan 1999 # v1.0 S.Mashkevich 24-Feb-2001 # different symbols for switching translit on/off allowed # text on standard input, recoded on standard output # ################################################################## $firstarg = shift; if (defined $firstarg && $firstarg =~ /-s(.)(.)/) { $transoff = $1; $transon = $2; } $TRANSOFF = $transoff || '|'; $TRANSON = $transon || '|'; if ($TRANSON eq $TRANSOFF) { $TRANS = $TRANSON # otherwise $TRANS will be undefined } # extra symbols that count as letters (that figure in translit patterns) $extrasyms = "\'\`\^\~"; # read in translit patterns $maxlen = 0; $translitflag = 1; while () { next if /^#/; @chars = /([\w$extrasyms:]+)\s+([\200-\377]+)/g; for ($i = 0; $i < @chars; $i += 2) { $orig = $chars[$i]; $orig =~ s/\"//g; $len = length $orig; $maxlen = $len if $len > $maxlen; $repl = $chars[$i+1]; $repl =~ s/\"//g; $trans[$len]{$orig} = $repl; } }; #for ($len = $maxlen; $len > 0; $len--) { # print "$len :\n"; # for $orig (keys %{$trans[$len]}) {print "$orig->$trans[$len]{$orig}\n"} #}; ################################################################## # convert undef $/; $in = ; $in = ':'.$in; $inlength = length $in; $offset = 0; $out = ""; LOOP: while ($offset < $inlength) { # translit on/off switch? $onesymbol = substr($in, $offset, 1); if (defined $TRANS) { ($onesymbol eq $TRANS) && do { $translitflag = 1 - $translitflag; $offset++; next LOOP }; } else { ($onesymbol eq $TRANSOFF) && do { $translitflag = 0; $offset++; next LOOP }; ($onesymbol eq $TRANSON ) && do { $translitflag = 1; $offset++; next LOOP }; } # transliteration itself if ($translitflag) { for ($len = $maxlen; $len > 0; $len--) { $pattern = $patwopunct = substr($in, $offset, $len); if ($patwopunct =~ s/[^\w$extrasyms]/:/g) { # there are non-letters if (defined ($repl = $trans[$len]{$patwopunct})) { $pattorep = $translit = $pattern; $pattorep =~ s/[^\w$extrasyms]//g; # delete non-letters $translit =~ s/[^\w$extrasyms]$// && $len--; # if last is non-letter, # we may need it for next transliteration $translit =~ s/$pattorep/$repl/; $out .= $translit; $offset += $len; next LOOP } } else { if (defined ($translit = $trans[$len]{$pattern})) { $out .= $translit; $offset += $len; next LOOP } } } } # If we did not find pattern or translitflag was off $out .= $onesymbol; $offset++; } ################################################################## # output $out = substr($out, 1); print $out; ################################################################## __END__ # # lat_chars cyr_chars, any number of pairs in line # colon means noncharacter (space, comma etc) a Á b  c à d Ä e Å f Æ g Ç h È i i j Ê k Ë l Ì m Í n Î o Ï p Ð q Ñ r Ò s Ó t Ô u Õ v × w Û x È y É z Ú A á B â C ã D ä E å F æ G ç H è I I J ê K ë L ì M í N î O ï P ð Q ñ R ò S ó T ô U õ V ÷ W û X è Y é Z ú `e Ü `E ü ye ¤ je ¤ YE ´ JE ´ Ye ¤ Je ´ yo ÊÏ jo ÊÏ YO êï JO êï Yo êÏ Jo êÏ yi § ji § YI · JI · Yi · Ji · yu À ju À YU à JU à Yu à Ju à ya Ñ ja Ñ YA ñ JA ñ Ya ñ Ja ñ ay ÁÊ ey ÅÊ iy ÉÊ oy ÏÊ uy ÕÊ yy ÉÊ AY áê EY åê IY éê OY ïê UY õê YY éê Ay áÊ Ey åÊ Iy éÊ Oy ïÊ Uy õÊ Yy éÊ #yey ÅÊ YEY åê Yey åÊ aya ÁÑ aye Á¤ ayo ÁÊÏ ayu ÁÀ ayi Á§ AYA áñ AYE á´ AYO áêï AYU áà AYI á· Aya áÑ Aye ᤠAyo áÊÏ Ayu áÀ Ayi ᧠eya ÅÑ eye Ť eyo ÅÊÏ eyu ÅÀ eyi ŧ EYA åñ EYE å´ EYO åêï EYU åà EYI å· Eya åÑ Eye å¤ Eyo åÊÏ Eyu åÀ EYi å§ iya iÑ iye i¤ iyo iÊÏ iyu iÀ iyi i§ IYA Iñ IYE I´ IYO Iêï IYU Ià IYI I· Iya IÑ Iye I¤ Iyo IÊÏ Iyu IÀ Iyi I§ oya ÏÑ oye Ϥ oyo ÏÊÏ oyu ÏÀ oyi ϧ OYA ïñ OYE ï´ OYO ïêï OYU ïà OYI ï· Oya ïÑ Oye ï¤ Oyo ïÊÏ Oyu ïÀ Oyi ï§ uya ÕÑ uye Õ¤ uyo ÕÊÏ uyu ÕÀ uyi Õ§ UYA õñ UYE õ´ UYO õêï UYU õà UYI õ· Uya õÑ Uye õ¤ Uyo õÊÏ Uyu õÀ Uyi õ§ yya ÉÑ yye ɤ yyo ÉÊÏ yyu ÉÀ yyi ɧ YYA éñ YYE é´ YYO éêï YYU éà YYI é· Yya éÑ Yye é¤ Yyo éÊÏ Yyu éÀ Yyi é§ ajon ÁÊÏÎ AJON áêïî ajor ÁÊÏÒ AJOR áêïò ayon ÁÊÏÎ AYON áêïî ayor ÁÊÏÒ AYOR áêïò jork ÊÏÒË JORK êïòë Jork êÏÒË york ÊÏÒË YORK êïòë York êÏÒË zh Ö ZH ö Zh ö kh È KH è Kh è ts à TS ã Ts ã tsya ÔÓÑ TSYA ôóñ tsja ÔÓÑ TSJA ôóñ tst ÔÓÔ TST ôóô # tsk ÔÓË TSK ôóë dets ÄÅÔÓ DETS äåôó Dets äÅÔÓ odets ÏÄÅà ODETS ïäåã tsch ÔÝ TSCH ôý Tsch ôÝ tshch ÔÝ TSHCH ôý Tshch ôÝ ch Þ CH þ Ch Þ sh Û SH û Sh û :shem ÓÈÅÍ :SHEM óèåí :Shem óÈÅÍ shod ÓÈÏÄ SHOD óèïä Shod óÈÏÄ shozh ÓÈÏÖ SHOZH óèïö Shozh óÈÏÖ shola ÓÈÏÌÁ SHOLA óèïìá Shola óÈÏÌÁ shch Ý SHCH ý Shch ý sch Ý SCH ý Sch ý schita ÓÞÉÔÁ SCHITA óþéôá Schita óÞÉÔÁ schita: ÝÉÔÁ SCHITA: ýéôá schet ÓÞÅÔ SCHET óþåô Schet óÞÅÔ plesch ÐÌÅÝ PLESCH ðìåý Plesch ðÌÅÝ ' Ø '' ' vyud ×ÉÕÄ VYUD ÷éõä Vyud ÷ÉÕÄ vyuch ×ÉÕÞ VYUCH ÷éõþ Vyuch ÷ÉÕÞ vyuzh ×ÉÕÖ VYUZH ÷éõö Vyuzh ÷ÉÕÖ ##################################################################