:; #-*- mode: nendo; syntax: scheme -*-;; ;;; ;;; roman-lib.nnd - ローマ字と平仮名、片仮名の相互変換ライブラリ ;;; ;;; Copyright (c) 2010 Kiyoka Nishiyama ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; ;;; 1. Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; ;;; 2. Redistributions in binary form must reproduce the above copyright ;;; notice, this list of conditions and the following disclaimer in the ;;; documentation and/or other materials provided with the distribution. ;;; ;;; 3. Neither the name of the authors nor the names of its contributors ;;; may be used to endorse or promote products derived from this ;;; software without specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;; ;;; $Id: ;;; (use srfi-1) ;; 子音が1音だけ入ったテーブル ;; このテーブルはruby-romkan から変換し、一部不足を追加した。 ;; クンレイ、日本式、ヘボン式もデータ中に併記し全てを網羅している。 (define sekka-kana->roman-alist-short '( ;; normal roman ---AZIK--- ("ぁ" "la" "xxa") ("あ" "a") ("ぃ" "li" "xxi") ("い" "i") ("ぅ" "lu" "xxu") ("う" "u" "whu") ("う゛" "vu") ("う゛ぁ" "va") ("う゛ぃ" "vi") ("う゛ぇ" "ve") ("う゛ぉ" "vo") ("ぇ" "le" "xxe") ("え" "e") ("ぉ" "lo" "xxo") ("お" "o") ("か" "ka") ("が" "ga") ("き" "ki" "kf") ("きゃ" "kya" "kga") ("きゅ" "kyu" "kgu") ("きぇ" "kye" "kge") ("きょ" "kyo" "kgo") ("ぎ" "gi") ("ぎゃ" "gya") ("ぎゅ" "gyu") ("ぎぇ" "gye") ("ぎょ" "gyo") ("く" "ku") ("ぐ" "gu") ("け" "ke") ("げ" "ge") ("こ" "ko") ("ご" "go") ("さ" "sa") ("ざ" "za" "zc") ;; AZIKザ行の特別措置 ("し" "shi" "si" "xi") ("しゃ" "sha" "sya" "xa" "xc") ;; hu の例外処理: shu は sfu になってはいけない ("しゅ" "shu" "syu" "xu") ("しぇ" "she" "sye" "xe") ("しょ" "sho" "syo" "xo") ("じ" "ji" "zi") ("じゃ" "ja" "jya" "zya") ("じゅ" "ju" "jyu" "zyu" "jf") ("じぇ" "je" "jye" "zye") ("じょ" "jo" "jyo" "zyo") ("す" "su") ("ず" "zu") ("せ" "se") ("ぜ" "ze" "zf") ("そ" "so") ("ぞ" "zo") ("た" "ta") ("だ" "da") ;; normal roman ---AZIK--- ("ち" "chi" "tyi" "ti" "ci") ("ちゃ" "cha" "tya" "ca") ;; hu の例外処理: chu は cfu になってはいけない ("ちゅ" "chu" "tyu" "cu") ("ちぇ" "che" "tye" "ce" "cf") ("ちょ" "cho" "tyo" "co") ("ぢ" "di") ("ぢゃ" "dya") ("ぢゅ" "dyu") ("ぢぇ" "dye") ("ぢょ" "dyo") ("っ" "tt" "@" ";" "ltu") ;; normal roman ---AZIK--- ("つ" "tsu" "tu") ("づ" "du") ("て" "te") ("で" "de" "df") ("と" "to") ("ど" "do") ("な" "na") ("に" "ni") ("にゃ" "nya") ("にゅ" "nyu") ("にぇ" "nye") ("にょ" "nyo") ("ぬ" "nu" "nf") ("ね" "ne") ("の" "no") ("は" "ha") ("ば" "ba") ("ぱ" "pa") ("ひ" "hi") ("ひゃ" "hya" "hga") ("ひゅ" "hyu" "hgu") ("ひぇ" "hye" "hge") ("ひょ" "hyo" "hgo") ("び" "bi") ("びゃ" "bya") ("びゅ" "byu") ("びぇ" "bye") ("びょ" "byo") ("ぴ" "pi") ("ぴゃ" "pya" "pga") ("ぴゅ" "pyu" "pgu") ("ぴぇ" "pye" "pge") ("ぴょ" "pyo" "pgo") ("ふ" "fu" "hu" "hf") ("ふぁ" "fa") ("ふぃ" "fi") ("ふぇ" "fe") ("ふぉ" "fo") ("ぶ" "bu") ("ぷ" "pu") ("へ" "he") ("べ" "be") ("ぺ" "pe") ("ほ" "ho") ("ぼ" "bo") ("ぽ" "po") ("ま" "ma") ("み" "mi") ("みゃ" "mya" "mga") ("みゅ" "myu" "mgu") ("みぇ" "mye" "mge") ("みょ" "myo" "mgo") ("む" "mu" "mf") ("め" "me") ("も" "mo") ;; normal roman ---AZIK--- ("ゃ" "lya") ("や" "ya") ("ゅ" "lyu") ("ゆ" "yu" "yf") ("ょ" "lyo") ("よ" "yo") ("ら" "ra") ("り" "ri") ("りゃ" "rya") ("りゅ" "ryu") ("りょ" "ryo") ("る" "ru") ("れ" "re") ("ろ" "ro") ("ゎ" "lwa") ("わ" "wa") ("うぃ" "wi" "whi") ("うぇ" "we" "whe") ("を" "wo") ("うぉ" "wso" "who") ;; normal roman ---AZIK--- ---AZIK(US)--- ("ん" "nn" "n" "q") ("でぃ" "dyi" "dhi" "dci") ("でぅ" "dyu" "dhu" "dcu") ("ー" "-" "^" ":" "'") ;; Ruby romkanからの不足分追加 ("てぃ" "thi" "tgi") ("てぅ" "thu" "tgu") ;; Sekkaの辞書に入っている特別なキーワード ">あん" など (">" ">") ("ゐ" "yi") ;; 撥音から始まるキーワード ("っう゛" "vvu" "@vu" ";vu" ) ("っう゛ぁ" "vva" "@va" ";va" ) ("っう゛ぃ" "vvi" "@vi" ";vi" ) ("っう゛ぇ" "vve" "@ve" ";ve" ) ("っう゛ぉ" "vvo" "@vo" ";vo" ) ("っか" "kka" "@ka" ";ka" ) ("っが" "gga" "@ga" ";ga" ) ("っき" "kki" "@ki" ";ki" ) ("っきゃ" "kkya" "@kya" ";kya") ("っきゅ" "kkyu" "@kyu" ";kyu") ("っきぇ" "kkye" "@kye" ";kye") ("っきょ" "kkyo" "@kyo" ";kyo") ("っぎ" "ggi" "@gi" ";gi" ) ("っぎゃ" "ggya" "@gya" ";gya") ("っぎゅ" "ggyu" "@gyu" ";gyu") ("っぎぇ" "ggye" "@gye" ";gye") ("っぎょ" "ggyo" "@gyo" ";gyo") ("っく" "kku" "@ku" ";ku" ) ("っぐ" "ggu" "@gu" ";gu" ) ("っけ" "kke" "@ke" ";ke" ) ("っげ" "gge" "@ge" ";ge" ) ("っこ" "kko" "@ko" ";ko" ) ("っご" "ggo" "@go" ";go" ) ("っさ" "ssa" "@sa" ";sa" ) ("っざ" "zza" "@za" ";za" ) ("っし" "sshi" "sshi" "@shi" "@shi" ";shi" ";shi") ("っしゃ" "ssha" "ssya" "@sha" "@sya" ";sha" ";sya") ("っしゅ" "sshu" "ssyu" "@shu" "@syu" ";shu" ";syu") ("っしぇ" "sshe" "ssye" "@she" "@sye" ";she" ";sye") ("っしょ" "ssho" "ssyo" "@sho" "@syo" ";sho" ";syo") ("っじ" "jji" "@ji" ";ji" ) ("っじゃ" "jja" "@ja" ";ja" ) ("っじゅ" "jju" "@ju" ";ju" ) ("っじぇ" "jje" "@je" ";je" ) ("っじょ" "jjo" "@jo" ";jo" ) ("っす" "ssu" "@su" ";su" ) ("っず" "zzu" "@zu" ";zu" ) ("っせ" "sse" "@se" ";se" ) ("っぜ" "zze" "@ze" ";ze" ) ("っそ" "sso" "@so" ";so" ) ("っぞ" "zzo" "@zo" ";zo" ) ("った" "tta" "@ta" ";ta" ) ("っだ" "dda" "@da" ";da" ) ("っち" "cchi" "ttyi" "cci" "@chi" "@tyi" "@ci" ";chi" ";tyi" ";ci") ("っちゃ" "ccha" "ttya" "cca" "@cha" "@tya" "@ca" ";cha" ";tya" ";ca") ("っちゅ" "cchu" "ttyu" "ccu" "@chu" "@tyu" "@cu" ";chu" ";tyu" ";cu") ("っちぇ" "cche" "ttye" "cce" "@che" "@tye" "@ce" ";che" ";tye" ";ce") ("っちょ" "ccho" "ttyo" "cco" "@cho" "@tyo" "@co" ";cho" ";tyo" ";co") ("っぢ" "ddi" "@di" ";di" ) ("っぢゃ" "ddya" "@dya" ";dya") ("っぢゅ" "ddyu" "@dyu" ";dyu") ("っぢぇ" "ddye" "@dye" ";dye") ("っぢょ" "ddyo" "@dyo" ";dyo") ("っつ" "ttsu" "@tsu" ";tsu") ("っづ" "ddu" "@du" ";du" ) ("って" "tte" "@te" ";te" ) ("っで" "dde" "@de" ";de" ) ("っと" "tto" "@to" ";to" ) ("っど" "ddo" "@do" ";do" ) ("っは" "hha" "@ha" ";ha" ) ("っば" "bba" "@ba" ";ba" ) ("っぱ" "ppa" "@pa" ";pa" ) ("っひ" "hhi" "@hi" ";hi" ) ("っひゃ" "hhya" "@hya" ";hya") ("っひゅ" "hhyu" "@hyu" ";hyu") ("っひぇ" "hhye" "@hye" ";hye") ("っひょ" "hhyo" "@hyo" ";hyo") ("っび" "bbi" "@bi" ";bi" ) ("っびゃ" "bbya" "@bya" ";bya") ("っびゅ" "bbyu" "@byu" ";byu") ("っびぇ" "bbye" "@bye" ";bye") ("っびょ" "bbyo" "@byo" ";byo") ("っぴ" "ppi" "@pi" ";pi" ) ("っぴゃ" "ppya" "@pya" ";pya") ("っぴゅ" "ppyu" "@pyu" ";pyu") ("っぴぇ" "ppye" "@pye" ";pye") ("っぴょ" "ppyo" "@pyo" ";pyo") ("っふ" "ffu" "hhu" "@fu" "@hu" ";fu" ";hu") ("っふぁ" "ffa" "@fa" ";fa" ) ("っふぃ" "ffi" "@fi" ";fi" ) ("っふぇ" "ffe" "@fe" ";fe" ) ("っふぉ" "ffo" "@fo" ";fo" ) ("っぶ" "bbu" "@bu" ";bu" ) ("っぷ" "ppu" "@pu" ";pu" ) ("っへ" "hhe" "@he" ";he" ) ("っべ" "bbe" "@be" ";be" ) ("っぺ" "ppe" "@pe" ";pe" ) ("っほ" "hho" "@ho" ";ho" ) ("っぼ" "bbo" "@bo" ";bo" ) ("っぽ" "ppo" "@po" ";po" ) ("っや" "yya" "@ya" ";ya" ) ("っゆ" "yyu" "@yu" ";yu" ) ("っよ" "yyo" "@yo" ";yo" ) ("っら" "rra" "@ra" ";ra" ) ("っり" "rri" "@ri" ";ri" ) ("っりゃ" "rrya" "@rya" ";rya") ("っりゅ" "rryu" "@ryu" ";ryu") ("っりぇ" "rrye" "@rye" ";rye") ("っりょ" "rryo" "@ryo" ";ryo") ("っる" "rru" "@ru" ";ru" ) ("っれ" "rre" "@re" ";re" ) ("っろ" "rro" "@ro" ";ro" ) )) ;; 子音が2音入ったテーブル (define sekka-kana->roman-alist-long `( ;; "n" 一つで "nn" を表現する件と被るのでAZIK専用拡張とする(エントリ上書き) ("にゃ" "nya" "nga") ("にゅ" "nyu" "ngu") ("にぇ" "nye" "nge") ("にょ" "nyo" "ngo") ;; 非AZIKで "cc" で 撥音「っ」から始まるかなを表現を消す可能性があるので "cc" はAZIK側だけに定義する ("ちゃ" "cha" "tya" "ca" "cc") ;; ---以下AZIK--- ;; ------ AZIK 撥音拡張 ("かん" "kz" "kn") ("きん" "kk" "kv") ("くん" "kj") ("けん" "kd") ("こん" "kl") ("さん" "sz" "sn") ("しん" "sk" "xk") ("すん" "sj") ("せん" "sd") ("そん" "sl") ("たん" "tz" "tn") ("ちん" "tk" "ck") ("つん" "tj") ("てん" "td") ("とん" "tl") ("なん" "nz") ;;("さん" "nn") "ん"になる ("にん" "nk") ("ぬん" "nj" "nv") ("ねん" "nd") ("のん" "nl") ("はん" "hz" "hn") ("ひん" "hk") ("ふん" "hj" "fj") ("へん" "hd") ("ほん" "hl") ("ふぁん" "fz" "fn") ("ふぃん" "fk") ("ふぇん" "fd") ("ふぉん" "fl") ("まん" "mz") ;;("まん" "mn") "もの"になる ("みん" "mk") ("むん" "mj" "mv") ("めん" "md") ("もん" "ml") ("やん" "yz" "yn") ("ゆん" "yj") ("よん" "yl") ("らん" "rz" "rn") ("りん" "rk") ("るん" "rj") ("れん" "rd") ("ろん" "rl" "wz") ("わん" "wn" "wz") ("うぃん" "wk") ("うぇん" "wd") ("うぉん" "wl") ;; ------ AZIK 二重母音拡張 ("かい" "kq") ("くう" "kh") ("けい" "kw") ("こう" "kp") ("さい" "sq" "sf" "sv") ("すう" "sh") ("せい" "sw" "ss") ("そう" "sp") ("たい" "tq") ("つう" "th") ("てい" "tw") ("とう" "tp") ("ない" "nq") ("ぬう" "nh") ("ねい" "nw") ("のう" "np") ("はい" "hq") ("ふう" "hh" "hh" "fh") ("へい" "hw") ("ほう" "hp") ("ふぁい" "fq" "fs") ("ふぇい" "fw") ("ふぉー" "fp") ("まい" "mq") ("むう" "mh") ("めい" "mw") ("もう" "mp") ("やい" "yq") ("ゆう" "yh" "yv") ("よう" "yp") ("らい" "rq") ("るう" "rh") ("れい" "rw") ("ろう" "rp") ("わい" "wq" "wf") ("うぉー" "wp") ;; ------ AZIK 濁音、半濁音 ("がん" "gz" "gn") ("ぎん" "gg" "gk") ("ぐん" "gj") ("げん" "gd") ("ごん" "gl") ("ざん" "zz" "zn") ("じん" "jk" "zk") ("ずん" "zj") ("ぜん" "zd") ("ぞん" "zl") ("だん" "dz" "dn") ("ぢん" "dd" "dk") ("づん" "dj") ("でん" "dd" "dv") ("どん" "dl") ("ばん" "bz" "bn") ("びん" "bb" "bk") ("ぶん" "bj") ("べん" "bd") ("ぼん" "bl") ("ぱん" "pz" "pn") ("ぴん" "pp" "pk") ("ぷん" "pj") ("ぺん" "pd") ("ぽん" "pl" "pf") ;; ------ AZIK 濁音、半濁音二重母音拡張 ("がい" "gq") ("ぐう" "gh") ("げい" "gw") ("ごう" "gp") ("ざい" "zq" "zv") ("ずう" "zh") ("ぜい" "zw" "zx") ("ぞう" "zp") ("だい" "dq") ("づう" "dh") ("でい" "dw") ("どう" "dp") ("ばい" "bq") ("ぶう" "bh") ("べい" "bw" "bx") ("ぼう" "bp") ("ぱい" "pq") ("ぷう" "ph") ("ぺい" "pw") ("ぽう" "pp" "pv") ;; ------ AZIK 特殊拡張 ("こと" "kt") ("わた" "wt") ("かも" "km") ("する" "sr") ("られ" "rr") ("ねば" "nb") ("にち" "nt") ("した" "st") ("もの" "mn") ("ため" "tm") ("たら" "tr") ("ざる" "zr") ("びと" "bt") ("だち" "dt") ("たち" "tt") ("ます" "ms") ("でも" "dm") ("なる" "nr") ("また" "mt") ("がら" "gr") ("われ" "wr") ("ひと" "ht") ("です" "ds") ("から" "kr") ("よる" "yr") ("たび" "tb") ("ごと" "gt") ;; skk-azik.elからの不足分 ("ばら" "br") ("びぇん" "byd") ("びゅう" "byh") ("びゅん" "byj") ("びょん" "byl") ("びゃん" "byn" "byz") ("びょう" "byp") ("びゃい" "byq") ("びぇい" "byw") ("ちぇん" "cd" "tyd") ("ちゅう" "ch" "tyh") ("ちゅん" "cj" "tyj") ("ちょん" "cl" "tyl") ("ちゃん" "cn" "cz" "tyn" "tyz") ("ちょう" "cp" "typ") ("ちゃい" "cq" "cv" "tyq") ("ちぇい" "cw" "cx" "tyw") ("でゅー" "dch") ("でぃん" "dck") ("どぅー" "dcp") ("でゅ" "dcu") ("だが" "dg") ("である" "dr") ("ふむ" "fm") ("ふる" "fr") ("ぎぇん" "gyd") ("ぎゅう" "gyh") ("ぎゅん" "gyj") ("ぎょん" "gyl") ("ぎゃん" "gyn" "gyz") ("ぎょう" "gyp") ("ぎゃい" "gyq") ("ぎぇい" "gyw") ("ひぇん" "hgd" "hyd") ("ひゅう" "hgh" "hyh") ("ひゅん" "hgj") ("ひょん" "hgl" "hyl") ("ひゃん" "hgn" "hgz" "hyz") ("ひょう" "hgp" "hyp") ("ひゃい" "hgq" "hyq") ("ひぇい" "hgw" "hyw") ("じぇん" "jd" "zyd") ("じゅう" "jh" "jv" "zyh") ("じゅん" "jj" "zyj") ("じょん" "jl" "zyl") ("じゃん" "jn" "jz" "zyn" "zyz") ("じょう" "jp" "zyp") ("じゃい" "jq" "zyq") ("じぇい" "jw" "zyw") ("きぇん" "kgd" "kyd") ("きゅう" "kgh" "kyh") ("きょん" "kgl" "kyl") ("きゃん" "kgn" "kgz" "kyn" "kyz") ("きょう" "kgp" "kyp") ("きゃい" "kgq" "kyq") ("きぇい" "kgw" "kyw") ("きゅん" "kyj") ("みぇん" "mgd" "myd") ("みゅう" "mgh" "myh") ("みゅん" "mgj" "myj") ("みょん" "mgl" "myl") ("みゃん" "mgn" "mgz" "myn" "myz") ("みょう" "mgp" "myp") ("みゃい" "mgq" "myq") ("みぇい" "mgw" "myw") ("まる" "mr") ("にぇん" "ngd" "nyd") ("にゅう" "ngh" "nyh") ("にゅん" "ngj" "nyj") ("にょん" "ngl" "nyl") ("にゃん" "ngn" "ngz" "nyn" "nyz") ("にょう" "ngp" "nyp") ("にゃい" "ngq" "nyq") ("にぇい" "ngw" "nyw") ("ぴぇん" "pgd" "pyd") ("ぴゅう" "pgh" "pyh") ("ぴゅん" "pgj" "pyj") ("ぴょん" "pgl" "pyl") ("ぴゃん" "pgn" "pgz" "pyn" "pyz") ("ぴょう" "pgp" "pyp") ("ぴゃい" "pgq" "pyq") ("ぴぇい" "pgw" "pyw") ("りぇん" "ryd") ("りゅう" "ryh") ("りゅん" "ryj") ("りょく" "ryk") ("りょん" "ryl") ("りゃん" "ryn" "ryz") ("りょう" "ryp") ("りゃい" "ryq") ("りぇい" "ryw") ("しも" "sm") ("しぇん" "syd" "xd") ("しゅう" "syh" "xh") ("しゅん" "syj" "xj") ("しょん" "syl" "xl") ("しょう" "syp" "xp") ("しゃい" "syq" "xq" "xv") ("しぇい" "syw" "xf" "xw") ("しゃん" "syz" "xn" "xz") ("てゅー" "tgh") ("てぃん" "tgk") ("とぅー" "tgp") ("てゅ" "tgu") ("つぁ" "tsa") ("つぇ" "tse") ("つぃ" "tsi") ("つぉ" "tso") ("う゛ぇん" "vd") ("う゛ぃん" "vk") ("う゛ぉん" "vl") ("う゛ぁん" "vn" "vz") ("う゛ぉー" "vp") ("う゛ぁい" "vq") ("う゛ぇい" "vw") ("う゛ゃ" "vya") ("う゛ぇ" "vye") ("う゛ょ" "vyo") ("う゛ゅ" "vyu") ("うぁ" "wha") ("しゅつ" "xt") ;; 辞書でカバーすれば良いのでRomanテーブルには入れない ;;("←" "xxh") ;;("↓" "xxj") ;;("↑" "xxk") ;;("→" "xxl") )) ;; ハッシュテーブル 平仮名 =>ローマ字 (define sekka-kana->roman-hash-short (alist->hash-table sekka-kana->roman-alist-short)) (define sekka-kana->roman-hash-long (alist->hash-table (append sekka-kana->roman-alist-short sekka-kana->roman-alist-long))) ;; ハッシュテーブル ローマ字 =>平仮名 (define (sekka-alist-swap alist) (append-map (lambda (x) (let ((hira (car x)) (romans (cdr x))) (map (lambda (r) (list r hira)) romans))) alist)) (define sekka-roman->kana-hash-short (alist->hash-table (sekka-alist-swap sekka-kana->roman-alist-short))) (define sekka-roman->kana-hash-long (alist->hash-table (sekka-alist-swap (append sekka-kana->roman-alist-short sekka-kana->roman-alist-long)))) ;; 平仮名->カタカナ 変換 (define (gen-hiragana->katakana str) (str.tr "あ-んぁぃぅぇぉゃゅょっー" "ア-ンァィゥェォャュョッー")) ;; カタカナ->平仮名 変換 (define (gen-katakana->hiragana str) (str.tr "ア-ンァィゥェォャュョッー" "あ-んぁぃぅぇぉゃゅょっー")) ;; カタカナの文字列かどうかを評価する (define (is-katakana str) (if (rxmatch #/^[ア-ンァィゥェォャュョッー]+$/ str) #t #f)) ;; 平仮名の文字列かどうかを評価する (define (is-hiragana str) (if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+$/ str) #t #f)) ;; 送り仮名付き平仮名文字列(例:"おこなu") かどうかを評価する (define (is-hiragana-and-okuri str) (if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+[a-z]$/ str) #t #f)) ;; 送り仮名付き漢字文字列(例:"行う") の送り仮名部分を削除する (define (drop-okuri str) (if-let1 m (rxmatch #/^([^あ-んぁぃぅぇぉゃゅょっー]+)(.+)$/ str) (rxmatch-substring m 1) str)) ;; 小文字を大文字にして返す。 "@"と";"も扱う。 (define (sekka-upcase str) (. (str.tr "@;" "`+") upcase)) ;; 大文字を小文字にして返す。 "@"と";"も扱う。 (define (sekka-downcase str) (. (str.tr "`+" "@;") downcase)) (define (gen-hiragana->roman-pattens-with-hash h hiragana) (let1 lst '() (let loop ((str hiragana)) (let ((str1 (str.slice 0 1)) (str2 (str.slice 0 2)) (str3 (str.slice 0 3))) (cond ((eq? 0 (str.size)) #f) ((hash-table-exist? h str3) (set! lst (cons (hash-table-get h str3) lst)) (loop (str.slice (str3.size) (str.size)))) ((hash-table-exist? h str2) (set! lst (cons (hash-table-get h str2) lst)) (loop (str.slice (str2.size) (str.size)))) ((hash-table-exist? h str1) (set! lst (cons (hash-table-get h str1) lst)) (loop (str.slice (str1.size) (str.size))))))) (reverse lst))) (define (gen-hiragana->roman-pattens hiragana) (delete-duplicates (list (gen-hiragana->roman-pattens-with-hash sekka-kana->roman-hash-short hiragana) (gen-hiragana->roman-pattens-with-hash sekka-kana->roman-hash-long hiragana)))) ;; if failed, return #f (define (gen-roman->hiragana-with-hash h roman-str) (let ((lst '()) (err #f)) (let loop ((str roman-str)) (let ((str1 (str.slice 0 1)) (str2 (str.slice 0 2)) (str3 (str.slice 0 3)) (str4 (str.slice 0 4))) (cond ((eq? 0 (str.size)) #f) ((hash-table-exist? h str4) (set! lst (cons (hash-table-get h str4) lst)) (loop (str.slice (str4.size) (str.size)))) ((hash-table-exist? h str3) (set! lst (cons (hash-table-get h str3) lst)) (loop (str.slice (str3.size) (str.size)))) ((hash-table-exist? h str2) (set! lst (cons (hash-table-get h str2) lst)) (loop (str.slice (str2.size) (str.size)))) ((hash-table-exist? h str1) (set! lst (cons (hash-table-get h str1) lst)) (loop (str.slice (str1.size) (str.size)))) (else (set! err #t))))) (if err #f (string-join (map (lambda (x) (car x)) (reverse lst)))))) ;; if failed, return '() ;; roman-methodには :normal か :azik を指定します。 ;; それにより、通常のローマ字かAZIK(拡張ローマ字)のどちらを優先するかを指定できます。 (define (gen-roman->hiragana roman-str roman-method) (let ((s (gen-roman->hiragana-with-hash sekka-roman->kana-hash-short roman-str)) (l (gen-roman->hiragana-with-hash sekka-roman->kana-hash-long roman-str))) (delete-duplicates (filter (lambda (x) x) (case roman-method ((:azik) (list l s)) ((:normal) (list s l)) (else (error "Error: gen-roman->hiragana got illegal roman-method."))))))) ;; if failed, return '() (define (gen-roman->katakana roman-str roman-method) (filter-map (lambda (x) (gen-hiragana->katakana x)) (gen-roman->hiragana roman-str roman-method))) ;; This function port from Gauche-0.9's util.combinations. (define (cartesian-product lol) (if (null? lol) (list '()) (let ((l (car lol)) (rest (cartesian-product (cdr lol)))) (append-map (lambda (x) (map (lambda (sub-prod) (cons x sub-prod)) rest)) l)))) (define (patterns->roman-list patterns) (uniq (sort (append-map (lambda (_pattern) (map (lambda (x) (string-join x)) (cartesian-product _pattern))) patterns)))) (define (gen-hiragana->roman-list hiragana) (patterns->roman-list (gen-hiragana->roman-pattens hiragana)))