:; #-*- mode: nendo; syntax: scheme -*-;; ;;; ;;; convert-jisyo.nnd - SKK-JISYO形式から SEKKA-JISYO形式へのコンバートロジック ;;; ;;; Copyright (c) 2010 Kiyoka Nishiyama ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; ;;; 1. Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; ;;; 2. Redistributions in binary form must reproduce the above copyright ;;; notice, this list of conditions and the following disclaimer in the ;;; documentation and/or other materials provided with the distribution. ;;; ;;; 3. Neither the name of the authors nor the names of its contributors ;;; may be used to endorse or promote products derived from this ;;; software without specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;; ;;; $Id: ;;; (use srfi-1) (use util.list) (use sekka.util) (use sekka.roman-lib) (require "ruby-progressbar") (define (expand-hiragana-phrase-entry key) (let1 roman-list (gen-hiragana->roman-list key) (if (< 1000 (length roman-list)) (begin (sprintf " Warning: ignored entry [%s] (hiragana phrase), because too many pattens.\n" key) #f) ;; パターン数が爆発した単語は無視する (map (lambda (x) (cons (+ "=" x) key)) roman-list)))) (define (expand-okuri-nashi-entry key value) (let1 roman-list (gen-hiragana->roman-list key) (if (< 1000 (length roman-list)) (begin (sprintf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value) #f) ;; パターン数が爆発した単語は無視する (append (map (lambda (x) (cons x (+ "C" key))) roman-list) (list (cons key value)))))) (define (expand-okuri-ari-entry-internal key okuri value) (let1 roman-list (gen-hiragana->roman-list key) (append (map (lambda (x) (cons (+ x (sekka-upcase okuri)) (+ "C" key okuri))) roman-list) (list (cons (+ key okuri) value))))) (define (expand-okuri-ari-entry key okuri value) (cond ((eq? "t" okuri) (append-map (lambda (x) x) (list (expand-okuri-ari-entry-internal key okuri value) (expand-okuri-ari-entry-internal key (sekka-upcase "@") value) (expand-okuri-ari-entry-internal key (sekka-upcase ";") value)))) (else (expand-okuri-ari-entry-internal key okuri value)))) (define (convert-skk-jisyo-f f) (let1 progress (ProgressBar.create (alist->hash-table `( [title . "convert "] [total . ,(file-length f)] [output . ,STDERR]))) (define (gen-sekka-entries line) (progress.increment) (let* ((line (line.sub #/\/$/ "")) (fields (split-dict-line line))) (cond ((rxmatch #/^\;/ line) ;; コメント行 #f) ((not fields) ;; フォーマットエラー #f) ((rxmatch #/\;\;/ line) ;; フォーマットエラー #f) ((and (is-hiragana (first fields)) (= 1 (length fields))) ;; 平仮名フレーズ (expand-hiragana-phrase-entry (first fields))) ((and (is-hiragana (first fields)) (rxmatch #/^\/$/ (second fields))) ;; 平仮名フレーズ (expand-hiragana-phrase-entry (first fields))) ((or (is-hiragana (first fields)) (rxmatch #/^([>#あ-んー]+)$/ (first fields))) ;; 送り仮名なしデータ (expand-okuri-nashi-entry (first fields) (second fields))) ((rxmatch #/^([>あ-んー]+)([a-z])$/ (first fields)) => (lambda (m) ;; 送り仮名ありデータ (expand-okuri-ari-entry (rxmatch-substring m 1) (rxmatch-substring m 2) (second fields)))) ((rxmatch #/[亜-瑤]+/ (first fields)) ;; 漢字が1文字でも含まれている #f) ((rxmatch #/[#]+/ (first fields)) ;; # が1文字でも含まれている #f) ((rxmatch #/^[ ]*$/ (first fields) ) ;; 空文字列 #f) ((= 1 (length fields)) ;; フィールドが1件しかない。 #f) (else (list (cons (first fields) (second fields))))))) (let* ([lines (map (lambda (line) (line.chomp)) (f.readlines))] [result '#()]) (for-each (lambda (x) (let1 entries (gen-sekka-entries x) (when entries (for-each (lambda (entry) (result.push (sprintf "%s %s" (car entry) (cdr entry)))) entries)))) lines) (to-list result))))