:; #-*- mode: nendo; syntax: scheme -*-;; ;;; ;;; convert-jisyo.nnd - SKK-JISYO形式から SEKKA-JISYO形式へのコンバートロジック ;;; ;;; Copyright (c) 2010 Kiyoka Nishiyama ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; ;;; 1. Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; ;;; 2. Redistributions in binary form must reproduce the above copyright ;;; notice, this list of conditions and the following disclaimer in the ;;; documentation and/or other materials provided with the distribution. ;;; ;;; 3. Neither the name of the authors nor the names of its contributors ;;; may be used to endorse or promote products derived from this ;;; software without specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;; ;;; $Id: ;;; (use srfi-1) (use sekka.util) (use sekka.roman-lib) (define (expand-hiragana-phrase-entry key) (let1 roman-list (gen-hiragana->roman-list key) (if (< 1000 (length roman-list)) (begin (STDERR.printf " Warning: ignored entry [%s] (hiragana phrase), because too many pattens.\n" key) #f) ;; パターン数が爆発した単語は無視する (map (lambda (x) (cons (+ "=" x) key)) roman-list)))) (define (expand-okuri-nashi-entry key value) (let1 roman-list (gen-hiragana->roman-list key) (if (< 1000 (length roman-list)) (begin (STDERR.printf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value) #f) ;; パターン数が爆発した単語は無視する (append (map (lambda (x) (cons x (+ "C" key))) roman-list) (list (cons key value)))))) (define (expand-okuri-ari-entry-internal key okuri value) (let1 roman-list (gen-hiragana->roman-list key) (append (map (lambda (x) (cons (+ x (sekka-upcase okuri)) (+ "C" key okuri))) roman-list) (list (cons (+ key okuri) value))))) (define (expand-okuri-ari-entry key okuri value) (cond ((eq? "t" okuri) (append-map (lambda (x) x) (list (expand-okuri-ari-entry-internal key okuri value) (expand-okuri-ari-entry-internal key (sekka-upcase "@") value) (expand-okuri-ari-entry-internal key (sekka-upcase ";") value)))) (else (expand-okuri-ari-entry-internal key okuri value)))) (define (convert-skk-jisyo-f f) (define total 0) (define current 0) (define (display-progress line) (set! current (+ current 1)) (when (= 0 (% current 1000)) (STDERR.printf " %7d/%7d (%3.3f%)\n" current total (* (/ current (total.to_f)) 100.0)))) (define (gen-sekka-entries line) (display-progress line) (let* ((line (line.sub #/\/$/ "")) (fields (split-dict-line line))) (cond ((rxmatch #/^\;/ line) ;; コメント行 #f) ((not fields) ;; フォーマットエラー #f) ((rxmatch #/\;\;/ line) ;; フォーマットエラー #f) ((and (is-hiragana (first fields)) (rxmatch #/^\/$/ (second fields))) ;; 平仮名フレーズ (expand-hiragana-phrase-entry (first fields))) ((or (is-hiragana (first fields)) (rxmatch #/^([>#あ-んー]+)$/ (first fields))) ;; 送り仮名なしデータ (expand-okuri-nashi-entry (first fields) (second fields))) ((rxmatch #/^([>あ-んー]+)([a-z])$/ (first fields)) => (lambda (m) ;; 送り仮名ありデータ (expand-okuri-ari-entry (rxmatch-substring m 1) (rxmatch-substring m 2) (second fields)))) ((rxmatch #/[亜-瑤]+/ (first fields)) ;; 漢字が1文字でも含まれている #f) ((rxmatch #/[#]+/ (first fields)) ;; # が1文字でも含まれている #f) (else (list (cons (first fields) (second fields))))))) (let* ([lines (map (lambda (line) (line.chomp)) (f.readlines))] [_ (set! total (lines.size))] [result '#()]) (for-each (lambda (x) (let1 entries (gen-sekka-entries x) (when entries (for-each (lambda (entry) (result.push (sprintf "%s %s" (car entry) (cdr entry)))) entries)))) lines) (to-list result)))