#!/bin/sh
:; #-*- mode: nendo; syntax: scheme -*-;;
:; exec /usr/local/bin/nendo $0 $*

(use srfi-1)
(use sekka.roman-lib)

(debug-print-length 300)

(define (analyze-line line)
  (define (quoting-kanji-as-string line)
    (let* ([line (line.gsub "\"(\"" "kakko")]
           [line (line.gsub "\")\"" "kakko")])
      (line.gsub #/([^\(\)0-9 ]+)/ "\"\\1\"")))

  (define (read-one-line line)
    (let1 pair (read-from-string (+ "("
                                    (quoting-kanji-as-string line)
                                    ")"))
      (values (second (first pair))
              (second pair))))

  (receive (kind-list data-alist)
      (read-one-line line)

    (let1 word (assq-ref "見出し語" data-alist)
      (cons
       (caar word)
       kind-list))))

(define (phrase-is-valid? word-and-kind)
  (let ([word (car word-and-kind)]
        [kind-list (cdr word-and-kind)])
    (when (string? word)
      (when (is-hiragana word)
        (when (< 1 (word.size))
          (= 0 (length
                (filter
                 (lambda (ng-kind)
                   (memq ng-kind kind-list))
                 '(
                   "フィラー"
                   "固有名詞"
                   "サ変接続"
                   "感動詞"
                   )))))))))
       
(define (test-code)
  (let* ([lines '(
                  "(品詞 (副詞 助詞類接続)) ((見出し語 (ありのまま 2116)) (読み アリノママ) (発音 アリノママ) )  "
                  "(品詞 (副詞 一般)) ((見出し語 (やすやす 2875)) (読み ヤスヤス) (発音 ヤスヤス) )   "
                  "(品詞 (副詞 一般)) ((見出し語 (とりいそぎ 2875)) (読み トリイソギ) (発音 トリイソギ) )  "
                  "(品詞 (副詞 一般)) ((見出し語 (しかしながら 2875)) (読み シカシナガラ) (発音 シカシナガラ) )    "
                  "(品詞 (形容詞 自立)) ((見出し語 (赤い 2074)) (読み アカイ) (発音 アカイ) (活用型 形容詞・アウオ段) )")])
    (for-each
     (lambda (line)
       (let1 word-and-kind #?=(analyze-line line)
         #?=(phrase-is-valid? word-and-kind)))
     lines)))

(define (grep-valid-phrase filename)
  (with-open filename
             (lambda (f)
               (for-each
                (lambda (line)
                  (let1 word-and-kind (analyze-line line.chomp)
                    (when (phrase-is-valid? word-and-kind)
                      (printf ";; %s %s  \n"
                              (car word-and-kind)
                              (write-to-string (cdr word-and-kind)))
                      (printf "%s  //\n"
                              (car word-and-kind)))))
                f))))

(define (main argv)
  (if (> 1 (length argv))
      (begin
        (test-code)
        (error "hiragana_phrase_in_ipadic.nnd requires [ipadic file(utf8)]"))
      (grep-valid-phrase (car (to-list argv)))))