#!/usr/bin/env ruby # windex.rb $Revision: 1.2 $ # # windex: 索引を生成する # パラメタ: # str: キーワード文字列 # readname: 読み仮名 # # wikw: 索引からアンカーを生成する # パラメタ: # str: キーワード文字列 # # このファイルをtDiaryのトップディレクトリにも配置し、CGIとして # 実行することで索引ページを出力できます。 # # CGI動作時の引数 # http://(日記URL)/windex.rb?kw=(キーワード文字列) # とキーワードを指定してアクセスすることでそのキーワードに関係する日記の # 日付一覧を出力できます。一つだけの場合にはその日付の日記への # リダイレクトを出力します。 # # tdiary.confによる設定 # @options['windex.generate_all'] = true # 全ての日記から索引を生成します。これは時間がかかるので、索引の全生成を # 行いたいときだけtrueに設定して更新を行うような使い方を想定しています。 # # Copyright (c) 2003 Gony # Distributed under the GPL # mode = "" if $0 == __FILE__ mode = "CGI" if FileTest.symlink?(__FILE__) == true org_path = File.dirname(File.readlink(__FILE__)) else org_path = File.dirname(__FILE__) end $:.unshift(org_path) require "pstore" require "tdiary" tdiarybase = TDiary::TDiaryBase else tdiarybase = TDiaryBase end class WITDiary < tdiarybase def load_plugins super end def generate_wordindex(date,plugin,index) wordindex = WIWordIndex.new @io.transaction(date) do |diaries| wordindex.generate(diaries,plugin,index) end return wordindex end end class WIWordIndex def initialize @windex = {} @dates = [] end def generate(diaries,plugin,index) diaries.each_value do |diary| num_section = 1 diary.each_section do |section| anchor = index \ + plugin.anchor(diary.date.strftime("%Y%m%d")) \ + "#p%02d" % num_section if section.subtitle != nil scan(section.subtitle,anchor) end scan(section.body,anchor) num_section = num_section + 1 end end end def load(dir) @windex = {} Dir.mkdir(dir) unless File.directory?(dir) PStore.new(dir + "/windex").transaction do |pstore| @dates = pstore.roots @dates.each do |key| windex_tmp = pstore[key] windex_tmp.each_key do |key_windex| if @windex.has_key?(key_windex) == false @windex[key_windex] = {"readname" => nil, "anchor" => []} end if @windex[key_windex]["readname"] == nil \ && windex_tmp[key_windex].has_key?("readname") == true @windex[key_windex]["readname"] = windex_tmp[key_windex]["readname"] end @windex[key_windex]["anchor"].concat(windex_tmp[key_windex]["anchor"]) end end end end def save(dir,keyname) if File.directory?(dir) == false Dir.mkdir(dir,0755) end PStore.new(dir + "/windex").transaction do |pstore| pstore[keyname] = @windex end end def generate_html(page) return page.generate_html(@windex) end def has_key?(key) return @windex.has_key?(key) end def [](key) return @windex[key] end private def scan(body,anchor) to_delimiter_end = { "(" => ")","[" => "]","{" => "}","<" => ">", } wistrs = body.scan(%r[<%\s*=\s*windex\s*[^(<%)]*\s*%>]) wistrs.each do |wistr| # 引数抽出 argstr = wistr.gsub(%r[<%\s*=\s*windex\s*],"") argstr = argstr.gsub(%r[\s*%>],"") args = [] flag_done = false while flag_done == false pos_delimiter = argstr.index(%r['|"|%[Qq].]) #"' if pos_delimiter != nil # デリミタ文字取得 delimiter = argstr.scan(%r['|"|%[Qq].])[0] #"' if delimiter.length == 3 delimiter_end = delimiter[2].chr if to_delimiter_end.has_key?(delimiter_end) delimiter_end = to_delimiter_end[delimiter_end] end else delimiter_end = delimiter end # デリミタまでの文字列を削除 argstr = argstr[(pos_delimiter + delimiter.length)..-1] pos_delimiter = argstr.index(delimiter_end) if pos_delimiter != nil if pos_delimiter > 0 # 引数として取得 args << argstr[0..(pos_delimiter - 1)] else args << "" end # デリミタまでの文字列を削除 argstr = argstr[(pos_delimiter + delimiter_end.length)..-1] else flag_done = true end else flag_done = true end end if args.length > 0 if @windex.has_key?(args[0]) == false # ハッシュを生成 @windex[args[0]] = {"readname" => nil,"anchor" => []} end if args.length > 1 && @windex[args[0]]["readname"] == nil && args[1] != "" @windex[args[0]]["readname"] = args[1] end @windex[args[0]]["anchor"] << anchor end end end end class WIIndexPage def initialize(title,css) @title = title @css = css end def generate_html(windex) body = "" # 大項目名 => 名前の配列 のハッシュを生成 subindex_to_name = {} windex.keys.each do |key| subindex = "" if windex[key]["readname"] != nil subindex = get_subindex(windex[key]["readname"]) else subindex = get_subindex(key) end if subindex_to_name.has_key?(subindex) == false subindex_to_name[subindex] = [] end subindex_to_name[subindex] << key end # 大項目名ごとにHTMLを生成 if subindex_to_name.has_key?("記号") == true body << generate_html_subindex(windex,subindex_to_name,"記号") end subindex_to_name.keys.sort.each do |key| if key != "記号" body << generate_html_subindex(windex,subindex_to_name,key) end end body = <<-BODY #{h @title}(索引) #{@css}

#{@title} [索引]

#{body}
BODY return body end private def generate_html_subindex(windex,subindex_to_name,key) readname_to_name = {} subindex_to_name[key].each do |name| key_new = "" if windex[name]["readname"] != nil key_new = windex[name]["readname"] else key_new = name end if readname_to_name.has_key?(key_new) == false readname_to_name[key_new] = [] end readname_to_name[key_new] << name end body = %Q[

#{key}

\n] # 読み仮名のソートでループ -> 名前のソートでループ keys = readname_to_name.keys if keys.empty? == false keys.sort.each do |readname| readname_to_name[readname].sort.each do |name| body << "

#{name} ... " num_anchor = 1 windex[name]["anchor"].sort.each do |anchor| body = body + %Q[#{num_anchor}] if num_anchor < windex[name]["anchor"].length body = body + "," end num_anchor = num_anchor + 1 end body << "

" end end end body << "\n
\n" return body end def get_subindex(name) to_plainhiragana = { "ぁ" => "あ","ぃ" => "い","ぅ" => "う","ぇ" => "え","ぉ" => "お", "が" => "か","ぎ" => "き","ぐ" => "く","げ" => "け","ご" => "こ", "ざ" => "さ","じ" => "し","ず" => "す","ぜ" => "せ","ぞ" => "そ", "だ" => "た","ぢ" => "ち","っ" => "つ","づ" => "つ","で" => "て","ど" => "と", "ば" => "は","ぱ" => "は","び" => "ひ","ぴ" => "ひ","ぶ" => "ふ","ぷ" => "ふ","べ" => "へ","ぺ" => "へ","ぼ" => "ほ","ぽ" => "ほ", "ゃ" => "や","ゅ" => "ゆ","ょ" => "よ", "ゎ" => "わ","ヴ" => "う","ヵ" => "か","ヶ" => "け", } to_1byte = { "!" => "!",'”' => '"',"#" => "#","$" => "$","%" => "%","&" => "&","’" => "'","(" => "(",")" => ")","*" => "*","+" => "+","," => ",","−" => "-","." => ".","/" => "/", "0" => "0","1" => "1","2" => "2","3" => "3","4" => "4","5" => "5","6" => "6","7" => "7","8" => "8","9" => "9",":" => ":",";" => ";","<" => "<","=" => "=",">" => ">","?" => "?", "@" => "@","A" => "A","B" => "B","C" => "C","D" => "D","E" => "E","F" => "F","G" => "G","H" => "H","I" => "I","J" => "J","K" => "K","L" => "L","M" => "M","N" => "N","O" => "O", "P" => "P","Q" => "Q","R" => "R","S" => "S","T" => "T","U" => "U","V" => "V","W" => "W","X" => "X","Y" => "Y","Z" => "Z","[" => "[","¥" => "\\","]" => "]","^" => "^","_" => "_", "a" => "a","b" => "b","c" => "c","d" => "d","e" => "e","f" => "f","g" => "g","h" => "h","i" => "i","j" => "j","k" => "k","l" => "l","m" => "m","n" => "n","o" => "o", "p" => "p","q" => "q","r" => "r","s" => "s","t" => "t","u" => "u","v" => "v","w" => "w","x" => "x","y" => "y","z" => "z","{" => "{","|" => "|","}" => "}","‾" => "~", } topchr = name[0,1] if topchr.count("\xA1-\xFE") == 1 # マルチバイト文字 topchr = name[0,2] end if to_1byte.has_key?(topchr) == true topchr = to_1byte[topchr] end if topchr.length == 1 # シングルバイト文字の処理 topchr = topchr.upcase if (0x21 <= topchr[0] && topchr[0] <= 0x2F) \ || (0x3A <= topchr[0] && topchr[0] <= 0x40) \ || (0x5B <= topchr[0] && topchr[0] <= 0x60) \ || (0x7B <= topchr[0] && topchr[0] <= 0x7B) topchr = "記号" end else # マルチバイト文字の処理 # カタカナ->ひらがな変換 code = topchr[0] * 0x100 + topchr[1] if 0xA5A1 <= code && code <= 0xA5F3 topchr = 0xA4.chr + topchr[1].chr end # 濁点 / 半濁点 撥音など変換 if to_plainhiragana.has_key?(topchr) == true topchr = to_plainhiragana[topchr] end end return topchr end end class WIRedirectPage def initialize(key) @key = key end def generate_html(windex) anchor = windex[@key]["anchor"][0] body = <<-BODY moving... Wait or Click here! BODY return body end end class WISinglePage def initialize(title,date_format,css,key) @title = title @date_format = date_format @css = css @key = key end def generate_html(windex) anchors = windex[@key]["anchor"] body = %Q[

#{@key}

\n] anchors.sort.each do |anchor| str_date = anchor.scan(/\d{8}/)[0] date = Time.local(str_date[0..3].to_i,str_date[4..5].to_i,str_date[6..7].to_i) body << %Q[

#{date.strftime(@date_format)}

\n] end body << "\n
\n" body = <<-BODY #{h @title}(索引) #{@css}

#{@title} [索引]

#{body}
BODY return body end end class WIErrorPage def initialize(title,css,key) @title = title @css = css @key = key end def generate_html(windex) body = <<-BODY #{h @title}(索引) #{@css}

#{@title} [索引]

キーワード「#{h @key}」は登録されていません。
BODY return body end end def windex(str,readname = "") return str end def wikw(str) if @wordindex.has_key?(str) == true anchors = @wordindex[str]["anchor"] if anchors.length == 1 return %Q[#{str}] else body = "#{str}(" num_anchor = 1 anchors.sort.each do |anchor| body << %Q[#{num_anchor}] if num_anchor < anchors.length body << "," end num_anchor = num_anchor + 1 end body << ")" return body end end return str end if mode != "CGI" @wordindex = WIWordIndex.new @wordindex.load(@cache_path + "/windex") add_update_proc do if @conf.options["windex.generate_all"] == true tdiary = WITDiary.new(@cgi,"day.rhtml",@conf) @years.each_key do |year| @years[year.to_s].each do |month| date = Time::local(year,month) wordindex = tdiary.generate_wordindex(date,self,@conf.index) wordindex.save(@cache_path + "/windex",date.strftime('%Y%m')) end end else wordindex = WIWordIndex.new wordindex.generate(@diaries,self,@conf.index) wordindex.save(@cache_path + "/windex",@date.strftime('%Y%m')) end end else cgi = CGI.new conf = TDiary::Config.new(cgi) cache_path = conf.data_path + "cache" plugin = WITDiary.new(cgi,"day.rhtml",conf).load_plugins wordindex = WIWordIndex.new wordindex.load(cache_path + "/windex") if cgi.valid?('kw') == true key = cgi.params['kw'][0] if wordindex.has_key?(key) == true num_anchor = wordindex[key]["anchor"].length if num_anchor == 0 page = WIErrorPage.new(conf.html_title,plugin.css_tag,key) elsif num_anchor == 1 page = WIRedirectPage.new(key) else page = WISinglePage.new(conf.html_title,conf.date_format,plugin.css_tag,key) end else page = WIErrorPage.new(conf.html_title,plugin.css_tag,key) end else page = WIIndexPage.new(conf.html_title,plugin.css_tag) end body = wordindex.generate_html(page) header = { "type" => "text/html", "charset" => "UTF-8", "Content-Length" => body.size.to_s, "Pragma" => "no-cache", "Cache-Control" => "no-cache", "Vary" => "User-Agent", } head = cgi.header(header) print head print body end