#!/usr/bin/env ruby -Ke # Copyright(c) 2004 URABE, Shyouhei. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this code, to deal in the code without restriction, including without # limitation the rights to use, copy, modify, merge, publish, distribute, # sublicense, and/or sell copies of the code, and to permit persons to whom the # code is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the code. # # THE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE CODE OR THE USE OR OTHER DEALINGS IN THE # CODE. # $Id: hatena_style.rb,v 1.12 2007-02-27 06:57:14 kazuhiko Exp $ # Hatena::Diary compatible style # Works only under ruby 1.8.1 or later [ 'uri', 'net/http', 'cgi', 'pstore', 'time', ].each {|f| require f } class TDiary::HatenaDiary include TDiary::DiaryBase, TDiary::CategorizableDiary def initialize(date, title, body, modified=Time.now) init_diary @sections = [] replace date, title, body @last_modified = modified end def style 'Hatena' end def replace(date, title, body) set_date date set_title title @sections.clear append body end def append(body, author=nil) @sections.concat Hatena::Diary.parse(body, author) self end def each_section(&block) @sections.each(&block) end def to_src @sections.inject('') {|r, i| r << i.to_src } end def to_html(opt, mode=:HTML) j = 0 @sections.inject('') {|r, i| j += 1 r << '
' if mode != :CHTML r << i.convert(mode, date, j, opt) r << '
' if mode != :CHTML r } end def to_s sprintf('date=%s, title=%s, body=%s', date.strftime('%Y%m%d'), title, @sections.map {|i| '[%s]' % i}.join) end end # This is the namespace module module Hatena def Hatena.conf ObjectSpace.each_object do |diary| next unless diary.kind_of?(TDiary::TDiaryBase) return diary.instance_eval { @conf } end end Diary = Object.new API = Object.new # get a text of hatena-style, and convert it into parse tree. def Diary.parse(str, author) str.gsub(/\r(?!\n)/,"\n")\ .delete("\r")\ .gsub(/^\*/,'**')\ .split(/^\*/)\ .inject([]) {|r, i| i.empty? ? r : r << Hatena::Section.new(i, author) } end # find the cache_path from entore ruby world # could someone please tell me more eficient way to do this... def API.cache_path ret = Hatena.conf.cache_path || Hatena.conf.data_path + '/cache' unless FileTest.directory?(ret) begin Dir.mkdir(ret) rescue Errno::EEXIST ; # OK end end ret end def API.update_kw(kw) return false if File.exist?(kw) && Time.now - File.mtime(kw) < 86400 File.open(kw, IO::WRONLY|IO::CREAT) do |fp| break unless fp.flock(IO::LOCK_EX|IO::LOCK_NB) uri = ::URI.parse('http://d.hatena.ne.jp/images/keyword/keywordlist') Timeout.timeout(60) do Net::HTTP.version_1_1 Net::HTTP.new(uri.host, uri.port).start do |http| res, body = http.get(uri.request_uri, {'User-Agent' => "tDiary/#{TDIARY_VERSION}"}) fp.seek(0, IO::SEEK_SET) fp.write(body) end end end true end def API.update_db(kw, db) raise if API.update_kw kw raise unless FileTest.exist? db.path false rescue str = File.open(kw, IO::RDONLY) do |fp| fp.flock(IO::LOCK_SH) fp.read end a = str.gsub(/\\s/,' ') \ .gsub(/\\(?!\|)/,'') \ .scan(/(?:[^|]|\\\|)*[^\\](?=\||\z)/) db.transaction do db['trie'] = Trie.new(a) end true end # The trie of keywords # Keywords are chached, chache expires every day (24h) def API.keywords path = API.cache_path kw = path + '/keywordlist' db = PStore.new(path + '/keywords.pstore') if API.update_db(kw, db) || @ret.nil? db.transaction(IO::RDONLY) do @ret = db['trie'] end end return @ret end end # Deterministic finate automata class Hatena::Trie private def add(kw) h = @hash1 kw.split(//e).each do |c| unless h.has_key? c tmp = Hash.new @ary << tmp h[c] = tmp end h = h[c] end @hash2[h] = kw end def initialize(a) @ary = Array.new @hash1 = Hash.new @hash2 = Hash.new a.each {|kw| add kw } end public def match(str) ret = nil h = @hash1 a = str.split(//e) i = 0 j = 0 while c = a[i + j] if h[c] h = h[c] if @hash2[h] ret = @hash2[h] end f = false j += 1 else return ret if ret h = @hash1 # reset i += 1 j = 0 end end return ret end end # -------- # Parser Tree Nodes class Hatena::Section def initialize(str, author) t = Time.now @author = author.freeze @src = str.gsub(/^\*t\*/, '*%d*' % t.to_i)\ .gsub(/<(ins|del)>/, '<\1 datetime="%s">' % t.xmlschema) @tree = Hatena::Block.new(@src) end def convert(mode, date, i, opt) @tree.convert(mode, date, i, opt, author) end def to_src @src end def categories @tree.title.categories end def author @author end def body @tree.body.to_s end def subtitle @tree.title.to_s end def stripped_subtitle @tree.title.strip.to_s end def body_to_html @tree.body.convert(:HTML) end def subtitle_to_html @tree.title.convert(:HTML) end def stripped_subtitle_to_html @tree.title.strip.convert(:HTML) end end # Block level elements class Hatena::Block attr_reader :to_s, :title, :body def initialize(str) # Too long. Needs refactoring. if str.nil? @title = Hatena::Title.new('') # dummy @body = Hatena::Inline.new('') # dummy @to_s = '' elsif str[0] == ?* t,b = *str.split(/\n/,2) @title = Hatena::Title.new(t) @body = Hatena::BlockAndorInline.new(b, false) @to_s = t + "\n" + (b||'') else @to_s = str @title = Hatena::Title.new('') # dummy @body = Hatena::BlockAndorInline.new(str, false) end end def convert(mode, date=nil, i=nil, opt=nil, author=nil) if title_is_dummy? @body.convert(mode) else @title.convert(mode, date, i, opt, author) + "\n" + @body.convert(mode) end end def title_is_dummy? @to_s[0] == ?* end end # Section subtitle class Hatena::Title attr_reader :to_s, :categories, :strip def initialize(str) if m = /\A\*([0-9]+)\*/.match(str) @time = Time.at(Integer(m[1])) @to_s = m.post_match.freeze elsif m = /\A\*([a-zA-Z0-9_]+)\*/.match(str) @name = m[1] @to_s = m.post_match.freeze else @to_s = (str[1..-1]||'').freeze end @categories = to_s.scan(/\[(.*?)\]/).map{|a| a[0] } @strip = Hatena::Inline.new(Regexp.last_match ? Regexp.last_match.post_match : to_s) end def convert(mode, date=nil, i=nil, opt=nil, author=nil) id = ('p%02d' % (i || 0)) h = '%0.32b' % rand(0x100000000) case when date.nil? categories.map {|i| "<%=category_anchor <<'#{h}'.chomp\n#{i}\n#{h}\n%>" }.join + strip.convert(mode) when mode == :CHTML sprintf('*%s%s', @name ? %Q{ ID"=#@name"} : '', id, (opt['multi_user'] && author) ? "[#{author}]" : '', strip.convert(mode)) else sprintf('#%s">%s%s%s%s %s', @name ? %Q{ id="#@name"} : '', opt['anchor'] ? 'name="%s" ' % id : '', opt['index'], date.strftime('%Y%m%d'), @name || id, opt['section_anchor'], categories.map {|cat| "<%=category_anchor <<'#{h}'.chomp\n#{cat}\n#{h}\n%>" }.join, (opt['multi_user'] && author) ? "[#{author}]" : '', strip.convert(mode), @time ? %Q!#{@time.strftime('%H:%M')}! : '') end end end # Sequence of block level elements and/or inline level elements # or sequence of block level elements only. class Hatena::BlockAndorInline def initialize(str, allowinline = true) @elems = Array.new pbuffer = '' # paragraph buffer flush_pbuffer = lambda{ next if pbuffer.empty? if allowinline @elems.push Hatena::Inline.new(pbuffer) else @elems.push Hatena::Paragraph.new(pbuffer) end pbuffer.replace('') } lines = str.concat("\n").scan(/.*\n/) until lines.empty? case when lines[0][0] == ?- flush_pbuffer.call buffer = '' until lines.empty? break unless lines[0][0] == ?- buffer.concat lines.shift end @elems.push Hatena::Itemize.new(buffer) when lines[0][0] == ?+ flush_pbuffer.call buffer = '' until lines.empty? break unless lines[0][0] == ?+ buffer.concat lines.shift end @elems.push Hatena::Enumerate.new(buffer) when lines[0][0] == ?: flush_pbuffer.call buffer = '' until lines.empty? break unless lines[0][0] == ?: break unless lines[0].rindex(?:) != 0 buffer.concat lines.shift end @elems.push Hatena::Description.new(buffer) when lines[0] == ">>\n" flush_pbuffer.call buffer = '' nest = 0 until lines.empty? nest += 1 if lines[0] == ">>\n" nest -= 1 if lines[0] == "<<\n" buffer.concat lines.shift break if nest <= 0 end @elems.push Hatena::Quote.new(buffer) when lines[0] == ">|\n" flush_pbuffer.call buffer = '' until lines.empty? str1 = lines.shift buffer.concat str1 break if /\|<$/ =~ str1 end @elems.push Hatena::Verbatim.new(buffer) when lines[0] == ">||\n" flush_pbuffer.call buffer = '' until lines.empty? str1 = lines.shift buffer.concat str1 break if /\|\|<$/ =~ str1 end @elems.push Hatena::SuperVerbatim.new(buffer) when lines[0][0,5] == '><$/ =~ lines.shift end when lines[0][0,2] == '><' flush_pbuffer.call buffer = '' until lines.empty? str1 = lines.shift buffer.concat str1 break if /><$/ =~ str1 end @elems.push Hatena::UnParagraph.new(buffer) else pbuffer.concat lines.shift if pbuffer[-3..-1] == "\n\n\n" flush_pbuffer.call end end end flush_pbuffer.call end def convert(mode) @elems.inject('') {|r, i| r << i.convert(mode) << "\n" } end end # Itemize # extension to Hatena: nest can be more than 3 level. class Hatena::Itemize def initialize(str) @elems = Array.new lines = str.gsub(/^-/,'').scan(/.*\n/) buffer = '' until lines.empty? case when lines[0][0] == ?- until lines.empty? break unless lines[0][0] == ?- buffer.concat lines.shift end @elems.push Hatena::BlockAndorInline.new(buffer) buffer = '' when lines[0][0] == ?+ until lines.empty? break unless lines[0][0] == ?+ buffer.concat lines.shift end @elems.push Hatena::BlockAndorInline.new(buffer) buffer = '' when lines[0][0] == ?: until lines.empty? break unless lines[0][0] == ?: break unless lines[0].rindex(?:) != 0 buffer.concat lines.shift end @elems.push Hatena::BlcokAndorInline.new(buffer) buffer = '' else @elems.push Hatena::Inline.new(buffer) unless buffer.empty? buffer = lines.shift end end @elems.push Hatena::Inline.new(buffer) unless buffer.empty? end def convert(mode) template = nil if mode == :CHTML template = ["", "\n
  • %s
  • "] else template = ["", "\n
  • %s
  • "] end template[0] % @elems.inject('') {|r, i| r << template[1] % i.convert(mode) } end end # Enumerate # Extension to Hatena: nest can be more than 3 level class Hatena::Enumerate def initialize(str) @elems = Array.new lines = str.gsub(/^\+/,'').scan(/.*\n/) buffer = '' until lines.empty? case when lines[0][0] == ?- until lines.empty? break unless lines[0][0] == ?- buffer.concat lines.shift end @elems.push Hatena::BlockAndorInline.new(buffer) buffer = '' when lines[0][0] == ?+ until lines.empty? break unless lines[0][0] == ?+ buffer.concat lines.shift end @elems.push Hatena::BlockAndorInline.new(buffer) buffer = '' when lines[0][0] == ?: until lines.empty? break unless lines[0][0] == ?: break unless lines[0].rindex(?:) != 0 buffer.concat lines.shift end @elems.push Hatena::BlcokAndorInline.new(buffer) buffer = '' else @elems.push Hatena::Inline.new(buffer) unless buffer.empty? buffer = lines.shift end end @elems.push Hatena::Inline.new(buffer) unless buffer.empty? end def convert(mode) template = nil if mode == :CHTML template = ["
      %s\n
    ", "\n
  • %s
  • "] else template = ["
      %s\n
    ", "\n
  • %s
  • "] end template[0] % @elems.inject('') {|r, i| r << template[1] % i.convert(mode) } end end # Description list # Extension to hatena : term only and descriotion only are OK # :term: # ::desc # Extension to Hatena : can be combined with lists class Hatena::Description def initialize(str) @elems = Array.new str.each_line do |l| raise SyntaxError unless l[0] == ?: l = l[1..-1] buffer = '' # while l =~ /[^:]*#{URI.regexp}/o # buffer.concat Regexp.last_match.to_s # l = Regexp.last_match.post_match # end dt,dd = *l.split(/:/,2) buffer.concat dt @elems.push([ buffer.empty? ? nil : Hatena::Inline.new(buffer), (dd.nil? || dd.empty?) ? nil : Hatena::Inline.new(dd) ]) end end def convert(mode) template = nil if mode == :CHTML template = ["
    %s\n
    ", "\n
    %s
    ", "
    %s
    "] else template = ["
    %s\n
    ", "\n
    %s
    ", "
    %s
    "] end template[0] % @elems.inject('') {|r, i| r << template[1] % i[0].convert(mode) unless i[0].nil? r << template[2] % i[1].convert(mode) unless i[1].nil? r } end end # block level quote # Extension to hatena : nest can be more than 2 level. class Hatena::Quote def initialize(str) @elems = Hatena::Block.new(str[3..-4]) end def convert(mode) template = nil if mode == :CHTML template = "
    \n%s\n
    " else template = "
    \n%s\n
    " end sprintf(template,@elems.convert(mode)) end end # preformatted text class Hatena::Verbatim def initialize(str) @str = str[3..-4].freeze end def convert(mode) template = nil if mode == :CHTML template = "
    %s
    " else template = "
    %s
    " end sprintf(template,CGI.escapeHTML(@str)) end end # preformatted text class Hatena::SuperVerbatim def initialize(str) @str = str[3..-5].freeze end def convert(mode) template = nil if mode == :CHTML template = "
    %s
    " else template = "
    %s
    " end sprintf(template,CGI.escapeHTML(@str)) end end # non-paragraph blocklevel class Hatena::UnParagraph def initialize(str) @elems = Hatena::Inline.new(str[1..-3]) # 0123... # >
    # ... ... # ...
    <\n # ...-321 end def convert(mode) @elems.convert(mode) end end # paragraph # Extension to Hatena: not using
    but begins next paragraph class Hatena::Paragraph def initialize(str) @elems = Hatena::Inline.new(str.gsub(/\n\n\n/,'')) end def convert(mode) template = nil if mode == :CHTML template = "

    \n%s\n

    " else template = "

    \n%s\n

    " end sprintf(template, @elems.convert(mode)) end end # inline elements class Hatena::Inline def initialize(str) @elems = Array.new inside_a = false return if str == "\n" until str.empty? case str when /\A\[\](.*?)\[\]/m @elems.push Hatena::CDATA.new(Regexp.last_match[1]) when /\A\)\(\((.*?)\)\)\(/m, /\A\(\(\((.*?)\)\)\)/m @elems.push Hatena::CDATA.new('((') @elems.push Hatena::Inline.new(Regexp.last_match[1]) @elems.push Hatena::CDATA.new('))') when /\A\(\((.*?)\)\)/m @elems.push Hatena::Footnote.new(Regexp.last_match[1]) when /\A#{tag_regex}/o @elems.push Hatena::TAG.new(Regexp.last_match.to_s) if str.index("") == 0 inside_a = false end when /\A\[amazon:(.*?)\]/m @elems.push Hatena::AmazonSearch.new(Regexp.last_match[1], true) when /\A\[google:(.*?)\]/m @elems.push Hatena::Google.new(Regexp.last_match[1], true) when /\A\[(?:(#{gid_regex}):)?keyword:(.*?)\]/m, /\A\[\[(.*?)\]\]/m group, keyword = Regexp.last_match.captures @elems.push Hatena::Keyword.new(group, keyword, true) when /\A\[(?:(#{gid_regex}|[ad]):)?id:([a-zA-Z][-a-zA-Z0-9_]{1,30}[a-zA-Z0-9])\]/m, /\A(?:(#{gid_regex}|[ad]):)?id:([a-zA-Z][-a-zA-Z0-9_]{1,30}[a-zA-Z0-9](?::(?:[0-9]+|about))?)/ sid, id = Regexp.last_match.captures @elems.push Hatena::ID.new(sid, id, true) when /\A\[(?i:ISBN|ASIN):(.*?)(?::image(?::(?:small|large))?)?\]/m, /(?i:ISBN|ASIN):([-0-9A-Za-z]+)(?::image(?::(?:small|large))?)?/ @elems.push Hatena::Amazon.new(Regexp.last_match[1], true) when /\A\[tex:(.*?)\]/m @elems.push Hatena::TeX.new(Regexp.last_match[1]) when /\A#{gid_regex}/ @elems.push Hatena::Group.new(Regexp.last_match.to_s, true) when /\A\[((?i:https?|ftp|mailto):.+?)\]/m, /\A(#{URI.regexp})/o @elems.push Hatena::URI.new(Regexp.last_match[1]) else /.+?(?=[\[\]()<>]|(?i:https?|ftp|mailto|id|ISBN|ASIN)|[adg]:|$)/m =~ str if inside_a @elems.push Hatena::CDATA.new(Regexp.last_match.to_s) else @elems.push Hatena::Sentence.new(Regexp.last_match.to_s) end end str = Regexp.last_match.post_match end end def convert(mode) @elems.inject('') {|r, i| r << i.convert(mode) } end private # tag_regex was quoted from http://www.din.or.jp/~ohzaki/perl.htm#HTML_Tag def tag_regex /<[^"'<>]*(?:"[^"]*"[^"'<>]*|'[^']*'[^"'<>]*)*(?:>|(?=<)|$)/ end def gid_regex /g:[a-zA-Z][a-zA-Z0-9]{2,23}/ end end # String that surely doesn't contain any keywords # String that can contain keyword is a Sentence class Hatena::CDATA def initialize(str) @str = str.freeze end def convert(mode) @str end end # footnote # footnote.rb required class Hatena::Footnote def initialize(str) @str = str @heredoc = rand(0x100000000) end def convert(mode) sprintf("<%%=fn <<'%0.32b'.chomp\n%s\n%0.32b\n%%>", @heredoc, @str, @heredoc) end end # HTML tags # Disadvantanegs from hatena : is not supported # Extension to Hatena : ERB expression can be written class Hatena::TAG def initialize(str) @elems = Array.new return if /', @str, @str.delete('-')) # %= else sprintf('http://www.amazon.co.jp/exec/obidos/ASIN/%s/%s', @str, Hatena.conf['amazon.aid'] || '') end end end # Amazon search # http://d.hatena.ne.jp/hatenadiary/20040310#1078879113 class Hatena::AmazonSearch def initialize(str, tag_p) @str = str @tag_p = tag_p end def convert(mode) uri = 'http://www.amazon.co.jp/exec/obidos/external-search?mode=blended&tag=%s&encoding-string-jp=%%c6%%fc%%cb%%dc%%b8%%ec&keyword=%s' % [Hatena.conf['amazon.aid'] || '', URI.escape(@str, /[^-_.!~*'()a-zA-Z0-9]/)] return uri unless @tag_p template=nil if mode == :CHTML template = 'amazon:%s' else template = 'amazon:%s' end sprintf(template, uri, @str) end end # TeX expressoin # texdiary http://kumamushi.org/~k/texdiary/ required class Hatena::TeX def initialize(expr) @expr end def convert(mode) sprintf('<%%=eq "%s"%%>' % @expr) #%= end end # String that can contain keywords # String that cannot contain keywords is a CDATA class Hatena::Sentence def initialize(str) @elems = Array.new return if str.nil? || str.empty? if false # kw = Hatena::API.keywords.match(str) m = Regexp.new(Regexp.quote(kw)).match(str) @elems.push Hatena::CDATA.new(m.pre_match) @elems.push Hatena::Keyword.new(nil, kw, true) @elems.push Hatena::Sentence.new(m.post_match) else @elems.push Hatena::CDATA.new(str) end end def convert(mode) @elems.inject('') {|r, i| r << i.convert(mode) } end end # Local Variables: # mode: ruby # code: euc-jp-unix # End: