# -*- coding: utf-8 -*- require "tumblr4r/version" require 'net/http' require 'rexml/document' begin require 'active_support/core_ext' require 'active_support/core_ext' rescue require 'activesupport' end require 'logger' require 'cgi' module Tumblr4r class TumblrError < StandardError attr_accessor :attachment def initialize(msg, attachment=nil) super(msg) @attachment = attachment end end module POST_TYPE REGULAR = "regular" PHOTO = "photo" QUOTE = "quote" LINK = "link" CHAT = "conversation" AUDIO = "audio" VIDEO = "video" ANSWER = "answer" end # ConnectionオブジェクトとParserオブジェクトを組み合わせて、 # TumblrAPIとRubyオブジェクトの相互変換を行う # TODO: private な post だけを取得する API が無いのだなぁ # * Webから更新したものがAPIで取得できるデータに反映されるには少しタイムラグがあるようだ # * Webから更新しちゃうと、POST日時の秒が丸められてしまう class Site attr_accessor :hostname, :email, :password, :name, :timezone, :title, :cname, :description, :feeds attr_accessor :logger API_READ_MAX_ALLOWED_COUNT = 50 SLEEP_SECONDS_FOR_EVERY_FETCH = 10.0 # API manual says "Requests are rate-limited to one every 10 seconds." @@default_log_level = Logger::DEBUG cattr_accessor :default_log_level class << self # TODO: unit test def find(hostname, email=nil, password=nil, http=nil, &block) site = self.new(hostname, email, password, http) result = site.find(:all) if block_given? result.each do |post| yield post end else return result end end end def initialize(hostname, email=nil, password=nil, http = nil, logger = nil) @hostname = hostname @email = email @password = password @logger = logger || Logger.new(STDERR) @logger.level = @@default_log_level @conn = XMLConnection.new(http || @hostname, email, password, @logger) @parser = XMLParser.new self.site_info end # @param [Symbol|Integer] id_or_type :all, id # @param [Hash] options :offset, :limit, :type, :filter, :tagged, :search, # @return [Array|Post] def find(id_or_type, options = { }) if id_or_type == :all normal_find(options) elsif id_or_type.kind_of?(Integer) xml = @conn.get({:id => id_or_type}) posts, start, total = @parser.posts(xml) @logger.info("size: #{posts.size}") @logger.info("start: #{start}") @logger.info("total: #{total}") return posts[0] else raise ArgumentError.new("id_or_type must be :all or Integer, but was #{id_or_type}(<#{id_or_type.class}>)") end end # TODO: ループごとに実行して欲しい処理をblockで渡せるようにするといいかも? # そのブロック引数にエラー情報も渡してあげれば、エラーが起きたのならretryだな、みたいな # 指示ができない、、、、かな def normal_find(options) limit = options[:limit] && options[:limit].to_i offset = options[:offset].to_i total = self.count(options) result = [] params = { } [:type, :filter, :tagged, :search].each do |option| params[option] = options[option] if options[option] end last_fetched_at = nil each_fetch(limit, offset, API_READ_MAX_ALLOWED_COUNT, total) do |offset, num| params[:start] = offset params[:num] = num # APIマニュアルにはこっちのスリープ時間については明記されてないが、dashboardと同じ秒数SLEEPしとく sleep_secs = last_fetched_at ? SLEEP_SECONDS_FOR_EVERY_FETCH - (Time.now - last_fetched_at) : 0 if sleep_secs > 0 logger.debug("sleeping #{sleep_secs} secs.") sleep sleep_secs end xml = @conn.get(params) last_fetched_at = Time.now posts, start, total = @parser.posts(xml) result += posts if posts.size == 0 # Tumblr API の total で得られる値は全く信用ならない。 # 検索条件を考慮した件数を返してくれない。 # (つまり、goalは信用ならない)ので、posts.sizeも終了判定に利用する。 # TODO: もしくは:numの値を足し合わせていって、それとgoalを比較する? break end posts.size end result end #, :search, # @param [Hash] options :offset, :limit, :type, :filter # @return [Array] def dashboard(options = { }) limit = options[:limit] ? options[:limit].to_i : nil offset = options[:offset].to_i result = [] params = {:likes => "1" } [:type, :filter].each do |option| params[option] = options[option] if options[option] end total = 1000 # 明記されてないがたぶん1000件ぐらいが上限? last_fetched_at = nil each_fetch(limit, offset, API_READ_MAX_ALLOWED_COUNT, total) do |offset, num| params[:start] = offset params[:num] = num sleep_secs = last_fetched_at ? SLEEP_SECONDS_FOR_EVERY_FETCH - (Time.now - last_fetched_at) : 0 if sleep_secs > 0 logger.debug("sleeping #{sleep_secs} secs.") sleep sleep_secs end xml = @conn.dashboard(params) last_fetched_at = Time.now posts, start, total = @parser.posts(xml) result += posts if posts.size == 0 # Tumblr API の total で得られる値は全く信用ならない。 # 検索条件を考慮した件数を返してくれない。 # (つまり、goalは信用ならない)ので、posts.sizeも終了判定に利用する。 # TODO: もしくは:numの値を足し合わせていって、それとgoalを比較する? break end posts.size end result end def each_fetch(limit, offset, max_at_once, total, &block) return if offset && offset.to_i < 0 # 取得開始位置の初期化 start = offset || 0 if limit goal = [total - start, limit].min else goal = total - start end # 取得件数の初期化 num = [goal, max_at_once].min if num < 0 return end all_fetched = 0 while all_fetched < goal fetched_count = yield(start, num) @logger.info("size: #{fetched_count}") @logger.info("start: #{start}") @logger.info("total: #{total}") all_fetched += fetched_count # 取得開始位置の調整 start += num # 取得件数の調整 num = [goal - fetched_count, max_at_once].min end end def count(options = { }) params = { } [:id, :type, :filter, :tagged, :search].each do |option| params[option] = options[option] if options[option] end params[:num] = 1 params[:start] = 0 xml = @conn.get(params) posts, start, total = @parser.posts(xml) return total end def site_info xml = @conn.get(:num => 1) @parser.siteinfo(self, xml) end def save(post) post_id = @conn.write(post.params) new_post = self.find(post_id) return new_post end # @param [Integer|Post] post_id_or_post def delete(post_id_or_post) post_id = nil case post_id_or_post when Tumblr4r::Post post_id = post_id_or_post.post_id when Integer post_id = post_id_or_post else raise ArgumentError.new("post_id_or_post must be Tumblr4r::Post or Integer, but was #{post_id_or_post}(<#{post_id_or_post.class}>)") end return @conn.delete(post_id) end end # Postおよびその子クラスは原則として単なるData Transfer Objectとし、 # 何かのロジックをこの中に実装はしない。 class Post attr_accessor :post_id, # Integer :url, # String :url_with_slug, # String :type, # String :date_gmt, :date, :unix_timestamp, # Integer :format, # String("html"|"markdown") :tags, # Array :bookmarklet, # true|false :private, # Integer(0|1) :generator # String attr_accessor :liked, # Boolean :reblog_key # String @@default_generator = nil cattr_accessor :default_generator def initialize @generator = @@default_generator || "Tumblr4R" @tags = [] end def params {"type" => @type, "generator" => @generator, "date" => @date, "private" => @private, "tags" => @tags.join(","), "format" => @format } end end class Regular < Post attr_accessor :regular_title, :regular_body def params super.merge!({"title" => @regular_title, "body" => @regular_body }) end end # TODO: Feed の扱いをどうするか class Feed < Post attr_accessor :regular_body, :feed_item, :from_feed_id # TODO: titleのあるfeed itemってあるのか? end class Photo < Post attr_accessor :photo_caption, :photo_link_url, :photo_url, :photoset #TODO: photo_url の max-width って何? attr_accessor :data # TODO: data をどうやってPOSTするか考える # 生のデータを持たせるんじゃなく、TumblrPostDataみたいな # クラスにラップして、それを各POSTのivarに保持させる? def params super.merge!( {"source" => @photo_url, "caption" => @photo_caption, "click-through-url" => @photo_link_url, "photoset" => @photoset, "data" => @data}) end end class Quote < Post attr_accessor :quote_text, :quote_source def params super.merge!( {"quote" => @quote_text, "source" => @quote_source}) end end class Link < Post attr_accessor :link_text, :link_url, :link_description def params super.merge!( {"name" => @link_text, "url" => @link_url, "description" => @link_description}) end end class Chat < Post attr_accessor :conversation_title, :conversation_text # textのリスト def params super.merge!( {"title" => @conversation_title, "conversation" => @conversation_text}) end end class Audio < Post attr_accessor :audio_plays, :audio_caption, :audio_player attr_accessor :data def params super.merge!( {"data" => @data, "caption" => @audio_caption}) end end class Video < Post attr_accessor :video_caption, :video_source, :video_player attr_accessor :data, :title # TODO: title は vimeo へのアップロードのときのみ有効らしい # TODO: embed を使うか、アップロードしたdataを使うかってのは # Tumblr側で勝手に判断されるのかなぁ? def params super.merge!( {"embed" => @video_source, "data" => @data, "title" => @title, "caption" => @video_caption}) end end class Answer < Post attr_accessor :answer, :question def params super.merge!({"question" => @question, "answer" => @answer}) end end # Tumblr XML API への薄いラッパー。 # Rubyオブジェクトからの変換やRubyオブジェクトへの変換などは # Parserクラスで行う。Parserクラスへの依存関係は一切持たない。 class XMLConnection attr_accessor :logger, :group, :authenticated def initialize(http_or_hostname, email=nil, password=nil, logger = nil) case http_or_hostname when String @conn = Net::HTTP.new(http_or_hostname) when Net::HTTP @conn = http_or_hostname else raise ArgumentError.new("http_or_hostname must be String or Net::HTTP but is #{http_or_hostname.class}") end @email= email @password = password if @email && @password begin @authenticated = authenticate rescue TumblrError @authenticated = false end end @group = @conn.address @logger = logger || Logger.new(STDERR) end # @param [Hash] options :id, :type, :filter, :tagged, :search, :start, :num def get(options = { }) params = options.map{|k, v| "#{k}=#{v}" }.join("&") req = "/api/read?#{params}" logger.info(req) res = @conn.get(req) logger.debug(res.body) case res when Net::HTTPOK return res.body when Net::HTTPNotFound raise TumblrError.new("no such site(#{@hostname})", res) else raise TumblrError.new("unexpected response #{res.inspect}", res) end end def dashboard(options = { }) response = nil http = Net::HTTP.new("www.tumblr.com") params = options.merge({"email" => @email, "password" => @password, "group" => @group}) query_string = params.delete_if{|k,v| v == nil }.map{|k,v| "#{k}=#{CGI.escape(v.to_s)}" unless v.nil?}.join("&") logger.debug("#### query_string: #{query_string}") response = http.post('/api/dashboard', query_string) logger.debug(response.body) case response when Net::HTTPSuccess return response.body else raise TumblrError.new(format_error(response), response) end end # @return true if email and password are valid # @raise TumblrError if email or password is invalid def authenticate response = nil http = Net::HTTP.new("www.tumblr.com") response = http.post('/api/authenticate', "email=#{CGI.escape(@email)}&password=#{CGI.escape(@password)}") case response when Net::HTTPOK return true else raise TumblrError.new(format_error(response), response) end end # @return [Integer] post_id if success # @raise [TumblrError] if fail def write(options) raise TumblrError.new("email or password is invalid") unless authenticated response = nil http = Net::HTTP.new("www.tumblr.com") params = options.merge({"email" => @email, "password" => @password, "group" => @group}) query_string = params.delete_if{|k,v| v == nil }.map{|k,v| "#{k}=#{CGI.escape(v.to_s)}" unless v.nil?}.join("&") logger.debug("#### query_string: #{query_string}") response = http.post('/api/write', query_string) case response when Net::HTTPSuccess return response.body.to_i else raise TumblrError.new(format_error(response), response) end end # @param [Integer] post_id def delete(post_id) raise TumblrError.new("email or password is invalid") unless authenticated response = nil http = Net::HTTP.new("www.tumblr.com") params = {"post-id" => post_id, "email" => @email, "password" => @password, "group" => @group} query_string = params.delete_if{|k,v| v == nil }.map{|k,v| "#{k}=#{CGI.escape(v.to_s)}" unless v.nil?}.join("&") logger.debug("#### query_string: #{query_string}") response = http.post('/api/delete', query_string) case response when Net::HTTPSuccess logger.debug("#### response: #{response.code}: #{response.body}") return true else raise TumblrError.new(format_error(response), response) end end def format_error(response) msg = response.inspect + "\n" response.each{|k,v| msg += "#{k}: #{v}\n"} msg += response.body msg end end # Tumblr XML API class XMLParser # @param [Site] site xmlをパースした結果を埋める入れ物 # @param [String] xml TumblrAPIのレスポンスのXMLそのまま def siteinfo(site, xml) xml_doc = REXML::Document.new(xml) tumblelog = REXML::XPath.first(xml_doc, "//tumblr/tumblelog") site.name = tumblelog.attributes["name"] site.timezone = tumblelog.attributes["timezone"] site.title = tumblelog.attributes["title"] site.cname = tumblelog.attributes["cname"] site.description = tumblelog.text # tumblelog.elements["/feeds"]} # TODO: feeds は後回し return site end # XMLをパースしてオブジェクトのArrayを作る # @param [String] xml APIからのレスポンス全体 # @return [Array, Integer, Integer] 各種Postの子クラスのArray, start, total def posts(xml) rexml_doc = REXML::Document.new(xml) rexml_posts = REXML::XPath.first(rexml_doc, "//tumblr/posts") start = rexml_posts.attributes["start"] total = rexml_posts.attributes["total"] posts = [] rexml_posts.elements.each("//posts/post") do |rexml_post| post_type = rexml_post.attributes["type"] post = nil case post_type when POST_TYPE::REGULAR post = self.regular(Regular.new, rexml_post) when POST_TYPE::PHOTO post = self.photo(Photo.new, rexml_post) when POST_TYPE::QUOTE post = self.quote(Quote.new, rexml_post) when POST_TYPE::LINK post = self.link(Link.new, rexml_post) when POST_TYPE::CHAT post = self.chat(Chat.new, rexml_post) when POST_TYPE::AUDIO post = self.audio(Audio.new, rexml_post) when POST_TYPE::VIDEO post = self.video(Video.new, rexml_post) when POST_TYPE::ANSWER post = self.answer(Answer.new, rexml_post) else raise TumblrError.new("unknown post type #{post_type}") end posts << post end return posts, start.to_i, total.to_i end # TODO: この辺りの設計についてはもう少し考慮の余地がある? # みんな同じような構造(まずはpost(post, rexml_post)呼んでその後独自処理)してるし、 # 引数にpostとrexml_postをもらってくるってのもなんかイケてない気がする。 def post(post, rexml_post) post.post_id = rexml_post.attributes["id"].to_i post.url = rexml_post.attributes["url"] post.url_with_slug = rexml_post.attributes["url-with-slug"] post.liked = (rexml_post.attributes["liked"] == "true") post.reblog_key = rexml_post.attributes["reblog-key"] post.type = rexml_post.attributes["type"] # TODO: time 関係の型をStringじゃなくTimeとかにする? post.date_gmt = rexml_post.attributes["date-gmt"] post.date = rexml_post.attributes["date"] post.unix_timestamp = rexml_post.attributes["unix-timestamp"].to_i post.format = rexml_post.attributes["format"] post.tags = rexml_post.get_elements("tag").map(&:text) post.bookmarklet = (rexml_post.attributes["bookmarklet"] == "true") post end def regular(post, rexml_post) post = self.post(post, rexml_post) post.regular_title = rexml_post.elements["regular-title"].try(:text) || "" post.regular_body = rexml_post.elements["regular-body"].try(:text) || "" post end def photo(post, rexml_post) post = self.post(post, rexml_post) post.type post.photo_caption = rexml_post.elements["photo-caption"].try(:text) || "" post.photo_link_url = rexml_post.elements["photo-link-url"].try(:text) || "" post.photo_url = rexml_post.elements["photo-url"].try(:text) || "" post.photoset = [] rexml_post.elements.each("photoset/photo") do |photo| post.photoset.push(photo.elements["photo-url"].try(:text) || "") end post end def quote(post, rexml_post) post = self.post(post, rexml_post) post.quote_text = rexml_post.elements["quote-text"].try(:text) || "" post.quote_source = rexml_post.elements["quote-source"].try(:text) || "" post end def link(post, rexml_post) post = self.post(post, rexml_post) post.link_text = rexml_post.elements["link-text"].try(:text) || "" post.link_url = rexml_post.elements["link-url"].try(:text) || "" post.link_description = rexml_post.elements["link-description"].try(:text) || "" post end def chat(post, rexml_post) post = self.post(post, rexml_post) post.conversation_title = rexml_post.elements["conversation-title"].try(:text) || "" post.conversation_text = rexml_post.elements["conversation-text"].try(:text) || "" post end def audio(post, rexml_post) post = self.post(post, rexml_post) post.audio_plays = (rexml_post.attributes["audio-plays"] == "1") post.audio_caption = rexml_post.elements["audio-caption"].try(:text) || "" post.audio_player = rexml_post.elements["audio-player"].try(:text) || "" post end def video(post, rexml_post) post = self.post(post, rexml_post) post.video_caption = rexml_post.elements["video-caption"].try(:text) || "" post.video_source = rexml_post.elements["video-source"].try(:text) || "" post.video_player = rexml_post.elements["video-player"].try(:text) || "" post end def answer(post, rexml_post) post = self.post(post, rexml_post) post.question = rexml_post.elements["question"].try(:text) || "" post.answer = rexml_post.elements["answer"].try(:text) || "" post end end end # module