module JekyllImport
module Importers
class Tumblr < Importer
def self.require_deps
JekyllImport.require_with_fallback(%w[
rubygems
fileutils
open-uri
nokogiri
json
uri
time
jekyll
])
end
def self.specify_options(c)
c.option 'url', '--url URL', 'Tumblr URL'
c.option 'format', '--format FORMAT', 'Output format (default: "html")'
c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
end
def self.process(options)
url = options.fetch('url')
format = options.fetch('format', "html")
grab_images = options.fetch('grab_images', false)
add_highlights = options.fetch('add_highlights', false)
rewrite_urls = options.fetch('rewrite_urls', false)
@grab_images = grab_images
FileUtils.mkdir_p "_posts/tumblr"
url += "/api/read/json/"
per_page = 50
posts = []
# Two passes are required so that we can rewrite URLs.
# First pass builds up an array of each post as a hash.
begin
current_page = (current_page || -1) + 1
feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
puts "Fetching #{feed_url}"
feed = open(feed_url)
json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
blog = JSON.parse(json)
puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
batch = blog["posts"].map { |post| post_to_hash(post, format) }
# If we're rewriting, save the posts for later. Otherwise, go ahead and
# dump these to disk now
if rewrite_urls
posts += batch
else
batch.each {|post| write_post(post, format == "md", add_highlights)}
end
end until blog["posts"].size < per_page
# Rewrite URLs, create redirects and write out out posts if necessary
if rewrite_urls
posts = rewrite_urls_and_redirects posts
posts.each {|post| write_post(post, format == "md", add_highlights)}
end
end
private
# Writes a post out to disk
def self.write_post(post, use_markdown, add_highlights)
content = post[:content]
if content
if use_markdown
content = html_to_markdown content
content = add_syntax_highlights content if add_highlights
end
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
f.puts post[:header].to_yaml + "---\n" + content
end
end
end
# Converts each type of Tumblr post to a hash with all required
# data for Jekyll.
def self.post_to_hash(post, format)
case post['type']
when "regular"
title = post["regular-title"]
content = post["regular-body"]
when "link"
title = post["link-text"] || post["link-url"]
content = "#{title}"
unless post["link-description"].nil?
content << "
" + post["link-description"]
end
when "photo"
title = post["photo-caption"]
content = if post["photo-link-url"].nil?
"#{content}"
else
fetch_photo post
end
when "audio"
if !post["id3-title"].nil?
title = post["id3-title"]
content = post["audio-player"] + "
" + post["audio-caption"]
else
title = post["audio-caption"]
content = post["audio-player"]
end
when "quote"
title = post["quote-text"]
content = "
#{post["quote-text"]}" unless post["quote-source"].nil? content << "—" + post["quote-source"] end when "conversation" title = post["conversation-title"] content = "" when "video" title = post["video-title"] content = post["video-player"] unless post["video-caption"].nil? content << "