# frozen_string_literal: true
module JekyllImport
module Importers
class Tumblr < Importer
def self.require_deps
JekyllImport.require_with_fallback(%w(
rubygems
fileutils
open-uri
nokogiri
json
uri
time
jekyll
))
end
def self.specify_options(c)
c.option "url", "--url URL", "Tumblr URL"
c.option "format", "--format FORMAT", 'Output format (default: "html")'
c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
end
def self.process(options)
url = options.fetch("url")
format = options.fetch("format", "html")
grab_images = options.fetch("grab_images", false)
add_highlights = options.fetch("add_highlights", false)
rewrite_urls = options.fetch("rewrite_urls", false)
@grab_images = grab_images
FileUtils.mkdir_p "_posts/tumblr"
url += "/api/read/json/"
per_page = 50
posts = []
# Two passes are required so that we can rewrite URLs.
# First pass builds up an array of each post as a hash.
begin
current_page = (current_page || -1) + 1
feed_url = "#{url}?num=#{per_page}&start=#{current_page * per_page}"
Jekyll.logger.info "Fetching #{feed_url}"
feed = URI.parse(feed_url).open
contents = feed.readlines.join("\n")
blog = extract_json(contents)
Jekyll.logger.info "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
batch = blog["posts"].map { |post| post_to_hash(post, format) }
# If we're rewriting, save the posts for later. Otherwise, go ahead and dump these to
# disk now
if rewrite_urls
posts += batch
else
batch.each { |post| write_post(post, format == "md", add_highlights) }
end
end until blog["posts"].size < per_page
# Rewrite URLs, create redirects and write out out posts if necessary
if rewrite_urls
posts = rewrite_urls_and_redirects posts
posts.each { |post| write_post(post, format == "md", add_highlights) }
end
end
class << self
def extract_json(contents)
beginning = contents.index("{")
ending = contents.rindex("}") + 1
json_data = contents[beginning...ending] # Strip Tumblr's JSONP chars.
JSON.parse(json_data)
end
# Writes a post out to disk
def write_post(post, use_markdown, add_highlights)
content = post[:content]
return unless content
if use_markdown
content = html_to_markdown content
if add_highlights
tumblr_url = URI.parse(post[:slug]).path
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
FileUtils.mkdir_p redirect_dir
content = add_syntax_highlights(content, redirect_dir)
end
end
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
f.puts post[:header].to_yaml + "---\n" + content
end
end
# Converts each type of Tumblr post to a hash with all required
# data for Jekyll.
def post_to_hash(post, format)
case post["type"]
when "regular"
title, content = post.values_at("regular-title", "regular-body")
when "link"
title = post["link-text"] || post["link-url"]
content = "#{title}"
content << "
#{post["link-description"]}" unless post["link-description"].nil?
when "photo"
title = post["slug"].tr("-", " ")
if post["photos"].size > 1
content = +""
post["photos"].each do |post_photo|
photo = fetch_photo post_photo
content << "#{photo}
"
content << post_photo["caption"]
end
else
content = fetch_photo post
end
content << "
#{post["photo-caption"]}"
when "audio"
if !post["id3-title"].nil?
title, content = post.values_at("id3-title", "audio-player")
content << "
#{post["audio-caption"]}"
else
title, content = post.values_at("audio-caption", "audio-player")
end
when "quote"
title = post["quote-text"]
content = "
#{post["quote-text"]}" content << "—#{post["quote-source"]}" unless post["quote-source"].nil? when "conversation" title = post["conversation-title"] content = "" when "video" title, content = post.values_at("video-title", "video-player") unless post["video-caption"].nil? if content content << "