lib/jekyll-import/importers/tumblr.rb in jekyll-import-0.14.0 vs lib/jekyll-import/importers/tumblr.rb in jekyll-import-0.15.0
- old
+ new
@@ -1,5 +1,7 @@
+# frozen_string_literal: true
+
module JekyllImport
module Importers
class Tumblr < Importer
def self.require_deps
JekyllImport.require_with_fallback(%w(
@@ -13,15 +15,15 @@
jekyll
))
end
def self.specify_options(c)
- c.option "url", "--url URL", "Tumblr URL"
- c.option "format", "--format FORMAT", 'Output format (default: "html")'
- c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
+ c.option "url", "--url URL", "Tumblr URL"
+ c.option "format", "--format FORMAT", 'Output format (default: "html")'
+ c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
- c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
+ c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
end
def self.process(options)
url = options.fetch("url")
format = options.fetch("format", "html")
@@ -32,24 +34,27 @@
@grab_images = grab_images
FileUtils.mkdir_p "_posts/tumblr"
url += "/api/read/json/"
per_page = 50
posts = []
+
# Two passes are required so that we can rewrite URLs.
# First pass builds up an array of each post as a hash.
begin
current_page = (current_page || -1) + 1
- feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
- puts "Fetching #{feed_url}"
- feed = open(feed_url)
+ feed_url = "#{url}?num=#{per_page}&start=#{current_page * per_page}"
+ Jekyll.logger.info "Fetching #{feed_url}"
+
+ feed = URI.parse(feed_url).open
contents = feed.readlines.join("\n")
- blog = extract_json(contents)
- puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
+ blog = extract_json(contents)
+ Jekyll.logger.info "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
+
batch = blog["posts"].map { |post| post_to_hash(post, format) }
- # If we're rewriting, save the posts for later. Otherwise, go ahead and
- # dump these to disk now
+ # If we're rewriting, save the posts for later. Otherwise, go ahead and dump these to
+ # disk now
if rewrite_urls
posts += batch
else
batch.each { |post| write_post(post, format == "md", add_highlights) }
end
@@ -60,112 +65,103 @@
posts = rewrite_urls_and_redirects posts
posts.each { |post| write_post(post, format == "md", add_highlights) }
end
end
- private
class << self
def extract_json(contents)
beginning = contents.index("{")
- ending = contents.rindex("}") + 1
- json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
- JSON.parse(json)
+ ending = contents.rindex("}") + 1
+ json_data = contents[beginning...ending] # Strip Tumblr's JSONP chars.
+ JSON.parse(json_data)
end
# Writes a post out to disk
def write_post(post, use_markdown, add_highlights)
content = post[:content]
+ return unless content
- if content
- if use_markdown
- content = html_to_markdown content
- if add_highlights
- tumblr_url = URI.parse(post[:slug]).path
- redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
- FileUtils.mkdir_p redirect_dir
- content = add_syntax_highlights(content, redirect_dir)
- end
+ if use_markdown
+ content = html_to_markdown content
+ if add_highlights
+ tumblr_url = URI.parse(post[:slug]).path
+ redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
+ FileUtils.mkdir_p redirect_dir
+ content = add_syntax_highlights(content, redirect_dir)
end
+ end
- File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
- f.puts post[:header].to_yaml + "---\n" + content
- end
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
+ f.puts post[:header].to_yaml + "---\n" + content
end
end
# Converts each type of Tumblr post to a hash with all required
# data for Jekyll.
def post_to_hash(post, format)
case post["type"]
when "regular"
- title = post["regular-title"]
- content = post["regular-body"]
+ title, content = post.values_at("regular-title", "regular-body")
when "link"
- title = post["link-text"] || post["link-url"]
+ title = post["link-text"] || post["link-url"]
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
- unless post["link-description"].nil?
- content << "<br/>" + post["link-description"]
- end
+ content << "<br/>#{post["link-description"]}" unless post["link-description"].nil?
when "photo"
title = post["slug"].tr("-", " ")
if post["photos"].size > 1
- content = ""
+ content = +""
post["photos"].each do |post_photo|
photo = fetch_photo post_photo
- content << photo + "<br/>"
+ content << "#{photo}<br/>"
content << post_photo["caption"]
end
else
content = fetch_photo post
end
- content << "<br/>" + post["photo-caption"]
+ content << "<br/>#{post["photo-caption"]}"
when "audio"
if !post["id3-title"].nil?
- title = post["id3-title"]
- content = post["audio-player"] + "<br/>" + post["audio-caption"]
+ title, content = post.values_at("id3-title", "audio-player")
+ content << "<br/>#{post["audio-caption"]}"
else
- title = post["audio-caption"]
- content = post["audio-player"]
+ title, content = post.values_at("audio-caption", "audio-player")
end
when "quote"
- title = post["quote-text"]
+ title = post["quote-text"]
content = "<blockquote>#{post["quote-text"]}</blockquote>"
- unless post["quote-source"].nil?
- content << "—" + post["quote-source"]
- end
+ content << "—#{post["quote-source"]}" unless post["quote-source"].nil?
when "conversation"
- title = post["conversation-title"]
+ title = post["conversation-title"]
content = "<section><dialog>"
post["conversation"].each do |line|
content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
end
content << "</dialog></section>"
when "video"
- title = post["video-title"]
- content = post["video-player"]
+ title, content = post.values_at("video-title", "video-player")
unless post["video-caption"].nil?
if content
- content << "<br/>" + post["video-caption"]
+ content << "<br/>#{post["video-caption"]}"
else
content = post["video-caption"]
end
end
when "answer"
- title = post["question"]
- content = post["answer"]
+ title, content = post.values_at("question", "answer")
end
- date = Date.parse(post["date"]).to_s
+
+ date = Date.parse(post["date"]).to_s
title = Nokogiri::HTML(title).text
title = "no title" if title.empty?
- slug = if post["slug"] && post["slug"].strip != ""
- post["slug"]
- elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
- slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
- slug.length > 200 ? slug.slice(0..200) : slug
- else
- post["id"]
- end
+ slug = if post["slug"] && post["slug"].strip != ""
+ post["slug"]
+ elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
+ slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
+ slug.length > 200 ? slug.slice(0..200) : slug
+ else
+ post["id"]
+ end
{
:name => "#{date}-#{slug}.#{format}",
:header => {
"layout" => "post",
"title" => title,
@@ -178,12 +174,12 @@
:slug => post["url-with-slug"],
}
end
# Attempts to fetch the largest version of a photo available for a post.
- # If that file fails, it tries the next smaller size until all available
- # photo URLs are exhausted. If they all fail, the import is aborted.
+ # If that file fails, it tries the next smaller size until all available photo URLs are
+ # exhausted. If they all fail, the import is aborted.
def fetch_photo(post)
sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
sizes.sort! { |a, b| b <=> a }
_ext_key, ext_val = post.find do |k, v|
@@ -192,34 +188,34 @@
ext = "." + ext_val.split(".").last
sizes.each do |size|
url = post["photo-url"] || post["photo-url-#{size}"]
next if url.nil?
+
begin
- return "<img src=\"#{save_photo(url, ext)}\"/>"
+ return +"<img src=\"#{save_photo(url, ext)}\"/>"
rescue OpenURI::HTTPError
- puts "Failed to grab photo"
+ Jekyll.logger.warn "Failed to grab photo"
end
end
abort "Failed to fetch photo for post #{post["url"]}"
end
- # Create a Hash of old urls => new urls, for rewriting and
- # redirects, and replace urls in each post. Instantiate Jekyll
- # site/posts to get the correct permalink format.
+ # Create a Hash of old urls => new urls, for rewriting and redirects, and replace urls in
+ # each post. Instantiate Jekyll site/posts to get the correct permalink format.
def rewrite_urls_and_redirects(posts)
site = Jekyll::Site.new(Jekyll.configuration({}))
urls = Hash[posts.map do |post|
- # Create an initial empty file for the post so that
- # we can instantiate a post object.
- File.write("_posts/tumblr/#{post[:name]}", "")
+ # Create an initial empty file for the post so that we can instantiate a post object.
+ relative_path = "_posts/tumblr/#{post[:name]}"
+ File.write(relative_path, "")
tumblr_url = URI.parse(URI.encode(post[:slug])).path
jekyll_url = if Jekyll.const_defined? :Post
- Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+ Jekyll::Post.new(site, site.source, "", "tumblr/#{post[:name]}").url
else
- Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), :site => site, :collection => site.posts).url
+ Jekyll::Document.new(site.in_source_dir(relative_path), :site => site, :collection => site.posts).url
end
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
FileUtils.mkdir_p redirect_dir
File.open(redirect_dir + "index.html", "w") do |f|
f.puts "<html><head><link rel=\"canonical\" href=\"" \
@@ -238,66 +234,64 @@
# Convert preserving HTML tables as per the markdown docs.
def html_to_markdown(content)
preserve = %w(table tr th td)
preserve.each do |tag|
- content.gsub!(%r!<#{tag}!i, "$$" + tag)
- content.gsub!(%r!<\/#{tag}!i, "||" + tag)
+ content.gsub!(%r!<#{tag}!i, "$$#{tag}")
+ content.gsub!(%r!<\/#{tag}!i, "||#{tag}")
end
content = Nokogiri::HTML(content.gsub("'", "''")).text
preserve.each do |tag|
- content.gsub!("$$" + tag, "<" + tag)
- content.gsub!("||" + tag, "</" + tag)
+ content.gsub!("$$#{tag}", "<#{tag}")
+ content.gsub!("||#{tag}", "</#{tag}")
end
content
end
- # Adds pygments highlight tags to code blocks in posts that use
- # markdown format. This doesn't guess the language of the code
- # block, so you should modify this to suit your own content.
- # For example, my code block only contain Python and JavaScript,
- # so I can assume the block is JavaScript if it contains a
- # semi-colon.
+ # Adds pygments highlight tags to code blocks in posts that use markdown format.
+ # This doesn't guess the language of the code block, so you should modify this to suit your
+ # own content.
+ # For example, my code block only contain Python and JavaScript, so I can assume the block
+ # is JavaScript if it contains a semi-colon.
def add_syntax_highlights(content, redirect_dir)
- lines = content.split("\n")
- block = false
- indent = %r!^ !
- lang = nil
- start = nil
- lines.each_with_index do |line, i|
- if !block && line =~ indent
- block = true
- lang = "python"
- start = i
- elsif block
- lang = "javascript" if line =~ %r!;$!
- block = line =~ indent && i < lines.size - 1 # Also handle EOF
- unless block
- lines[start] = "{% highlight #{lang} %}"
- lines[i - 1] = "{% endhighlight %}"
+ lines = content.split("\n")
+ block = false
+ indent = %r!^ !
+ lang = nil
+ start = nil
+ lines.each_with_index do |line, i|
+ if !block && line =~ indent
+ block = true
+ lang = "python"
+ start = i
+ elsif block
+ lang = "javascript" if line =~ %r!;$!
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
+ unless block
+ lines[start] = "{% highlight #{lang} %}"
+ lines[i - 1] = "{% endhighlight %}"
+ end
+ FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
+ lines[i] = lines[i].sub(indent, "")
end
- FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
- lines[i] = lines[i].sub(indent, "")
end
+ lines.join("\n")
end
- lines.join("\n")
- end
def save_photo(url, ext)
- if @grab_images
+ return url unless @grab_images
+
path = "tumblr_files/#{url.split("/").last}"
path += ext unless path =~ %r!#{ext}$!
FileUtils.mkdir_p "tumblr_files"
# Don't fetch if we've already cached this file
unless File.size? path
- puts "Fetching photo #{url}"
- File.open(path, "wb") { |f| f.write(open(url).read) }
+ Jekyll.logger.info "Fetching photo #{url}"
+ File.open(path, "wb") { |f| f.write(URI.parse(url).read) }
end
- url = "/" + path
+ "/#{path}"
end
- url
- end
end
end
end
end