lib/jekyll-import/importers/tumblr.rb in jekyll-import-0.12.0 vs lib/jekyll-import/importers/tumblr.rb in jekyll-import-0.13.0
- old
+ new
@@ -1,35 +1,35 @@
module JekyllImport
module Importers
class Tumblr < Importer
def self.require_deps
- JekyllImport.require_with_fallback(%w[
+ JekyllImport.require_with_fallback(%w(
rubygems
fileutils
open-uri
nokogiri
json
uri
time
jekyll
- ])
+ ))
end
def self.specify_options(c)
- c.option 'url', '--url URL', 'Tumblr URL'
- c.option 'format', '--format FORMAT', 'Output format (default: "html")'
- c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
- c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
- c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
+ c.option "url", "--url URL", "Tumblr URL"
+ c.option "format", "--format FORMAT", 'Output format (default: "html")'
+ c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
+ c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
+ c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
end
def self.process(options)
- url = options.fetch('url')
- format = options.fetch('format', "html")
- grab_images = options.fetch('grab_images', false)
- add_highlights = options.fetch('add_highlights', false)
- rewrite_urls = options.fetch('rewrite_urls', false)
+ url = options.fetch("url")
+ format = options.fetch("format", "html")
+ grab_images = options.fetch("grab_images", false)
+ add_highlights = options.fetch("add_highlights", false)
+ rewrite_urls = options.fetch("rewrite_urls", false)
@grab_images = grab_images
FileUtils.mkdir_p "_posts/tumblr"
url += "/api/read/json/"
per_page = 50
@@ -49,67 +49,66 @@
# If we're rewriting, save the posts for later. Otherwise, go ahead and
# dump these to disk now
if rewrite_urls
posts += batch
else
- batch.each {|post| write_post(post, format == "md", add_highlights)}
+ batch.each { |post| write_post(post, format == "md", add_highlights) }
end
-
end until blog["posts"].size < per_page
# Rewrite URLs, create redirects and write out out posts if necessary
if rewrite_urls
posts = rewrite_urls_and_redirects posts
- posts.each {|post| write_post(post, format == "md", add_highlights)}
+ posts.each { |post| write_post(post, format == "md", add_highlights) }
end
end
private
+ class << self
+ def extract_json(contents)
+ beginning = contents.index("{")
+ ending = contents.rindex("}") + 1
+ json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
+ JSON.parse(json)
+ end
- def self.extract_json(contents)
- beginning = contents.index("{")
- ending = contents.rindex("}")+1
- json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
- blog = JSON.parse(json)
- end
+ # Writes a post out to disk
+ def write_post(post, use_markdown, add_highlights)
+ content = post[:content]
- # Writes a post out to disk
- def self.write_post(post, use_markdown, add_highlights)
- content = post[:content]
+ if content
+ if use_markdown
+ content = html_to_markdown content
+ if add_highlights
+ tumblr_url = URI.parse(post[:slug]).path
+ redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
+ FileUtils.mkdir_p redirect_dir
+ content = add_syntax_highlights(content, redirect_dir)
+ end
+ end
- if content
- if use_markdown
- content = html_to_markdown content
- if add_highlights
- tumblr_url = URI.parse(post[:slug]).path
- redirect_dir = tumblr_url.sub(/\//, "") + "/"
- FileUtils.mkdir_p redirect_dir
- content = add_syntax_highlights(content, redirect_dir)
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
+ f.puts post[:header].to_yaml + "---\n" + content
end
end
-
- File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
- f.puts post[:header].to_yaml + "---\n" + content
- end
end
- end
- # Converts each type of Tumblr post to a hash with all required
- # data for Jekyll.
- def self.post_to_hash(post, format)
- case post['type']
+ # Converts each type of Tumblr post to a hash with all required
+ # data for Jekyll.
+ def post_to_hash(post, format)
+ case post["type"]
when "regular"
title = post["regular-title"]
content = post["regular-body"]
when "link"
title = post["link-text"] || post["link-url"]
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
unless post["link-description"].nil?
content << "<br/>" + post["link-description"]
end
when "photo"
- title = post["slug"].gsub("-"," ")
+ title = post["slug"].tr("-", " ")
if post["photos"].size > 1
content = ""
post["photos"].each do |post_photo|
photo = fetch_photo post_photo
content << photo + "<br/>"
@@ -135,11 +134,11 @@
end
when "conversation"
title = post["conversation-title"]
content = "<section><dialog>"
post["conversation"].each do |line|
- content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
+ content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
end
content << "</dialog></section>"
when "video"
title = post["video-title"]
content = post["video-player"]
@@ -151,150 +150,154 @@
end
end
when "answer"
title = post["question"]
content = post["answer"]
+ end
+ date = Date.parse(post["date"]).to_s
+ title = Nokogiri::HTML(title).text
+ title = "no title" if title.empty?
+ slug = if post["slug"] && post["slug"].strip != ""
+ post["slug"]
+ elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
+ slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
+ slug.length > 200 ? slug.slice(0..200) : slug
+ else
+ post["id"]
+ end
+ {
+ :name => "#{date}-#{slug}.#{format}",
+ :header => {
+ "layout" => "post",
+ "title" => title,
+ "date" => Time.parse(post["date"]).xmlschema,
+ "tags" => (post["tags"] || []),
+ "tumblr_url" => post["url-with-slug"],
+ },
+ :content => content,
+ :url => post["url"],
+ :slug => post["url-with-slug"],
+ }
end
- date = Date.parse(post['date']).to_s
- title = Nokogiri::HTML(title).text
- title = "no title" if title.empty?
- slug = if post["slug"] && post["slug"].strip != ""
- post["slug"]
- elsif title && title.downcase.gsub(/[^a-z0-9\-]/, '') != '' && title != 'no title'
- slug = title.downcase.strip.gsub(' ', '-').gsub(/[^a-z0-9\-]/, '')
- slug.length > 200 ? slug.slice(0..200) : slug
- else
- slug = post['id']
- end
- {
- :name => "#{date}-#{slug}.#{format}",
- :header => {
- "layout" => "post",
- "title" => title,
- "date" => Time.parse(post['date']).xmlschema,
- "tags" => (post["tags"] or []),
- "tumblr_url" => post["url-with-slug"]
- },
- :content => content,
- :url => post["url"],
- :slug => post["url-with-slug"],
- }
- end
- # Attempts to fetch the largest version of a photo available for a post.
- # If that file fails, it tries the next smaller size until all available
- # photo URLs are exhausted. If they all fail, the import is aborted.
- def self.fetch_photo(post)
- sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
- sizes.sort! {|a,b| b <=> a}
+ # Attempts to fetch the largest version of a photo available for a post.
+ # If that file fails, it tries the next smaller size until all available
+ # photo URLs are exhausted. If they all fail, the import is aborted.
+ def fetch_photo(post)
+ sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
+ sizes.sort! { |a, b| b <=> a }
- ext_key, ext_val = post.find do |k,v|
- k =~ /^photo-url-/ && v.split("/").last =~ /\./
+ _ext_key, ext_val = post.find do |k, v|
+ k =~ %r!^photo-url-! && v.split("/").last =~ %r!\.!
+ end
+ ext = "." + ext_val.split(".").last
+
+ sizes.each do |size|
+ url = post["photo-url"] || post["photo-url-#{size}"]
+ next if url.nil?
+ begin
+ return "<img src=\"#{save_photo(url, ext)}\"/>"
+ rescue OpenURI::HTTPError
+ puts "Failed to grab photo"
+ end
+ end
+
+ abort "Failed to fetch photo for post #{post["url"]}"
end
- ext = "." + ext_val.split(".").last
- sizes.each do |size|
- url = post["photo-url"] || post["photo-url-#{size}"]
- next if url.nil?
- begin
- return "<img src=\"#{save_photo(url, ext)}\"/>"
- rescue OpenURI::HTTPError => err
- puts "Failed to grab photo"
+ # Create a Hash of old urls => new urls, for rewriting and
+ # redirects, and replace urls in each post. Instantiate Jekyll
+ # site/posts to get the correct permalink format.
+ def rewrite_urls_and_redirects(posts)
+ site = Jekyll::Site.new(Jekyll.configuration({}))
+ urls = Hash[posts.map do |post|
+ # Create an initial empty file for the post so that
+ # we can instantiate a post object.
+ File.write("_posts/tumblr/#{post[:name]}", "")
+ tumblr_url = URI.parse(URI.encode(post[:slug])).path
+ jekyll_url = if Jekyll.const_defined? :Post
+ Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+ else
+ Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), :site => site, :collection => site.posts).url
+ end
+ redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
+ FileUtils.mkdir_p redirect_dir
+ File.open(redirect_dir + "index.html", "w") do |f|
+ f.puts "<html><head><link rel=\"canonical\" href=\"" \
+ "#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " \
+ "url=#{jekyll_url}\"></head><body></body></html>"
+ end
+ [tumblr_url, jekyll_url]
+ end]
+ posts.map do |post|
+ urls.each do |tumblr_url, jekyll_url|
+ post[:content].gsub!(%r!#{tumblr_url}!i, jekyll_url)
+ end
+ post
end
end
- abort "Failed to fetch photo for post #{post['url']}"
- end
-
- # Create a Hash of old urls => new urls, for rewriting and
- # redirects, and replace urls in each post. Instantiate Jekyll
- # site/posts to get the correct permalink format.
- def self.rewrite_urls_and_redirects(posts)
- site = Jekyll::Site.new(Jekyll.configuration({}))
- urls = Hash[posts.map { |post|
- # Create an initial empty file for the post so that
- # we can instantiate a post object.
- File.write("_posts/tumblr/#{post[:name]}", "")
- tumblr_url = URI.parse(URI.encode(post[:slug])).path
- jekyll_url = if Jekyll.const_defined? :Post
- Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
- else
- Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), site: site, collection: site.posts).url
- end
- redirect_dir = tumblr_url.sub(/\//, "") + "/"
- FileUtils.mkdir_p redirect_dir
- File.open(redirect_dir + "index.html", "w") do |f|
- f.puts "<html><head><link rel=\"canonical\" href=\"" +
- "#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
- "url=#{jekyll_url}\"></head><body></body></html>"
+ # Convert preserving HTML tables as per the markdown docs.
+ def html_to_markdown(content)
+ preserve = %w(table tr th td)
+ preserve.each do |tag|
+ content.gsub!(%r!<#{tag}!i, "$$" + tag)
+ content.gsub!(%r!<\/#{tag}!i, "||" + tag)
end
- [tumblr_url, jekyll_url]
- }]
- posts.map { |post|
- urls.each do |tumblr_url, jekyll_url|
- post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
+ content = Nokogiri::HTML(content.gsub("'", "''")).text
+ preserve.each do |tag|
+ content.gsub!("$$" + tag, "<" + tag)
+ content.gsub!("||" + tag, "</" + tag)
end
- post
- }
- end
-
- # Convert preserving HTML tables as per the markdown docs.
- def self.html_to_markdown(content)
- preserve = ["table", "tr", "th", "td"]
- preserve.each do |tag|
- content.gsub!(/<#{tag}/i, "$$" + tag)
- content.gsub!(/<\/#{tag}/i, "||" + tag)
+ content
end
- content = Nokogiri::HTML(content.gsub("'", "''")).text
- preserve.each do |tag|
- content.gsub!("$$" + tag, "<" + tag)
- content.gsub!("||" + tag, "</" + tag)
- end
- content
- end
- # Adds pygments highlight tags to code blocks in posts that use
- # markdown format. This doesn't guess the language of the code
- # block, so you should modify this to suit your own content.
- # For example, my code block only contain Python and JavaScript,
- # so I can assume the block is JavaScript if it contains a
- # semi-colon.
- def self.add_syntax_highlights(content, redirect_dir)
+ # Adds pygments highlight tags to code blocks in posts that use
+ # markdown format. This doesn't guess the language of the code
+ # block, so you should modify this to suit your own content.
+ # For example, my code block only contain Python and JavaScript,
+ # so I can assume the block is JavaScript if it contains a
+ # semi-colon.
+ def add_syntax_highlights(content, redirect_dir)
lines = content.split("\n")
- block, indent, lang, start = false, /^ /, nil, nil
+ block = false
+ indent = %r!^ !
+ lang = nil
+ start = nil
lines.each_with_index do |line, i|
if !block && line =~ indent
block = true
lang = "python"
start = i
elsif block
- lang = "javascript" if line =~ /;$/
+ lang = "javascript" if line =~ %r!;$!
block = line =~ indent && i < lines.size - 1 # Also handle EOF
- if !block
+ unless block
lines[start] = "{% highlight #{lang} %}"
lines[i - 1] = "{% endhighlight %}"
end
FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
lines[i] = lines[i].sub(indent, "")
end
end
lines.join("\n")
end
- def self.save_photo(url, ext)
+ def save_photo(url, ext)
if @grab_images
- path = "tumblr_files/#{url.split('/').last}"
- path += ext unless path =~ /#{ext}$/
+ path = "tumblr_files/#{url.split("/").last}"
+ path += ext unless path =~ %r!#{ext}$!
FileUtils.mkdir_p "tumblr_files"
# Don't fetch if we've already cached this file
unless File.size? path
puts "Fetching photo #{url}"
File.open(path, "wb") { |f| f.write(open(url).read) }
end
url = "/" + path
end
url
+ end
end
end
end
end