#!/usr/bin/env ruby # frozen_string_literal: true # # The Ruby documentation for #sort_by describes what's called a Schwartzian transform: # # > A more efficient technique is to cache the sort keys (modification times in this case) # > before the sort. Perl users often call this approach a Schwartzian transform, after # > Randal Schwartz. We construct a temporary array, where each element is an array # > containing our sort key along with the filename. We sort this array, and then extract # > the filename from the result. # > This is exactly what sort_by does internally. # # The well-documented efficiency of sort_by is a good reason to use it. However, when a property # does not exist on an item being sorted, it can cause issues (no nil's allowed!) # In Jekyll::Filters#sort_input, we extract the property in each iteration of #sort, # which is quite inefficient! How inefficient? This benchmark will tell you just how, and how much # it can be improved by using the Schwartzian transform. Thanks, Randall! require 'benchmark/ips' require 'minitest' require File.expand_path('../lib/jekyll', __dir__) def site @site ||= Jekyll::Site.new( Jekyll.configuration('source' => File.expand_path('../docs', __dir__)) ).tap(&:reset).tap(&:read) end def site_docs site.collections['docs'].docs.dup end def sort_by_property_directly(docs, meta_key) docs.sort! do |apple, orange| apple_property = apple[meta_key] orange_property = orange[meta_key] if !apple_property.nil? && !orange_property.nil? apple_property <=> orange_property elsif !apple_property.nil? && orange_property.nil? -1 elsif apple_property.nil? && !orange_property.nil? 1 else apple <=> orange end end end def schwartzian_transform(docs, meta_key) docs.collect! do |d| [d[meta_key], d] end.sort! do |apple, orange| if !apple[0].nil? && !orange[0].nil? apple.first <=> orange.first elsif !apple[0].nil? && orange[0].nil? -1 elsif apple[0].nil? && !orange[0].nil? 1 else apple[-1] <=> orange[-1] end end.collect! { |d| d[-1] } end # Before we test efficiency, do they produce the same output? class Correctness include Minitest::Assertions define_method :mu_pp, &:pretty_inspect attr_accessor :assertions def initialize(docs, property) @assertions = 0 @docs = docs @property = property end def assert! assert sort_by_property_directly(@docs, @property).is_a?(Array), 'sort_by_property_directly must return an array' assert schwartzian_transform(@docs, @property).is_a?(Array), 'schwartzian_transform must return an array' assert_equal sort_by_property_directly(@docs, @property), schwartzian_transform(@docs, @property) puts "Yeah, ok, correctness all checks out for property #{@property.inspect}" end end Correctness.new(site_docs, 'redirect_from').assert! Correctness.new(site_docs, 'title').assert! def property(property, meta_key) Benchmark.ips do |x| x.config(time: 10, warmup: 5) x.report("sort_by_property_directly with #{property} property") do sort_by_property_directly(site_docs, meta_key) end x.report("schwartzian_transform with #{property} property") do schwartzian_transform(site_docs, meta_key) end x.compare! end end # First, test with a property only a handful of documents have. test_property('sparse', 'redirect_from') # Next, test with a property they all have. test_property('non-sparse', 'title')