RubygemsResearch

Sha256: 53b0ce8d8049c235d759e1106c1bc68887d6879efe9e52d3991bba0befa1a915

Contents?: true

Size: 1.75 KB

Versions: 73

Compression:

Stored size: 1.75 KB

# Modified from: https://github.com/severinh/nanoc-lunr-js-search/blob/master/lib/nanoc/filters/lunr_js_search.rb
require 'json'
require 'nokogiri'

class SearchFilter < Nanoc::Filter
  identifier :search

  def run(content, params={})
    doc = Nokogiri::HTML(content)
    url = assigns[:item_rep].path
    #first_image = doc.xpath('//img/@src').to_a[0]
    document = {
      # See this next time you need to fix this: https://gist.github.com/LeCoupa/8c305ec8c713aad07b14
      title: extract_first(doc, '//article/*/h1 | //article/*/h2 | //article/h1 | //article/h2'),
      subtitle: extract_first(doc, '//article/*/h3 | //article/h3'),
      body: extract_all(doc, '//article//*/text()'),
      #img: (first_image.nil?) ? '' : first_image.value(),
      #alt: extract_values(doc, '//article//img/@alt')
    }

    if File.exist?(search_file)
      documents = JSON.parse(File.read(search_file))
    else
      documents = {}
    end

    documents[url] = document

    File.open(search_file, 'w') do |file|
      file.write(JSON.pretty_generate(documents))
    end

    content
  end

  def search_file
    if item[:search_id]
      File.join(@site.config[:output_dir], "search_#{item[:search_id]}.json")
    else
      File.join(@site.config[:output_dir], 'search.json')
    end
  end

  def extract_first(doc, path)
    extract_text(doc, path).first
  end

  def extract_all(doc, path)
    extract_text(doc, path).join(' ')
  end

  def extract_text(doc, path)
    doc.xpath(path).to_a.map { |t|
      t.content.gsub('\r', ' ').gsub('\n', ' ').squeeze(' ').strip
    }.reject { |t|
      t.empty?
    }
  end

  def extract_values(doc, path)
    tokens = doc.xpath(path).to_a.map { |t|
      t.value().gsub('\r', ' ').gsub('\n', ' ').squeeze(' ').strip
    }
    tokens.join(' ')
  end
end

Version data entries

73 entries across 73 versions & 1 rubygems

Version	Path
origen-0.33.3	templates/nanoc/lib/search_filter.rb
origen-0.33.2	templates/nanoc/lib/search_filter.rb
origen-0.33.1	templates/nanoc/lib/search_filter.rb
origen-0.33.0	templates/nanoc/lib/search_filter.rb
origen-0.32.1	templates/nanoc/lib/search_filter.rb
origen-0.32.0	templates/nanoc/lib/search_filter.rb
origen-0.31.0	templates/nanoc/lib/search_filter.rb
origen-0.30.0	templates/nanoc/lib/search_filter.rb
origen-0.29.0	templates/nanoc/lib/search_filter.rb
origen-0.28.2	templates/nanoc/lib/search_filter.rb
origen-0.28.1	templates/nanoc/lib/search_filter.rb
origen-0.28.0	templates/nanoc/lib/search_filter.rb
origen-0.27.0	templates/nanoc/lib/search_filter.rb
origen-0.26.0	templates/nanoc/lib/search_filter.rb
origen-0.25.1	templates/nanoc/lib/search_filter.rb
origen-0.25.0	templates/nanoc/lib/search_filter.rb
origen-0.24.0	templates/nanoc/lib/search_filter.rb
origen-0.23.0	templates/nanoc/lib/search_filter.rb
origen-0.22.0	templates/nanoc/lib/search_filter.rb
origen-0.21.0	templates/nanoc/lib/search_filter.rb