lib/bible_gateway.rb in bible_gateway-0.0.12 vs lib/bible_gateway.rb in bible_gateway-0.1.0

- old
+ new

@@ -4,11 +4,12 @@ require 'uri' class BibleGatewayError < StandardError; end class BibleGateway - GATEWAY_URL = "http://classic.biblegateway.com" + GATEWAY_URL = "http://biblegateway.com" + CLASSIC_GATEWAY_URL = "http://classic.biblegateway.com" VERSIONS = { :american_standard_version => "ASV", :amplified_bible => "AMP", :common_english_bible => "CEB", @@ -60,48 +61,81 @@ end def lookup(passage) response = Typhoeus.get(passage_url(passage), followlocation: true) doc = Nokogiri::HTML(response.body) - scrape_passage(doc) + scrape_passage(doc, @version) end + def old_lookup(passage) + response = Typhoeus.get(old_passage_url(passage), followlocation: true) + doc = Nokogiri::HTML(response.body) + old_way_scrape_passage(doc) + end + private def passage_url(passage) - URI.escape "#{GATEWAY_URL}/passage/?search=#{passage}&version=#{VERSIONS[version]}" + "#{GATEWAY_URL}/passage/?search=#{URI.encode_www_form_component(passage)}&version=#{URI.encode_www_form_component(VERSIONS[version])}" end - def scrape_passage(doc) + def old_passage_url(passage) + "#{CLASSIC_GATEWAY_URL}/passage/?search=#{URI.encode_www_form_component(passage)}&version=#{URI.encode_www_form_component(VERSIONS[version])}" + end + + def scrape_passage(doc, version) + container = doc.css('div.passage-text') + title = container.css("div.version-#{VERSIONS[version]}.result-text-style-normal.text-html h1 span")[0].content.strip if container.css("div.version-#{VERSIONS[version]}.result-text-style-normal.text-html h1")[0] != nil + segment = doc.at('div.passage-text') + + segment.search('sup.crossreference').remove # remove cross reference links + segment.search('sup.footnote').remove # remove footnote links + segment.search("div.crossrefs").remove # remove cross references + segment.search("div.footnotes").remove # remove footnotes + + text = "" + segment.search("span.text").each do |span| + text += span.inner_text + end + + segment.search("span.text").each do |span| + html_content = span.inner_html + span.swap html_content + end + + segment.search('sup.versenum').each do |sup| + html_content = sup.content + sup.swap "<sup>#{html_content}</sup>" + end + + content = segment.inner_html.gsub('<p></p>', '').gsub(/<!--.*?-->/, '').strip + {:title => title, :content => content, :text => text } + end + + def old_way_scrape_passage(doc) container = doc.css('div.container') title = container.css('div.passage-details h1')[0].content.strip segment = doc.at('div.passage-wrap') segment.search('sup.crossreference').remove # remove cross reference links segment.search('sup.footnote').remove # remove footnote links segment.search("div.crossrefs").remove # remove cross references segment.search("div.footnotes").remove # remove footnotes - #scripture_text should be text + # extract text only from scripture text = "" segment.search("span.text").each do |span| text += span.inner_text end - # text should be html so that text can be used above. segment.search("span.text").each do |span| html_content = span.inner_html span.swap html_content end - # text should be html so that text can be used above. segment.search('sup.versenum').each do |sup| html_content = sup.content sup.swap "<sup>#{html_content}</sup>" end content = segment.inner_html.gsub('<p></p>', '').gsub(/<!--.*?-->/, '').strip - - #scripture_text should be text - {:title => title, - :content => content, - :text => text } + {:title => title, :content => content, :text => text } end end