lib/bible_gateway.rb in bible_gateway-0.0.12 vs lib/bible_gateway.rb in bible_gateway-0.1.0
- old
+ new
@@ -4,11 +4,12 @@
require 'uri'
class BibleGatewayError < StandardError; end
class BibleGateway
- GATEWAY_URL = "http://classic.biblegateway.com"
+ GATEWAY_URL = "http://biblegateway.com"
+ CLASSIC_GATEWAY_URL = "http://classic.biblegateway.com"
VERSIONS = {
:american_standard_version => "ASV",
:amplified_bible => "AMP",
:common_english_bible => "CEB",
@@ -60,48 +61,81 @@
end
def lookup(passage)
response = Typhoeus.get(passage_url(passage), followlocation: true)
doc = Nokogiri::HTML(response.body)
- scrape_passage(doc)
+ scrape_passage(doc, @version)
end
+ def old_lookup(passage)
+ response = Typhoeus.get(old_passage_url(passage), followlocation: true)
+ doc = Nokogiri::HTML(response.body)
+ old_way_scrape_passage(doc)
+ end
+
private
def passage_url(passage)
- URI.escape "#{GATEWAY_URL}/passage/?search=#{passage}&version=#{VERSIONS[version]}"
+ "#{GATEWAY_URL}/passage/?search=#{URI.encode_www_form_component(passage)}&version=#{URI.encode_www_form_component(VERSIONS[version])}"
end
- def scrape_passage(doc)
+ def old_passage_url(passage)
+ "#{CLASSIC_GATEWAY_URL}/passage/?search=#{URI.encode_www_form_component(passage)}&version=#{URI.encode_www_form_component(VERSIONS[version])}"
+ end
+
+ def scrape_passage(doc, version)
+ container = doc.css('div.passage-text')
+ title = container.css("div.version-#{VERSIONS[version]}.result-text-style-normal.text-html h1 span")[0].content.strip if container.css("div.version-#{VERSIONS[version]}.result-text-style-normal.text-html h1")[0] != nil
+ segment = doc.at('div.passage-text')
+
+ segment.search('sup.crossreference').remove # remove cross reference links
+ segment.search('sup.footnote').remove # remove footnote links
+ segment.search("div.crossrefs").remove # remove cross references
+ segment.search("div.footnotes").remove # remove footnotes
+
+ text = ""
+ segment.search("span.text").each do |span|
+ text += span.inner_text
+ end
+
+ segment.search("span.text").each do |span|
+ html_content = span.inner_html
+ span.swap html_content
+ end
+
+ segment.search('sup.versenum').each do |sup|
+ html_content = sup.content
+ sup.swap "<sup>#{html_content}</sup>"
+ end
+
+ content = segment.inner_html.gsub('<p></p>', '').gsub(/<!--.*?-->/, '').strip
+ {:title => title, :content => content, :text => text }
+ end
+
+ def old_way_scrape_passage(doc)
container = doc.css('div.container')
title = container.css('div.passage-details h1')[0].content.strip
segment = doc.at('div.passage-wrap')
segment.search('sup.crossreference').remove # remove cross reference links
segment.search('sup.footnote').remove # remove footnote links
segment.search("div.crossrefs").remove # remove cross references
segment.search("div.footnotes").remove # remove footnotes
- #scripture_text should be text
+ # extract text only from scripture
text = ""
segment.search("span.text").each do |span|
text += span.inner_text
end
- # text should be html so that text can be used above.
segment.search("span.text").each do |span|
html_content = span.inner_html
span.swap html_content
end
- # text should be html so that text can be used above.
segment.search('sup.versenum').each do |sup|
html_content = sup.content
sup.swap "<sup>#{html_content}</sup>"
end
content = segment.inner_html.gsub('<p></p>', '').gsub(/<!--.*?-->/, '').strip
-
- #scripture_text should be text
- {:title => title,
- :content => content,
- :text => text }
+ {:title => title, :content => content, :text => text }
end
end