rdf.rb in gutenberg_rdf-0.1.0

- old
+ new

@@ -3,19 +3,19 @@
 module GutenbergRdf
   class Rdf
     attr_reader :xml
 
     def initialize(xml)
-      @xml = xml.at_xpath('rdf:RDF')
+      @xml = xml.root
     end
 
     def id
-      xml.at_xpath('pgterms:ebook').attribute('about').content.match(/\Aebooks\/(.+)\z/)[1]
+      xml.elements['pgterms:ebook'].attributes['about'].match(/\Aebooks\/(.+)\z/)[1]
     end
 
     def type
-      xml.at_xpath('pgterms:ebook/dcterms:type/rdf:Description/rdf:value').text
+      xml.elements['pgterms:ebook/dcterms:type/rdf:Description/rdf:value'].text
     end
 
     def title
       titles.first
     end
@@ -28,45 +28,45 @@
       @authors ||= extract_authors
     end
 
     def subjects
       entries = Array.new
-      xml.xpath('pgterms:ebook//dcterms:subject').each do |entry|
-        next unless entry.at_xpath('rdf:Description/dcam:memberOf').attribute('resource').text.match(/LCSH\z/)
-        entry.xpath('rdf:Description//rdf:value').each do |value|
+      xml.elements.each('pgterms:ebook/dcterms:subject') do |entry|
+        next unless entry.elements['rdf:Description/dcam:memberOf'].attributes['resource'].match(/LCSH\z/)
+        entry.elements.each('rdf:Description//rdf:value') do |value|
           entries << value.text
         end
       end
       entries
     end
 
     def published
-      xml.at_xpath('pgterms:ebook/dcterms:issued').text
+      xml.elements['pgterms:ebook/dcterms:issued'].text
     end
 
     def publisher
-      xml.at_xpath('pgterms:ebook/dcterms:publisher').text
+      xml.elements['pgterms:ebook/dcterms:publisher'].text
     end
 
     def language
-      xml.at_xpath('pgterms:ebook/dcterms:language').text
+      xml.elements['pgterms:ebook/dcterms:language'].text
     end
 
     def rights
-      xml.at_xpath('pgterms:ebook/dcterms:rights').text
+      xml.elements['pgterms:ebook/dcterms:rights'].text
     end
 
     def covers
       official_cover_images.concat(other_cover_images).sort.uniq
     end
 
     def ebooks
       files = Array.new
-      xml.xpath('//pgterms:file').each do |file|
-        uri = file.attribute('about').content
-        datatypes = separate_mimetype_and_encoding(file.at_xpath('dcterms:format/rdf:Description/rdf:value').text)
-        modified = DateTime.parse(file.at_xpath('dcterms:modified').text + '-07:00')
+      xml.elements.each('pgterms:file') do |file|
+        uri = file.attributes['about']
+        datatypes = separate_mimetype_and_encoding(file.elements['dcterms:format/rdf:Description/rdf:value'].text)
+        modified = DateTime.parse(file.elements['dcterms:modified'].text + '-07:00')
         files << {uri: uri, mime_type: datatypes[:mimetype], encoding: datatypes[:encoding], modified: modified}
       end
       files
     end
 
@@ -75,39 +75,45 @@
     def titles
       @titles ||= split_title_and_subtitle
     end
 
     def split_title_and_subtitle
-      # Note this gsub is replacing UTF-8 hyphens with normal ASCII ones
-      t = xml.at_xpath('pgterms:ebook/dcterms:title').text.gsub(/—/, '-')
+      # NOTE: this gsub is replacing UTF-8 hyphens with normal ASCII ones
+      t = xml.elements['pgterms:ebook/dcterms:title'].text.gsub(/—/, '-')
 
       title_array = t.split(/\n/)
       title_array = title_array.first.split(/:/) if title_array.count == 1
       title_array = title_array.first.split(/;/) if title_array.count == 1
 
       title_array.each(&:strip!)
     end
 
     def extract_authors
       entries = Array.new
-      xml.xpath('//pgterms:agent').each do |agent|
-        entries << Agent.new(agent)
+      xml.elements.each('pgterms:agent') do |agent|
+        entries << Agent.new(agent.root)
       end
       entries
     end
 
     def official_cover_images
       entries = Array.new
-      xml.xpath('//pgterms:file').each do |file|
-        url = file.attribute('about').content
-        entries << url if file.xpath('dcterms:format/rdf:Description//rdf:value').detect { |v| v.text.match(/image/) }
+      xml.elements.each('pgterms:file') do |file|
+        entries << file.attributes['about'] if file_is_image?(file)
       end
       entries
     end
 
+    def file_is_image?(node)
+      node.elements.each('dcterms:format/rdf:Description/rdf:value') do |value|
+        return true if value.text.match(/image/)
+      end
+      false
+    end
+
     def other_cover_images
       entries = Array.new
-      xml.xpath('pgterms:ebook//pgterms:marc901').each do |node|
+      xml.elements.each('pgterms:ebook/pgterms:marc901') do |node|
         cover = node.text
         cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
         entries << cover
       end
       entries