lib/gutenberg_rdf/rdf.rb in gutenberg_rdf-0.3.1 vs lib/gutenberg_rdf/rdf.rb in gutenberg_rdf-0.4.0

- old
+ new

@@ -46,25 +46,26 @@ def publisher xml.elements['pgterms:ebook/dcterms:publisher'].text end def language - xml.elements['pgterms:ebook/dcterms:language'].text + xml.elements['pgterms:ebook/dcterms:language/rdf:Description/rdf:value'].text end def rights xml.elements['pgterms:ebook/dcterms:rights'].text end def covers - official_cover_images.concat(other_cover_images).sort.uniq + official_cover_images.concat(other_cover_images).uniq end def ebooks files = Array.new - xml.elements.each('pgterms:file') do |file| - files << Media.new(file) + xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format| + file = format.elements['pgterms:file'] + files << Media.new(file) if file.elements['dcterms:format/rdf:Description/rdf:value'].text.match(/\Atext|\Aapplication/) end files end private @@ -83,41 +84,32 @@ title_array = title_array.first.split(/, or,/) if title_array.count == 1 title_array.map(&:strip) end - def roles - @roles ||= extract_roles - end - - def extract_roles - entries = Hash.new - xml.elements.each('pgterms:ebook/dcterms:creator') do |entry| - entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = 'aut' + def extract_authors + agents = Array.new + xml.elements.each('pgterms:ebook/dcterms:creator') do |contributor| + agent = Agent.new(contributor.elements['pgterms:agent']) + agent.role = 'aut' + agents << agent end - xml.elements.each('pgterms:ebook/marcrel:*') do |entry| - entries["#{entry.attributes['rdf:resource'].sub('2009/agents/', '')}"] = entry.name + xml.elements.each('pgterms:ebook/marcrel:*') do |contributor| + agent = Agent.new(contributor.elements['pgterms:agent']) + agent.role = contributor.name + agents << agent end - entries + agents end - def extract_authors - entries = Array.new - xml.elements.each('pgterms:agent') do |agent| - entry = Agent.new(agent) - entry.assign_role(roles) - entries << entry - end - entries - end - def official_cover_images entries = Array.new - xml.elements.each('pgterms:file') do |file| + xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format| + file = format.elements['pgterms:file'] entries << file.attributes['about'] if file_is_image?(file) end - entries + entries.sort end def file_is_image?(node) node.elements.each('dcterms:format/rdf:Description/rdf:value') do |value| return true if value.text.match(/image/) @@ -130,10 +122,10 @@ xml.elements.each('pgterms:ebook/pgterms:marc901') do |node| cover = node.text cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org') entries << cover end - entries + entries.sort end end end