lib/calais/response.rb in calais-0.0.3 vs lib/calais/response.rb in calais-0.0.5

- old
+ new

@@ -5,98 +5,74 @@ def initialize(raw, error=nil) @error = error @names = [] @relationships = [] - parse_rdf(raw) + parse_raw(raw) return if @error - - h_doc = Hpricot.XML(@rdf) - document_node = h_doc.root.search("//rdf:Description//c:document//..").remove.first - signature_node = h_doc.root.search("//rdf:Description//c:signature//..").remove.first - language_node = h_doc.root.search("//rdf:Description//c:lang//..").remove.first - h_doc = parse_names(h_doc) - h_doc = parse_relationships(h_doc) + + parse_names + parse_relationships end Name::TYPES.each_pair do |method_name, type| define_method method_name.to_sym do @names.map {|name| name if name.type == type }.compact end end private - def parse_rdf(raw) - @rdf = CGI::unescapeHTML Hpricot.XML(raw).at("/string").inner_html - @hpricot = Hpricot.XML(@rdf) - @error = Hpricot.XML(response).at("/Error/Exception").inner_html rescue @error + def parse_raw(raw) + @libxml = XML::Parser.string(XML::Parser.string(raw).parse.root.child.content).parse + @rdf = @libxml.to_s + @error = @libxml.find("/Error/Exception").first.content rescue @error end - def parse_names(doc) - name_elements = doc.root.search("//rdf:Description//c:name//..") - @names = name_elements.map do |ele| - name = ele.at("c:name").inner_html - type = ele.at("rdf:type").attributes["rdf:resource"].split("/").last - hash = ele.attributes["rdf:about"].split("/").last + def parse_names + @names = @libxml.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '/em/e/')]/..").map do |n| + name = n.find_first("c:name").content + type = n.find_first("rdf:type").properties.to_a.assoc("resource").last.split('/').last + hash = n.properties.to_a.assoc("about").last.split("/").last - detection_nodes = doc.root.search("//rdf:Description//c:subject//..").collect! do |ele| - ele unless ele.at("c:subject").attributes["rdf:resource"].match(hash).nil? - end.compact - - locations = detection_nodes.map do |ele| - start = ele.at("c:offset").inner_html.to_i - Range.new(start, start+ele.at("c:length").inner_html.to_i) + locations = @libxml.root.find("rdf:Description/c:subject[contains(@rdf:resource, '#{hash}')]/..").map do |n2| + start = n2.find_first("c:offset").content.to_i + Range.new(start, start+n2.find_first("c:length").content.to_i) end - detection_nodes.remove - Name.new( :name => name, :hash => hash, :type => type, :locations => locations ) end - name_elements.remove - - doc end - def parse_relationships(doc) - relationship_elements = doc.root.search("rdf:Description") - @relationships = relationship_elements.map do |ele| - next if ele.at("c:docId") - - hash = ele.attributes["rdf:about"].split("/").last - type = ele.at("rdf:type").attributes["rdf:resource"].split("/").last + def parse_relationships + @libxml.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '/em/r')]/..").each do |n| + hash = n.properties.to_a.assoc("about").last.split("/").last + type = n.find_first("rdf:type").properties.to_a.assoc("resource").last.split('/').last + metadata = {} - ele.children.each do |child| - next if child.comment? || child.name == "rdf:type" - - value = if child.attributes["rdf:resource"] - Name.find_in_names(child.attributes["rdf:resource"].split("/").last, @names) rescue nil - else - child.inner_html.strip - end - metadata[child.name.split(":").last] = value + + n.to_a.each do |n2| + next if n2.name == "type" or n2.comment? + resource = n2.properties.to_a.assoc("resource") + metadata[n2.name] = resource ? Name.find_in_names(resource.last.split("/").last, @names) : n2.content.strip end - locations = doc.root.search("//rdf:Description//c:docId//..").collect! do |ele| - ele unless ele.at("c:subject").attributes["rdf:resource"].match(hash).nil? - end.compact.map do |ele| - start = ele.at("c:offset").inner_html.to_i - Range.new(start, start+ele.at("c:length").inner_html.to_i) + locations = @libxml.root.find("rdf:Description/c:subject[contains(@rdf:resource, '#{hash}')]/..").map do |n2| + start = n2.find_first("c:offset").content.to_i + Range.new(start, start+n2.find_first("c:length").content.to_i) end - Relationship.new( + + @relationships << Relationship.new( :type => type, :hash => hash, :metadata => metadata, :locations => locations ) - end.compact - relationship_elements.remove - - doc + end end end end \ No newline at end of file