lib/calais/response.rb in calais-0.0.7 vs lib/calais/response.rb in calais-0.0.8
- old
+ new
@@ -45,18 +45,18 @@
end
class Instance
attr_accessor :prefix, :exact, :suffix, :offset, :length
- # Makes a new Instance object from an appropriate LibXML::XML::Node.
+ # Makes a new Instance object from an appropriate Nokogiri::XML::Node.
def self.from_node(node)
instance = self.new
- instance.prefix = node.find_first("c:prefix").content
- instance.exact = node.find_first("c:exact").content
- instance.suffix = node.find_first("c:suffix").content
- instance.offset = node.find_first("c:offset").content.to_i
- instance.length = node.find_first("c:length").content.to_i
+ instance.prefix = node.xpath("c:prefix[1]").first.content
+ instance.exact = node.xpath("c:exact[1]").first.content
+ instance.suffix = node.xpath("c:suffix[1]").first.content
+ instance.offset = node.xpath("c:offset[1]").first.content.to_i
+ instance.length = node.xpath("c:length[1]").first.content.to_i
instance
end
end
@@ -74,115 +74,115 @@
end
end
private
def extract_data
- doc = XML::Parser.string(@raw_response).parse
+ doc = Nokogiri::XML(@raw_response)
- if doc.root.find("/Error").first
- raise Calais::Error, doc.root.find("/Error/Exception").first.content
+ if doc.root.xpath("/Error[1]").first
+ raise Calais::Error, doc.root.xpath("/Error/Exception").first.content
end
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
@language = node['language']
@submission_date = DateTime.parse node['submissionDate']
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
@signature = attributes.delete('signature')
@submitter_code = attributes.delete('submitterCode')
- node.remove!
+ node.remove
end
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
@request_id = node['calaisRequestID']
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
@doc_title = attributes.delete('docTitle')
- @doc_date = Date.parse attributes.delete('docDate')
+ @doc_date = Date.parse(attributes.delete('docDate'))
- node.remove!
+ node.remove
end
- @categories = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
+ @categories = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
category = Category.new
- category.name = node.find_first("c:categoryName").content
- score = node.find_first("c:score")
+ category.name = node.xpath("c:categoryName[1]").first.content
+ score = node.xpath("c:score[1]").first
category.score = score.content.to_f unless score.nil?
- node.remove!
+ node.remove
category
end
- @relevances = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
- subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]
- acc[subject_hash] = node.find_first("c:relevance").content.to_f
+ @relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
+ subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1]
+ acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f
- node.remove!
+ node.remove
acc
end
- @entities = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
+ @entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil
entity = Entity.new
entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
entity.type = extract_type(node)
- entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
+ entity.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
entity.relevance = @relevances[extracted_hash]
entity.instances = extract_instances(doc, extracted_hash)
- node.remove!
+ node.remove
entity
end
- @relations = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
+ @relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil
relation = Relation.new
relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
relation.type = extract_type(node)
- relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
+ relation.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
relation.instances = extract_instances(doc, extracted_hash)
- node.remove!
+ node.remove
relation
end
- @geographies = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
+ @geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
geography = Geography.new
geography.name = attributes.delete('name')
geography.calais_hash = attributes.delete('subject')
geography.attributes = attributes
- node.remove!
+ node.remove
geography
end
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove! }
- doc.root.find("./*").each { |node| node.remove! }
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove }
+ doc.root.xpath("./*").each { |node| node.remove }
return
end
def extract_instances(doc, hash)
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
- instance_node.find_first("c:subject")[:resource].split("/")[-1] == hash
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
+ instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash
end.map do |instance_node|
instance = Instance.from_node(instance_node)
- instance_node.remove!
+ instance_node.remove
instance
end
end
def extract_type(node)
- node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
+ node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
rescue
nil
end
def extract_attributes(nodes)
\ No newline at end of file