lib/jkl/calais_client.rb in jakal-0.1.1 vs lib/jkl/calais_client.rb in jakal-0.1.2
- old
+ new
@@ -1,80 +1,28 @@
-require "json"
require "calais"
-require "rest_client"
-
module Jkl
module Extraction
class << self
-
- #using the calais gem
- def calais_response(key, pages)
+
+ def calais_response(key, text)
Calais.process_document(
- :content => pages,
+ :content => text,
:content_type => :text,
:license_id => key
)
end
-
+
def tags(key, text)
nested_list = {}
entities(key,text).each do |a|
nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
end
nested_list
end
-
+
def entities(key,text)
calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
- end
-
- #not using calais gem, experimenting with json response
- def get_from_calais(key, content)
- post_args = {
- "licenseID" => key,
- "content" => content,
- "paramsXML" => paramsXML("application/json")
- }
- Jkl::post_to(URI.parse("http://api.opencalais.com/enlighten/rest/"), post_args)
- end
-
- def get_tag_from_json(response)
- result = JSON.parse response
- result.delete_if {|key, value| key == "doc" } # ditching the doc
- cleaned_result = []
- result.each do |key,tag|
- tag = Jkl::clean_unwanted_items_from_hash tag
- cleaned_result << tag
- yield tag if block_given?
- end
- cleaned_result
- end
-
- def clean_unwanted_items_from_hash h
- h.delete_if {|k, v| k == "relevance" }
- h.delete_if {|k, v| k == "instances" }
- h.delete_if {|k, v| v == "N/A"}
- h.delete_if {|k, v| v == []}
- h.delete_if {|k, v| v == ""}
- h.delete_if {|k, v| k == "_typeGroup"}
- h
- end
-
- private
-
- def paramsXML(format)
- <<-paramsXML;
- <c:params xmlns:c="http://s.opencalais.com/1/pred/"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
- <c:processingDirectives
- c:contentType="text/txt"
- c:outputFormat="#{format}">
- </c:processingDirectives>
- <c:userDirectives />
- <c:externalMetadata />
- </c:params>
- paramsXML
end
end
end
end