lib/jkl/calais_client.rb in jakal-0.1.0 vs lib/jkl/calais_client.rb in jakal-0.1.1
- old
+ new
@@ -1,72 +1,81 @@
require "json"
-require "rest_client"
require "calais"
-module Jkl
+require "rest_client"
- class << self
+module Jkl
+ module Extraction
+ class << self
- #using the calais gem
- def calais_response(key, pages)
- Calais.process_document(
- :content => pages,
- :content_type => :text,
- :license_id => key
- )
- end
+ #using the calais gem
+ def calais_response(key, pages)
+ Calais.process_document(
+ :content => pages,
+ :content_type => :text,
+ :license_id => key
+ )
+ end
- def get_from_calais(content)
- begin
- license_id = YAML::load_file('config/keys.yml')['calais']
- c_uri = URI.parse('http://api.opencalais.com/enlighten/rest/')
- post_args = { 'licenseID' => license_id, 'content' => content,
- 'paramsXML' => paramsXML('application/json') }
- post_to(c_uri, post_args)
- rescue Exception => e
- puts e
+ def tags(key, text)
+ nested_list = {}
+ entities(key,text).each do |a|
+ nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
+ end
+ nested_list
end
- end
+
+ def entities(key,text)
+ calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
+ end
+
+ #not using calais gem, experimenting with json response
+ def get_from_calais(key, content)
+ post_args = {
+ "licenseID" => key,
+ "content" => content,
+ "paramsXML" => paramsXML("application/json")
+ }
+ Jkl::post_to(URI.parse("http://api.opencalais.com/enlighten/rest/"), post_args)
+ end
- def get_tag_from_json(response)
- result = JSON.parse response
- result.delete_if {|key, value| key == "doc" } # ditching the doc
- cleaned_result = []
- result.each do |key,tag|
- tag = Jkl::clean_unwanted_items_from_hash tag
- cleaned_result << tag
- yield tag if block_given?
+ def get_tag_from_json(response)
+ result = JSON.parse response
+ result.delete_if {|key, value| key == "doc" } # ditching the doc
+ cleaned_result = []
+ result.each do |key,tag|
+ tag = Jkl::clean_unwanted_items_from_hash tag
+ cleaned_result << tag
+ yield tag if block_given?
+ end
+ cleaned_result
end
- cleaned_result
- end
-
- #jkl doesn't work with these aspects of the calais response, also removing blanks
- def clean_unwanted_items_from_hash h
- h.delete_if {|k, v| k == "relevance" }
- h.delete_if {|k, v| k == "instances" }
- h.delete_if {|k, v| v == "N/A"}
- h.delete_if {|k, v| v == []}
- h.delete_if {|k, v| v == ""}
- h.delete_if {|k, v| k == "_typeGroup"}
- h
- end
+ def clean_unwanted_items_from_hash h
+ h.delete_if {|k, v| k == "relevance" }
+ h.delete_if {|k, v| k == "instances" }
+ h.delete_if {|k, v| v == "N/A"}
+ h.delete_if {|k, v| v == []}
+ h.delete_if {|k, v| v == ""}
+ h.delete_if {|k, v| k == "_typeGroup"}
+ h
+ end
- private
+ private
- def paramsXML(format)
- <<-paramsXML;
- <c:params xmlns:c="http://s.opencalais.com/1/pred/"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
- <c:processingDirectives
- c:contentType="text/txt"
- c:outputFormat="#{format}">
- </c:processingDirectives>
- <c:userDirectives />
- <c:externalMetadata />
- </c:params>
- paramsXML
- end
+ def paramsXML(format)
+ <<-paramsXML;
+ <c:params xmlns:c="http://s.opencalais.com/1/pred/"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ <c:processingDirectives
+ c:contentType="text/txt"
+ c:outputFormat="#{format}">
+ </c:processingDirectives>
+ <c:userDirectives />
+ <c:externalMetadata />
+ </c:params>
+ paramsXML
+ end
+ end
end
-
end