Sha256: 505ef0d59516d3eb55a3d569f2cba8e302b8f022f68333b0de407564d02b79ef
Contents?: true
Size: 1.39 KB
Versions: 1
Compression:
Stored size: 1.39 KB
Contents
namespace :transcribable do desc "Harvest documents to transcribe from DocumentCloud" task :harvest => :environment do require 'rest-client' klass = Kernel.const_get(Transcribable.table.classify) dc = YAML.load(File.read("#{Rails.root.to_s}/config/documentcloud.yml")) dc_project = JSON.parse(RestClient.get("https://#{CGI::escape(dc['email'])}:#{CGI::escape(dc['password'])}@www.documentcloud.org/api/projects.json")) # i had to use this to return the desired project # trace came back NoMethodError: undefined method `scan' for 19735:Fixnum # running rails 4.2.1 #dc_project = dc_project['projects'].select {|q| q['id'] == dc['project']}[0] dc_project = dc_project['projects'].select {|q| q['id'] == dc['project'].scan(/^\d+/)[0].to_i }[0] dc_project['document_ids'].each do |doc_id| begin dc_doc = JSON.parse(RestClient.get("https://www.documentcloud.org/api/documents/#{doc_id}.json"))['document'] # this will skip non-public documents rescue RestClient::ResourceNotFound next end # uses updated method with model field & value as the argument obj = klass.find_or_initialize_by(url: "https://www.documentcloud.org/documents/#{dc_doc['id']}") # don't plow over verified docs if rerunning the script obj.verified = false if obj.new_record? obj.save puts "== added #{obj.url}" end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
transcribable-0.0.6 | lib/tasks/harvester.rake |