lib/ecfs.rb in ecfs-0.5.1 vs lib/ecfs.rb in ecfs-0.6.0
- old
+ new
@@ -1,15 +1,211 @@
-require "ecfs/version"
-require "ecfs/query"
-require "ecfs/filings_query"
-require "ecfs/filing"
-require "ecfs/proceedings_query"
-require "ecfs/proceeding"
-require "ecfs/document"
-require "ecfs/error"
-require "ecfs/too_many_filings_error"
-require "ecfs/bulk_filings_query"
-require "ecfs/daily_releases_query"
-require "ecfs/solr_scrape_query"
+require 'ecfs/version'
+require 'nokogiri'
+require 'open-uri'
+require 'pry'
+require 'net/http'
+require 'uri'
+require 'unirest'
+require 'zip'
+require 'open_uri_redirections'
+require 'fileutils'
module ECFS
-end
\ No newline at end of file
+ module Util
+ SIGNALS = [
+ 'E.g.', 'Accord', 'See', 'See also', 'Cf.',
+ 'Compare', 'Contra', 'But see', 'But cf.',
+ 'See generally'
+ ].map {|s| "#{s} Id."} << 'Id.'
+
+ def self.get_footnotes(url: nil, id_tree: false)
+ # hacky 'temp' file
+ rando = (rand * 1000000000000000000).to_i
+ FileUtils.mkdir_p "tmp/#{rando}"
+ path = "tmp/#{rando}/document.doc.zip"
+
+ open(path, 'wb', allow_redirections: :all) do |file|
+ file << open(url, allow_redirections: :all).read
+ `unzip #{path} -d tmp/#{rando}`
+ end
+
+ xml = File.open("tmp/#{rando}/word/footnotes.xml").read
+ doc = Nokogiri::XML(xml)
+
+ footnotes = doc.children[0].children[3..-1]
+
+ my_footnotes = footnotes.to_ary.map do |fn|
+ {
+ index: fn.attributes['id'].value.to_i - 1,
+ text: fn.text.strip
+ }
+ end
+
+ # compute the tree of id. citations
+ if id_tree
+ my_footnotes.each {|fn| fn[:ids] = []}
+ my_footnotes.each {|fn| fn[:id] = false}
+ ids = my_footnotes.select {|fn| fn[:text].start_with?(*ECFS::Util::SIGNALS)}
+ ids.each {|id| id[:id] = true}
+
+ my_footnotes.each do |fn|
+ if fn[:id] == true
+ parent_idx = fn[:index]-1
+ my_footnotes.find {|fn| fn[:index] == parent_idx}[:ids] << fn
+ end
+ end
+
+ my_footnotes = send_ids_to_parent(my_footnotes)
+
+ end
+
+ FileUtils.rm_rf("tmp/#{rando}")
+
+ my_footnotes
+ end
+
+ private
+
+ # if a footnote is an id and has ids, we send its ids to its parent
+ # these footnotes are reflected as parents, but are actually siblings
+ # so we call these ptsbs (parents that should be siblings). <3 software.
+ def self.send_ids_to_parent(footnotes)
+ ptsbs_array = footnotes.select {|fn| fn[:id] == true && fn[:ids].length > 0}
+ if ptsbs_array.empty?
+ return footnotes
+ else
+ ptsbs_array.each do |ptsbs|
+ parent_idx = ptsbs[:index]-1
+ footnotes.find {|fn| fn[:index] == parent_idx}[:ids].concat(ptsbs[:ids])
+ ptsbs[:ids] = []
+ end
+ self.send_ids_to_parent(footnotes)
+ end
+ end
+ end
+
+ module EDOCS
+ def self.search(docket: nil, da: nil, fcc: nil, report: nil, file: nil, fcc_rcd_vol: nil, fcc_rcd_page: nil)
+ uri = URI.parse("https://apps.fcc.gov/edocs_public/Query.do?mode=advanced&rpt=cond")
+ params = {
+ 'fccNo' => fcc,
+ 'daNo' => da,
+ 'fileNo' => file,
+ 'docket' => docket,
+ 'reportNo' => report,
+ 'fccRecordVol' => fcc_rcd_vol,
+ 'fccRecordPage' => fcc_rcd_page
+ }
+ params.reject! {|_k,v| v.nil?}
+
+ url = 'https://apps.fcc.gov/edocs_public/Query.do?mode=advance&rpt=cond'
+ response = Unirest.post url, parameters: params
+ doc = Nokogiri::HTML(response.raw_body)
+
+ tables = doc.css('table.tableWithOutBorder').children.css('table.tableWithOutBorder')
+ results = tables[2].css('table.tableWithBorder')
+
+ results.map do |result|
+ links = result.search('a').to_a
+ links.shift
+ links = links.map do |link|
+ path = link.attributes["href"].value
+
+ "https://apps.fcc.gov/edocs_public/#{path}"
+ end
+
+ word = links.select {|link| link.end_with?('.doc', '.docx')}
+ pdf = links.select {|link| link.end_with?('.pdf')}
+ txt = links.select {|link| link.end_with?('.txt')}
+
+ rows = result.search('tr')
+
+ {
+ title: rows[0].text.strip,
+ released: rows[1].text.strip.split(': ')[1],
+ description: rows[2].text.strip.split('Description: ')[1],
+ word: word,
+ pdf: pdf,
+ txt: txt
+ }.reject {|_k,v| v.nil?}
+ end
+ end
+ end
+
+ module Proceedings
+ def self.search(docket: nil)
+ url = "http://apps.fcc.gov/ecfs/proceeding/view?name=#{docket}"
+ response = Unirest.get url
+ doc = Nokogiri::HTML(response.raw_body)
+ table = doc.search('table.dataTable').first
+ rows = table.search('div.wwgrp')
+
+ proceeding = {}
+ rows.each do |row|
+ key = row.search('span')[0].text.strip
+ key.gsub!(" ", "")
+ key.gsub!(":", "")
+ key.downcase!
+ value = row.search('span')[1].text.strip
+ proceeding[key.to_sym] = value
+ end
+
+ proceeding
+ end
+ end
+
+ module Filings
+ ATTRS = [
+ :docket, :filer, :lawfirm, :received,
+ :posted, :exparte, :type, :pages
+ ]
+
+ def self.get_document_links(url: url)
+ doc = Nokogiri::HTML(open(url))
+ xpath = "//*[@id=\"documents.link\"]"
+ links = doc.xpath(xpath).search('a')
+
+ links.map do |link|
+ id = link.attributes["href"].value.split('?id=')[1]
+
+ "http://apps.fcc.gov/ecfs/document/view?id=#{id}"
+ end
+ end
+
+ def self.search(docket: nil, size: 1000, start: 0, order: 'asc')
+ url = "http://apps.fcc.gov/ecfs/comment_search_solr/doSearch?proceeding=#{docket}&dir=#{order}&start=#{start}&size=#{size}"
+ doc = Nokogiri::HTML(open(url))
+ xpath = "//*[@id='yui-main']/div/div[4]"
+ table = doc.xpath(xpath).children[1]
+ rows = table.search('tr')
+ rows.shift
+
+ filings = []
+ rows.each do |row|
+ row_hash = {}
+ cols = row.search('td')
+
+ cols.each_with_index do |col, i|
+ attribute = ECFS::Filings::ATTRS[i]
+ row_hash[attribute] = col.text.strip
+
+ # get the url
+ if attribute == :filer
+ path = col.search('a').first.attributes["href"].value
+ id = path.split('?id=')[1]
+ url = "http://apps.fcc.gov/ecfs/comment/view?id=#{id}"
+ row_hash[:url] = url
+ end
+ end
+
+ # cast dates and int
+ row_hash[:received] = DateTime.parse(row_hash[:received]).to_s
+ row_hash[:posted] = DateTime.parse(row_hash[:posted]).to_s
+ row_hash[:pages] = row_hash[:pages].to_i
+
+ filings << row_hash
+ end
+
+ filings
+ end
+ end
+end