require 'nokogiri' require 'date' require 'active_support/core_ext/hash/conversions' class RIXML def initialize(document) @attrs = Hash.from_xml(document.to_s) end def product_id deep_value('Research', 'Product', 'productID') end def status deep_value('Research', 'Product', 'StatusInfo', 'statusType').try(:downcase).try(:to_sym) || :published end def publication_date time_str = deep_value('Research', 'Product', 'StatusInfo', 'statusDateTime') || DateTime.now.to_s DateTime.strptime(time_str) end def authors org = deep_value('Research', 'Product', 'Source', 'Organization') || {} if org.is_a? Array org = org.find { |v| v['primaryIndicator'] == 'Yes' } || org.first end authors = RIXML.deep_value(org, 'PersonGroup', 'PersonGroupMember') || [] authors = [authors] unless authors.is_a? Array authors.map do |author| person = author['Person'] { name: person['DisplayName'], first_name: person['GivenName'], middle_name: person['MiddleName'], last_name: person['FamilyName'], job_title: person['JobTitle'], email: person['ContactInfo'].try(:[], 'Email').try(:downcase) } end end def report_info content = deep_value('Research', 'Product', 'Content') { title: content['Title'], abstract: content['Abstract'], file_name: content['Resource'].try(:[], 'Name'), pages: content['Resource'].try(:[], 'Length').to_i } end def context context = deep_value('Research', 'Product', 'Context') context_info = { companies: RIXML.extract_companies_from_context(context), sectors: RIXML.extract_sectors_from_context(context) } end def self.parse data RIXML.new(Nokogiri::XML(data).root) end def self.parse_from_file filename self.parse self.read_file(filename) end private def self.deep_value(attrs, *keys) keys.reduce(attrs) { |v, key| v.try(:[], key) } end def deep_value(*keys) RIXML.deep_value(@attrs, *keys) end def self.read_file filename body = '' File.open(filename, 'r') do |infile| while (line = infile.gets) body << line end end body end def self.extract_companies_from_context context companies = [] list = context['IssuerDetails'].try(:[], 'Issuer') return [] if list.nil? list = [list] unless list.is_a? Array list.select { |c| c['issuerType'] == 'Corporate' }.each do |company| securities = company['SecurityDetails']['Security']['SecurityID'] securities = [securities] unless securities.is_a? Array isin = securities.find { |security| security['idType'] == 'ISIN' } companies << { isin: isin['idValue'] } unless isin.nil? end companies end def self.extract_sectors_from_context context sectors = [] list = context['ProductClassifications'].try(:[], 'SectorIndustry') return [] if list.nil? list = [list] unless list.is_a? Array list.select { |s| s['classificationType'] == 'GICS' }.map do |v| { code: v['code'].to_i, focus: v['focusLevel'].try(:downcase) == 'yes' } end end end