require 'nokogiri' require 'date' require 'active_support/core_ext/hash/conversions' class RIXML def initialize(document) @attrs = Hash.from_xml(document.to_s) end def product_id @attrs.dig('Research', 'Product', 'productID') end def status @attrs.dig('Research', 'Product', 'StatusInfo', 'statusType')&.downcase&.to_sym || :published end def publication_date time_str = @attrs.dig('Research', 'Product', 'StatusInfo', 'statusDateTime') || DateTime.now.to_s DateTime.strptime(time_str) end def authors org = @attrs.dig('Research', 'Product', 'Source', 'Organization') || {} if org.is_a? Array org = org.find { |v| v['primaryIndicator'] == 'Yes' } || org.first end authors = org.dig('PersonGroup', 'PersonGroupMember') || [] authors = [authors] unless authors.is_a? Array authors.map do |author| person = author['Person'] { name: person['DisplayName'], first_name: person['GivenName'], middle_name: person['MiddleName'], last_name: person['FamilyName'], job_title: person['JobTitle'], email: person['ContactInfo']&.dig('Email')&.downcase } end end def report_info content = @attrs.dig('Research', 'Product', 'Content') { title: content['Title'], abstract: content['Abstract'], file_name: content['Resource']&.dig('Name'), pages: content['Resource']&.dig('Length').to_i } end def context context = @attrs.dig('Research', 'Product', 'Context') context_info = { companies: RIXML.extract_companies_from_context(context), sectors: RIXML.extract_sectors_from_context(context) } end def self.parse data RIXML.new(Nokogiri::XML(data).root) end def self.parse_from_file filename self.parse self.read_file(filename) end private def self.read_file filename body = '' File.open(filename, 'r') do |infile| while (line = infile.gets) body << line end end body end def self.extract_companies_from_context context companies = [] list = context['IssuerDetails'].try(:[], 'Issuer') return [] if list.nil? list = [list] unless list.is_a? Array list.select { |c| c['issuerType'] == 'Corporate' }.each do |company| securities = company.dig('SecurityDetails', 'Security', 'SecurityID') securities = [securities] unless securities.is_a? Array isin = securities.find { |security| security['idType'] == 'ISIN' } companies << { isin: isin['idValue'] } unless isin&.dig('idValue').nil? end companies end def self.extract_sectors_from_context context sectors = [] list = context['ProductClassifications'].try(:[], 'SectorIndustry') return [] if list.nil? list = [list] unless list.is_a? Array list.select { |s| s['classificationType'] == 'GICS' }.map do |v| { code: v['code'].to_i, focus: v['focusLevel'].try(:downcase) == 'yes' } end end end