lib/aranha/parsers/html/node/default.rb in aranha-parsers-0.9.0 vs lib/aranha/parsers/html/node/default.rb in aranha-parsers-0.10.0

- old
+ new

@@ -1,74 +1,17 @@ # frozen_string_literal: true require 'aranha/parsers/html/node/base' +require 'eac_ruby_utils/core_ext' module Aranha module Parsers module Html module Node class Default < ::Aranha::Parsers::Html::Node::Base - def string_value(node, xpath) - if node.at_xpath(xpath) - sanitize_string(node.at_xpath(xpath).text) - else - '' - end - end + require_sub __FILE__, include_modules: true - def string_recursive_value(node, xpath, required = true) - root = node.at_xpath(xpath) - if root.blank? - return nil unless required - - raise "No node found (Xpath: #{xpath})" - end - result = string_recursive(root) - return result if result.present? - return nil unless required - - raise "String blank (Xpath: #{xpath})" - end - - def string_recursive_optional_value(node, xpath) - string_recursive_value(node, xpath, false) - end - - def quoted_value(node, xpath) - s = string_value(node, xpath) - return '' unless s - - m = /\"([^\"]+)\"/.match(s) - return m[1] if m - - '' - end - - def integer_value(node, xpath) - r = string_value(node, xpath) - return nil if r.blank? - - m = /\d+/.match(r) - raise "Integer not found in \"#{r}\"" unless m - - m[0].to_i - end - - def integer_optional_value(node, xpath) - r = string_value(node, xpath) - m = /\d+/.match(r) - m ? m[0].to_i : nil - end - - def float_value(node, xpath) - parse_float(node, xpath, true) - end - - def float_optional_value(node, xpath) - parse_float(node, xpath, false) - end - def array_value(node, xpath) r = node.xpath(xpath).map { |n| n.text.strip } r.join('|') end @@ -81,44 +24,9 @@ end def duration_value(node, xpath) m = /(\d+) m/.match(join_value(node, xpath)) m ? m[1].to_i : nil - end - - def regxep(node, xpath, pattern) - s = string_value(node, xpath) - m = pattern.match(s) - return m if m - - raise "Pattern \"#{pattern}\" not found in string \"#{s}\"" - end - - private - - def parse_float(node, xpath, required) - s = string_value(node, xpath) - m = /\d+(?:[\.\,](\d+))?/.match(s) - if m - m[0].delete('.').tr(',', '.').to_f - elsif required - raise "Float value not found in \"#{s}\"" - end - end - - def sanitize_string(obj) - obj.to_s.tr("\u00A0", ' ').strip - end - - def string_recursive(node) - return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text) - - s = '' - node.children.each do |child| - child_s = string_recursive(child) - s += ' ' + child_s if child_s.present? - end - sanitize_string(s) end end end end end