lib/scrap_kit/recipe.rb in scrap_kit-0.1.7 vs lib/scrap_kit/recipe.rb in scrap_kit-0.1.8

- old
+ new

@@ -2,54 +2,88 @@ require "webdrivers/chromedriver" require "watir" module ScrapKit class Recipe + class << self + def load(source) + input = if source.is_a?(Hash) + source + elsif source.is_a?(IO) + JSON.parse(source.read) + else + JSON.parse(File.read(source)) + end + + new(input.deep_symbolize_keys) + end + end + def initialize(url: nil, steps: [], attributes: {}) @url = url @steps = steps @attributes = attributes end def run output = {} - browser = Watir::Browser.new(:chrome, headless: true) - browser.goto @url + @browser = Watir::Browser.new(:chrome, headless: true) + @browser.goto @url @steps.each do |step| - run_step(browser, step) + run_step(step) end @attributes.each do |attribute_name, selector| - output[attribute_name] = extract_attribute(browser, selector) + output[attribute_name] = extract_attribute(@browser, selector) end - browser.close - browser = nil + @browser.close + @browser = nil output end - def run_step(browser, step) + def run_step(step) + return goto(step[:goto]) if step[:goto] + return click(step[:click]) if step[:click] + return fill_form(step[:fill_form]) if step[:fill_form] + + nil end + def find_element_by_name_or_selector(browser_or_element, name_or_selector) + element = browser_or_element.element(name: name_or_selector.to_s) + return element if element.exists? + + element = browser_or_element.element(css: name_or_selector.to_s) + return element if element.exists? + + nil + end + def elements_from_selector(browser_or_element, selector) if selector.is_a?(String) browser_or_element.elements(css: selector) + elsif selector.is_a?(Hash) + browser_or_element.elements(selector) elsif selector.is_a?(Array) *remainder, condition = selector + condition_key, condition_value = condition.first elements = browser_or_element - remainder.each do |item| - elements = elements.elements(css: item) + if remainder.empty? + elements = elements.elements(css: condition_key.to_s) + else + remainder.each do |item| + elements = elements.elements(css: item) + end end elements.filter do |element| - condition_key = condition.keys.first.to_s - condition_value = condition.values.first - found_element = element.element(css: condition_key) + found_element = element.element(css: condition_key.to_s) extracted_value = extract_value_from_element(found_element) extracted_value.match(condition_value) || extracted_value == condition_value end end end @@ -62,39 +96,101 @@ end element&.text_content end - def extract_attribute(browser_or_element, selector_or_hash) - if selector_or_hash.is_a?(String) - extract_value_from_element(browser_or_element.element(css: selector_or_hash)) - elsif selector_or_hash.is_a?(Hash) - selector = selector_or_hash[:selector] - selector_for_children_attributes = selector_or_hash[:children_attributes] + def extract_attribute(browser_or_element, selector_or_object) + if selector_or_object.is_a?(String) + extract_value_from_element(browser_or_element.element(css: selector_or_object)) + elsif selector_or_object.is_a?(Array) + found_elements = elements_from_selector(browser_or_element, selector_or_object) - elements_from_selector(browser_or_element, selector).map do |element| - output = {} + if found_elements.size === 1 + extract_value_from_element(found_elements.first) + else + found_elements.map do |element| + extract_value_from_element(element) + end + end + elsif selector_or_object.is_a?(Hash) + if selector_or_object[:selector] && selector_or_object[:children_attributes] + selector = selector_or_object[:selector] + selector_for_children_attributes = selector_or_object[:children_attributes] - selector_for_children_attributes.each do |child_attribute_name, child_selector| - output[child_attribute_name] = extract_attribute(element, child_selector) + elements_from_selector(browser_or_element, selector).map do |element| + output = {} + + selector_for_children_attributes.each do |child_attribute_name, child_selector| + output[child_attribute_name] = extract_attribute(element, child_selector) + end + + output end + else + found_elements = elements_from_selector(browser_or_element, selector_or_object) - output + if found_elements.size === 1 + extract_value_from_element(found_elements.first) + else + found_elements.map do |element| + extract_value_from_element(element) + end + end end end end - class << self - def load(source) - input = if source.is_a?(Hash) - source - elsif source.is_a?(IO) - JSON.parse(source.read) - else - JSON.parse(File.read(source)) + private + + def goto(link_or_selector) + if link_or_selector.is_a?(String) + @browser.goto(link_or_selector) + elsif link_or_selector.is_a?(Array) || link_or_selector.is_a?(Hash) + if found_element = elements_from_selector(@browser, link_or_selector).first + found_element.click end + end - new(input.deep_symbolize_keys) + sleep 0.5 + @browser.wait_until do + @browser.ready_state == "complete" + end + rescue + nil + end + + def click(selector) + if selector.is_a?(Array) || selector.is_a?(Hash) + if found_element = elements_from_selector(@browser, selector).first + found_element.click + end + end + + sleep 1 + @browser.wait_until do + @browser.ready_state == "complete" + end + + rescue + nil + end + + def fill_form(form_data) + form_data.each do |name, value| + if element = find_element_by_name_or_selector(@browser.body, name.to_s) + element = element.to_subtype + + if element.respond_to?(:set) + element.set(value) + elsif element.respond_to?(:select) + element.select(value) + end + end + end + + sleep 0.25 + @browser.wait_until do + @browser.ready_state == "complete" end end end end