lib/scrap_kit/recipe.rb in scrap_kit-0.1.7 vs lib/scrap_kit/recipe.rb in scrap_kit-0.1.8
- old
+ new
@@ -2,54 +2,88 @@
require "webdrivers/chromedriver"
require "watir"
module ScrapKit
class Recipe
+ class << self
+ def load(source)
+ input = if source.is_a?(Hash)
+ source
+ elsif source.is_a?(IO)
+ JSON.parse(source.read)
+ else
+ JSON.parse(File.read(source))
+ end
+
+ new(input.deep_symbolize_keys)
+ end
+ end
+
def initialize(url: nil, steps: [], attributes: {})
@url = url
@steps = steps
@attributes = attributes
end
def run
output = {}
- browser = Watir::Browser.new(:chrome, headless: true)
- browser.goto @url
+ @browser = Watir::Browser.new(:chrome, headless: true)
+ @browser.goto @url
@steps.each do |step|
- run_step(browser, step)
+ run_step(step)
end
@attributes.each do |attribute_name, selector|
- output[attribute_name] = extract_attribute(browser, selector)
+ output[attribute_name] = extract_attribute(@browser, selector)
end
- browser.close
- browser = nil
+ @browser.close
+ @browser = nil
output
end
- def run_step(browser, step)
+ def run_step(step)
+ return goto(step[:goto]) if step[:goto]
+ return click(step[:click]) if step[:click]
+ return fill_form(step[:fill_form]) if step[:fill_form]
+
+ nil
end
+ def find_element_by_name_or_selector(browser_or_element, name_or_selector)
+ element = browser_or_element.element(name: name_or_selector.to_s)
+ return element if element.exists?
+
+ element = browser_or_element.element(css: name_or_selector.to_s)
+ return element if element.exists?
+
+ nil
+ end
+
def elements_from_selector(browser_or_element, selector)
if selector.is_a?(String)
browser_or_element.elements(css: selector)
+ elsif selector.is_a?(Hash)
+ browser_or_element.elements(selector)
elsif selector.is_a?(Array)
*remainder, condition = selector
+ condition_key, condition_value = condition.first
elements = browser_or_element
- remainder.each do |item|
- elements = elements.elements(css: item)
+ if remainder.empty?
+ elements = elements.elements(css: condition_key.to_s)
+ else
+ remainder.each do |item|
+ elements = elements.elements(css: item)
+ end
end
elements.filter do |element|
- condition_key = condition.keys.first.to_s
- condition_value = condition.values.first
- found_element = element.element(css: condition_key)
+ found_element = element.element(css: condition_key.to_s)
extracted_value = extract_value_from_element(found_element)
extracted_value.match(condition_value) || extracted_value == condition_value
end
end
end
@@ -62,39 +96,101 @@
end
element&.text_content
end
- def extract_attribute(browser_or_element, selector_or_hash)
- if selector_or_hash.is_a?(String)
- extract_value_from_element(browser_or_element.element(css: selector_or_hash))
- elsif selector_or_hash.is_a?(Hash)
- selector = selector_or_hash[:selector]
- selector_for_children_attributes = selector_or_hash[:children_attributes]
+ def extract_attribute(browser_or_element, selector_or_object)
+ if selector_or_object.is_a?(String)
+ extract_value_from_element(browser_or_element.element(css: selector_or_object))
+ elsif selector_or_object.is_a?(Array)
+ found_elements = elements_from_selector(browser_or_element, selector_or_object)
- elements_from_selector(browser_or_element, selector).map do |element|
- output = {}
+ if found_elements.size === 1
+ extract_value_from_element(found_elements.first)
+ else
+ found_elements.map do |element|
+ extract_value_from_element(element)
+ end
+ end
+ elsif selector_or_object.is_a?(Hash)
+ if selector_or_object[:selector] && selector_or_object[:children_attributes]
+ selector = selector_or_object[:selector]
+ selector_for_children_attributes = selector_or_object[:children_attributes]
- selector_for_children_attributes.each do |child_attribute_name, child_selector|
- output[child_attribute_name] = extract_attribute(element, child_selector)
+ elements_from_selector(browser_or_element, selector).map do |element|
+ output = {}
+
+ selector_for_children_attributes.each do |child_attribute_name, child_selector|
+ output[child_attribute_name] = extract_attribute(element, child_selector)
+ end
+
+ output
end
+ else
+ found_elements = elements_from_selector(browser_or_element, selector_or_object)
- output
+ if found_elements.size === 1
+ extract_value_from_element(found_elements.first)
+ else
+ found_elements.map do |element|
+ extract_value_from_element(element)
+ end
+ end
end
end
end
- class << self
- def load(source)
- input = if source.is_a?(Hash)
- source
- elsif source.is_a?(IO)
- JSON.parse(source.read)
- else
- JSON.parse(File.read(source))
+ private
+
+ def goto(link_or_selector)
+ if link_or_selector.is_a?(String)
+ @browser.goto(link_or_selector)
+ elsif link_or_selector.is_a?(Array) || link_or_selector.is_a?(Hash)
+ if found_element = elements_from_selector(@browser, link_or_selector).first
+ found_element.click
end
+ end
- new(input.deep_symbolize_keys)
+ sleep 0.5
+ @browser.wait_until do
+ @browser.ready_state == "complete"
+ end
+ rescue
+ nil
+ end
+
+ def click(selector)
+ if selector.is_a?(Array) || selector.is_a?(Hash)
+ if found_element = elements_from_selector(@browser, selector).first
+ found_element.click
+ end
+ end
+
+ sleep 1
+ @browser.wait_until do
+ @browser.ready_state == "complete"
+ end
+
+ rescue
+ nil
+ end
+
+ def fill_form(form_data)
+ form_data.each do |name, value|
+ if element = find_element_by_name_or_selector(@browser.body, name.to_s)
+ element = element.to_subtype
+
+ if element.respond_to?(:set)
+ element.set(value)
+ elsif element.respond_to?(:select)
+ element.select(value)
+ end
+ end
+ end
+
+ sleep 0.25
+ @browser.wait_until do
+ @browser.ready_state == "complete"
end
end
end
end