lib/sqed/extractor.rb in sqed-0.3.2 vs lib/sqed/extractor.rb in sqed-0.4.0

- old
+ new

@@ -1,71 +1,69 @@ require 'rmagick' -# An Extractor takes Boundries object and a metadata_map and returns a Sqed::Result -# -class Sqed::Extractor +class Sqed + # An Extractor takes Boundaries object and a metadata_map and returns a Sqed::Result + # + class Extractor + class Error < StandardError; end; - # a Sqed::Boundaries instance + # a Sqed::Boundaries instance attr_accessor :boundaries + # @return [Hash] like `{0 => :annotated_specimen, 1 => :identifier, 2 => :image_registration }` # a metadata_map hash from EXTRACTION_PATTERNS like: - # {0 => :annotated_specimen, 1 => :identifier, 2 =>:image_registration } attr_accessor :metadata_map - # a Magick::Image file + # @return [Magick::Image file] attr_accessor :image - def initialize(target_boundaries: nil, target_metadata_map: nil, target_image: nil) - raise Error, 'target_boundaries not provided or provided boundary is not a Sqed::Boundaries' if target_boundaries.nil? || !target_boundaries.class == Sqed::Boundaries - raise Error, 'target_metadata_map not provided or target_metadata_map not a Hash' if target_metadata_map.nil? || !target_metadata_map.class == Hash - raise Error, 'target_image not provided' if target_image.nil? || !target_image.class.name == 'Magick::Image' + def initialize(**opts) + @metadata_map = opts[:metadata_map] + @boundaries = opts[:boundaries] + @image = opts[:image] - @metadata_map = target_metadata_map - @boundaries = target_boundaries - @image = target_image + raise Error, 'boundaries not provided or provided boundary is not a Sqed::Boundaries' if boundaries.nil? || !boundaries.class == Sqed::Boundaries + raise Error, 'metadata_map not provided or metadata_map not a Hash' if metadata_map.nil? || !metadata_map.class == Hash + raise Error, 'image not provided' if image.nil? || !image.class.name == 'Magick::Image' end def result - r = Sqed::Result.new() + r = Sqed::Result.new r.sections = metadata_map.values.sort - + # assign the images to the result boundaries.each do |section_index, coords| section_type = metadata_map[section_index] - - # TODO: raise this higher up the chain - raise Error, "invalid section_type [#{section_type}]" if !SqedConfig::LAYOUT_SECTION_TYPES.include?(section_type) r.send("#{section_type}_image=", extract_image(coords)) r.boundary_coordinates[section_type] = coords - end + end # assign the metadata to the result metadata_map.each do |section_index, section_type| # only extract data if a parser exists if parsers = SqedConfig::SECTION_PARSERS[section_type] - section_image = r.send("#{section_type}_image") - updated = r.send(section_type) parsers.each do |p| - parsed_result = p.new(section_image).text(section_type: section_type) - updated.merge!(p::TYPE => parsed_result) if parsed_result + parsed_result = p.new(section_image).get_text(section_type: section_type) + updated[p::TYPE] = parsed_result if parsed_result && parsed_result.length > 0 end - r.send("#{section_type}=", updated) + r.send("#{section_type}=", updated) end end r end # crop takes x, y, width, height def extract_image(coords) - i = @image.crop(*coords, true) + @image.crop(*coords, true) end + end end