module Tolq::Parsers::XLSX class ParseError < StandardError; end class Parser # Helper method to convert a XLSX into a annotated hash. Note that format # is different from the Tolq api. It's just a hash. # # @param xlsx_io [IO] String or IO of xlsx # @param parse_opts [Hash] Options for the XLSX parser # @return [Hash] Hash representation of xlsx, i.e. { 'A1' => 'Hello World' def self.to_hash(xlsx_io, **parse_opts) extract_request_data_from_xlsx(xlsx_io, text_node: false) end # Creates a new xlsx parser # # @param quality [Symbol] the Tolq quality to order in # @param source_language_code [String] The two letter source language code # @param target_language_code [String] The two letter target language code # @return [Tolq::Parsers::XLSX::Parser def initialize(quality:, source_language_code:, target_language_code:) @quality = quality @source_language_code = source_language_code @target_language_code = target_language_code end # Parses a xlsx # # @param xlsx_io [IO] A string or IO object to parse # @param parse_opts [Hash] Options for the XLSX parser (i.e. col_sep, quote_char) # @param exclude [Array] List of ranges to exclude in excel notation, i.e. ["A1:A3"] # @return [Hash] A hash suitable to be converted to json for a Tolq api request def parse(xlsx_io, exclude: [], **parse_opts) exclude_ranges = to_numeric_exclude_ranges(exclude) request_data = self.class.extract_request_data_from_xlsx(xlsx_io, exclude_ranges: exclude_ranges) return nil if request_data.empty? { "request" => request_data, "source_language_code" => @source_language_code, "target_language_code" => @target_language_code, "quality" => @quality.to_s } end private def self.extract_request_data_from_xlsx(xlsx_io, text_node: true, exclude_ranges: []) workbook = RubyXL::Parser.parse_buffer(xlsx_io) # TODO Validate single sheet hash = {} row_idx = 0 workbook.worksheets.first.each do |row| cell_idx = 0 row && row.cells.each do |cell| unless cell && cell.value cell_idx += 1 next end if excluded?(cell_idx, row_idx, exclude_ranges) cell_idx += 1 next end cell_char = ColumnHelper.column_to_char(cell_idx) if text_node hash["#{cell_char}#{row_idx + 1}"] = { 'text' => cell.value } else hash["#{cell_char}#{row_idx + 1}"] = cell.value end cell_idx += 1 end row_idx += 1 end hash end def self.excluded?(colidx, rowidx, exclude_ranges) !!exclude_ranges.find do |range| from, to = range colidx >= from.last && colidx <= to.last && rowidx >= from.first && rowidx <= to.first end end def to_numeric_exclude_ranges(exclude_arr) exclude_arr.map do |exrange| exrange .split(":") .map { |k| ColumnHelper.from_char_notation(k) } end end end end