Sha256: eb96e54998ad1b8edce17bea80c512d8301663f40e2a5fad57efe39360be0c32
Contents?: true
Size: 1.57 KB
Versions: 1
Compression:
Stored size: 1.57 KB
Contents
# encoding: utf-8 module Okei # Converts source string to standardized line of text with: # # * cyrillic letters un upper register # * numbers # * ^ - a symbol of word break # * slashes # * spaces # * % # # Line.new "1000шт в условн.эквивал." # # => "1000 ШТ В УСЛОВН ЭКВИВАЛ" # class Line < String def initialize(str = "") super str.to_s.mb_chars.upcase to_cyrillic remove_dots_and_commas clear_text add_breaks end private # Converts latin letters to corresponding cyrillic def to_cyrillic { "A" => "А", "B" => "В", "C" => "С", "D" => "Д", "E" => "Е", "G" => "Д", "H" => "Н", "K" => "К", "M" => "М", "N" => "П", "O" => "О", "P" => "Р", "Q" => "Д", "R" => "Г", "T" => "Т", "U" => "И", "X" => "Х", "Y" => "У" }.each { |lat, cyr| gsub! lat, cyr } end # Remove dots and commas except for dots between digits def remove_dots_and_commas gsub!(/\.+/, ",") gsub!(/(\d),(\d)/, '\1.\2') gsub!(/,/, " ") end # Removes unwanted symbols. def clear_text gsub!(/\%-[^\s]+/, "%") gsub!(/[^А-Я\d\.\s\/\%]/, " ") gsub!(/\A[^А-Я\d]+|[^А-Я\d]+\z/, " ") gsub!(/\s{2,}/, " ") strip! end # Add ^ to mark word breaks def add_breaks gsub!( /\A(НАНО|МИКРО|МИЛЛИ|САНТИ|ПОЛ|ДЕКА|ДЕЦИ|ГЕКТО|КИЛО|МЕГА|ТЕРА)/, '\1 ^ ' ) gsub!(/([А-Я])[\s\.^]*(2|3)(?!\d)/, '\1 ^ \2') end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
okei-0.0.2 | app/models/okei/line.rb |