lib/MESH/entry.rb in mesh-medical-subject-headings-2.3.0 vs lib/MESH/entry.rb in mesh-medical-subject-headings-3.0.0

- old
+ new

@@ -1,13 +1,23 @@ module MESH class Entry - attr_accessor :heading, :term, :semantic_types, :semantic_relationship, :lexical_type + include Comparable + attr_accessor :heading, :term, :semantic_types, :semantic_relationship, :lexical_type, :regex, :case_sensitive, + :downcased, :locales, :loose_match_term - def initialize(heading, entry_text) + @@wordy_characters = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a + + def <=> other + self.term <=> other.term + end + + def initialize(heading, entry_text, locale) @heading = heading + @locales = Set.new + @locales << locale @semantic_types = [] parts = entry_text.split('|') if entry_text.include? '|' key = parts.pop parts.each_with_index do |part, i| @@ -27,9 +37,40 @@ end end else @term = entry_text end + if /^[A-Z0-9]+$/ =~ @term + @regex = /(^|\W)#{Regexp.quote(@term)}(\W|$)/ + @case_sensitive = true + else + @regex = /(^|\W)#{Regexp.quote(@term)}(\W|$)/i + @case_sensitive = false + end + + @downcased = @term.downcase + @loose_match_term = Entry.loose_match(@term) + + end + + def self.loose_match(term) + term.gsub(/\W+/, ' ').upcase + end + + def match_in_text(text, downcased) + matches = [] + return matches if text.nil? || text.empty? + + loose_match = @case_sensitive ? (text.include? @term) : (downcased.include? @downcased) + if loose_match + text.to_enum(:scan, @regex).map do |m,| + match = Regexp.last_match + matches << {heading: @heading, matched: self, index: match.offset(0)} + end + end + + matches + end end end