lib/MESH/entry.rb in mesh-medical-subject-headings-2.3.0 vs lib/MESH/entry.rb in mesh-medical-subject-headings-3.0.0
- old
+ new
@@ -1,13 +1,23 @@
module MESH
class Entry
- attr_accessor :heading, :term, :semantic_types, :semantic_relationship, :lexical_type
+ include Comparable
+ attr_accessor :heading, :term, :semantic_types, :semantic_relationship, :lexical_type, :regex, :case_sensitive,
+ :downcased, :locales, :loose_match_term
- def initialize(heading, entry_text)
+ @@wordy_characters = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a
+
+ def <=> other
+ self.term <=> other.term
+ end
+
+ def initialize(heading, entry_text, locale)
@heading = heading
+ @locales = Set.new
+ @locales << locale
@semantic_types = []
parts = entry_text.split('|')
if entry_text.include? '|'
key = parts.pop
parts.each_with_index do |part, i|
@@ -27,9 +37,40 @@
end
end
else
@term = entry_text
end
+ if /^[A-Z0-9]+$/ =~ @term
+ @regex = /(^|\W)#{Regexp.quote(@term)}(\W|$)/
+ @case_sensitive = true
+ else
+ @regex = /(^|\W)#{Regexp.quote(@term)}(\W|$)/i
+ @case_sensitive = false
+ end
+
+ @downcased = @term.downcase
+ @loose_match_term = Entry.loose_match(@term)
+
+ end
+
+ def self.loose_match(term)
+ term.gsub(/\W+/, ' ').upcase
+ end
+
+ def match_in_text(text, downcased)
+ matches = []
+ return matches if text.nil? || text.empty?
+
+ loose_match = @case_sensitive ? (text.include? @term) : (downcased.include? @downcased)
+ if loose_match
+ text.to_enum(:scan, @regex).map do |m,|
+ match = Regexp.last_match
+ matches << {heading: @heading, matched: self, index: match.offset(0)}
+ end
+ end
+
+ matches
+
end
end
end