app/controllers/search_methods/sfx4.rb in umlaut-3.1.0 vs app/controllers/search_methods/sfx4.rb in umlaut-3.1.1
- old
+ new
@@ -38,22 +38,26 @@
def sfx4_db_connection
az_title_klass.connection
end
# Needs to return ContextObjects
- def find_by_title
+ def find_by_title
connection = sfx4_db_connection
query_match_clause = case search_type_param
when "contains"
terms = title_query_param.split(" ")
#SFX4 seems to ignore 'the' or 'a' on the front, so we will too.
if (["the", "a"].include? terms[0])
terms = terms.slice(1..-1)
end
# Then make each term required, but stemmed. Seems to match SFX4,
# and more importantly give us decent results.
+ #
+ # For reasons we can't entirely tell, the wildcard "*" on terms of less
+ # than 2 causes false negatives. Otherwise we use it to be consistent
+ # with SFX. This reverse-engineering is full of pitfalls.
query = terms.collect do |term|
- "+" + connection.quote_string(term) + "*"
+ "+" + connection.quote_string(term) + (term.length > 2 ? "*" : "")
end.join(" ")
"MATCH (TS.TITLE_SEARCH) AGAINST ('#{query}' IN BOOLEAN MODE)"
when "begins"
# For 'begins', searching against TITLE itself rather than TITLE_SEARCH gives us
# results more like SFX4 native, without so many 'also known as' titles confusing