lib/sportdb/utils.rb in sportdb-1.6.5 vs lib/sportdb/utils.rb in sportdb-1.6.6

- old
+ new

@@ -18,13 +18,13 @@ # NB: check after is_round? (round may contain group reference!) line =~ SportDb.lang.regex_group end def is_knockout_round?( line ) - + ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg) - + if line =~ SportDb.lang.regex_leg1 logger.debug " two leg knockout; skip knockout flag on first leg" false elsif line =~ SportDb.lang.regex_knockout_round logger.debug " setting knockout flag to true" @@ -71,60 +71,80 @@ line.sub!( regex, '[GROUP|TITLE+POS]' ) return [title,pos] end - - def find_round_pos!( line ) + + def cut_off_end_of_line_comment!( line ) + # cut off (that is, remove) optional end of line comment starting w/ # - ## todo: let title2 go first to cut off // - ## todo: cut of end of line comments w/ # ??? - - ## fix/todo: - ## if no round found assume last_pos+1 ??? why? why not? + line = line.sub( /#.*$/, '' ) + line + end - # extract optional round pos from line - # e.g. (1) - must start line - regex = /^[ \t]*\((\d{1,3})\)[ \t]+/ + + def find_round_title2!( line ) + # assume everything after // is title2 - strip off leading n trailing whitespaces + regex = /\/{2,}\s*(.+)\s*$/ if line =~ regex - logger.debug " pos: >#{$1}<" + logger.debug " title2: >#{$1}<" - line.sub!( regex, '[ROUND|POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+ - return $1.to_i + line.sub!( regex, '[ROUND|TITLE2]' ) + return $1 + else + return nil # no round title2 found (title2 is optional) end + end - # continue; try some other options - # NB: do not search string after free standing / or // - # cut-off optional trailing part w/ starting w/ / or // - # - # e.g. Viertelfinale // Di+Mi 10.+11. April 2012 becomes just - # Viertelfinale - - cutoff_regex = /^(.+?)[ \t]\/{1,3}[ \t]/ - - if line =~ cutoff_regex - line = $1.to_s # cut off the rest if regex matches - end + def find_round_title!( line ) + # assume everything left is the round title + # extract all other items first (round title2, round pos, group title n pos, etc.) - ## fix/todo: use cutoff_line for search - ## and use line.sub! to change original string - # e.g. Jornada 3 // 1,2 y 3 febrero - # only replaces match in local string w/ [ROUND|POS] + buf = line.dup + logger.debug " find_round_title! line-before: >>#{buf}<<" - regex = /\b(\d+)\b/ - - if line =~ regex - value = $1.to_i - logger.debug " pos: >#{value}<" - - line.sub!( regex, '[ROUND|POS]' ) + buf.gsub!( /\[.+?\]/, '' ) # e.g. remove [ROUND|POS], [ROUND|TITLE2], [GROUP|TITLE+POS] etc. + buf.sub!( /\s+[\/\-]{1,}\s+$/, '' ) # remove optional trailing / or / chars (left over from group) + buf.strip! # remove leading and trailing whitespace - return value + logger.debug " find_round_title! line-after: >>#{buf}<<" + + ### bingo - assume what's left is the round title + + logger.debug " title: >>#{buf}<<" + line.sub!( buf, '[ROUND|TITLE]' ) + + buf + end + + + def find_round_pos!( line ) + ## fix/todo: + ## if no round found assume last_pos+1 ??? why? why not? + + # extract optional round pos from line + # e.g. (1) - must start line + regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/ + + ## find free standing number + regex_num = /\b(\d{1,3})\b/ + + if line =~ regex_pos + logger.debug " pos: >#{$1}<" + + line.sub!( regex_pos, '[ROUND|POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+ + return $1.to_i + elsif line =~ regex_num + ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.) + ## NB: do NOT remove pos from string (will get removed by round title) + logger.debug " pos: >#{$1}<" + return $1.to_i else + ## fix: add logger.warn no round pos found in line return nil end - end + end # method find_round_pos! def find_date!( line ) # extract date from line # and return it # NB: side effect - removes date from line string