lib/sportdb/utils.rb in sportdb-1.6.5 vs lib/sportdb/utils.rb in sportdb-1.6.6
- old
+ new
@@ -18,13 +18,13 @@
# NB: check after is_round? (round may contain group reference!)
line =~ SportDb.lang.regex_group
end
def is_knockout_round?( line )
-
+
## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg)
-
+
if line =~ SportDb.lang.regex_leg1
logger.debug " two leg knockout; skip knockout flag on first leg"
false
elsif line =~ SportDb.lang.regex_knockout_round
logger.debug " setting knockout flag to true"
@@ -71,60 +71,80 @@
line.sub!( regex, '[GROUP|TITLE+POS]' )
return [title,pos]
end
-
- def find_round_pos!( line )
+
+ def cut_off_end_of_line_comment!( line )
+ # cut off (that is, remove) optional end of line comment starting w/ #
- ## todo: let title2 go first to cut off //
- ## todo: cut of end of line comments w/ # ???
-
- ## fix/todo:
- ## if no round found assume last_pos+1 ??? why? why not?
+ line = line.sub( /#.*$/, '' )
+ line
+ end
- # extract optional round pos from line
- # e.g. (1) - must start line
- regex = /^[ \t]*\((\d{1,3})\)[ \t]+/
+
+ def find_round_title2!( line )
+ # assume everything after // is title2 - strip off leading n trailing whitespaces
+ regex = /\/{2,}\s*(.+)\s*$/
if line =~ regex
- logger.debug " pos: >#{$1}<"
+ logger.debug " title2: >#{$1}<"
- line.sub!( regex, '[ROUND|POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
- return $1.to_i
+ line.sub!( regex, '[ROUND|TITLE2]' )
+ return $1
+ else
+ return nil # no round title2 found (title2 is optional)
end
+ end
- # continue; try some other options
- # NB: do not search string after free standing / or //
- # cut-off optional trailing part w/ starting w/ / or //
- #
- # e.g. Viertelfinale // Di+Mi 10.+11. April 2012 becomes just
- # Viertelfinale
-
- cutoff_regex = /^(.+?)[ \t]\/{1,3}[ \t]/
-
- if line =~ cutoff_regex
- line = $1.to_s # cut off the rest if regex matches
- end
+ def find_round_title!( line )
+ # assume everything left is the round title
+ # extract all other items first (round title2, round pos, group title n pos, etc.)
- ## fix/todo: use cutoff_line for search
- ## and use line.sub! to change original string
- # e.g. Jornada 3 // 1,2 y 3 febrero
- # only replaces match in local string w/ [ROUND|POS]
+ buf = line.dup
+ logger.debug " find_round_title! line-before: >>#{buf}<<"
- regex = /\b(\d+)\b/
-
- if line =~ regex
- value = $1.to_i
- logger.debug " pos: >#{value}<"
-
- line.sub!( regex, '[ROUND|POS]' )
+ buf.gsub!( /\[.+?\]/, '' ) # e.g. remove [ROUND|POS], [ROUND|TITLE2], [GROUP|TITLE+POS] etc.
+ buf.sub!( /\s+[\/\-]{1,}\s+$/, '' ) # remove optional trailing / or / chars (left over from group)
+ buf.strip! # remove leading and trailing whitespace
- return value
+ logger.debug " find_round_title! line-after: >>#{buf}<<"
+
+ ### bingo - assume what's left is the round title
+
+ logger.debug " title: >>#{buf}<<"
+ line.sub!( buf, '[ROUND|TITLE]' )
+
+ buf
+ end
+
+
+ def find_round_pos!( line )
+ ## fix/todo:
+ ## if no round found assume last_pos+1 ??? why? why not?
+
+ # extract optional round pos from line
+ # e.g. (1) - must start line
+ regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
+
+ ## find free standing number
+ regex_num = /\b(\d{1,3})\b/
+
+ if line =~ regex_pos
+ logger.debug " pos: >#{$1}<"
+
+ line.sub!( regex_pos, '[ROUND|POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
+ return $1.to_i
+ elsif line =~ regex_num
+ ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
+ ## NB: do NOT remove pos from string (will get removed by round title)
+ logger.debug " pos: >#{$1}<"
+ return $1.to_i
else
+ ## fix: add logger.warn no round pos found in line
return nil
end
- end
+ end # method find_round_pos!
def find_date!( line )
# extract date from line
# and return it
# NB: side effect - removes date from line string