utils.rb in sportdb-1.6.6

- old
+ new

@@ -18,13 +18,13 @@
     # NB: check after is_round? (round may contain group reference!)
     line =~ SportDb.lang.regex_group
   end
 
   def is_knockout_round?( line )
-    
+
     ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg)
-    
+
     if line =~ SportDb.lang.regex_leg1
       logger.debug "  two leg knockout; skip knockout flag on first leg"
       false
     elsif line =~ SportDb.lang.regex_knockout_round
       logger.debug "   setting knockout flag to true"
@@ -71,60 +71,80 @@
       
     line.sub!( regex, '[GROUP|TITLE+POS]' )
 
     return [title,pos]
   end
-  
-  def find_round_pos!( line )
+
+  def cut_off_end_of_line_comment!( line )
+    #  cut off (that is, remove) optional end of line comment starting w/ #
     
-    ## todo: let title2 go first to cut off //
-    ## todo: cut of end of line comments w/ # ???
-    
-    ## fix/todo:
-    ##  if no round found assume last_pos+1 ??? why? why not?
+    line = line.sub( /#.*$/, '' )
+    line
+  end
 
-    # extract optional round pos from line
-    # e.g.  (1)   - must start line 
-    regex = /^[ \t]*\((\d{1,3})\)[ \t]+/
+
+  def find_round_title2!( line )
+    # assume everything after // is title2 - strip off leading n trailing whitespaces
+    regex = /\/{2,}\s*(.+)\s*$/
     if line =~ regex
-      logger.debug "   pos: >#{$1}<"
+      logger.debug "   title2: >#{$1}<"
       
-      line.sub!( regex, '[ROUND|POS] ' )  ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
-      return $1.to_i
+      line.sub!( regex, '[ROUND|TITLE2]' )
+      return $1
+    else
+      return nil    # no round title2 found (title2 is optional)
     end
+  end
 
-    # continue; try some other options
 
-    # NB: do not search string after free standing / or //
-    #  cut-off optional trailing part w/ starting w/  / or //
-    #
-    # e.g.  Viertelfinale   //   Di+Mi 10.+11. April 2012  becomes just
-    #       Viertelfinale
-    
-    cutoff_regex = /^(.+?)[ \t]\/{1,3}[ \t]/
-    
-    if line =~ cutoff_regex
-      line = $1.to_s    # cut off the rest if regex matches
-    end
+  def find_round_title!( line )
+    # assume everything left is the round title
+    #  extract all other items first (round title2, round pos, group title n pos, etc.)
 
-    ## fix/todo: use cutoff_line for search
-    ## and use line.sub! to change original string
-    # e.g.  Jornada 3  // 1,2 y 3 febrero
-    #   only replaces match in local string w/ [ROUND|POS]
+    buf = line.dup
+    logger.debug "  find_round_title! line-before: >>#{buf}<<"
 
-    regex = /\b(\d+)\b/
-    
-    if line =~ regex
-      value = $1.to_i
-      logger.debug "   pos: >#{value}<"
-      
-      line.sub!( regex, '[ROUND|POS]' )
+    buf.gsub!( /\[.+?\]/, '' )   # e.g. remove [ROUND|POS], [ROUND|TITLE2], [GROUP|TITLE+POS] etc.
+    buf.sub!( /\s+[\/\-]{1,}\s+$/, '' )    # remove optional trailing / or / chars (left over from group)
+    buf.strip!    # remove leading and trailing whitespace
 
-      return value
+    logger.debug "  find_round_title! line-after: >>#{buf}<<"
+
+    ### bingo - assume what's left is the round title
+
+    logger.debug "   title: >>#{buf}<<"
+    line.sub!( buf, '[ROUND|TITLE]' )
+
+    buf
+  end
+
+
+  def find_round_pos!( line )
+    ## fix/todo:
+    ##  if no round found assume last_pos+1 ??? why? why not?
+
+    # extract optional round pos from line
+    # e.g.  (1)   - must start line 
+    regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
+
+    ## find free standing number
+    regex_num = /\b(\d{1,3})\b/
+
+    if line =~ regex_pos
+      logger.debug "   pos: >#{$1}<"
+      
+      line.sub!( regex_pos, '[ROUND|POS] ' )  ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
+      return $1.to_i
+    elsif line =~ regex_num
+      ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
+      ## NB: do NOT remove pos from string (will get removed by round title)
+      logger.debug "   pos: >#{$1}<"
+      return $1.to_i
     else
+      ## fix: add logger.warn no round pos found in line
       return nil
     end
-  end
+  end # method find_round_pos!
   
   def find_date!( line )
     # extract date from line
     # and return it
     # NB: side effect - removes date from line string