utils.rb in sportdb-0.9.2

- old
+ new
@@ -1,31 +1,243 @@
 
-### some utils
+### some utils moved to worldbdb/utils for reuse
 
-class Time
+
+module SportDB::FixtureHelpers
+
+  def is_round?( line )
+    line =~ /Spieltag|Runde|Achtelfinale|Viertelfinale|Halbfinale|Finale/
+  end
   
-  def self.cet( str )   # central european time (cet) + central european summer time (cest)  
-    ActiveSupport::TimeZone['Vienna'].parse( str )
+  def is_group?( line )
+    # NB: check after is_round? (round may contain group reference!)
+    line =~ /Gruppe|Group/
   end
+  
+  def is_knockout_round?( line )
+    if line =~ /Achtelfinale|Viertelfinale|Halbfinale|Spiel um Platz 3|Finale|K\.O\.|Knockout/
+      puts "   setting knockout flag to true"
+      true
+    else
+      false
+    end
+  end
+  
+  def find_group_title_and_pos!( line )
+    ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C
+    ## nb:  (?:)  = is for non-capturing group(ing)
+    regex = /(?:Group|Gruppe)\s+((?:\d{1}|[A-Z]{1}))\b/
+    
+    match = regex.match( line )
+    
+    return [nil,nil] if match.nil?
 
-  def self.eet( str )  # eastern european time (eet)  + 2 hours
-    ActiveSupport::TimeZone['Bucharest'].parse( str )
+    pos = case match[1]      
+          when 'A' then 1
+          when 'B' then 2
+          when 'C' then 3
+          when 'D' then 4
+          when 'E' then 5
+          when 'F' then 6
+          when 'G' then 7
+          when 'H' then 8
+          when 'I' then 9
+          when 'J' then 10
+          else  match[1].to_i
+          end
+
+    title = match[0]
+
+    puts "   title: >#{title}<"
+    puts "   pos: >#{pos}<"
+      
+    line.sub!( regex, '[GROUP|TITLE+POS]' )
+
+    return [title,pos]
   end
   
-  def self.cst( str )  # central standard time (cst) - 6 hours 
-    ActiveSupport::TimeZone['Mexico City'].parse( str )
+  def find_round_pos!( line )
+    ## fix/todo:
+    ##  if no round found assume last_pos+1 ??? why? why not?
+
+    regex = /\b(\d+)\b/
+    
+    if line =~ regex
+      value = $1.to_i
+      puts "   pos: >#{value}<"
+      
+      line.sub!( regex, '[ROUND|POS]' )
+
+      return value
+    else
+      return nil
+    end    
   end
   
-end # class Time
+  def find_date!( line )
+    # extract date from line
+    # and return it
+    # NB: side effect - removes date from line string
+    
+    # e.g. 2012-09-14 20:30   => YYYY-MM-DD HH:MM
+    regex_db = /\b(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})\b/
 
+    # e.g. 14.09. 20:30  => DD.MM. HH:MM
+    regex_de = /\b(\d{2})\.(\d{2})\.\s+(\d{2}):(\d{2})\b/
 
-class File
-  def self.read_utf8( path )
-    open( path, 'r:bom|utf-8' ) do |file|
-      file.read
+    if line =~ regex_db
+      value = "#{$1}-#{$2}-#{$3} #{$4}:#{$5}"
+      puts "   date: >#{value}<"
+
+      ## todo: lets you configure year
+      ##  and time zone (e.g. cet, eet, utc, etc.)
+      
+      line.sub!( regex_db, '[DATE.DB]' )
+
+      return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
+    elsif line =~ regex_de
+      value = "2012-#{$2}-#{$1} #{$3}:#{$4}"
+      puts "   date: >#{value}<"
+
+      ## todo: lets you configure year
+      ##  and time zone (e.g. cet, eet, utc, etc.)
+      
+      line.sub!( regex_de, '[DATE.DE]' )
+
+      return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
+    else
+      return nil
     end
   end
-end # class File
 
 
+  def find_game_pos!( line )
+    # extract optional game pos from line
+    # and return it
+    # NB: side effect - removes pos from line string
 
+    # e.g.  (1)   - must start line 
+    regex = /^[ \t]*\((\d{1,3})\)[ \t]+/
+    if line =~ regex
+      puts "   pos: >#{$1}<"
+      
+      line.sub!( regex, '[POS] ' )
+      return $1.to_i
+    else
+      return nil
+    end
+
+  end
+
+  def find_scores!( line )
+    # extract score from line
+    # and return it
+    # NB: side effect - removes date from line string
+    
+    # e.g. 1:2 or 0:2 or 3:3
+    regex = /\b(\d):(\d)\b/
+    
+    # e.g. 1:2nV  => overtime
+    regex_ot = /\b(\d):(\d)[ \t]?[nN][vV]\b/
+    
+    # e.g. 5:4iE  => penalty
+    regex_p = /\b(\d):(\d)[ \t]?[iI][eE]\b/
+    
+    scores = []
+    
+    if line =~ regex
+      puts "   score: >#{$1}-#{$2}<"
+      
+      line.sub!( regex, '[SCORE]' )
+
+      scores << $1.to_i
+      scores << $2.to_i
+      
+      if line =~ regex_ot
+        puts "   score.ot: >#{$1}-#{$2}<"
+      
+        line.sub!( regex_ot, '[SCORE.OT]' )
+
+        scores << $1.to_i
+        scores << $2.to_i
+      
+        if line =~ regex_p
+          puts "   score.p: >#{$1}-#{$2}<"
+      
+          line.sub!( regex_p, '[SCORE.P]' )
+
+          scores << $1.to_i
+          scores << $2.to_i
+        end
+      end
+    end
+    scores
+  end # methdod find_scores!
+  
+
+  def find_team_worker!( line, index )
+    regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
+    
+    if line =~ regex
+      value = "#{$1}"
+      puts "   team#{index}: >#{value}<"
+      
+      line.sub!( regex, "[TEAM#{index}]" )
+
+      return $1
+    else
+      return nil
+    end
+  end
+  
+  def find_teams!( line )
+    counter = 1
+    teams = []
+    
+    team = find_team_worker!( line, counter )
+    while team.present?
+      teams << team
+      counter += 1
+      team = find_team_worker!( line, counter )
+    end
+    
+    teams
+  end
+
+  def find_team1!( line )
+    find_team_worker!( line, 1 )
+  end
+  
+  def find_team2!( line )
+    find_team_worker!( line, 2 )
+  end
+
+
+  def match_team_worker!( line, key, values )
+    values.each do |value|
+      ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
+      ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)
+
+      ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
+      regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 
+      if line =~ regex
+        puts "     match for team >#{key}< >#{value}<"
+        # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
+        line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
+        return true    # break out after first match (do NOT continue)
+      end
+    end
+    return false
+  end
+ 
+  ## todo/fix: pass in known_teams as a parameter? why? why not?
+  def match_teams!( line )
+    @known_teams.each do |rec|
+      key    = rec[0]
+      values = rec[1]
+      match_team_worker!( line, key, values )
+    end # each known_teams    
+  end # method translate_teams!
+  
+
+end # module SportDB::FixtureHelpers