lib/sportdb/utils.rb in sportdb-0.9.1 vs lib/sportdb/utils.rb in sportdb-0.9.2
- old
+ new
@@ -1,31 +1,243 @@
-### some utils
+### some utils moved to worldbdb/utils for reuse
-class Time
+
+module SportDB::FixtureHelpers
+
+ def is_round?( line )
+ line =~ /Spieltag|Runde|Achtelfinale|Viertelfinale|Halbfinale|Finale/
+ end
- def self.cet( str ) # central european time (cet) + central european summer time (cest)
- ActiveSupport::TimeZone['Vienna'].parse( str )
+ def is_group?( line )
+ # NB: check after is_round? (round may contain group reference!)
+ line =~ /Gruppe|Group/
end
+
+ def is_knockout_round?( line )
+ if line =~ /Achtelfinale|Viertelfinale|Halbfinale|Spiel um Platz 3|Finale|K\.O\.|Knockout/
+ puts " setting knockout flag to true"
+ true
+ else
+ false
+ end
+ end
+
+ def find_group_title_and_pos!( line )
+ ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C
+ ## nb: (?:) = is for non-capturing group(ing)
+ regex = /(?:Group|Gruppe)\s+((?:\d{1}|[A-Z]{1}))\b/
+
+ match = regex.match( line )
+
+ return [nil,nil] if match.nil?
- def self.eet( str ) # eastern european time (eet) + 2 hours
- ActiveSupport::TimeZone['Bucharest'].parse( str )
+ pos = case match[1]
+ when 'A' then 1
+ when 'B' then 2
+ when 'C' then 3
+ when 'D' then 4
+ when 'E' then 5
+ when 'F' then 6
+ when 'G' then 7
+ when 'H' then 8
+ when 'I' then 9
+ when 'J' then 10
+ else match[1].to_i
+ end
+
+ title = match[0]
+
+ puts " title: >#{title}<"
+ puts " pos: >#{pos}<"
+
+ line.sub!( regex, '[GROUP|TITLE+POS]' )
+
+ return [title,pos]
end
- def self.cst( str ) # central standard time (cst) - 6 hours
- ActiveSupport::TimeZone['Mexico City'].parse( str )
+ def find_round_pos!( line )
+ ## fix/todo:
+ ## if no round found assume last_pos+1 ??? why? why not?
+
+ regex = /\b(\d+)\b/
+
+ if line =~ regex
+ value = $1.to_i
+ puts " pos: >#{value}<"
+
+ line.sub!( regex, '[ROUND|POS]' )
+
+ return value
+ else
+ return nil
+ end
end
-end # class Time
+ def find_date!( line )
+ # extract date from line
+ # and return it
+ # NB: side effect - removes date from line string
+
+ # e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM
+ regex_db = /\b(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})\b/
+ # e.g. 14.09. 20:30 => DD.MM. HH:MM
+ regex_de = /\b(\d{2})\.(\d{2})\.\s+(\d{2}):(\d{2})\b/
-class File
- def self.read_utf8( path )
- open( path, 'r:bom|utf-8' ) do |file|
- file.read
+ if line =~ regex_db
+ value = "#{$1}-#{$2}-#{$3} #{$4}:#{$5}"
+ puts " date: >#{value}<"
+
+ ## todo: lets you configure year
+ ## and time zone (e.g. cet, eet, utc, etc.)
+
+ line.sub!( regex_db, '[DATE.DB]' )
+
+ return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
+ elsif line =~ regex_de
+ value = "2012-#{$2}-#{$1} #{$3}:#{$4}"
+ puts " date: >#{value}<"
+
+ ## todo: lets you configure year
+ ## and time zone (e.g. cet, eet, utc, etc.)
+
+ line.sub!( regex_de, '[DATE.DE]' )
+
+ return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
+ else
+ return nil
end
end
-end # class File
+ def find_game_pos!( line )
+ # extract optional game pos from line
+ # and return it
+ # NB: side effect - removes pos from line string
+ # e.g. (1) - must start line
+ regex = /^[ \t]*\((\d{1,3})\)[ \t]+/
+ if line =~ regex
+ puts " pos: >#{$1}<"
+
+ line.sub!( regex, '[POS] ' )
+ return $1.to_i
+ else
+ return nil
+ end
+
+ end
+
+ def find_scores!( line )
+ # extract score from line
+ # and return it
+ # NB: side effect - removes date from line string
+
+ # e.g. 1:2 or 0:2 or 3:3
+ regex = /\b(\d):(\d)\b/
+
+ # e.g. 1:2nV => overtime
+ regex_ot = /\b(\d):(\d)[ \t]?[nN][vV]\b/
+
+ # e.g. 5:4iE => penalty
+ regex_p = /\b(\d):(\d)[ \t]?[iI][eE]\b/
+
+ scores = []
+
+ if line =~ regex
+ puts " score: >#{$1}-#{$2}<"
+
+ line.sub!( regex, '[SCORE]' )
+
+ scores << $1.to_i
+ scores << $2.to_i
+
+ if line =~ regex_ot
+ puts " score.ot: >#{$1}-#{$2}<"
+
+ line.sub!( regex_ot, '[SCORE.OT]' )
+
+ scores << $1.to_i
+ scores << $2.to_i
+
+ if line =~ regex_p
+ puts " score.p: >#{$1}-#{$2}<"
+
+ line.sub!( regex_p, '[SCORE.P]' )
+
+ scores << $1.to_i
+ scores << $2.to_i
+ end
+ end
+ end
+ scores
+ end # methdod find_scores!
+
+
+ def find_team_worker!( line, index )
+ regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
+
+ if line =~ regex
+ value = "#{$1}"
+ puts " team#{index}: >#{value}<"
+
+ line.sub!( regex, "[TEAM#{index}]" )
+
+ return $1
+ else
+ return nil
+ end
+ end
+
+ def find_teams!( line )
+ counter = 1
+ teams = []
+
+ team = find_team_worker!( line, counter )
+ while team.present?
+ teams << team
+ counter += 1
+ team = find_team_worker!( line, counter )
+ end
+
+ teams
+ end
+
+ def find_team1!( line )
+ find_team_worker!( line, 1 )
+ end
+
+ def find_team2!( line )
+ find_team_worker!( line, 2 )
+ end
+
+
+ def match_team_worker!( line, key, values )
+ values.each do |value|
+ ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
+ ## (thus add it, allows match for Benfica Lis. for example - note . at the end)
+
+ ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
+ regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
+ if line =~ regex
+ puts " match for team >#{key}< >#{value}<"
+ # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
+ line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
+ return true # break out after first match (do NOT continue)
+ end
+ end
+ return false
+ end
+
+ ## todo/fix: pass in known_teams as a parameter? why? why not?
+ def match_teams!( line )
+ @known_teams.each do |rec|
+ key = rec[0]
+ values = rec[1]
+ match_team_worker!( line, key, values )
+ end # each known_teams
+ end # method translate_teams!
+
+
+end # module SportDB::FixtureHelpers