lib/sportdb/utils.rb in sportdb-0.9.1 vs lib/sportdb/utils.rb in sportdb-0.9.2

- old
+ new

@@ -1,31 +1,243 @@ -### some utils +### some utils moved to worldbdb/utils for reuse -class Time + +module SportDB::FixtureHelpers + + def is_round?( line ) + line =~ /Spieltag|Runde|Achtelfinale|Viertelfinale|Halbfinale|Finale/ + end - def self.cet( str ) # central european time (cet) + central european summer time (cest) - ActiveSupport::TimeZone['Vienna'].parse( str ) + def is_group?( line ) + # NB: check after is_round? (round may contain group reference!) + line =~ /Gruppe|Group/ end + + def is_knockout_round?( line ) + if line =~ /Achtelfinale|Viertelfinale|Halbfinale|Spiel um Platz 3|Finale|K\.O\.|Knockout/ + puts " setting knockout flag to true" + true + else + false + end + end + + def find_group_title_and_pos!( line ) + ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C + ## nb: (?:) = is for non-capturing group(ing) + regex = /(?:Group|Gruppe)\s+((?:\d{1}|[A-Z]{1}))\b/ + + match = regex.match( line ) + + return [nil,nil] if match.nil? - def self.eet( str ) # eastern european time (eet) + 2 hours - ActiveSupport::TimeZone['Bucharest'].parse( str ) + pos = case match[1] + when 'A' then 1 + when 'B' then 2 + when 'C' then 3 + when 'D' then 4 + when 'E' then 5 + when 'F' then 6 + when 'G' then 7 + when 'H' then 8 + when 'I' then 9 + when 'J' then 10 + else match[1].to_i + end + + title = match[0] + + puts " title: >#{title}<" + puts " pos: >#{pos}<" + + line.sub!( regex, '[GROUP|TITLE+POS]' ) + + return [title,pos] end - def self.cst( str ) # central standard time (cst) - 6 hours - ActiveSupport::TimeZone['Mexico City'].parse( str ) + def find_round_pos!( line ) + ## fix/todo: + ## if no round found assume last_pos+1 ??? why? why not? + + regex = /\b(\d+)\b/ + + if line =~ regex + value = $1.to_i + puts " pos: >#{value}<" + + line.sub!( regex, '[ROUND|POS]' ) + + return value + else + return nil + end end -end # class Time + def find_date!( line ) + # extract date from line + # and return it + # NB: side effect - removes date from line string + + # e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM + regex_db = /\b(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})\b/ + # e.g. 14.09. 20:30 => DD.MM. HH:MM + regex_de = /\b(\d{2})\.(\d{2})\.\s+(\d{2}):(\d{2})\b/ -class File - def self.read_utf8( path ) - open( path, 'r:bom|utf-8' ) do |file| - file.read + if line =~ regex_db + value = "#{$1}-#{$2}-#{$3} #{$4}:#{$5}" + puts " date: >#{value}<" + + ## todo: lets you configure year + ## and time zone (e.g. cet, eet, utc, etc.) + + line.sub!( regex_db, '[DATE.DB]' ) + + return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + elsif line =~ regex_de + value = "2012-#{$2}-#{$1} #{$3}:#{$4}" + puts " date: >#{value}<" + + ## todo: lets you configure year + ## and time zone (e.g. cet, eet, utc, etc.) + + line.sub!( regex_de, '[DATE.DE]' ) + + return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + else + return nil end end -end # class File + def find_game_pos!( line ) + # extract optional game pos from line + # and return it + # NB: side effect - removes pos from line string + # e.g. (1) - must start line + regex = /^[ \t]*\((\d{1,3})\)[ \t]+/ + if line =~ regex + puts " pos: >#{$1}<" + + line.sub!( regex, '[POS] ' ) + return $1.to_i + else + return nil + end + + end + + def find_scores!( line ) + # extract score from line + # and return it + # NB: side effect - removes date from line string + + # e.g. 1:2 or 0:2 or 3:3 + regex = /\b(\d):(\d)\b/ + + # e.g. 1:2nV => overtime + regex_ot = /\b(\d):(\d)[ \t]?[nN][vV]\b/ + + # e.g. 5:4iE => penalty + regex_p = /\b(\d):(\d)[ \t]?[iI][eE]\b/ + + scores = [] + + if line =~ regex + puts " score: >#{$1}-#{$2}<" + + line.sub!( regex, '[SCORE]' ) + + scores << $1.to_i + scores << $2.to_i + + if line =~ regex_ot + puts " score.ot: >#{$1}-#{$2}<" + + line.sub!( regex_ot, '[SCORE.OT]' ) + + scores << $1.to_i + scores << $2.to_i + + if line =~ regex_p + puts " score.p: >#{$1}-#{$2}<" + + line.sub!( regex_p, '[SCORE.P]' ) + + scores << $1.to_i + scores << $2.to_i + end + end + end + scores + end # methdod find_scores! + + + def find_team_worker!( line, index ) + regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) + + if line =~ regex + value = "#{$1}" + puts " team#{index}: >#{value}<" + + line.sub!( regex, "[TEAM#{index}]" ) + + return $1 + else + return nil + end + end + + def find_teams!( line ) + counter = 1 + teams = [] + + team = find_team_worker!( line, counter ) + while team.present? + teams << team + counter += 1 + team = find_team_worker!( line, counter ) + end + + teams + end + + def find_team1!( line ) + find_team_worker!( line, 1 ) + end + + def find_team2!( line ) + find_team_worker!( line, 2 ) + end + + + def match_team_worker!( line, key, values ) + values.each do |value| + ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) + ## (thus add it, allows match for Benfica Lis. for example - note . at the end) + + ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ + regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) + if line =~ regex + puts " match for team >#{key}< >#{value}<" + # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. + line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end + return true # break out after first match (do NOT continue) + end + end + return false + end + + ## todo/fix: pass in known_teams as a parameter? why? why not? + def match_teams!( line ) + @known_teams.each do |rec| + key = rec[0] + values = rec[1] + match_team_worker!( line, key, values ) + end # each known_teams + end # method translate_teams! + + +end # module SportDB::FixtureHelpers