lib/sportdb/utils.rb in sportdb-0.9.7 vs lib/sportdb/utils.rb in sportdb-1.0.0

- old
+ new

@@ -1,49 +1,63 @@ +# encoding: utf-8 ### some utils moved to worldbdb/utils for reuse module SportDB::FixtureHelpers def is_round?( line ) - line =~ /Spieltag|Runde|Achtelfinale|Viertelfinale|Halbfinale|Finale/ + line =~ SportDB.lang.regex_round end - + def is_group?( line ) # NB: check after is_round? (round may contain group reference!) - line =~ /Gruppe|Group/ + line =~ SportDB.lang.regex_group end - + def is_knockout_round?( line ) - if line =~ /Achtelfinale|Viertelfinale|Halbfinale|Spiel um Platz 3|Finale|K\.O\.|Knockout/ + + ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg) + + if line =~ SportDB.lang.regex_leg1 + puts " two leg knockout; skip knockout flag on first leg" + false + elsif line =~ SportDB.lang.regex_knockout_round puts " setting knockout flag to true" true + elsif line =~ /K\.O\.|Knockout/ + ## NB: add two language independent markers, that is, K.O. and Knockout + puts " setting knockout flag to true (lang independent marker)" + true else false end end def find_group_title_and_pos!( line ) - ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C + ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX ## nb: (?:) = is for non-capturing group(ing) - regex = /(?:Group|Gruppe)\s+((?:\d{1}|[A-Z]{1}))\b/ + regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/ match = regex.match( line ) return [nil,nil] if match.nil? - pos = case match[1] + pos = case match[1] when 'A' then 1 when 'B' then 2 when 'C' then 3 when 'D' then 4 when 'E' then 5 when 'F' then 6 when 'G' then 7 when 'H' then 8 when 'I' then 9 when 'J' then 10 + when 'K' then 11 + when 'L' then 12 + when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ?? else match[1].to_i end title = match[0] @@ -54,13 +68,46 @@ return [title,pos] end def find_round_pos!( line ) + + ## todo: let title2 go first to cut off // + ## todo: cut of end of line comments w/ # ??? + ## fix/todo: ## if no round found assume last_pos+1 ??? why? why not? + # extract optional round pos from line + # e.g. (1) - must start line + regex = /^[ \t]*\((\d{1,3})\)[ \t]+/ + if line =~ regex + puts " pos: >#{$1}<" + + line.sub!( regex, '[ROUND|POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+ + return $1.to_i + end + + # continue; try some other options + + # NB: do not search string after free standing / or // + # cut-off optional trailing part w/ starting w/ / or // + # + # e.g. Viertelfinale // Di+Mi 10.+11. April 2012 becomes just + # Viertelfinale + + cutoff_regex = /^(.+?)[ \t]\/{1,3}[ \t]/ + + if line =~ cutoff_regex + line = $1.to_s # cut off the rest if regex matches + end + + ## fix/todo: use cutoff_line for search + ## and use line.sub! to change original string + # e.g. Jornada 3 // 1,2 y 3 febrero + # only replaces match in local string w/ [ROUND|POS] + regex = /\b(\d+)\b/ if line =~ regex value = $1.to_i puts " pos: >#{value}<" @@ -68,23 +115,29 @@ line.sub!( regex, '[ROUND|POS]' ) return value else return nil - end + end end def find_date!( line ) # extract date from line # and return it # NB: side effect - removes date from line string # e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM regex_db = /\b(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})\b/ + + # e.g. 2012-09-14 w/ implied hours (set to 12:00) + regex_db2 = /\b(\d{4})-(\d{2})-(\d{2})\b/ # e.g. 14.09. 20:30 => DD.MM. HH:MM regex_de = /\b(\d{2})\.(\d{2})\.\s+(\d{2}):(\d{2})\b/ + + # e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM + regex_de2 = /\b(\d{2})\.(\d{2})\.(\d{4})\s+(\d{2}):(\d{2})\b/ if line =~ regex_db value = "#{$1}-#{$2}-#{$3} #{$4}:#{$5}" puts " date: >#{value}<" @@ -92,10 +145,27 @@ ## and time zone (e.g. cet, eet, utc, etc.) line.sub!( regex_db, '[DATE.DB]' ) return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + elsif line =~ regex_db2 + value = "#{$1}-#{$2}-#{$3} 12:00" + puts " date: >#{value}<" + + line.sub!( regex_db2, '[DATE.DB2]' ) + + return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + elsif line =~ regex_de2 + value = "#{$3}-#{$2}-#{$1} #{$4}:#{$5}" + puts " date: >#{value}<" + + ## todo: lets you configure year + ## and time zone (e.g. cet, eet, utc, etc.) + + line.sub!( regex_de2, '[DATE.DE2]' ) + + return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) elsif line =~ regex_de value = "2012-#{$2}-#{$1} #{$3}:#{$4}" puts " date: >#{value}<" ## todo: lets you configure year @@ -127,21 +197,28 @@ end end def find_scores!( line ) + + ### fix: depending on language allow 1:1 or 1-1 + ## do NOT allow mix and match + ## e.g. default to en is 1-1 + ## de is 1:1 etc. + + # extract score from line # and return it # NB: side effect - removes date from line string - # e.g. 1:2 or 0:2 or 3:3 - regex = /\b(\d):(\d)\b/ + # e.g. 1:2 or 0:2 or 3:3 // 1-1 or 0-2 or 3-3 + regex = /\b(\d)[:\-](\d)\b/ # e.g. 1:2nV => overtime - regex_ot = /\b(\d):(\d)[ \t]?[nN][vV]\b/ + regex_ot = /\b(\d)[:\-](\d)[ \t]?[nN][vV]\b/ # e.g. 5:4iE => penalty - regex_p = /\b(\d):(\d)[ \t]?[iI][eE]\b/ + regex_p = /\b(\d)[:\-](\d)[ \t]?[iI][eE]\b/ scores = [] if line =~ regex puts " score: >#{$1}-#{$2}<"