lib/sportdb/utils.rb in sportdb-1.6.12 vs lib/sportdb/utils.rb in sportdb-1.6.13

- old
+ new

@@ -1,52 +1,10 @@ # encoding: utf-8 -### some utils moved to worldbdb/utils for reuse +### note: some utils moved to worldbdb/utils for reuse -### fix: move to textutils?? - - -def build_match_table_for( recs ) - ## build known tracks table w/ synonyms e.g. - # - # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]], - # [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]], - # [ 'stuttgart', [ 'VfB Stuttgart' ]] ] - - known_titles = [] - - recs.each_with_index do |rec,index| - - titles = [] - titles << rec.title - titles += rec.synonyms.split('|') if rec.synonyms.present? - - ## NB: sort here by length (largest goes first - best match) - # exclude code and key (key should always go last) - titles = titles.sort { |left,right| right.length <=> left.length } - - ## escape for regex plus allow subs for special chars/accents - titles = titles.map { |title| TextUtils.title_esc_regex( title ) } - - ## NB: only include code field - if defined - titles << rec.code if rec.respond_to?(:code) && rec.code.present? - - known_titles << [ rec.key, titles ] - - ### fix: - ## plain logger - - LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<" - end - - known_titles -end - - - - module SportDb::FixtureHelpers def is_postponed?( line ) # check if line include postponed marker e.g. => line =~ /=>/ @@ -114,15 +72,20 @@ line.sub!( regex, '[GROUP|TITLE+POS]' ) return [title,pos] end + def cut_off_end_of_line_comment!( line ) # cut off (that is, remove) optional end of line comment starting w/ # - line = line.sub( /#.*$/, '' ) - line + line.sub!( /#.*$/ ) do |_| + logger.debug " cutting off end of line comment - >>#{$&}<<" + '' + end + + # NB: line = line.sub will NOT work - thus, lets use line.sub! end def find_round_title2!( line ) # assume everything after // is title2 - strip off leading n trailing whitespaces @@ -202,36 +165,42 @@ # e.g. 14.09. 20:30 => DD.MM. HH:MM # nb: allow 2.3.2012 e.g. no leading zero required # nb: allow hour as 20.30 or 3.30 instead of 03.30 regex_de = /\b(\d{1,2})\.(\d{1,2})\.\s+(\d{1,2})[:.](\d{2})\b/ - + # e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM # nb: allow 2.3.2012 e.g. no leading zero required # nb: allow hour as 20.30 regex_de2 = /\b(\d{1,2})\.(\d{1,2})\.(\d{4})\s+(\d{1,2})[:.](\d{2})\b/ + month_abbrev_en = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec" + + # e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM + regex_en = /\b(\d{1,2})\s(#{month_abbrev_en})\s(\d{4})\s+(\d{1,2}):(\d{2})\b/ + + if line =~ regex_db - value = "#{$1}-#{$2}-#{$3} #{$4}:#{$5}" + value = '%d-%02d-%02d %02d:%02d' % [$1, $2, $3, $4, $5] logger.debug " date: >#{value}<" ## todo: lets you configure year ## and time zone (e.g. cet, eet, utc, etc.) line.sub!( regex_db, '[DATE.DB]' ) return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) elsif line =~ regex_db2 - value = "#{$1}-#{$2}-#{$3} 12:00" + value = '%d-%02d-%02d 12:00' % [$1, $2, $3] logger.debug " date: >#{value}<" - + line.sub!( regex_db2, '[DATE.DB2]' ) return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) elsif line =~ regex_de2 - value = "#{$3}-#{$2}-#{$1} #{$4}:#{$5}" + value = '%d-%02d-%02d %02d:%02d' % [$3, $2, $1, $4, $5] logger.debug " date: >#{value}<" ## todo: lets you configure year ## and time zone (e.g. cet, eet, utc, etc.) @@ -241,42 +210,136 @@ elsif line =~ regex_de #### fix/todo: # get year from event start date!!!! # do NOT hard code!!!! - - value = "2012-#{$2}-#{$1} #{$3}:#{$4}" + + value = '2012-%02d-%02d %02d:%02d' % [$2, $1, $3, $4] logger.debug " date: >#{value}<" ## todo: lets you configure year ## and time zone (e.g. cet, eet, utc, etc.) line.sub!( regex_de, '[DATE.DE]' ) return DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + elsif line =~ regex_en + value = '%d-%s-%02d %02d:%02d' % [$3, $2, $1, $4, $5] + logger.debug " date: >#{value}<" + + line.sub!( regex_en, '[DATE.EN]' ) + + return DateTime.strptime( value, '%Y-%b-%d %H:%M' ) ## %b - abbreviated month name (e.g. Jan,Feb, etc.) else return nil end end - def find_game_pos!( line ) + def find_record_comment!( line ) + # assume everything left after the last record marker,that is, ] is a record comment + + regex = /]([^\]]+?)$/ # NB: use non-greedy +? + + if line =~ regex + value = $1.strip + return nil if value.blank? # skip whitespaces only + + logger.debug " comment: >#{value}<" + + line.sub!( value, '[REC.COMMENT] ' ) + return value + else + return nil + end + end + + + def find_record_timeline!( line ) + + # +1 lap or +n laps + regex_laps = /\s+\+\d{1,2}\s(lap|laps)\b/ + + # 2:17:15.123 + regex_time = /\b\d{1,2}:\d{2}:\d{2}\.\d{1,3}\b/ + + # +40.1 secs + regex_secs = /\s+\+\d{1,3}\.\d{1,3}\s(secs)\b/ # NB: before \+ - boundry (\b) will not work + + # NB: $& contains the complete matched text + + if line =~ regex_laps + value = $&.strip + logger.debug " timeline.laps: >#{value}<" + + line.sub!( value, '[REC.TIMELINE.LAPS] ' ) # NB: add trailing space + return value + elsif line =~ regex_time + value = $&.strip + logger.debug " timeline.time: >#{value}<" + + line.sub!( value, '[REC.TIMELINE.TIME] ' ) # NB: add trailing space + return value + elsif line =~ regex_secs + value = $&.strip + logger.debug " timeline.secs: >#{value}<" + + line.sub!( value, '[REC.TIMELINE.SECS] ' ) # NB: add trailing space + return value + else + return nil + end + end + + def find_record_laps!( line ) + # e.g. first free-standing number w/ one or two digits e.g. 7 or 28 etc. + regex = /\b(\d{1,2})\b/ + if line =~ regex + logger.debug " laps: >#{$1}<" + + line.sub!( regex, '[REC.LAPS] ' ) # NB: add trailing space + return $1.to_i + else + return nil + end + end + + def find_record_leading_state!( line ) + # e.g. 1|2|3|etc or Ret - must start line + regex = /^[ \t]*(\d{1,3}|Ret)[ \t]+/ + if line =~ regex + value = $1.dup + logger.debug " state: >#{value}<" + + line.sub!( regex, '[REC.STATE] ' ) # NB: add trailing space + return value + else + return nil + end + end + + + def find_leading_pos!( line ) # extract optional game pos from line # and return it # NB: side effect - removes pos from line string # e.g. (1) - must start line regex = /^[ \t]*\((\d{1,3})\)[ \t]+/ if line =~ regex logger.debug " pos: >#{$1}<" - - line.sub!( regex, '[POS] ' ) + + line.sub!( regex, '[POS] ' ) # NB: add trailing space return $1.to_i else return nil end + end + def find_game_pos!( line ) + ## fix: add depreciation warning - remove - use find_leading_pos! + find_leading_pos!( line ) end def find_scores!( line ) ### fix: depending on language allow 1:1 or 1-1 @@ -328,102 +391,65 @@ end scores end # methdod find_scores! - ## todo/fix: - # find a better name find_xxx_by_title ?? find_xxx_w_match_table? or similiar - # move to its own file/module for easier maintance - # include build_match_table_for - # - lets us change internals e.g. lets improve matcher using a reverse index, for example - def find_xxx_worker!( name, line ) - regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) - - upcase_name = name.upcase - downcase_name = name.downcase - - if line =~ regex - value = "#{$1}" - logger.debug " #{downcase_name}: >#{value}<" - - line.sub!( regex, "[#{upcase_name}]" ) - - return $1 - else - return nil - end + def find_teams!( line ) # NB: returns an array - note: plural! (teamsss) + TextUtils.find_keys_for!( 'team', line ) end - - - def match_xxx_worker!( name, line, key, values ) - - downcase_name = name.downcase - - values.each do |value| - ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) - ## (thus add it, allows match for Benfica Lis. for example - note . at the end) - - ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ - regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) - if line =~ regex - logger.debug " match for #{downcase_name} >#{key}< >#{value}<" - # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. - line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end - return true # break out after first match (do NOT continue) - end - end - return false + + def find_team!( line ) # NB: returns key (string or nil) + TextUtils.find_key_for!( 'team', line ) end - - - def find_teams!( line ) - counter = 1 - teams = [] - - team = find_xxx_worker!( "team#{counter}", line ) - while team.present? - teams << team - counter += 1 - team = find_xxx_worker!( "team#{counter}", line ) - end - - teams - end - ## todo: check if find_team1 gets used? if not remove it!! use find_teams! def find_team1!( line ) - find_xxx_worker!( 'team1', line ) + TextUtils.find_key_for!( 'team1', line ) end - + def find_team2!( line ) - find_xxx_worker!( 'team2', line ) + TextUtils.find_key_for!( 'team2', line ) end - ## todo/fix: pass in known_teams as a parameter? why? why not? - def match_teams!( line ) - @known_teams.each do |rec| - key = rec[0] - values = rec[1] - match_xxx_worker!( 'team', line, key, values ) - end # each known_teams - end # method match_teams! + def map_teams!( line ) + TextUtils.map_titles_for!( 'team', line, @known_teams ) + end + + def map_team!( line ) # alias map_teams! + map_teams!( line ) + end - def find_track!( line ) - find_xxx_worker!( 'track', line ) + TextUtils.find_key_for!( 'track', line ) end ## todo/fix: pass in known_tracks as a parameter? why? why not? - def match_track!( line ) - @known_tracks.each do |rec| - key = rec[0] - values = rec[1] - match_xxx_worker!( 'track', line, key, values ) - end # each known_tracks + def map_track!( line ) + TextUtils.map_titles_for!( 'track', line, @known_tracks ) + end + + def find_person!( line ) + TextUtils.find_key_for!( 'person', line ) + end + + def map_person!( line ) + TextUtils.map_titles_for!( 'person', line, @known_persons) + end + + + + ## depreciated methods - use map_ + def match_teams!( line ) ## fix: rename to map_teams!! - remove match_teams! + ## todo: issue depreciated warning + map_teams!( line ) + end # method match_teams! + + def match_track!( line ) ## fix: rename to map_track!!! + ## todo: issue depreciated warning + map_track!( line ) end # method match_tracks! end # module SportDb::FixtureHelpers