lib/sportdb/utils.rb in sportdb-1.6.12 vs lib/sportdb/utils.rb in sportdb-1.6.13
- old
+ new
@@ -1,52 +1,10 @@
# encoding: utf-8
-### some utils moved to worldbdb/utils for reuse
+### note: some utils moved to worldbdb/utils for reuse
-### fix: move to textutils??
-
-
-def build_match_table_for( recs )
- ## build known tracks table w/ synonyms e.g.
- #
- # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
- # [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
- # [ 'stuttgart', [ 'VfB Stuttgart' ]] ]
-
- known_titles = []
-
- recs.each_with_index do |rec,index|
-
- titles = []
- titles << rec.title
- titles += rec.synonyms.split('|') if rec.synonyms.present?
-
- ## NB: sort here by length (largest goes first - best match)
- # exclude code and key (key should always go last)
- titles = titles.sort { |left,right| right.length <=> left.length }
-
- ## escape for regex plus allow subs for special chars/accents
- titles = titles.map { |title| TextUtils.title_esc_regex( title ) }
-
- ## NB: only include code field - if defined
- titles << rec.code if rec.respond_to?(:code) && rec.code.present?
-
- known_titles << [ rec.key, titles ]
-
- ### fix:
- ## plain logger
-
- LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
- end
-
- known_titles
-end
-
-
-
-
module SportDb::FixtureHelpers
def is_postponed?( line )
# check if line include postponed marker e.g. =>
line =~ /=>/
@@ -114,15 +72,20 @@
line.sub!( regex, '[GROUP|TITLE+POS]' )
return [title,pos]
end
+
def cut_off_end_of_line_comment!( line )
# cut off (that is, remove) optional end of line comment starting w/ #
- line = line.sub( /#.*$/, '' )
- line
+ line.sub!( /#.*$/ ) do |_|
+ logger.debug " cutting off end of line comment - >>#{$&}<<"
+ ''
+ end
+
+ # NB: line = line.sub will NOT work - thus, lets use line.sub!
end
def find_round_title2!( line )
# assume everything after // is title2 - strip off leading n trailing whitespaces
@@ -202,36 +165,42 @@
# e.g. 14.09. 20:30 => DD.MM. HH:MM
# nb: allow 2.3.2012 e.g. no leading zero required
# nb: allow hour as 20.30 or 3.30 instead of 03.30
regex_de = /\b(\d{1,2})\.(\d{1,2})\.\s+(\d{1,2})[:.](\d{2})\b/
-
+
# e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM
# nb: allow 2.3.2012 e.g. no leading zero required
# nb: allow hour as 20.30
regex_de2 = /\b(\d{1,2})\.(\d{1,2})\.(\d{4})\s+(\d{1,2})[:.](\d{2})\b/
+ month_abbrev_en = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"
+
+ # e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM
+ regex_en = /\b(\d{1,2})\s(#{month_abbrev_en})\s(\d{4})\s+(\d{1,2}):(\d{2})\b/
+
+
if line =~ regex_db
- value = "#{$1}-#{$2}-#{$3} #{$4}:#{$5}"
+ value = '%d-%02d-%02d %02d:%02d' % [$1, $2, $3, $4, $5]
logger.debug " date: >#{value}<"
## todo: lets you configure year
## and time zone (e.g. cet, eet, utc, etc.)
line.sub!( regex_db, '[DATE.DB]' )
return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
elsif line =~ regex_db2
- value = "#{$1}-#{$2}-#{$3} 12:00"
+ value = '%d-%02d-%02d 12:00' % [$1, $2, $3]
logger.debug " date: >#{value}<"
-
+
line.sub!( regex_db2, '[DATE.DB2]' )
return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
elsif line =~ regex_de2
- value = "#{$3}-#{$2}-#{$1} #{$4}:#{$5}"
+ value = '%d-%02d-%02d %02d:%02d' % [$3, $2, $1, $4, $5]
logger.debug " date: >#{value}<"
## todo: lets you configure year
## and time zone (e.g. cet, eet, utc, etc.)
@@ -241,42 +210,136 @@
elsif line =~ regex_de
#### fix/todo:
# get year from event start date!!!!
# do NOT hard code!!!!
-
- value = "2012-#{$2}-#{$1} #{$3}:#{$4}"
+
+ value = '2012-%02d-%02d %02d:%02d' % [$2, $1, $3, $4]
logger.debug " date: >#{value}<"
## todo: lets you configure year
## and time zone (e.g. cet, eet, utc, etc.)
line.sub!( regex_de, '[DATE.DE]' )
return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
+ elsif line =~ regex_en
+ value = '%d-%s-%02d %02d:%02d' % [$3, $2, $1, $4, $5]
+ logger.debug " date: >#{value}<"
+
+ line.sub!( regex_en, '[DATE.EN]' )
+
+ return DateTime.strptime( value, '%Y-%b-%d %H:%M' ) ## %b - abbreviated month name (e.g. Jan,Feb, etc.)
else
return nil
end
end
- def find_game_pos!( line )
+ def find_record_comment!( line )
+ # assume everything left after the last record marker,that is, ] is a record comment
+
+ regex = /]([^\]]+?)$/ # NB: use non-greedy +?
+
+ if line =~ regex
+ value = $1.strip
+ return nil if value.blank? # skip whitespaces only
+
+ logger.debug " comment: >#{value}<"
+
+ line.sub!( value, '[REC.COMMENT] ' )
+ return value
+ else
+ return nil
+ end
+ end
+
+
+ def find_record_timeline!( line )
+
+ # +1 lap or +n laps
+ regex_laps = /\s+\+\d{1,2}\s(lap|laps)\b/
+
+ # 2:17:15.123
+ regex_time = /\b\d{1,2}:\d{2}:\d{2}\.\d{1,3}\b/
+
+ # +40.1 secs
+ regex_secs = /\s+\+\d{1,3}\.\d{1,3}\s(secs)\b/ # NB: before \+ - boundry (\b) will not work
+
+ # NB: $& contains the complete matched text
+
+ if line =~ regex_laps
+ value = $&.strip
+ logger.debug " timeline.laps: >#{value}<"
+
+ line.sub!( value, '[REC.TIMELINE.LAPS] ' ) # NB: add trailing space
+ return value
+ elsif line =~ regex_time
+ value = $&.strip
+ logger.debug " timeline.time: >#{value}<"
+
+ line.sub!( value, '[REC.TIMELINE.TIME] ' ) # NB: add trailing space
+ return value
+ elsif line =~ regex_secs
+ value = $&.strip
+ logger.debug " timeline.secs: >#{value}<"
+
+ line.sub!( value, '[REC.TIMELINE.SECS] ' ) # NB: add trailing space
+ return value
+ else
+ return nil
+ end
+ end
+
+ def find_record_laps!( line )
+ # e.g. first free-standing number w/ one or two digits e.g. 7 or 28 etc.
+ regex = /\b(\d{1,2})\b/
+ if line =~ regex
+ logger.debug " laps: >#{$1}<"
+
+ line.sub!( regex, '[REC.LAPS] ' ) # NB: add trailing space
+ return $1.to_i
+ else
+ return nil
+ end
+ end
+
+ def find_record_leading_state!( line )
+ # e.g. 1|2|3|etc or Ret - must start line
+ regex = /^[ \t]*(\d{1,3}|Ret)[ \t]+/
+ if line =~ regex
+ value = $1.dup
+ logger.debug " state: >#{value}<"
+
+ line.sub!( regex, '[REC.STATE] ' ) # NB: add trailing space
+ return value
+ else
+ return nil
+ end
+ end
+
+
+ def find_leading_pos!( line )
# extract optional game pos from line
# and return it
# NB: side effect - removes pos from line string
# e.g. (1) - must start line
regex = /^[ \t]*\((\d{1,3})\)[ \t]+/
if line =~ regex
logger.debug " pos: >#{$1}<"
-
- line.sub!( regex, '[POS] ' )
+
+ line.sub!( regex, '[POS] ' ) # NB: add trailing space
return $1.to_i
else
return nil
end
+ end
+ def find_game_pos!( line )
+ ## fix: add depreciation warning - remove - use find_leading_pos!
+ find_leading_pos!( line )
end
def find_scores!( line )
### fix: depending on language allow 1:1 or 1-1
@@ -328,102 +391,65 @@
end
scores
end # methdod find_scores!
- ## todo/fix:
- # find a better name find_xxx_by_title ?? find_xxx_w_match_table? or similiar
- # move to its own file/module for easier maintance
- # include build_match_table_for
- # - lets us change internals e.g. lets improve matcher using a reverse index, for example
- def find_xxx_worker!( name, line )
- regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
-
- upcase_name = name.upcase
- downcase_name = name.downcase
-
- if line =~ regex
- value = "#{$1}"
- logger.debug " #{downcase_name}: >#{value}<"
-
- line.sub!( regex, "[#{upcase_name}]" )
-
- return $1
- else
- return nil
- end
+ def find_teams!( line ) # NB: returns an array - note: plural! (teamsss)
+ TextUtils.find_keys_for!( 'team', line )
end
-
-
- def match_xxx_worker!( name, line, key, values )
-
- downcase_name = name.downcase
-
- values.each do |value|
- ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
- ## (thus add it, allows match for Benfica Lis. for example - note . at the end)
-
- ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
- regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
- if line =~ regex
- logger.debug " match for #{downcase_name} >#{key}< >#{value}<"
- # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
- line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
- return true # break out after first match (do NOT continue)
- end
- end
- return false
+
+ def find_team!( line ) # NB: returns key (string or nil)
+ TextUtils.find_key_for!( 'team', line )
end
-
-
- def find_teams!( line )
- counter = 1
- teams = []
-
- team = find_xxx_worker!( "team#{counter}", line )
- while team.present?
- teams << team
- counter += 1
- team = find_xxx_worker!( "team#{counter}", line )
- end
-
- teams
- end
-
## todo: check if find_team1 gets used? if not remove it!! use find_teams!
def find_team1!( line )
- find_xxx_worker!( 'team1', line )
+ TextUtils.find_key_for!( 'team1', line )
end
-
+
def find_team2!( line )
- find_xxx_worker!( 'team2', line )
+ TextUtils.find_key_for!( 'team2', line )
end
-
## todo/fix: pass in known_teams as a parameter? why? why not?
- def match_teams!( line )
- @known_teams.each do |rec|
- key = rec[0]
- values = rec[1]
- match_xxx_worker!( 'team', line, key, values )
- end # each known_teams
- end # method match_teams!
+ def map_teams!( line )
+ TextUtils.map_titles_for!( 'team', line, @known_teams )
+ end
+
+ def map_team!( line ) # alias map_teams!
+ map_teams!( line )
+ end
-
def find_track!( line )
- find_xxx_worker!( 'track', line )
+ TextUtils.find_key_for!( 'track', line )
end
## todo/fix: pass in known_tracks as a parameter? why? why not?
- def match_track!( line )
- @known_tracks.each do |rec|
- key = rec[0]
- values = rec[1]
- match_xxx_worker!( 'track', line, key, values )
- end # each known_tracks
+ def map_track!( line )
+ TextUtils.map_titles_for!( 'track', line, @known_tracks )
+ end
+
+ def find_person!( line )
+ TextUtils.find_key_for!( 'person', line )
+ end
+
+ def map_person!( line )
+ TextUtils.map_titles_for!( 'person', line, @known_persons)
+ end
+
+
+
+ ## depreciated methods - use map_
+ def match_teams!( line ) ## fix: rename to map_teams!! - remove match_teams!
+ ## todo: issue depreciated warning
+ map_teams!( line )
+ end # method match_teams!
+
+ def match_track!( line ) ## fix: rename to map_track!!!
+ ## todo: issue depreciated warning
+ map_track!( line )
end # method match_tracks!
end # module SportDb::FixtureHelpers