lib/sportdb/finders/date.rb in sportdb-models-1.14.0 vs lib/sportdb/finders/date.rb in sportdb-models-1.14.1

- old
+ new

@@ -1,14 +1,106 @@ # encoding: utf-8 -#### fix: move to textutils for reuse !!!!! +#### fix: move to textutils for reuse !!!!!!!!!! - why?? why not ?? module SportDb -class DateFinder +class DateFinderBase + + MONTH_EN_TO_MM = { + 'Jan' => '1', 'January' => '1', + 'Feb' => '2', 'February' => '2', + 'Mar' => '3', 'March' => '3', + 'Apr' => '4', 'April' => '4', + 'May' => '5', + 'Jun' => '6', 'June' => '6', + 'Jul' => '7', 'July' => '7', + 'Aug' => '8', 'August' => '8', + 'Sep' => '9', 'Sept' => '9', 'September' => '9', + 'Oct' => '10', 'October' => '10', + 'Nov' => '11', 'November' => '11', + 'Dec' => '12', 'December' =>'12' } + + MONTH_FR_TO_MM = { + 'Janvier' => '1', 'Janv' => '1', 'Jan' => '1', ## check janv in use?? + 'Février' => '2', 'Févr' => '2', 'Fév' => '2', ## check fevr in use??? + 'Mars' => '3', 'Mar' => '3', + 'Avril' => '4', 'Avri' => '4', 'Avr' => '4', ## check avri in use??? if not remove + 'Mai' => '5', + 'Juin' => '6', + 'Juillet' => '7', 'Juil' => '7', + 'Août' => '8', + 'Septembre' => '9', 'Sept' => '9', + 'Octobre' => '10', 'Octo' => '10', 'Oct' => '10', ### check octo in use?? + 'Novembre' => '11', 'Nove' => '11', 'Nov' => '11', ## check nove in use?? + 'Décembre' => '12', 'Déce' => '12', 'Déc' => '12' } ## check dece in use?? + + MONTH_ES_TO_MM = { + 'Ene' => '1', 'Enero' => '1', + 'Feb' => '2', + 'Mar' => '3', 'Marzo' => '3', + 'Abr' => '4', 'Abril' => '4', + 'May' => '5', 'Mayo' => '5', + 'Jun' => '6', 'Junio' => '6', + 'Jul' => '7', 'Julio' => '7', + 'Ago' => '8', 'Agosto' => '8', + 'Sep' => '9', 'Set' => '9', 'Sept' => '9', + 'Oct' => '10', + 'Nov' => '11', + 'Dic' => '12' } + +private + def calc_year( month, day, opts ) + start_at = opts[:start_at] + + logger.debug " [calc_year] ????-#{month}-#{day} -- start_at: #{start_at}" + + if month >= start_at.month + # assume same year as start_at event (e.g. 2013 for 2013/14 season) + start_at.year + else + # assume year+1 as start_at event (e.g. 2014 for 2013/14 season) + start_at.year+1 + end + end + + def parse_date_time( match_data, opts={} ) + + # convert regex match_data captures to hash + # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found) + h = {} + # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym) + match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"?? + + ## puts "[parse_date_time] match_data:" + ## pp h + logger.debug " [parse_date_time] hash: >#{h.inspect}<" + + h[ :month ] = MONTH_EN_TO_MM[ h[:month_en] ] if h[:month_en] + h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es] + h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr] + + month = h[:month] + day = h[:day] + year = h[:year] || calc_year( month.to_i, day.to_i, opts ).to_s + + hours = h[:hours] || '12' # default to 12:00 for HH:MM (hours:minutes) + minutes = h[:minutes] || '00' + + value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i] + logger.debug " date: >#{value}<" + + DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + end + +end # class DateFinderBase + + +class DateFinder < DateFinderBase + include LogUtils::Logging # todo: make more generic for reuse ### fix: ### move to textutils @@ -25,23 +117,11 @@ 'Août|' + 'Septembre|Sept|' + 'Octobre|Octo|Oct|' + 'Novembre|Nove|Nov|' + 'Décembre|Déce|Déc' - MONTH_FR_TO_MM = { - 'Janvier' => '1', 'Janv' => '1', 'Jan' => '1', ## check janv in use?? - 'Février' => '2', 'Févr' => '2', 'Fév' => '2', ## check fevr in use??? - 'Mars' => '3', 'Mar' => '3', - 'Avril' => '4', 'Avri' => '4', 'Avr' => '4', ## check avri in use??? if not remove - 'Mai' => '5', - 'Juin' => '6', - 'Juillet' => '7', 'Juil' => '7', - 'Août' => '8', - 'Septembre' => '9', 'Sept' => '9', - 'Octobre' => '10', 'Octo' => '10', 'Oct' => '10', ### check octo in use?? - 'Novembre' => '11', 'Nove' => '11', 'Nov' => '11', ## check nove in use?? - 'Décembre' => '12', 'Déce' => '12', 'Déc' => '12' } ## check dece in use?? + WEEKDAY_FR = 'Lundi|Lun|L|' + 'Mardi|Mar|Ma|' + 'Mercredi|Mer|Me|' + 'Jeudi|Jeu|J|' + 'Vendredi|Ven|V|' + @@ -59,46 +139,31 @@ 'August|Aug|'+ 'September|Sept|Sep|'+ 'October|Oct|'+ 'November|Nov|'+ 'December|Dec' - MONTH_EN_TO_MM = { - 'Jan' => '1', 'January' => '1', - 'Feb' => '2', 'February' => '2', - 'Mar' => '3', 'March' => '3', - 'Apr' => '4', 'April' => '4', - 'May' => '5', - 'Jun' => '6', 'June' => '6', - 'Jul' => '7', 'July' => '7', - 'Aug' => '8', 'August' => '8', - 'Sep' => '9', 'Sept' => '9', 'September' => '9', - 'Oct' => '10', 'October' => '10', - 'Nov' => '11', 'November' => '11', - 'Dec' => '12', 'December' =>'12' } ### ## todo: add days ## 1. Sunday - Sun. 2. Monday - Mon. ## 3. Tuesday - Tu., Tue., or Tues. 4. Wednesday - Wed. ## 5. Thursday - Th., Thu., Thur., or Thurs. 6. Friday - Fri. ## 7. Saturday - Sat. - MONTH_ES = 'Enero|Ene|Feb|Marzo|Mar|Abril|Abr|Mayo|May|Junio|Jun|Julio|Jul|Agosto|Ago|Sept|Set|Sep|Oct|Nov|Dic' - MONTH_ES_TO_MM = { - 'Ene' => '1', 'Enero' => '1', - 'Feb' => '2', - 'Mar' => '3', 'Marzo' => '3', - 'Abr' => '4', 'Abril' => '4', - 'May' => '5', 'Mayo' => '5', - 'Jun' => '6', 'Junio' => '6', - 'Jul' => '7', 'Julio' => '7', - 'Ago' => '8', 'Agosto' => '8', - 'Sep' => '9', 'Set' => '9', 'Sept' => '9', - 'Oct' => '10', - 'Nov' => '11', - 'Dic' => '12' } + MONTH_ES = 'Enero|Ene|'+ + 'Feb|'+ + 'Marzo|Mar|'+ + 'Abril|Abr|'+ + 'Mayo|May|'+ + 'Junio|Jun|'+ + 'Julio|Jul|'+ + 'Agosto|Ago|'+ + 'Sept|Set|Sep|'+ + 'Oct|'+ + 'Nov|'+ + 'Dic' # todo/fix - add de and es too!! # note: in Austria - Jänner - in Deutschland Januar allow both ?? # MONTH_DE = 'J[aä]n|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez' @@ -240,10 +305,14 @@ \s (?<year>\d{4}) \b/x # e.g. Jun/12 w/ implied year and implied hours (set to 12:00) + # note: allow space too e.g Jun 12 -- check if conflicts w/ other formats??? (added for rsssf reader) + # -- fix: might eat french weekday mar 12 is mardi (mar) !!! see FR__ pattern + # fix: remove space again for now - and use simple en date reader or something!!! + ## was [\/ ] changed back to \/ EN__MONTH_DD__DATE_REGEX = /\b (?<month_en>#{MONTH_EN}) \/ (?<day>\d{1,2}) \b/x @@ -321,54 +390,57 @@ end return nil # no match found end -private - def calc_year( month, day, opts ) - start_at = opts[:start_at] +end # class DateFinder - logger.debug " [calc_year] ????-#{month}-#{day} -- start_at: #{start_at}" - if month >= start_at.month - # assume same year as start_at event (e.g. 2013 for 2013/14 season) - start_at.year - else - # assume year+1 as start_at event (e.g. 2014 for 2013/14 season) - start_at.year+1 - end - end +class RsssfDateFinder < DateFinderBase - def parse_date_time( match_data, opts={} ) - - # convert regex match_data captures to hash - # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found) - h = {} - # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym) - match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"?? + include LogUtils::Logging - ## puts "[parse_date_time] match_data:" - ## pp h - logger.debug " [parse_date_time] hash: >#{h.inspect}<" + MONTH_EN = 'Jan|'+ + 'Feb|'+ + 'March|Mar|'+ + 'April|Apr|'+ + 'May|'+ + 'June|Jun|'+ + 'July|Jul|'+ + 'Aug|'+ + 'Sept|Sep|'+ + 'Oct|'+ + 'Nov|'+ + 'Dec' + + ## e.g. + ## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. [] + ## + ## check add \b at the beginning and end - why?? why not?? working?? + EN__MONTH_DD__DATE_REGEX = /\[ + (?<month_en>#{MONTH_EN}) + \s + (?<day>\d{1,2}) + \]/x - h[ :month ] = MONTH_EN_TO_MM[ h[:month_en] ] if h[:month_en] - h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es] - h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr] - - month = h[:month] - day = h[:day] - year = h[:year] || calc_year( month.to_i, day.to_i, opts ).to_s - - hours = h[:hours] || '12' # default to 12:00 for HH:MM (hours:minutes) - minutes = h[:minutes] || '00' - - value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i] - logger.debug " date: >#{value}<" - - DateTime.strptime( value, '%Y-%m-%d %H:%M' ) + def find!( line, opts={} ) + # fix: use more lookahead for all required trailing spaces!!!!! + # fix: use <name capturing group> for month,day,year etc.!!! + + tag = '[EN_MONTH_DD]' + pattern = EN__MONTH_DD__DATE_REGEX + md = pattern.match( line ) + if md + date = parse_date_time( md, opts ) + ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not??? + ## fix: use md.begin(0), md.end(0) + line.sub!( md[0], tag ) + ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time) + return date + end + return nil # no match found end -end # class DateFinder - +end ## class RsssfDateFinder end # module SportDb