lib/sportdb/formats/package.rb in sportdb-formats-1.0.2 vs lib/sportdb/formats/package.rb in sportdb-formats-1.0.3
- old
+ new
@@ -1,9 +1,12 @@
module SportDb
class Package
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
+ ## e.g. .TXT and .txt
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
\.conf\.txt$
}x
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
@@ -24,26 +27,45 @@
CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
clubs\.props\.txt$
}x
+
+ ### season folder:
+ ## e.g. /2019-20 or
+ ## year-only e.g. /2019 or
+ ## /2016--france
+ SEASON_RE = %r{ (?:
+ \d{4}-\d{2}
+ | \d{4}(--[^/]+)?
+ )
+ }x
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
+
+
## note: if pattern includes directory add here
## (otherwise move to more "generic" datafile) - why? why not?
- MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
- | \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
- )
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ #{SEASON}
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
}x
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ #{SEASON}
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
+ }x
+
+
+
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
def self.find( path, pattern )
datafiles = []
## check all txt files
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
pp candidates
candidates.each do |candidate|
datafiles << candidate if pattern.match( candidate )
end
@@ -64,10 +86,19 @@
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
def self.match_conf( path ) CONF_RE.match( path ); end
+ def self.find_match( path, format: 'txt' )
+ if format == 'csv'
+ find( path, MATCH_CSV_RE )
+ else ## otherwise always assume txt for now
+ find( path, MATCH_RE )
+ end
+ end
+ ## add match_match and match_match_csv - why? why not?
+
class << self
alias_method :match_clubs?, :match_clubs
alias_method :clubs?, :match_clubs
alias_method :match_clubs_wiki?, :match_clubs_wiki
@@ -147,19 +178,118 @@
blk.call( entry )
end
end
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
- def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
+ def each_match( format: 'txt', &blk )
+ if format == 'csv'
+ each( pattern: MATCH_CSV_RE, &blk );
+ else
+ each( pattern: MATCH_RE, &blk );
+ end
+ end
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
## return all match datafile entries
- def match() ary=[]; each_match {|entry| ary << entry }; ary; end
+ def match( format: 'txt' )
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
+ end
alias_method :matches, :match
+
+
+ ## todo/check: rename/change to match_by_dir - why? why not?
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
+ def match_by_season_dir( format: 'txt' )
+ ##
+ ## [["1950s/1956-57",
+ ## ["1950s/1956-57/1-division1.csv",
+ ## "1950s/1956-57/2-division2.csv",
+ ## "1950s/1956-57/3a-division3n.csv",
+ ## "1950s/1956-57/3b-division3s.csv"]],
+ ## ...]
+
+ h = {}
+ match( format: format ).each do |entry|
+ season_path = File.dirname( entry.name )
+
+ h[ season_path ] ||= []
+ h[ season_path ] << entry
+ end
+
+ ## todo/fix: - add sort entries by name - why? why not?
+ ## note: assume 1-,2- etc. gets us back sorted leagues
+ ## - use sort. (will not sort by default?)
+
+ h.to_a ## return as array (or keep hash) - why? why not?
+ end # method match_by_season_dir
+
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
+
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
+
+ ## note: fold all sames seasons (even if in different directories)
+ ## into same datafile list e.g.
+ ## ["1957/58",
+ ## ["1950s/1957-58/1-division1.csv",
+ ## "1950s/1957-58/2-division2.csv",
+ ## "1950s/1957-58/3a-division3n.csv",
+ ## "1950s/1957-58/3b-division3s.csv"]],
+ ## and
+ ## ["1957/58",
+ ## ["archives/1950s/1957-58/1-division1.csv",
+ ## "archives/1950s/1957-58/2-division2.csv",
+ ## "archives/1950s/1957-58/3a-division3n.csv",
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
+ ## should be together - why? why not?
+
+ ####
+ # Example package:
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
+
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
+
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
+ ## filter_season( clause, season_key )
+ ## or better filter = SeasonFilter.new( clause )
+ ## filter.skip? filter.include? ( season_sason_key )?
+ ## fiteer.before?( season_key ) etc.
+ ## find some good method names!!!!
+ season_start = start ? Import::Season.new( start ) : nil
+
+ h = {}
+ match( format: format ).each do |entry|
+ ## note: assume last directory in datafile path is the season part/key
+ season_q = File.basename( File.dirname( entry.name ))
+ season = Import::Season.new( season_q ) ## normalize season
+
+ ## skip if start season before this season
+ next if season_start && season_start.start_year > season.start_year
+
+ h[ season.key ] ||= []
+ h[ season.key ] << entry
+ end
+
+ ## todo/fix: - add sort entries by name - why? why not?
+ ## note: assume 1-,2- etc. gets us back sorted leagues
+ ## - use sort. (will not sort by default?)
+
+ ## sort by season
+ ## latest / newest first (and oldest last)
+
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
+ r[0] <=> l[0]
+ end
+ end # method match_by_season
end # class Package
class DirPackage < Package
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end