lib/sportdb/formats/package.rb in sportdb-formats-1.1.6 vs lib/sportdb/formats/package.rb in sportdb-formats-1.2.0
- old
+ new
@@ -1,374 +1,394 @@
-
-module SportDb
- class Package
-
- ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
- ## e.g. .TXT and .txt
- ## yes!! use /i option!!!!!
-
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- \.conf\.txt$
- }x
-
- ## leagues.txt or leagues_en.txt
- ## remove support for en.leagues.txt - why? why not?
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
- leagues
- (?:_[a-z0-9_-]+)?
- \.txt$
- }x
-
- ## seasons.txt or seasons_en.txt
- ## remove support for br.seasons.txt - why? why not?
- SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
- seasons
- (?:_[a-z0-9_-]+)?
- \.txt$
- }x
-
-
- ## clubs.txt or clubs_en.txt
- ## remove support for en.clubs.txt - why? why not?
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
- clubs
- (?:_[a-z0-9_-]+)?
- \.txt$
- }x
-
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
- clubs
- (?:_[a-z0-9_-]+)?
- \.wiki\.txt$
- }x
-
- ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
- CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
- clubs
- (?:_[a-z0-9_-]+)?
- \.props\.txt$
- }x
- CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
-
-
- CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
- clubs
- (?:_[a-z0-9_-]+)?
- \.history\.txt$
- }x
-
- ## teams.txt or teams_history.txt
- TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- teams
- (?:_[a-z0-9_-]+)?
- \.txt$
- }x
-
-
- ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
- ### season folder:
- ## e.g. /2019-20 or
- ## year-only e.g. /2019 or
- ## /2016--france
- SEASON_RE = %r{ (?:
- \d{4}-\d{2}
- | \d{4}(--[a-z0-9_-]+)?
- )
- }x
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
-
-
- ## note: if pattern includes directory add here
- ## (otherwise move to more "generic" datafile) - why? why not?
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- #{SEASON}
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
- }x
-
- MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
- #{SEASON}
- /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
- }x
-
- ### add "generic" pattern to find all csv datafiles
- CSV_RE = %r{ (?: ^|/ )
- [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
- }x
-
-
- ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
-
- def self.find( path, pattern )
- datafiles = []
-
- ## check all txt files
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
- candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
- pp candidates
- candidates.each do |candidate|
- datafiles << candidate if pattern.match( candidate )
- end
-
- pp datafiles
- datafiles
- end
-
-
- def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
- def self.match_teams( path ) TEAMS_RE.match( path ); end
-
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
- def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
-
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
- def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
-
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
-
- def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
- def self.match_seasons( path ) SEASONS_RE.match( path ); end
-
-
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
- def self.match_conf( path ) CONF_RE.match( path ); end
-
- def self.find_match( path, format: 'txt' )
- if format == 'csv'
- find( path, MATCH_CSV_RE )
- else ## otherwise always assume txt for now
- find( path, MATCH_RE )
- end
- end
- ## add match_match and match_match_csv - why? why not?
-
-
- class << self
- alias_method :match_teams?, :match_teams
- alias_method :teams?, :match_teams
-
- alias_method :match_clubs?, :match_clubs
- alias_method :clubs?, :match_clubs
-
- alias_method :match_clubs_wiki?, :match_clubs_wiki
- alias_method :clubs_wiki?, :match_clubs_wiki
-
- alias_method :match_clubs_history?, :match_clubs_history
- alias_method :clubs_history?, :match_clubs_history
-
- alias_method :match_club_props, :match_clubs_props
- alias_method :match_club_props?, :match_clubs_props
- alias_method :club_props?, :match_clubs_props
- alias_method :match_clubs_props?, :match_clubs_props
- alias_method :clubs_props?, :match_clubs_props
-
- alias_method :match_leagues?, :match_leagues
- alias_method :leagues?, :match_leagues
-
- alias_method :match_seasons?, :match_seasons
- alias_method :seasons?, :match_seasons
-
- alias_method :match_conf?, :match_conf
- alias_method :conf?, :match_conf
- end
-
-
- ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
- attr_accessor :include, :exclude
-
- ## private helpers - like select returns true for keeping and false for skipping entry
- def filter_clause( filter, entry )
- if filter.is_a?( String )
- entry.name.index( filter ) ? true : false
- elsif filter.is_a?( Regexp )
- filter.match( entry.name ) ? true : false
- else ## assume
- ## todo/check: pass in entry (and NOT entry.name) - why? why not?
- filter.call( entry )
- end
- end
-
- def filter( entry )
- if @include
- if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
- true ## todo/check: check for exclude here too - why? why not?
- else
- false
- end
- else
- if @exclude && filter_clause( @exclude, entry )
- false
- else
- true
- end
- end
- end
-
-
- def initialize( path_or_pack )
- @include = nil
- @exclude = nil
-
- if path_or_pack.is_a?( Datafile::Package )
- @pack = path_or_pack
- else ## assume it's a (string) path
- path = path_or_pack
- if !File.exist?( path ) ## file or directory
- puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
- exit 1
- end
-
- if File.directory?( path )
- @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
- elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
- @pack = Datafile::ZipPackage.new( path )
- else
- puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
- exit 1
- end
- end
- end
-
-
- def each( pattern:, &blk )
- @pack.each( pattern: pattern ) do |entry|
- next unless filter( entry ) ## lets you use include/exclude filters
- blk.call( entry )
- end
- end
-
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
- def each_match( format: 'txt', &blk )
- if format == 'csv'
- each( pattern: MATCH_CSV_RE, &blk );
- else
- each( pattern: MATCH_RE, &blk );
- end
- end
- def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
- def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
-
- def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
-
- def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
- def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
- def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
- def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
-
- def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
-
-
- ## return all match datafile entries
- def match( format: 'txt' )
- ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
- end
- alias_method :matches, :match
-
-
- ## todo/check: rename/change to match_by_dir - why? why not?
- ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
- def match_by_season_dir( format: 'txt' )
- ##
- ## [["1950s/1956-57",
- ## ["1950s/1956-57/1-division1.csv",
- ## "1950s/1956-57/2-division2.csv",
- ## "1950s/1956-57/3a-division3n.csv",
- ## "1950s/1956-57/3b-division3s.csv"]],
- ## ...]
-
- h = {}
- match( format: format ).each do |entry|
- season_path = File.dirname( entry.name )
-
- h[ season_path ] ||= []
- h[ season_path ] << entry
- end
-
- ## todo/fix: - add sort entries by name - why? why not?
- ## note: assume 1-,2- etc. gets us back sorted leagues
- ## - use sort. (will not sort by default?)
-
- h.to_a ## return as array (or keep hash) - why? why not?
- end # method match_by_season_dir
-
- def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
-
- ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
-
- ## note: fold all sames seasons (even if in different directories)
- ## into same datafile list e.g.
- ## ["1957/58",
- ## ["1950s/1957-58/1-division1.csv",
- ## "1950s/1957-58/2-division2.csv",
- ## "1950s/1957-58/3a-division3n.csv",
- ## "1950s/1957-58/3b-division3s.csv"]],
- ## and
- ## ["1957/58",
- ## ["archives/1950s/1957-58/1-division1.csv",
- ## "archives/1950s/1957-58/2-division2.csv",
- ## "archives/1950s/1957-58/3a-division3n.csv",
- ## "archives/1950s/1957-58/3b-division3s.csv"]],
- ## should be together - why? why not?
-
- ####
- # Example package:
- # [["2012/13", ["2012-13/1-proleague.csv"]],
- # ["2013/14", ["2013-14/1-proleague.csv"]],
- # ["2014/15", ["2014-15/1-proleague.csv"]],
- # ["2015/16", ["2015-16/1-proleague.csv"]],
- # ["2016/17", ["2016-17/1-proleague.csv"]],
- # ["2017/18", ["2017-18/1-proleague.csv"]]]
-
- ## todo/fix: (re)use a more generic filter instead of start for start of season only
-
- ## todo/fix: use a "generic" filter_season helper for easy reuse
- ## filter_season( clause, season_key )
- ## or better filter = SeasonFilter.new( clause )
- ## filter.skip? filter.include? ( season_sason_key )?
- ## fiteer.before?( season_key ) etc.
- ## find some good method names!!!!
- season_start = start ? Season( start ) : nil
-
- h = {}
- match( format: format ).each do |entry|
- ## note: assume last directory in datafile path is the season part/key
- season_q = File.basename( File.dirname( entry.name ))
- season = Season.parse( season_q ) ## normalize season
-
- ## skip if start season before this season
- next if season_start && season_start.start_year > season.start_year
-
- h[ season.key ] ||= []
- h[ season.key ] << entry
- end
-
- ## todo/fix: - add sort entries by name - why? why not?
- ## note: assume 1-,2- etc. gets us back sorted leagues
- ## - use sort. (will not sort by default?)
-
- ## sort by season
- ## latest / newest first (and oldest last)
-
- h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
- r[0] <=> l[0]
- end
- end # method match_by_season
- end # class Package
-
-
- class DirPackage < Package
- def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
- end
-
- class ZipPackage < Package
- def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
- end
-end # module SportDb
+
+module SportDb
+ class Package
+
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
+ ## e.g. .TXT and .txt
+ ## yes!! use /i option!!!!!
+
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ \.conf\.txt$
+ }x
+
+ ## leagues.txt or leagues_en.txt
+ ## remove support for en.leagues.txt - why? why not?
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
+ leagues
+ (?:_[a-z0-9_-]+)?
+ \.txt$
+ }x
+
+ ## seasons.txt or seasons_en.txt
+ ## remove support for br.seasons.txt - why? why not?
+ SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
+ seasons
+ (?:_[a-z0-9_-]+)?
+ \.txt$
+ }x
+
+
+ ####
+ # de.stadiums.txt or stadiums.txt or stadiums_de.txt
+ GROUNDS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
+ stadiums
+ (?:_[a-z0-9_-]+)?
+ \.txt$
+ }x
+
+ PLAYERS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
+ players
+ (?:_[a-z0-9_-]+)?
+ \.txt$
+ }x
+
+
+ ## clubs.txt or clubs_en.txt
+ ## remove support for en.clubs.txt - why? why not?
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
+ clubs
+ (?:_[a-z0-9_-]+)?
+ \.txt$
+ }x
+
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
+ clubs
+ (?:_[a-z0-9_-]+)?
+ \.wiki\.txt$
+ }x
+
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
+ clubs
+ (?:_[a-z0-9_-]+)?
+ \.props\.txt$
+ }x
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
+
+
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
+ clubs
+ (?:_[a-z0-9_-]+)?
+ \.history\.txt$
+ }x
+
+ ## teams.txt or teams_history.txt
+ TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ teams
+ (?:_[a-z0-9_-]+)?
+ \.txt$
+ }x
+
+
+ ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
+ ### season folder:
+ ## e.g. /2019-20 or
+ ## year-only e.g. /2019 or
+ ## /2016--france
+ SEASON_RE = %r{ (?:
+ \d{4}-\d{2}
+ | \d{4}(--[a-z0-9_-]+)?
+ )
+ }x
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
+
+
+ ## note: if pattern includes directory add here
+ ## (otherwise move to more "generic" datafile) - why? why not?
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ #{SEASON}
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
+ }x
+
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
+ #{SEASON}
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
+ }x
+
+ ### add "generic" pattern to find all csv datafiles
+ CSV_RE = %r{ (?: ^|/ )
+ [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
+ }x
+
+
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
+
+ def self.find( path, pattern )
+ datafiles = []
+
+ ## check all txt files
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
+ pp candidates
+ candidates.each do |candidate|
+ datafiles << candidate if pattern.match( candidate )
+ end
+
+ pp datafiles
+ datafiles
+ end
+
+
+ def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
+ def self.match_teams( path ) TEAMS_RE.match( path ); end
+
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
+
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
+
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
+
+ def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
+ def self.match_seasons( path ) SEASONS_RE.match( path ); end
+
+
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
+ def self.match_conf( path ) CONF_RE.match( path ); end
+
+ def self.find_match( path, format: 'txt' )
+ if format == 'csv'
+ find( path, MATCH_CSV_RE )
+ else ## otherwise always assume txt for now
+ find( path, MATCH_RE )
+ end
+ end
+ ## add match_match and match_match_csv - why? why not?
+
+
+ class << self
+ alias_method :match_teams?, :match_teams
+ alias_method :teams?, :match_teams
+
+ alias_method :match_clubs?, :match_clubs
+ alias_method :clubs?, :match_clubs
+
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
+ alias_method :clubs_wiki?, :match_clubs_wiki
+
+ alias_method :match_clubs_history?, :match_clubs_history
+ alias_method :clubs_history?, :match_clubs_history
+
+ alias_method :match_club_props, :match_clubs_props
+ alias_method :match_club_props?, :match_clubs_props
+ alias_method :club_props?, :match_clubs_props
+ alias_method :match_clubs_props?, :match_clubs_props
+ alias_method :clubs_props?, :match_clubs_props
+
+ alias_method :match_leagues?, :match_leagues
+ alias_method :leagues?, :match_leagues
+
+ alias_method :match_seasons?, :match_seasons
+ alias_method :seasons?, :match_seasons
+
+ alias_method :match_conf?, :match_conf
+ alias_method :conf?, :match_conf
+ end
+
+
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
+ attr_accessor :include, :exclude
+
+ ## private helpers - like select returns true for keeping and false for skipping entry
+ def filter_clause( filter, entry )
+ if filter.is_a?( String )
+ entry.name.index( filter ) ? true : false
+ elsif filter.is_a?( Regexp )
+ filter.match( entry.name ) ? true : false
+ else ## assume
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
+ filter.call( entry )
+ end
+ end
+
+ def filter( entry )
+ if @include
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
+ true ## todo/check: check for exclude here too - why? why not?
+ else
+ false
+ end
+ else
+ if @exclude && filter_clause( @exclude, entry )
+ false
+ else
+ true
+ end
+ end
+ end
+
+
+ def initialize( path_or_pack )
+ @include = nil
+ @exclude = nil
+
+ if path_or_pack.is_a?( Datafile::Package )
+ @pack = path_or_pack
+ else ## assume it's a (string) path
+ path = path_or_pack
+ if !File.exist?( path ) ## file or directory
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
+ exit 1
+ end
+
+ if File.directory?( path )
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
+ @pack = Datafile::ZipPackage.new( path )
+ else
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
+ exit 1
+ end
+ end
+ end
+
+
+ def each( pattern:, &blk )
+ @pack.each( pattern: pattern ) do |entry|
+ next unless filter( entry ) ## lets you use include/exclude filters
+ blk.call( entry )
+ end
+ end
+
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
+ def each_match( format: 'txt', &blk )
+ if format == 'csv'
+ each( pattern: MATCH_CSV_RE, &blk );
+ else
+ each( pattern: MATCH_RE, &blk );
+ end
+ end
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
+ def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
+
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
+
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
+
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
+
+
+ def each_grounds( &blk ) each( pattern: GROUNDS_RE, &blk ); end
+ def each_players( &blk ) each( pattern: PLAYERS_RE, &blk ); end
+
+ ## return all match datafile entries
+ def match( format: 'txt' )
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
+ end
+ alias_method :matches, :match
+
+
+ ## todo/check: rename/change to match_by_dir - why? why not?
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
+ def match_by_season_dir( format: 'txt' )
+ ##
+ ## [["1950s/1956-57",
+ ## ["1950s/1956-57/1-division1.csv",
+ ## "1950s/1956-57/2-division2.csv",
+ ## "1950s/1956-57/3a-division3n.csv",
+ ## "1950s/1956-57/3b-division3s.csv"]],
+ ## ...]
+
+ h = {}
+ match( format: format ).each do |entry|
+ season_path = File.dirname( entry.name )
+
+ h[ season_path ] ||= []
+ h[ season_path ] << entry
+ end
+
+ ## todo/fix: - add sort entries by name - why? why not?
+ ## note: assume 1-,2- etc. gets us back sorted leagues
+ ## - use sort. (will not sort by default?)
+
+ h.to_a ## return as array (or keep hash) - why? why not?
+ end # method match_by_season_dir
+
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
+
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
+
+ ## note: fold all sames seasons (even if in different directories)
+ ## into same datafile list e.g.
+ ## ["1957/58",
+ ## ["1950s/1957-58/1-division1.csv",
+ ## "1950s/1957-58/2-division2.csv",
+ ## "1950s/1957-58/3a-division3n.csv",
+ ## "1950s/1957-58/3b-division3s.csv"]],
+ ## and
+ ## ["1957/58",
+ ## ["archives/1950s/1957-58/1-division1.csv",
+ ## "archives/1950s/1957-58/2-division2.csv",
+ ## "archives/1950s/1957-58/3a-division3n.csv",
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
+ ## should be together - why? why not?
+
+ ####
+ # Example package:
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
+
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
+
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
+ ## filter_season( clause, season_key )
+ ## or better filter = SeasonFilter.new( clause )
+ ## filter.skip? filter.include? ( season_sason_key )?
+ ## fiteer.before?( season_key ) etc.
+ ## find some good method names!!!!
+ season_start = start ? Season( start ) : nil
+
+ h = {}
+ match( format: format ).each do |entry|
+ ## note: assume last directory in datafile path is the season part/key
+ season_q = File.basename( File.dirname( entry.name ))
+ season = Season.parse( season_q ) ## normalize season
+
+ ## skip if start season before this season
+ next if season_start && season_start.start_year > season.start_year
+
+ h[ season.key ] ||= []
+ h[ season.key ] << entry
+ end
+
+ ## todo/fix: - add sort entries by name - why? why not?
+ ## note: assume 1-,2- etc. gets us back sorted leagues
+ ## - use sort. (will not sort by default?)
+
+ ## sort by season
+ ## latest / newest first (and oldest last)
+
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
+ r[0] <=> l[0]
+ end
+ end # method match_by_season
+ end # class Package
+
+
+ class DirPackage < Package
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
+ end
+
+ class ZipPackage < Package
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
+ end
+end # module SportDb