lib/sportdb/formats/package.rb in sportdb-formats-1.1.6 vs lib/sportdb/formats/package.rb in sportdb-formats-1.2.0

- old
+ new

@@ -1,374 +1,394 @@ - -module SportDb - class Package - - ## todo/fix: make all regexes case-insensitive with /i option - why? why not? - ## e.g. .TXT and .txt - ## yes!! use /i option!!!!! - - CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - \.conf\.txt$ - }x - - ## leagues.txt or leagues_en.txt - ## remove support for en.leagues.txt - why? why not? - LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt - leagues - (?:_[a-z0-9_-]+)? - \.txt$ - }x - - ## seasons.txt or seasons_en.txt - ## remove support for br.seasons.txt - why? why not? - SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt - seasons - (?:_[a-z0-9_-]+)? - \.txt$ - }x - - - ## clubs.txt or clubs_en.txt - ## remove support for en.clubs.txt - why? why not? - CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt - clubs - (?:_[a-z0-9_-]+)? - \.txt$ - }x - - CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/) - (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt - clubs - (?:_[a-z0-9_-]+)? - \.wiki\.txt$ - }x - - ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not? - CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt - clubs - (?:_[a-z0-9_-]+)? - \.props\.txt$ - }x - CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?) - - - CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt - clubs - (?:_[a-z0-9_-]+)? - \.history\.txt$ - }x - - ## teams.txt or teams_history.txt - TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - teams - (?:_[a-z0-9_-]+)? - \.txt$ - }x - - - ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!! - ### season folder: - ## e.g. /2019-20 or - ## year-only e.g. /2019 or - ## /2016--france - SEASON_RE = %r{ (?: - \d{4}-\d{2} - | \d{4}(--[a-z0-9_-]+)? - ) - }x - SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not? - - - ## note: if pattern includes directory add here - ## (otherwise move to more "generic" datafile) - why? why not? - MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - #{SEASON} - /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt - }x - - MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) - #{SEASON} - /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv - }x - - ### add "generic" pattern to find all csv datafiles - CSV_RE = %r{ (?: ^|/ ) - [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv - }x - - - ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not? - - def self.find( path, pattern ) - datafiles = [] - - ## check all txt files - ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *) - candidates = Dir.glob( "#{path}/**/{*,.*}.*" ) - pp candidates - candidates.each do |candidate| - datafiles << candidate if pattern.match( candidate ) - end - - pp datafiles - datafiles - end - - - def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end - def self.match_teams( path ) TEAMS_RE.match( path ); end - - def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end - def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end - def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end - - def self.match_clubs( path ) CLUBS_RE.match( path ); end - def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end - def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end - def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end - - def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end - def self.match_leagues( path ) LEAGUES_RE.match( path ); end - - def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end - def self.match_seasons( path ) SEASONS_RE.match( path ); end - - - def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end - def self.match_conf( path ) CONF_RE.match( path ); end - - def self.find_match( path, format: 'txt' ) - if format == 'csv' - find( path, MATCH_CSV_RE ) - else ## otherwise always assume txt for now - find( path, MATCH_RE ) - end - end - ## add match_match and match_match_csv - why? why not? - - - class << self - alias_method :match_teams?, :match_teams - alias_method :teams?, :match_teams - - alias_method :match_clubs?, :match_clubs - alias_method :clubs?, :match_clubs - - alias_method :match_clubs_wiki?, :match_clubs_wiki - alias_method :clubs_wiki?, :match_clubs_wiki - - alias_method :match_clubs_history?, :match_clubs_history - alias_method :clubs_history?, :match_clubs_history - - alias_method :match_club_props, :match_clubs_props - alias_method :match_club_props?, :match_clubs_props - alias_method :club_props?, :match_clubs_props - alias_method :match_clubs_props?, :match_clubs_props - alias_method :clubs_props?, :match_clubs_props - - alias_method :match_leagues?, :match_leagues - alias_method :leagues?, :match_leagues - - alias_method :match_seasons?, :match_seasons - alias_method :seasons?, :match_seasons - - alias_method :match_conf?, :match_conf - alias_method :conf?, :match_conf - end - - - ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not? - attr_accessor :include, :exclude - - ## private helpers - like select returns true for keeping and false for skipping entry - def filter_clause( filter, entry ) - if filter.is_a?( String ) - entry.name.index( filter ) ? true : false - elsif filter.is_a?( Regexp ) - filter.match( entry.name ) ? true : false - else ## assume - ## todo/check: pass in entry (and NOT entry.name) - why? why not? - filter.call( entry ) - end - end - - def filter( entry ) - if @include - if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword???? - true ## todo/check: check for exclude here too - why? why not? - else - false - end - else - if @exclude && filter_clause( @exclude, entry ) - false - else - true - end - end - end - - - def initialize( path_or_pack ) - @include = nil - @exclude = nil - - if path_or_pack.is_a?( Datafile::Package ) - @pack = path_or_pack - else ## assume it's a (string) path - path = path_or_pack - if !File.exist?( path ) ## file or directory - puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package" - exit 1 - end - - if File.directory?( path ) - @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package - elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip - @pack = Datafile::ZipPackage.new( path ) - else - puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required" - exit 1 - end - end - end - - - def each( pattern:, &blk ) - @pack.each( pattern: pattern ) do |entry| - next unless filter( entry ) ## lets you use include/exclude filters - blk.call( entry ) - end - end - - def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end - def each_match( format: 'txt', &blk ) - if format == 'csv' - each( pattern: MATCH_CSV_RE, &blk ); - else - each( pattern: MATCH_RE, &blk ); - end - end - def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end - def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end - - def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end - - def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end - def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end - def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end - def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end - - def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end - - - ## return all match datafile entries - def match( format: 'txt' ) - ary=[]; each_match( format: format ) {|entry| ary << entry }; ary; - end - alias_method :matches, :match - - - ## todo/check: rename/change to match_by_dir - why? why not? - ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not? - def match_by_season_dir( format: 'txt' ) - ## - ## [["1950s/1956-57", - ## ["1950s/1956-57/1-division1.csv", - ## "1950s/1956-57/2-division2.csv", - ## "1950s/1956-57/3a-division3n.csv", - ## "1950s/1956-57/3b-division3s.csv"]], - ## ...] - - h = {} - match( format: format ).each do |entry| - season_path = File.dirname( entry.name ) - - h[ season_path ] ||= [] - h[ season_path ] << entry - end - - ## todo/fix: - add sort entries by name - why? why not? - ## note: assume 1-,2- etc. gets us back sorted leagues - ## - use sort. (will not sort by default?) - - h.to_a ## return as array (or keep hash) - why? why not? - end # method match_by_season_dir - - def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not? - - ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not? - - ## note: fold all sames seasons (even if in different directories) - ## into same datafile list e.g. - ## ["1957/58", - ## ["1950s/1957-58/1-division1.csv", - ## "1950s/1957-58/2-division2.csv", - ## "1950s/1957-58/3a-division3n.csv", - ## "1950s/1957-58/3b-division3s.csv"]], - ## and - ## ["1957/58", - ## ["archives/1950s/1957-58/1-division1.csv", - ## "archives/1950s/1957-58/2-division2.csv", - ## "archives/1950s/1957-58/3a-division3n.csv", - ## "archives/1950s/1957-58/3b-division3s.csv"]], - ## should be together - why? why not? - - #### - # Example package: - # [["2012/13", ["2012-13/1-proleague.csv"]], - # ["2013/14", ["2013-14/1-proleague.csv"]], - # ["2014/15", ["2014-15/1-proleague.csv"]], - # ["2015/16", ["2015-16/1-proleague.csv"]], - # ["2016/17", ["2016-17/1-proleague.csv"]], - # ["2017/18", ["2017-18/1-proleague.csv"]]] - - ## todo/fix: (re)use a more generic filter instead of start for start of season only - - ## todo/fix: use a "generic" filter_season helper for easy reuse - ## filter_season( clause, season_key ) - ## or better filter = SeasonFilter.new( clause ) - ## filter.skip? filter.include? ( season_sason_key )? - ## fiteer.before?( season_key ) etc. - ## find some good method names!!!! - season_start = start ? Season( start ) : nil - - h = {} - match( format: format ).each do |entry| - ## note: assume last directory in datafile path is the season part/key - season_q = File.basename( File.dirname( entry.name )) - season = Season.parse( season_q ) ## normalize season - - ## skip if start season before this season - next if season_start && season_start.start_year > season.start_year - - h[ season.key ] ||= [] - h[ season.key ] << entry - end - - ## todo/fix: - add sort entries by name - why? why not? - ## note: assume 1-,2- etc. gets us back sorted leagues - ## - use sort. (will not sort by default?) - - ## sort by season - ## latest / newest first (and oldest last) - - h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not? - r[0] <=> l[0] - end - end # method match_by_season - end # class Package - - - class DirPackage < Package - def initialize( path ) super( Datafile::DirPackage.new( path ) ); end - end - - class ZipPackage < Package - def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end - end -end # module SportDb + +module SportDb + class Package + + ## todo/fix: make all regexes case-insensitive with /i option - why? why not? + ## e.g. .TXT and .txt + ## yes!! use /i option!!!!! + + CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + \.conf\.txt$ + }x + + ## leagues.txt or leagues_en.txt + ## remove support for en.leagues.txt - why? why not? + LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt + leagues + (?:_[a-z0-9_-]+)? + \.txt$ + }x + + ## seasons.txt or seasons_en.txt + ## remove support for br.seasons.txt - why? why not? + SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt + seasons + (?:_[a-z0-9_-]+)? + \.txt$ + }x + + + #### + # de.stadiums.txt or stadiums.txt or stadiums_de.txt + GROUNDS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt + stadiums + (?:_[a-z0-9_-]+)? + \.txt$ + }x + + PLAYERS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt + players + (?:_[a-z0-9_-]+)? + \.txt$ + }x + + + ## clubs.txt or clubs_en.txt + ## remove support for en.clubs.txt - why? why not? + CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt + clubs + (?:_[a-z0-9_-]+)? + \.txt$ + }x + + CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/) + (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt + clubs + (?:_[a-z0-9_-]+)? + \.wiki\.txt$ + }x + + ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not? + CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt + clubs + (?:_[a-z0-9_-]+)? + \.props\.txt$ + }x + CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?) + + + CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt + clubs + (?:_[a-z0-9_-]+)? + \.history\.txt$ + }x + + ## teams.txt or teams_history.txt + TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + teams + (?:_[a-z0-9_-]+)? + \.txt$ + }x + + + ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!! + ### season folder: + ## e.g. /2019-20 or + ## year-only e.g. /2019 or + ## /2016--france + SEASON_RE = %r{ (?: + \d{4}-\d{2} + | \d{4}(--[a-z0-9_-]+)? + ) + }x + SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not? + + + ## note: if pattern includes directory add here + ## (otherwise move to more "generic" datafile) - why? why not? + MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + #{SEASON} + /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt + }x + + MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/) + #{SEASON} + /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv + }x + + ### add "generic" pattern to find all csv datafiles + CSV_RE = %r{ (?: ^|/ ) + [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv + }x + + + ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not? + + def self.find( path, pattern ) + datafiles = [] + + ## check all txt files + ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *) + candidates = Dir.glob( "#{path}/**/{*,.*}.*" ) + pp candidates + candidates.each do |candidate| + datafiles << candidate if pattern.match( candidate ) + end + + pp datafiles + datafiles + end + + + def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end + def self.match_teams( path ) TEAMS_RE.match( path ); end + + def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end + def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end + def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end + + def self.match_clubs( path ) CLUBS_RE.match( path ); end + def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end + def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end + def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end + + def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end + def self.match_leagues( path ) LEAGUES_RE.match( path ); end + + def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end + def self.match_seasons( path ) SEASONS_RE.match( path ); end + + + def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end + def self.match_conf( path ) CONF_RE.match( path ); end + + def self.find_match( path, format: 'txt' ) + if format == 'csv' + find( path, MATCH_CSV_RE ) + else ## otherwise always assume txt for now + find( path, MATCH_RE ) + end + end + ## add match_match and match_match_csv - why? why not? + + + class << self + alias_method :match_teams?, :match_teams + alias_method :teams?, :match_teams + + alias_method :match_clubs?, :match_clubs + alias_method :clubs?, :match_clubs + + alias_method :match_clubs_wiki?, :match_clubs_wiki + alias_method :clubs_wiki?, :match_clubs_wiki + + alias_method :match_clubs_history?, :match_clubs_history + alias_method :clubs_history?, :match_clubs_history + + alias_method :match_club_props, :match_clubs_props + alias_method :match_club_props?, :match_clubs_props + alias_method :club_props?, :match_clubs_props + alias_method :match_clubs_props?, :match_clubs_props + alias_method :clubs_props?, :match_clubs_props + + alias_method :match_leagues?, :match_leagues + alias_method :leagues?, :match_leagues + + alias_method :match_seasons?, :match_seasons + alias_method :seasons?, :match_seasons + + alias_method :match_conf?, :match_conf + alias_method :conf?, :match_conf + end + + + ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not? + attr_accessor :include, :exclude + + ## private helpers - like select returns true for keeping and false for skipping entry + def filter_clause( filter, entry ) + if filter.is_a?( String ) + entry.name.index( filter ) ? true : false + elsif filter.is_a?( Regexp ) + filter.match( entry.name ) ? true : false + else ## assume + ## todo/check: pass in entry (and NOT entry.name) - why? why not? + filter.call( entry ) + end + end + + def filter( entry ) + if @include + if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword???? + true ## todo/check: check for exclude here too - why? why not? + else + false + end + else + if @exclude && filter_clause( @exclude, entry ) + false + else + true + end + end + end + + + def initialize( path_or_pack ) + @include = nil + @exclude = nil + + if path_or_pack.is_a?( Datafile::Package ) + @pack = path_or_pack + else ## assume it's a (string) path + path = path_or_pack + if !File.exist?( path ) ## file or directory + puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package" + exit 1 + end + + if File.directory?( path ) + @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package + elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip + @pack = Datafile::ZipPackage.new( path ) + else + puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required" + exit 1 + end + end + end + + + def each( pattern:, &blk ) + @pack.each( pattern: pattern ) do |entry| + next unless filter( entry ) ## lets you use include/exclude filters + blk.call( entry ) + end + end + + def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end + def each_match( format: 'txt', &blk ) + if format == 'csv' + each( pattern: MATCH_CSV_RE, &blk ); + else + each( pattern: MATCH_RE, &blk ); + end + end + def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end + def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end + + def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end + + def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end + def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end + def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end + def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end + + def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end + + + def each_grounds( &blk ) each( pattern: GROUNDS_RE, &blk ); end + def each_players( &blk ) each( pattern: PLAYERS_RE, &blk ); end + + ## return all match datafile entries + def match( format: 'txt' ) + ary=[]; each_match( format: format ) {|entry| ary << entry }; ary; + end + alias_method :matches, :match + + + ## todo/check: rename/change to match_by_dir - why? why not? + ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not? + def match_by_season_dir( format: 'txt' ) + ## + ## [["1950s/1956-57", + ## ["1950s/1956-57/1-division1.csv", + ## "1950s/1956-57/2-division2.csv", + ## "1950s/1956-57/3a-division3n.csv", + ## "1950s/1956-57/3b-division3s.csv"]], + ## ...] + + h = {} + match( format: format ).each do |entry| + season_path = File.dirname( entry.name ) + + h[ season_path ] ||= [] + h[ season_path ] << entry + end + + ## todo/fix: - add sort entries by name - why? why not? + ## note: assume 1-,2- etc. gets us back sorted leagues + ## - use sort. (will not sort by default?) + + h.to_a ## return as array (or keep hash) - why? why not? + end # method match_by_season_dir + + def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not? + + ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not? + + ## note: fold all sames seasons (even if in different directories) + ## into same datafile list e.g. + ## ["1957/58", + ## ["1950s/1957-58/1-division1.csv", + ## "1950s/1957-58/2-division2.csv", + ## "1950s/1957-58/3a-division3n.csv", + ## "1950s/1957-58/3b-division3s.csv"]], + ## and + ## ["1957/58", + ## ["archives/1950s/1957-58/1-division1.csv", + ## "archives/1950s/1957-58/2-division2.csv", + ## "archives/1950s/1957-58/3a-division3n.csv", + ## "archives/1950s/1957-58/3b-division3s.csv"]], + ## should be together - why? why not? + + #### + # Example package: + # [["2012/13", ["2012-13/1-proleague.csv"]], + # ["2013/14", ["2013-14/1-proleague.csv"]], + # ["2014/15", ["2014-15/1-proleague.csv"]], + # ["2015/16", ["2015-16/1-proleague.csv"]], + # ["2016/17", ["2016-17/1-proleague.csv"]], + # ["2017/18", ["2017-18/1-proleague.csv"]]] + + ## todo/fix: (re)use a more generic filter instead of start for start of season only + + ## todo/fix: use a "generic" filter_season helper for easy reuse + ## filter_season( clause, season_key ) + ## or better filter = SeasonFilter.new( clause ) + ## filter.skip? filter.include? ( season_sason_key )? + ## fiteer.before?( season_key ) etc. + ## find some good method names!!!! + season_start = start ? Season( start ) : nil + + h = {} + match( format: format ).each do |entry| + ## note: assume last directory in datafile path is the season part/key + season_q = File.basename( File.dirname( entry.name )) + season = Season.parse( season_q ) ## normalize season + + ## skip if start season before this season + next if season_start && season_start.start_year > season.start_year + + h[ season.key ] ||= [] + h[ season.key ] << entry + end + + ## todo/fix: - add sort entries by name - why? why not? + ## note: assume 1-,2- etc. gets us back sorted leagues + ## - use sort. (will not sort by default?) + + ## sort by season + ## latest / newest first (and oldest last) + + h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not? + r[0] <=> l[0] + end + end # method match_by_season + end # class Package + + + class DirPackage < Package + def initialize( path ) super( Datafile::DirPackage.new( path ) ); end + end + + class ZipPackage < Package + def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end + end +end # module SportDb