# -*- encoding: utf-8 -*-
require 'pathname'
require 'webgen/languages'
module Webgen
# == General Information
# A Path object provides information about a path that is used to create one or more nodes as well
# as methods for accessing the path's content. So a Path object always refers to a source path. In
# contrast, output paths are always strings and just specify the location where a specific node
# should be written to.
# Note the +path+ and +source_path+ attributes of a Path object:
# * The +source_path+ specifies a path string that was directly created by a Source object. Each
# Path object must have such a valid source path sothat webgen can infer the Path the lead to
# the creation of a Node object later.
# * In contrast, the +path+ attribute specifies the path that is used to create the canonical name
# (and by default the output path) of a Node object. Normally it is the same as the
# +source_path+ but can differ (e.g. when fragment nodes are created for page file nodes).
# A Path object can represent one of three different things: a directory, a file or a fragment. If
# the +path+ ends with a slash character, then the path object represents a directory, if the path
# contains a hash character anywhere, then the path object represents a fragment and else it
# represents a file. Have a look at the webgen manual to see the exact format of a path!
# == Relation to Source classes
# A webgen source class needs to derive a specialized path class from this class and implement an
# approriate #changed? method that returns +true+ if the path's content has changed since the last
# webgen run.
class Path
# Helper class for easy access to the content of a path.
# This class is used sothat the creation of the real IO object for #stream can be delayed till
# it is actually needed. This is done by not directly requiring the user of this class to supply
# the IO object, but by requiring a block that creates the real IO object.
class SourceIO
# Create a new SourceIO object. A block has to be specified that returns the to-be-wrapped IO
# object.
def initialize(&block)
@block = block
raise ArgumentError, 'You need to provide a block which returns an IO object' if @block.nil?
# Provide direct access to the wrapped IO object by yielding it. After the method block
# returns the IO object is automatically closed.
# The parameter +mode+ specifies the mode in which the wrapped IO object should be opened.
# This can be used, for example, to open a file in binary mode (or specify a certain input
# encoding under Ruby 1.9).
def stream(mode = 'r')
io = @block.call(mode)
# Return the whole content of the wrapped IO object as string. For a description of the
# parameter +mode+ see #stream.
def data(mode = 'r')
stream(mode) {|io| io.read}
# Make the given +path+ absolute by prepending the absolute directory path +base+ if necessary.
# Also resolves all '..' and '.' references in +path+.
def self.make_absolute(base, path)
raise(ArgumentError, 'base has to be an absolute path, ie. needs to start with a slash') unless base =~ /\//
Pathname.new(path =~ /^\// ? path : File.join(base, path)).cleanpath.to_s
# Return +true+ if the given +path+ matches the given +pattern+ (trailing slashes of directories
# are not respected). For information on which patterns are supported, have a look at the
# documentation of File.fnmatch.
def self.match(path, pattern)
path = path.to_s.chomp('/') unless path == '/'
pattern = pattern.to_s.chomp('/') unless pattern == '/'
File.fnmatch(pattern, path, File::FNM_DOTMATCH|File::FNM_CASEFOLD|File::FNM_PATHNAME)
include Comparable
# The full path for which this Path object was created.
attr_reader :path
# A string specifying the path that lead to the creation of this path.
attr_reader :source_path
# The string specifying the parent path
attr_reader :parent_path
# The canonical name of the path without the extension.
attr_accessor :basename
# The extension of the +path+.
attr_accessor :ext
# Extracted meta information for the path.
attr_accessor :meta_info
# Specifies whether this path should be used during the "tree update" phase of a webgen run or
# only later during node resolution.
attr_writer :passive
# Is this path only used later during node resolution? Defaults to +false+, i.e. used during the
# "tree update" phase.
def passive?; @passive; end
# Create a new Path object for +path+. The optional +source_path+ parameter specifies the path
# string that lead to the creation of this path. The optional block needs to return an IO object
# for getting the content of the path.
# The +path+ needs to be in a well defined format which can be looked up in the webgen manual.
def initialize(path, source_path = path, &ioblock)
@meta_info = {}
@io = block_given? ? SourceIO.new(&ioblock) : nil
@source_path = source_path
@passive = false
# Mount this path at the mount point +mp+, optionally stripping +prefix+ from the parent path,
# and return the new path object.
# The parameters +mp+ and +prefix+ have to be absolute directory paths, ie. they have to start
# and end with a slash and must not contain any hash characters!
# Can't use self.class.new(...) here because the semantics of the sub constructors is not know
def mount_at(mp, prefix = nil)
raise(ArgumentError, "The mount point (#{mp}) must be a valid directory path") if mp =~ /^[^\/]|#|[^\/]$/
raise(ArgumentError, "The strip prefix (#{prefix}) must be a valid directory path") if !prefix.nil? && prefix =~ /^[^\/]|#|[^\/]$/
temp = dup
strip_re = /^#{Regexp.escape(prefix.to_s)}/
temp.instance_variable_set(:@path, temp.path.sub(strip_re, ''))
reanalyse = (@path == '/' || temp.path == '')
temp.instance_variable_set(:@path, File.join(mp, temp.path))
temp.instance_variable_set(:@source_path, temp.path) if @path == @source_path
if reanalyse
temp.send(:analyse, temp.path)
temp.instance_variable_set(:@parent_path, File.join(mp, temp.parent_path.sub(strip_re, '')))
# Duplicate the path object.
def dup
temp = super
temp.instance_variable_set(:@meta_info, @meta_info.dup)
# Has the content of this path changed since the last webgen run? This default implementation
# always returns +true+, a specialized sub class needs to override this behaviour!
def changed?
# The SourceIO object associated with the path.
def io
if @io
raise "No IO object defined for the path #{self}"
# The canonical name created from the +path+ (namely from the parts +basename+ and +extension+).
def cn
@basename + (@ext.length > 0 ? '.' + @ext : '') + (@basename != '/' && @path =~ /.\/$/ ? '/' : '')
# Utility method for creating the lcn from the +cn+ and the language +lang+.
def self.lcn(cn, lang)
if lang.nil?
cn.split('.').insert((cn =~ /^\./ ? 2 : 1), lang.to_s).join('.')
# The localized canonical name created from the +path+.
def lcn
self.class.lcn(cn, @meta_info['lang'])
# The absolute canonical name of this path.
def acn
if @path =~ /#/
self.class.new(@parent_path).acn + cn
@parent_path + cn
# The absolute localized canonical name of this path.
def alcn
if @path =~ /#/
self.class.new(@parent_path).alcn + lcn
@parent_path + lcn
# Equality -- Return +true+ if +other+ is a Path object with the same #path or if +other+ is a
# String equal to the #path. Else return +false+.
def ==(other)
if other.kind_of?(Path)
other.path == @path
elsif other.kind_of?(String)
other == @path
alias_method(:eql?, :==)
# Compare the #path of this object to other.path
def <=>(other)
@path <=> other.path
def hash #:nodoc:
def to_s #:nodoc:
alias_method :to_str, :to_s
def inspect #:nodoc:
# Analyse the +path+ and fill the object with the extracted information.
def analyse(path)
@path = path
if @path =~ /#/
elsif @path =~ /\/$/
@meta_info['title'] = @basename.tr('_-', ' ').capitalize
@ext ||= ''
raise "The basename of a path may not be empty: #{@path}" if @basename.empty? || @basename == '#'
raise "The parent path must start with a slash: #{@path}" if @path !~ /^\// && @path != '/'
# Analyse the path assuming it is a directory.
def analyse_directory
@parent_path = (@path == '/' ? '' : File.join(File.dirname(@path), '/'))
@basename = File.basename(@path)
FILENAME_RE = /^(?:(\d+)\.)?(\.?[^.]*?)(?:\.(\w\w\w?)(?=\.))?(?:\.(.*))?$/
# Analyse the path assuming it is a file.
def analyse_file
@parent_path = File.join(File.dirname(@path), '/')
match_data = FILENAME_RE.match(File.basename(@path))
@meta_info['sort_info'] = (match_data[1].nil? ? nil : match_data[1].to_i)
@basename = match_data[2]
@meta_info['lang'] = Webgen::LanguageManager.language_for_code(match_data[3])
@ext = (@meta_info['lang'].nil? && !match_data[3].nil? ? match_data[3].to_s : '') + match_data[4].to_s
# Analyse the path assuming it is a fragment.
def analyse_fragment
@parent_path, @basename = @path.scan(/^(.*?)(#.*?)$/).first
raise "The parent path of a fragment path must be a file path and not a directory path: #{@path}" if @parent_path =~ /\/$/
raise "A fragment path must only contain one hash character: #{path}" if @path.count("#") > 1