lib/nanoc/data_sources/filesystem.rb in nanoc-4.1.6 vs lib/nanoc/data_sources/filesystem.rb in nanoc-4.2.0b1
- old
+ new
@@ -1,10 +1,53 @@
module Nanoc::DataSources
- # Provides functionality common across all filesystem data sources.
+ # The filesystem data source stores its items and layouts in nested
+ # directories. Items and layouts are represented by one or two files; if it
+ # is represented using one file, the metadata can be contained in this file.
#
+ # The default root directory for items is the `content` directory; for
+ # layouts, this is the `layouts` directory. This can be overridden
+ # in the data source configuration:
+ #
+ # data_sources:
+ # - type: filesystem
+ # content_dir: items
+ # layouts_dir: layouts
+ #
+ # The metadata for items and layouts can be stored in a separate file with
+ # the same base name but with the `.yaml` extension. If such a file is
+ # found, metadata is read from that file. Alternatively, the content file
+ # itself can start with a metadata section: it can be stored at the top of
+ # the file, between `---` (three dashes) separators. For example:
+ #
+ # ---
+ # title: "Moo!"
+ # ---
+ # h1. Hello!
+ #
+ # The metadata section can be omitted. If the file does not start with
+ # three or five dashes, the entire file will be considered as content.
+ #
+ # The identifier of items and layouts is the filename itself, without the
+ # root directory (as determined by the `content_dir` or `layouts_dir`
+ # configuration attribute, for items resp. layouts). For example:
+ #
+ # foo/bar/index.html → /foo/bar/index.html
+ # foo/bar.html → /foo/bar.html
+ #
+ # Note that each item must have an unique identifier. Nanoc will display an
+ # error if two items with the same identifier are found.
+ #
+ # The file extension does not determine the filters to run on items; the
+ # Rules file is used to specify processing instructors for each item.
+ #
+ # It is possible to set an explicit encoding that should be used when reading
+ # files. In the data source configuration, set `encoding` to an encoding
+ # understood by Ruby’s `Encoding`. If no encoding is set in the configuration,
+ # one will be inferred from the environment.
+ #
# @api private
- module Filesystem
+ class Filesystem < Nanoc::DataSource
# See {Nanoc::DataSource#up}.
def up
end
# See {Nanoc::DataSource#down}.
@@ -19,20 +62,76 @@
config.fetch(:layouts_dir, 'layouts')
end
# See {Nanoc::DataSource#items}.
def items
- load_objects(content_dir_name, 'item', Nanoc::Int::Item)
+ load_objects(content_dir_name, Nanoc::Int::Item)
end
# See {Nanoc::DataSource#layouts}.
def layouts
- load_objects(layouts_dir_name, 'layout', Nanoc::Int::Layout)
+ load_objects(layouts_dir_name, Nanoc::Int::Layout)
end
protected
+ class ProtoDocument
+ attr_reader :attributes
+ attr_reader :checksum_data
+ attr_reader :is_binary
+ alias binary? is_binary
+
+ def initialize(is_binary:, content: nil, filename: nil, attributes:, checksum_data: nil)
+ if content.nil? && filename.nil?
+ raise ArgumentError, '#initialize needs at least content or filename'
+ end
+
+ @is_binary = is_binary
+ @content = content
+ @filename = filename
+ @attributes = attributes
+ @checksum_data = checksum_data
+ end
+
+ def content
+ if binary?
+ raise ArgumentError, 'cannot fetch content of binary item'
+ else
+ @content
+ end
+ end
+
+ def filename
+ if binary?
+ @filename
+ else
+ raise ArgumentError, 'cannot fetch filename of non-binary item'
+ end
+ end
+ end
+
+ def read_proto_document(content_filename, meta_filename, klass)
+ is_binary = content_filename && !@site_config[:text_extensions].include?(File.extname(content_filename)[1..-1])
+
+ if is_binary && klass == Nanoc::Int::Item
+ meta = (meta_filename && YAML.load_file(meta_filename)) || {}
+
+ ProtoDocument.new(is_binary: true, filename: content_filename, attributes: meta)
+ elsif is_binary && klass == Nanoc::Int::Layout
+ raise "The layout file '#{content_filename}' is a binary file, but layouts can only be textual"
+ else
+ parse_result = parse(content_filename, meta_filename)
+
+ ProtoDocument.new(
+ is_binary: false,
+ content: parse_result.content,
+ attributes: parse_result.attributes,
+ checksum_data: "content=#{parse_result.content},meta=#{parse_result.attributes_data}",
+ )
+ end
+ end
+
# Creates instances of klass corresponding to the files in dir_name. The
# kind attribute indicates the kind of object that is being loaded and is
# used solely for debugging purposes.
#
# This particular implementation loads objects from a filesystem-based
@@ -40,80 +139,79 @@
# files. The content and meta-file are optional (but at least one of them
# needs to be present, obviously) and the content file can start with a
# metadata section.
#
# @see Nanoc::DataSources::Filesystem#load_objects
- def load_objects(dir_name, kind, klass)
+ def load_objects(dir_name, klass)
res = []
return [] if dir_name.nil?
all_split_files_in(dir_name).each do |base_filename, (meta_ext, content_exts)|
content_exts.each do |content_ext|
- # Get filenames
meta_filename = filename_for(base_filename, meta_ext)
content_filename = filename_for(base_filename, content_ext)
- # Read content and metadata
- is_binary = content_filename && !@site_config[:text_extensions].include?(File.extname(content_filename)[1..-1])
- if is_binary && klass == Nanoc::Int::Item
- meta = (meta_filename && YAML.load_file(meta_filename)) || {}
- content_or_filename = content_filename
- elsif is_binary && klass == Nanoc::Int::Layout
- raise "The layout file '#{content_filename}' is a binary file, but layouts can only be textual"
- else
- meta, content_or_filename = parse(content_filename, meta_filename, kind)
- end
+ proto_doc = read_proto_document(content_filename, meta_filename, klass)
- # Get attributes
- attributes = {
- filename: content_filename,
- content_filename: content_filename,
- meta_filename: meta_filename,
- extension: content_filename ? ext_of(content_filename)[1..-1] : nil,
- }.merge(meta)
+ content = content_for(proto_doc, content_filename)
+ attributes = attributes_for(proto_doc, content_filename, meta_filename)
+ identifier = identifier_for(content_filename, meta_filename, dir_name)
- # Get identifier
- if content_filename
- identifier = identifier_for_filename(content_filename[dir_name.length..-1])
- elsif meta_filename
- identifier = identifier_for_filename(meta_filename[dir_name.length..-1])
- else
- raise 'meta_filename and content_filename are both nil'
- end
+ res << klass.new(content, attributes, identifier, checksum_data: proto_doc.checksum_data)
+ end
+ end
- # Get modification times
- meta_mtime = meta_filename ? File.stat(meta_filename).mtime : nil
- content_mtime = content_filename ? File.stat(content_filename).mtime : nil
- if meta_mtime && content_mtime
- mtime = meta_mtime > content_mtime ? meta_mtime : content_mtime
- elsif meta_mtime
- mtime = meta_mtime
- elsif content_mtime
- mtime = content_mtime
- else
- raise 'meta_mtime and content_mtime are both nil'
- end
- attributes[:mtime] = mtime
+ res
+ end
- # Create content
- full_content_filename = content_filename && File.expand_path(content_filename)
- content =
- if is_binary
- Nanoc::Int::BinaryContent.new(full_content_filename)
- else
- Nanoc::Int::TextualContent.new(content_or_filename, filename: full_content_filename)
- end
+ def attributes_for(proto_doc, content_filename, meta_filename)
+ extra_attributes = {
+ filename: content_filename,
+ content_filename: content_filename,
+ meta_filename: meta_filename,
+ extension: content_filename ? ext_of(content_filename)[1..-1] : nil,
+ mtime: mtime_of(content_filename, meta_filename),
+ }
- # Create object
- res << klass.new(content, attributes, identifier)
- end
+ extra_attributes.merge(proto_doc.attributes)
+ end
+
+ def identifier_for(content_filename, meta_filename, dir_name)
+ if content_filename
+ identifier_for_filename(content_filename[dir_name.length..-1])
+ elsif meta_filename
+ identifier_for_filename(meta_filename[dir_name.length..-1])
+ else
+ raise 'meta_filename and content_filename are both nil'
end
+ end
- res
+ def content_for(proto_doc, content_filename)
+ full_content_filename = content_filename && File.expand_path(content_filename)
+
+ if proto_doc.binary?
+ Nanoc::Int::BinaryContent.new(full_content_filename)
+ else
+ Nanoc::Int::TextualContent.new(proto_doc.content, filename: full_content_filename)
+ end
end
+ def mtime_of(content_filename, meta_filename)
+ meta_mtime = meta_filename ? File.stat(meta_filename).mtime : nil
+ content_mtime = content_filename ? File.stat(content_filename).mtime : nil
+ if meta_mtime && content_mtime
+ meta_mtime > content_mtime ? meta_mtime : content_mtime
+ elsif meta_mtime
+ meta_mtime
+ elsif content_mtime
+ content_mtime
+ else
+ raise 'meta_mtime and content_mtime are both nil'
+ end
+ end
+
# e.g.
#
# {
# 'content/foo' => [ 'yaml', ['html', 'md'] ],
# 'content/bar' => [ 'yaml', [nil] ],
@@ -165,22 +263,34 @@
# period and an extension (which is what the
# {Nanoc::DataSources::FilesystemCompact} data source does), but other
# data sources may prefer to implement this differently (for example,
# {Nanoc::DataSources::FilesystemVerbose} doubles the last part of the
# basename before concatenating it with a period and the extension).
- def filename_for(_base_filename, _ext)
- raise NotImplementedError.new(
- "#{self.class} does not implement #filename_for",
- )
+ def filename_for(base_filename, ext)
+ if ext.nil?
+ nil
+ elsif ext.empty?
+ base_filename
+ else
+ base_filename + '.' + ext
+ end
end
# Returns the identifier that corresponds with the given filename, which
# can be the content filename or the meta filename.
- def identifier_for_filename(_filename)
- raise NotImplementedError.new(
- "#{self.class} does not implement #identifier_for_filename",
- )
+ def identifier_for_filename(filename)
+ if config[:identifier_type] == 'full'
+ return Nanoc::Identifier.new(filename)
+ end
+
+ regex =
+ if filename =~ /(^|\/)index(\.[^\/]+)?$/
+ @config && @config[:allow_periods_in_identifiers] ? /\/?(index)?(\.[^\/\.]+)?$/ : /\/?index(\.[^\/]+)?$/
+ else
+ @config && @config[:allow_periods_in_identifiers] ? /\.[^\/\.]+$/ : /\.[^\/]+$/
+ end
+ Nanoc::Identifier.new(filename.sub(regex, ''), type: :legacy)
end
# Returns the base name of filename, i.e. filename with the first or all
# extensions stripped off. By default, all extensions are stripped off,
# but when allow_periods_in_identifiers is set to true in the site
@@ -196,61 +306,80 @@
end
# Returns a regex that is used for determining the extension of a file
# name. The first match group will be the entire extension, including the
# leading period.
+ #
+ # @return [Regex]
def extension_regex
if @config && @config[:allow_periods_in_identifiers]
/(\.[^\/\.]+$)/
else
/(\.[^\/]+$)/
end
end
- # Parses the file named `filename` and returns an array with its first
- # element a hash with the file's metadata, and with its second element the
- # file content itself.
- def parse(content_filename, meta_filename, _kind)
- # Read content and metadata from separate files
+ # @return [ParseResult]
+ def parse(content_filename, meta_filename)
if meta_filename
- content = content_filename ? read(content_filename) : ''
- meta_raw = read(meta_filename)
- begin
- meta = YAML.load(meta_raw) || {}
- rescue Exception => e
- raise "Could not parse YAML for #{meta_filename}: #{e.message}"
- end
- verify_meta(meta, meta_filename)
- return [meta, content]
+ parse_with_separate_meta_filename(content_filename, meta_filename)
+ else
+ parse_with_frontmatter(content_filename)
end
+ end
- # Read data
+ # @return [ParseResult]
+ def parse_with_separate_meta_filename(content_filename, meta_filename)
+ content = content_filename ? read(content_filename) : ''
+ meta_raw = read(meta_filename)
+ meta = parse_metadata(meta_raw, meta_filename)
+ ParseResult.new(content: content, attributes: meta, attributes_data: meta_raw)
+ end
+
+ # @return [ParseResult]
+ def parse_with_frontmatter(content_filename)
data = read(content_filename)
- # Check presence of metadata section
if data !~ /\A-{3,5}\s*$/
- return [{}, data]
+ return ParseResult.new(content: data, attributes: {}, attributes_data: '')
end
- # Split data
pieces = data.split(/^(-{5}|-{3})[ \t]*\r?\n?/, 3)
if pieces.size < 4
raise RuntimeError.new(
"The file '#{content_filename}' appears to start with a metadata section (three or five dashes at the top) but it does not seem to be in the correct format.",
)
end
- # Parse
+ meta = parse_metadata(pieces[2], content_filename)
+ content = pieces[4]
+
+ ParseResult.new(content: content, attributes: meta, attributes_data: pieces[2])
+ end
+
+ # @return [Hash]
+ def parse_metadata(data, filename)
begin
- meta = YAML.load(pieces[2]) || {}
+ meta = YAML.load(data) || {}
rescue Exception => e
- raise "Could not parse YAML for #{content_filename}: #{e.message}"
+ raise "Could not parse YAML for #{filename}: #{e.message}"
end
- verify_meta(meta, content_filename)
- content = pieces[4]
- # Done
- [meta, content]
+ verify_meta(meta, filename)
+
+ meta
+ end
+
+ class ParseResult
+ attr_reader :content
+ attr_reader :attributes
+ attr_reader :attributes_data
+
+ def initialize(content:, attributes:, attributes_data:)
+ @content = content
+ @attributes = attributes
+ @attributes_data = attributes_data
+ end
end
class InvalidMetadataError < Nanoc::Error
def initialize(filename, klass)
super("The file #{filename} has invalid metadata (expected key-value pairs, found #{klass} instead)")