lib/nanoc/data_sources/filesystem.rb in nanoc-4.1.6 vs lib/nanoc/data_sources/filesystem.rb in nanoc-4.2.0b1

- old
+ new

@@ -1,10 +1,53 @@ module Nanoc::DataSources - # Provides functionality common across all filesystem data sources. + # The filesystem data source stores its items and layouts in nested + # directories. Items and layouts are represented by one or two files; if it + # is represented using one file, the metadata can be contained in this file. # + # The default root directory for items is the `content` directory; for + # layouts, this is the `layouts` directory. This can be overridden + # in the data source configuration: + # + # data_sources: + # - type: filesystem + # content_dir: items + # layouts_dir: layouts + # + # The metadata for items and layouts can be stored in a separate file with + # the same base name but with the `.yaml` extension. If such a file is + # found, metadata is read from that file. Alternatively, the content file + # itself can start with a metadata section: it can be stored at the top of + # the file, between `---` (three dashes) separators. For example: + # + # --- + # title: "Moo!" + # --- + # h1. Hello! + # + # The metadata section can be omitted. If the file does not start with + # three or five dashes, the entire file will be considered as content. + # + # The identifier of items and layouts is the filename itself, without the + # root directory (as determined by the `content_dir` or `layouts_dir` + # configuration attribute, for items resp. layouts). For example: + # + # foo/bar/index.html → /foo/bar/index.html + # foo/bar.html → /foo/bar.html + # + # Note that each item must have an unique identifier. Nanoc will display an + # error if two items with the same identifier are found. + # + # The file extension does not determine the filters to run on items; the + # Rules file is used to specify processing instructors for each item. + # + # It is possible to set an explicit encoding that should be used when reading + # files. In the data source configuration, set `encoding` to an encoding + # understood by Ruby’s `Encoding`. If no encoding is set in the configuration, + # one will be inferred from the environment. + # # @api private - module Filesystem + class Filesystem < Nanoc::DataSource # See {Nanoc::DataSource#up}. def up end # See {Nanoc::DataSource#down}. @@ -19,20 +62,76 @@ config.fetch(:layouts_dir, 'layouts') end # See {Nanoc::DataSource#items}. def items - load_objects(content_dir_name, 'item', Nanoc::Int::Item) + load_objects(content_dir_name, Nanoc::Int::Item) end # See {Nanoc::DataSource#layouts}. def layouts - load_objects(layouts_dir_name, 'layout', Nanoc::Int::Layout) + load_objects(layouts_dir_name, Nanoc::Int::Layout) end protected + class ProtoDocument + attr_reader :attributes + attr_reader :checksum_data + attr_reader :is_binary + alias binary? is_binary + + def initialize(is_binary:, content: nil, filename: nil, attributes:, checksum_data: nil) + if content.nil? && filename.nil? + raise ArgumentError, '#initialize needs at least content or filename' + end + + @is_binary = is_binary + @content = content + @filename = filename + @attributes = attributes + @checksum_data = checksum_data + end + + def content + if binary? + raise ArgumentError, 'cannot fetch content of binary item' + else + @content + end + end + + def filename + if binary? + @filename + else + raise ArgumentError, 'cannot fetch filename of non-binary item' + end + end + end + + def read_proto_document(content_filename, meta_filename, klass) + is_binary = content_filename && !@site_config[:text_extensions].include?(File.extname(content_filename)[1..-1]) + + if is_binary && klass == Nanoc::Int::Item + meta = (meta_filename && YAML.load_file(meta_filename)) || {} + + ProtoDocument.new(is_binary: true, filename: content_filename, attributes: meta) + elsif is_binary && klass == Nanoc::Int::Layout + raise "The layout file '#{content_filename}' is a binary file, but layouts can only be textual" + else + parse_result = parse(content_filename, meta_filename) + + ProtoDocument.new( + is_binary: false, + content: parse_result.content, + attributes: parse_result.attributes, + checksum_data: "content=#{parse_result.content},meta=#{parse_result.attributes_data}", + ) + end + end + # Creates instances of klass corresponding to the files in dir_name. The # kind attribute indicates the kind of object that is being loaded and is # used solely for debugging purposes. # # This particular implementation loads objects from a filesystem-based @@ -40,80 +139,79 @@ # files. The content and meta-file are optional (but at least one of them # needs to be present, obviously) and the content file can start with a # metadata section. # # @see Nanoc::DataSources::Filesystem#load_objects - def load_objects(dir_name, kind, klass) + def load_objects(dir_name, klass) res = [] return [] if dir_name.nil? all_split_files_in(dir_name).each do |base_filename, (meta_ext, content_exts)| content_exts.each do |content_ext| - # Get filenames meta_filename = filename_for(base_filename, meta_ext) content_filename = filename_for(base_filename, content_ext) - # Read content and metadata - is_binary = content_filename && !@site_config[:text_extensions].include?(File.extname(content_filename)[1..-1]) - if is_binary && klass == Nanoc::Int::Item - meta = (meta_filename && YAML.load_file(meta_filename)) || {} - content_or_filename = content_filename - elsif is_binary && klass == Nanoc::Int::Layout - raise "The layout file '#{content_filename}' is a binary file, but layouts can only be textual" - else - meta, content_or_filename = parse(content_filename, meta_filename, kind) - end + proto_doc = read_proto_document(content_filename, meta_filename, klass) - # Get attributes - attributes = { - filename: content_filename, - content_filename: content_filename, - meta_filename: meta_filename, - extension: content_filename ? ext_of(content_filename)[1..-1] : nil, - }.merge(meta) + content = content_for(proto_doc, content_filename) + attributes = attributes_for(proto_doc, content_filename, meta_filename) + identifier = identifier_for(content_filename, meta_filename, dir_name) - # Get identifier - if content_filename - identifier = identifier_for_filename(content_filename[dir_name.length..-1]) - elsif meta_filename - identifier = identifier_for_filename(meta_filename[dir_name.length..-1]) - else - raise 'meta_filename and content_filename are both nil' - end + res << klass.new(content, attributes, identifier, checksum_data: proto_doc.checksum_data) + end + end - # Get modification times - meta_mtime = meta_filename ? File.stat(meta_filename).mtime : nil - content_mtime = content_filename ? File.stat(content_filename).mtime : nil - if meta_mtime && content_mtime - mtime = meta_mtime > content_mtime ? meta_mtime : content_mtime - elsif meta_mtime - mtime = meta_mtime - elsif content_mtime - mtime = content_mtime - else - raise 'meta_mtime and content_mtime are both nil' - end - attributes[:mtime] = mtime + res + end - # Create content - full_content_filename = content_filename && File.expand_path(content_filename) - content = - if is_binary - Nanoc::Int::BinaryContent.new(full_content_filename) - else - Nanoc::Int::TextualContent.new(content_or_filename, filename: full_content_filename) - end + def attributes_for(proto_doc, content_filename, meta_filename) + extra_attributes = { + filename: content_filename, + content_filename: content_filename, + meta_filename: meta_filename, + extension: content_filename ? ext_of(content_filename)[1..-1] : nil, + mtime: mtime_of(content_filename, meta_filename), + } - # Create object - res << klass.new(content, attributes, identifier) - end + extra_attributes.merge(proto_doc.attributes) + end + + def identifier_for(content_filename, meta_filename, dir_name) + if content_filename + identifier_for_filename(content_filename[dir_name.length..-1]) + elsif meta_filename + identifier_for_filename(meta_filename[dir_name.length..-1]) + else + raise 'meta_filename and content_filename are both nil' end + end - res + def content_for(proto_doc, content_filename) + full_content_filename = content_filename && File.expand_path(content_filename) + + if proto_doc.binary? + Nanoc::Int::BinaryContent.new(full_content_filename) + else + Nanoc::Int::TextualContent.new(proto_doc.content, filename: full_content_filename) + end end + def mtime_of(content_filename, meta_filename) + meta_mtime = meta_filename ? File.stat(meta_filename).mtime : nil + content_mtime = content_filename ? File.stat(content_filename).mtime : nil + if meta_mtime && content_mtime + meta_mtime > content_mtime ? meta_mtime : content_mtime + elsif meta_mtime + meta_mtime + elsif content_mtime + content_mtime + else + raise 'meta_mtime and content_mtime are both nil' + end + end + # e.g. # # { # 'content/foo' => [ 'yaml', ['html', 'md'] ], # 'content/bar' => [ 'yaml', [nil] ], @@ -165,22 +263,34 @@ # period and an extension (which is what the # {Nanoc::DataSources::FilesystemCompact} data source does), but other # data sources may prefer to implement this differently (for example, # {Nanoc::DataSources::FilesystemVerbose} doubles the last part of the # basename before concatenating it with a period and the extension). - def filename_for(_base_filename, _ext) - raise NotImplementedError.new( - "#{self.class} does not implement #filename_for", - ) + def filename_for(base_filename, ext) + if ext.nil? + nil + elsif ext.empty? + base_filename + else + base_filename + '.' + ext + end end # Returns the identifier that corresponds with the given filename, which # can be the content filename or the meta filename. - def identifier_for_filename(_filename) - raise NotImplementedError.new( - "#{self.class} does not implement #identifier_for_filename", - ) + def identifier_for_filename(filename) + if config[:identifier_type] == 'full' + return Nanoc::Identifier.new(filename) + end + + regex = + if filename =~ /(^|\/)index(\.[^\/]+)?$/ + @config && @config[:allow_periods_in_identifiers] ? /\/?(index)?(\.[^\/\.]+)?$/ : /\/?index(\.[^\/]+)?$/ + else + @config && @config[:allow_periods_in_identifiers] ? /\.[^\/\.]+$/ : /\.[^\/]+$/ + end + Nanoc::Identifier.new(filename.sub(regex, ''), type: :legacy) end # Returns the base name of filename, i.e. filename with the first or all # extensions stripped off. By default, all extensions are stripped off, # but when allow_periods_in_identifiers is set to true in the site @@ -196,61 +306,80 @@ end # Returns a regex that is used for determining the extension of a file # name. The first match group will be the entire extension, including the # leading period. + # + # @return [Regex] def extension_regex if @config && @config[:allow_periods_in_identifiers] /(\.[^\/\.]+$)/ else /(\.[^\/]+$)/ end end - # Parses the file named `filename` and returns an array with its first - # element a hash with the file's metadata, and with its second element the - # file content itself. - def parse(content_filename, meta_filename, _kind) - # Read content and metadata from separate files + # @return [ParseResult] + def parse(content_filename, meta_filename) if meta_filename - content = content_filename ? read(content_filename) : '' - meta_raw = read(meta_filename) - begin - meta = YAML.load(meta_raw) || {} - rescue Exception => e - raise "Could not parse YAML for #{meta_filename}: #{e.message}" - end - verify_meta(meta, meta_filename) - return [meta, content] + parse_with_separate_meta_filename(content_filename, meta_filename) + else + parse_with_frontmatter(content_filename) end + end - # Read data + # @return [ParseResult] + def parse_with_separate_meta_filename(content_filename, meta_filename) + content = content_filename ? read(content_filename) : '' + meta_raw = read(meta_filename) + meta = parse_metadata(meta_raw, meta_filename) + ParseResult.new(content: content, attributes: meta, attributes_data: meta_raw) + end + + # @return [ParseResult] + def parse_with_frontmatter(content_filename) data = read(content_filename) - # Check presence of metadata section if data !~ /\A-{3,5}\s*$/ - return [{}, data] + return ParseResult.new(content: data, attributes: {}, attributes_data: '') end - # Split data pieces = data.split(/^(-{5}|-{3})[ \t]*\r?\n?/, 3) if pieces.size < 4 raise RuntimeError.new( "The file '#{content_filename}' appears to start with a metadata section (three or five dashes at the top) but it does not seem to be in the correct format.", ) end - # Parse + meta = parse_metadata(pieces[2], content_filename) + content = pieces[4] + + ParseResult.new(content: content, attributes: meta, attributes_data: pieces[2]) + end + + # @return [Hash] + def parse_metadata(data, filename) begin - meta = YAML.load(pieces[2]) || {} + meta = YAML.load(data) || {} rescue Exception => e - raise "Could not parse YAML for #{content_filename}: #{e.message}" + raise "Could not parse YAML for #{filename}: #{e.message}" end - verify_meta(meta, content_filename) - content = pieces[4] - # Done - [meta, content] + verify_meta(meta, filename) + + meta + end + + class ParseResult + attr_reader :content + attr_reader :attributes + attr_reader :attributes_data + + def initialize(content:, attributes:, attributes_data:) + @content = content + @attributes = attributes + @attributes_data = attributes_data + end end class InvalidMetadataError < Nanoc::Error def initialize(filename, klass) super("The file #{filename} has invalid metadata (expected key-value pairs, found #{klass} instead)")