module Nanoc::DataSources # The filesystem data source stores its items and layouts in nested # directories. Items and layouts are represented by one or two files; if it # is represented using one file, the metadata can be contained in this file. # # The default root directory for items is the `content` directory; for # layouts, this is the `layouts` directory. This can be overridden # in the data source configuration: # # data_sources: # - type: filesystem # content_dir: items # layouts_dir: layouts # # The metadata for items and layouts can be stored in a separate file with # the same base name but with the `.yaml` extension. If such a file is # found, metadata is read from that file. Alternatively, the content file # itself can start with a metadata section: it can be stored at the top of # the file, between `---` (three dashes) separators. For example: # # --- # title: "Moo!" # --- # h1. Hello! # # The metadata section can be omitted. If the file does not start with # three or five dashes, the entire file will be considered as content. # # The identifier of items and layouts is the filename itself, without the # root directory (as determined by the `content_dir` or `layouts_dir` # configuration attribute, for items resp. layouts). For example: # # foo/bar/index.html → /foo/bar/index.html # foo/bar.html → /foo/bar.html # # Note that each item must have an unique identifier. Nanoc will display an # error if two items with the same identifier are found. # # The file extension does not determine the filters to run on items; the # Rules file is used to specify processing instructors for each item. # # It is possible to set an explicit encoding that should be used when reading # files. In the data source configuration, set `encoding` to an encoding # understood by Ruby’s `Encoding`. If no encoding is set in the configuration, # one will be inferred from the environment. # # @api private class Filesystem < Nanoc::DataSource # See {Nanoc::DataSource#up}. def up; end # See {Nanoc::DataSource#down}. def down; end def content_dir_name config.fetch(:content_dir, 'content') end def layouts_dir_name config.fetch(:layouts_dir, 'layouts') end # See {Nanoc::DataSource#items}. def items load_objects(content_dir_name, Nanoc::Int::Item) end # See {Nanoc::DataSource#layouts}. def layouts load_objects(layouts_dir_name, Nanoc::Int::Layout) end protected class ProtoDocument attr_reader :attributes attr_reader :content_checksum_data attr_reader :attributes_checksum_data attr_reader :is_binary alias binary? is_binary def initialize(is_binary:, content: nil, filename: nil, attributes:, content_checksum_data: nil, attributes_checksum_data: nil) if content.nil? && filename.nil? raise ArgumentError, '#initialize needs at least content or filename' end @is_binary = is_binary @content = content @filename = filename @attributes = attributes @content_checksum_data = content_checksum_data @attributes_checksum_data = attributes_checksum_data end def content if binary? raise ArgumentError, 'cannot fetch content of binary item' else @content end end def filename if binary? @filename else raise ArgumentError, 'cannot fetch filename of non-binary item' end end end def read_proto_document(content_filename, meta_filename, klass) is_binary = content_filename && !@site_config[:text_extensions].include?(File.extname(content_filename)[1..-1]) if is_binary && klass == Nanoc::Int::Item meta = (meta_filename && YAML.load_file(meta_filename)) || {} ProtoDocument.new(is_binary: true, filename: content_filename, attributes: meta) elsif is_binary && klass == Nanoc::Int::Layout raise Errors::BinaryLayout.new(content_filename) else parse_result = parse(content_filename, meta_filename) ProtoDocument.new( is_binary: false, content: parse_result.content, attributes: parse_result.attributes, content_checksum_data: parse_result.content, attributes_checksum_data: parse_result.attributes_data, ) end end # Creates instances of klass corresponding to the files in dir_name. The # kind attribute indicates the kind of object that is being loaded and is # used solely for debugging purposes. # # This particular implementation loads objects from a filesystem-based # data source where content and attributes can be spread over two separate # files. The content and meta-file are optional (but at least one of them # needs to be present, obviously) and the content file can start with a # metadata section. # # @see Nanoc::DataSources::Filesystem#load_objects def load_objects(dir_name, klass) res = [] return [] if dir_name.nil? all_split_files_in(dir_name).each do |base_filename, (meta_ext, content_exts)| content_exts.each do |content_ext| meta_filename = filename_for(base_filename, meta_ext) content_filename = filename_for(base_filename, content_ext) proto_doc = read_proto_document(content_filename, meta_filename, klass) content = content_for(proto_doc, content_filename) attributes = attributes_for(proto_doc, content_filename, meta_filename) identifier = identifier_for(content_filename, meta_filename, dir_name) res << klass.new( content, attributes, identifier, content_checksum_data: proto_doc.content_checksum_data, attributes_checksum_data: proto_doc.attributes_checksum_data, ) end end res end def attributes_for(proto_doc, content_filename, meta_filename) extra_attributes = { filename: content_filename, content_filename: content_filename, meta_filename: meta_filename, extension: content_filename ? ext_of(content_filename)[1..-1] : nil, mtime: mtime_of(content_filename, meta_filename), } extra_attributes.merge(proto_doc.attributes) end def identifier_for(content_filename, meta_filename, dir_name) if content_filename identifier_for_filename(content_filename[dir_name.length..-1]) elsif meta_filename identifier_for_filename(meta_filename[dir_name.length..-1]) else raise 'meta_filename and content_filename are both nil' end end def content_for(proto_doc, content_filename) full_content_filename = content_filename && File.expand_path(content_filename) if proto_doc.binary? Nanoc::Int::BinaryContent.new(full_content_filename) else Nanoc::Int::TextualContent.new(proto_doc.content, filename: full_content_filename) end end def mtime_of(content_filename, meta_filename) meta_mtime = meta_filename ? File.stat(meta_filename).mtime : nil content_mtime = content_filename ? File.stat(content_filename).mtime : nil if meta_mtime && content_mtime meta_mtime > content_mtime ? meta_mtime : content_mtime elsif meta_mtime meta_mtime elsif content_mtime content_mtime else raise 'meta_mtime and content_mtime are both nil' end end # e.g. # # { # 'content/foo' => [ 'yaml', ['html', 'md'] ], # 'content/bar' => [ 'yaml', [nil] ], # 'content/qux' => [ nil, ['html'] ] # } def all_split_files_in(dir_name) by_basename = all_files_in(dir_name) .reject { |fn| fn =~ /(~|\.orig|\.rej|\.bak)$/ } .group_by { |fn| basename_of(fn) } all = {} by_basename.each_pair do |basename, filenames| # Divide meta_filenames = filenames.select { |fn| ext_of(fn) == '.yaml' } content_filenames = filenames.select { |fn| ext_of(fn) != '.yaml' } # Check number of files per type unless [0, 1].include?(meta_filenames.size) raise Errors::MultipleMetaFiles.new(meta_filenames, basename) end unless config[:identifier_type] == 'full' unless [0, 1].include?(content_filenames.size) raise Errors::MultipleContentFiles.new(meta_filenames, basename) end end all[basename] = [] all[basename][0] = meta_filenames[0] ? 'yaml' : nil all[basename][1] = content_filenames.any? ? content_filenames.map { |fn| ext_of(fn)[1..-1] || '' } : [nil] end all end # Returns all files in the given directory and directories below it. def all_files_in(dir_name) Nanoc::DataSources::Filesystem::Tools.all_files_in(dir_name, config[:extra_files]) end # Returns the filename for the given base filename and the extension. # # If the extension is nil, this function should return nil as well. # # A simple implementation would simply concatenate the base filename, a # period and an extension (which is what the # {Nanoc::DataSources::FilesystemCompact} data source does), but other # data sources may prefer to implement this differently (for example, # {Nanoc::DataSources::FilesystemVerbose} doubles the last part of the # basename before concatenating it with a period and the extension). def filename_for(base_filename, ext) if ext.nil? nil elsif ext.empty? base_filename else base_filename + '.' + ext end end # Returns the identifier that corresponds with the given filename, which # can be the content filename or the meta filename. def identifier_for_filename(filename) if config[:identifier_type] == 'full' return Nanoc::Identifier.new(filename) end regex = if filename =~ /(^|\/)index(\.[^\/]+)?$/ @config && @config[:allow_periods_in_identifiers] ? /\/?(index)?(\.[^\/\.]+)?$/ : /\/?index(\.[^\/]+)?$/ else @config && @config[:allow_periods_in_identifiers] ? /\.[^\/\.]+$/ : /\.[^\/]+$/ end Nanoc::Identifier.new(filename.sub(regex, ''), type: :legacy) end # Returns the base name of filename, i.e. filename with the first or all # extensions stripped off. By default, all extensions are stripped off, # but when allow_periods_in_identifiers is set to true in the site # configuration, only the last extension will be stripped . def basename_of(filename) filename.sub(extension_regex, '') end # Returns the extension(s) of filename. Supports multiple extensions. # Includes the leading period. def ext_of(filename) filename =~ extension_regex ? Regexp.last_match[1] : '' end # Returns a regex that is used for determining the extension of a file # name. The first match group will be the entire extension, including the # leading period. # # @return [Regex] def extension_regex if @config && @config[:allow_periods_in_identifiers] /(\.[^\/\.]+$)/ else /(\.[^\/]+$)/ end end # @return [ParseResult] def parse(content_filename, meta_filename) if meta_filename parse_with_separate_meta_filename(content_filename, meta_filename) else parse_with_frontmatter(content_filename) end end # @return [ParseResult] def parse_with_separate_meta_filename(content_filename, meta_filename) content = content_filename ? read(content_filename) : '' meta_raw = read(meta_filename) meta = parse_metadata(meta_raw, meta_filename) ParseResult.new(content: content, attributes: meta, attributes_data: meta_raw) end # @return [ParseResult] def parse_with_frontmatter(content_filename) data = read(content_filename) if data !~ /\A-{3,5}\s*$/ return ParseResult.new(content: data, attributes: {}, attributes_data: '') end pieces = data.split(/^(-{5}|-{3})[ \t]*\r?\n?/, 3) if pieces.size < 4 raise Errors::InvalidFormat.new(content_filename) end meta = parse_metadata(pieces[2], content_filename) content = pieces[4] ParseResult.new(content: content, attributes: meta, attributes_data: pieces[2]) end # @return [Hash] def parse_metadata(data, filename) begin meta = YAML.load(data) || {} rescue => e raise Errors::UnparseableMetadata.new(filename, e) end verify_meta(meta, filename) meta end class ParseResult attr_reader :content attr_reader :attributes attr_reader :attributes_data def initialize(content:, attributes:, attributes_data:) @content = content @attributes = attributes @attributes_data = attributes_data end end def verify_meta(meta, filename) return if meta.is_a?(Hash) raise Errors::InvalidMetadata.new(filename, meta.class) end # Reads the content of the file with the given name and returns a string # in UTF-8 encoding. The original encoding of the string is derived from # the default external encoding, but this can be overridden by the # “encoding” configuration attribute in the data source configuration. def read(filename) # Read begin data = File.read(filename) rescue => e raise Errors::FileUnreadable.new(filename, e) end # Fix if data.respond_to?(:encode!) if @config && @config[:encoding] original_encoding = Encoding.find(@config[:encoding]) data.force_encoding(@config[:encoding]) else original_encoding = data.encoding end begin data.encode!('UTF-8') rescue raise Errors::InvalidEncoding.new(filename, original_encoding) end unless data.valid_encoding? raise Errors::InvalidEncoding.new(filename, original_encoding) end end # Remove UTF-8 BOM (ugly) data.delete!("\xEF\xBB\xBF") data end end end require_relative 'filesystem/tools' require_relative 'filesystem/errors'