lib/nanoc3/data_sources/filesystem.rb in nanoc3-3.0.9 vs lib/nanoc3/data_sources/filesystem.rb in nanoc3-3.1.0a1

- old
+ new

@@ -1,241 +1,253 @@ # encoding: utf-8 module Nanoc3::DataSources - # The filesystem data source is the default data source for a new nanoc - # site. It stores all data as files on the hard disk. - # - # None of the methods are documented in this file. See Nanoc3::DataSource - # for documentation on the overridden methods instead. - # - # = Items - # - # The filesystem data source stores its items in nested directories. Each - # directory represents a single item. The root directory is the 'content' - # directory. - # - # Every directory has a content file and a meta file. The content file - # contains the actual item content, while the meta file contains the item's - # metadata, formatted as YAML. - # - # Both content files and meta files are named after its parent directory - # (i.e. item). For example, a item named 'foo' will have a directory named - # 'foo', with e.g. a 'foo.markdown' content file and a 'foo.yaml' meta file. - # - # Content file extensions are not used for determining the filter that - # should be run; the meta file defines the list of filters. The meta file - # extension must always be 'yaml', though. - # - # Content files can also have the 'index' basename. Similarly, meta files - # can have the 'meta' basename. For example, a parent directory named 'foo' - # can have an 'index.txt' content file and a 'meta.yaml' meta file. This is - # to preserve backward compatibility. - # - # The identifier is calculated by stripping the extension; if there is more - # than one extension, only the last extension is stripped and the previous - # extensions will be part of the identifier. - # - # = Layouts - # - # Layouts are stored as directories in the 'layouts' directory. Each layout - # contains a content file and a meta file. The content file contain the - # actual layout, and the meta file describes how the item should be handled - # (contains the filter that should be used). - # - # For backward compatibility, a layout can also be a single file in the - # 'layouts' directory. Such a layout cannot have any metadata; the filter - # used for this layout is determined from the file extension. - # - # The identifier for layouts is generated the same way as identifiers for - # items (see above for details). - # - # = Code Snippets - # - # Code snippets are stored in '.rb' files in the 'lib' directory. Code - # snippets can reside in sub-directories. - class Filesystem < Nanoc3::DataSource + # Provides functionality common across all filesystem data sources. + module Filesystem - include Nanoc3::DataSources::FilesystemCommon - - ########## VCSes ########## - - attr_accessor :vcs - + # The VCS that will be called when adding, deleting and moving files. If + # no VCS has been set, or if the VCS has been set to `nil`, a dummy VCS + # will be returned. + # + # @return [Nanoc3::Extra::VCS, nil] The VCS that will be used. def vcs @vcs ||= Nanoc3::Extra::VCSes::Dummy.new end + attr_writer :vcs - ########## Preparation ########## - + # See {Nanoc3::DataSource#up}. def up end + # See {Nanoc3::DataSource#down}. def down end + # See {Nanoc3::DataSource#setup}. def setup # Create directories %w( content layouts lib ).each do |dir| FileUtils.mkdir_p(dir) vcs.add(dir) end end - ########## Loading data ########## - + # See {Nanoc3::DataSource#items}. def items - meta_filenames('content').map do |meta_filename| - # Read metadata - meta = YAML.load_file(meta_filename) || {} + load_objects('content', 'item', Nanoc3::Item) + end - # Get content - content_filename = content_filename_for_dir(File.dirname(meta_filename)) - content = File.read(content_filename) + # See {Nanoc3::DataSource#layouts}. + def layouts + load_objects('layouts', 'layout', Nanoc3::Layout) + end - # Get attributes - attributes = meta.merge(:file => Nanoc3::Extra::FileProxy.new(content_filename)) + # See {Nanoc3::DataSource#create_item}. + def create_item(content, attributes, identifier, params={}) + create_object('content', content, attributes, identifier, params) + end - # Get identifier - identifier = meta_filename_to_identifier(meta_filename, /^content/) + # See {Nanoc3::DataSource#create_layout}. + def create_layout(content, attributes, identifier, params={}) + create_object('layouts', content, attributes, identifier, params) + end - # Get modification times - meta_mtime = File.stat(meta_filename).mtime - content_mtime = File.stat(content_filename).mtime - mtime = meta_mtime > content_mtime ? meta_mtime : content_mtime + private - # Create item object - Nanoc3::Item.new(content, attributes, identifier, mtime) - end + # Creates a new object (item or layout) on disk in dir_name according to + # the given identifier. The file will have its attributes taken from the + # attributes hash argument and its content from the content argument. + def create_object(dir_name, content, attributes, identifier, params={}) + raise NotImplementedError.new( + "#{self.class} does not implement ##{name}" + ) end - def layouts - meta_filenames('layouts').map do |meta_filename| - # Get content - content_filename = content_filename_for_dir(File.dirname(meta_filename)) - content = File.read(content_filename) + # Creates instances of klass corresponding to the files in dir_name. The + # kind attribute indicates the kind of object that is being loaded and is + # used solely for debugging purposes. + # + # This particular implementation loads objects from a filesystem-based + # data source where content and attributes can be spread over two separate + # files. The content and meta-file are optional (but at least one of them + # needs to be present, obviously) and the content file can start with a + # metadata section. + # + # @see Nanoc3::DataSources::Filesystem#load_objects + def load_objects(dir_name, kind, klass) + all_split_files_in(dir_name).map do |base_filename, (meta_ext, content_ext)| + # Get filenames + meta_filename = filename_for(base_filename, meta_ext) + content_filename = filename_for(base_filename, content_ext) + # Read content and metadata + meta, content = parse(content_filename, meta_filename, kind) + # Get attributes - attributes = YAML.load_file(meta_filename) || {} + attributes = { + :filename => content_filename, + :content_filename => content_filename, + :meta_filename => meta_filename, + :extension => content_filename ? ext_of(content_filename)[1..-1] : nil, + # WARNING :file is deprecated; please create a File object manually + # using the :content_filename or :meta_filename attributes. + # TODO [in nanoc 4.0] remove me + :file => content_filename ? Nanoc3::Extra::FileProxy.new(content_filename) : nil + }.merge(meta) # Get identifier - identifier = meta_filename_to_identifier(meta_filename, /^layouts/) + if meta_filename + identifier = identifier_for_filename(meta_filename[(dir_name.length+1)..-1]) + elsif content_filename + identifier = identifier_for_filename(content_filename[(dir_name.length+1)..-1]) + else + raise RuntimeError, "meta_filename and content_filename are both nil" + end # Get modification times - meta_mtime = File.stat(meta_filename).mtime - content_mtime = File.stat(content_filename).mtime - mtime = meta_mtime > content_mtime ? meta_mtime : content_mtime + meta_mtime = meta_filename ? File.stat(meta_filename).mtime : nil + content_mtime = content_filename ? File.stat(content_filename).mtime : nil + if meta_mtime && content_mtime + mtime = meta_mtime > content_mtime ? meta_mtime : content_mtime + elsif meta_mtime + mtime = meta_mtime + elsif content_mtime + mtime = content_mtime + else + raise RuntimeError, "meta_mtime and content_mtime are both nil" + end # Create layout object - Nanoc3::Layout.new(content, attributes, identifier, mtime) + klass.new(content, attributes, identifier, mtime) end end - ########## Creating data ########## + # Finds all items/layouts/... in the given base directory. Returns a hash + # in which the keys are the file's dirname + basenames, and the values a + # pair consisting of the metafile extension and the content file + # extension. The meta file extension or the content file extension can be + # nil, but not both. Backup files are ignored. For example: + # + # { + # 'content/foo' => [ 'yaml', 'html' ], + # 'content/bar' => [ 'yaml', nil ], + # 'content/qux' => [ nil, 'html' ] + # } + def all_split_files_in(dir_name) + # Get all good file names + filenames = Dir[dir_name + '/**/*'].select { |i| File.file?(i) } + filenames.reject! { |fn| fn =~ /(~|\.orig|\.rej|\.bak)$/ } - # Creates a new item with the given content, attributes and identifier. - def create_item(content, attributes, identifier) - # Determine base path - last_component = identifier.split('/')[-1] || 'content' - base_path = 'content' + identifier + last_component + # Group by identifier + grouped_filenames = filenames.group_by { |fn| basename_of(fn) } - # Get filenames - dir_path = 'content' + identifier - meta_filename = 'content' + identifier + last_component + '.yaml' - content_filename = 'content' + identifier + last_component + '.html' - - # Notify - Nanoc3::NotificationCenter.post(:file_created, meta_filename) - Nanoc3::NotificationCenter.post(:file_created, content_filename) + # Convert values into metafile/content file extension tuple + grouped_filenames.each_pair do |key, filenames| + # Divide + meta_filenames = filenames.select { |fn| ext_of(fn) == '.yaml' } + content_filenames = filenames.select { |fn| ext_of(fn) != '.yaml' } - # Create files - FileUtils.mkdir_p(dir_path) - File.open(meta_filename, 'w') { |io| io.write(YAML.dump(attributes.stringify_keys)) } - File.open(content_filename, 'w') { |io| io.write(content) } - end + # Check number of files per type + if ![ 0, 1 ].include?(meta_filenames.size) + raise RuntimeError, "Found #{meta_filenames.size} meta files for #{key}; expected 0 or 1" + end + if ![ 0, 1 ].include?(content_filenames.size) + raise RuntimeError, "Found #{content_filenames.size} content files for #{key}; expected 0 or 1" + end - # Creates a new layout with the given content, attributes and identifier. - def create_layout(content, attributes, identifier) - # Determine base path - last_component = identifier.split('/')[-1] - base_path = 'layouts' + identifier + last_component + # Reorder elements and convert to extnames + filenames[0] = meta_filenames[0] ? ext_of(meta_filenames[0])[1..-1] : nil + filenames[1] = content_filenames[0] ? ext_of(content_filenames[0])[1..-1] : nil + end - # Get filenames - dir_path = 'layouts' + identifier - meta_filename = 'layouts' + identifier + last_component + '.yaml' - content_filename = 'layouts' + identifier + last_component + '.html' + # Done + grouped_filenames + end - # Notify - Nanoc3::NotificationCenter.post(:file_created, meta_filename) - Nanoc3::NotificationCenter.post(:file_created, content_filename) + # Returns the filename for the given base filename and the extension. + # + # If the extension is nil, this function should return nil as well. + # + # A simple implementation would simply concatenate the base filename, a + # period and an extension (which is what the + # {Nanoc3::DataSources::FilesystemCompact} data source does), but other + # data sources may prefer to implement this differently (for example, + # {Nanoc3::DataSources::FilesystemVerbose} doubles the last part of the + # basename before concatenating it with a period and the extension). + def filename_for(base_filename, ext) + raise NotImplementedError.new( + "#{self.class} does not implement #filename_for" + ) + end - # Create files - FileUtils.mkdir_p(dir_path) - File.open(meta_filename, 'w') { |io| io.write(YAML.dump(attributes.stringify_keys)) } - File.open(content_filename, 'w') { |io| io.write(content) } + # Returns the identifier that corresponds with the given filename, which + # can be the content filename or the meta filename. + def identifier_for_filename(filename) + raise NotImplementedError.new( + "#{self.class} does not implement #identifier_for_filename" + ) end - private + # Returns the base name of filename, i.e. filename with the first or all + # extensions stripped off. By default, all extensions are stripped off, + # but when allow_periods_in_identifiers is set to true in the site + # configuration, only the last extension will be stripped . + def basename_of(filename) + filename.sub(extension_regex, '') + end - ########## Custom functions ########## + # Returns the extension(s) of filename. Supports multiple extensions. + # Includes the leading period. + def ext_of(filename) + filename =~ extension_regex ? $1 : '' + end - # Returns the list of all meta files in the given base directory as well - # as its subdirectories. - def meta_filenames(base) - # Find all possible meta file names - filenames = Dir[base + '/**/*.yaml'] - - # Filter out invalid meta files - good_filenames = [] - bad_filenames = [] - filenames.each do |filename| - if filename =~ /meta\.yaml$/ or filename =~ /([^\/]+)\/\1\.yaml$/ - good_filenames << filename - else - bad_filenames << filename - end + # Returns a regex that is used for determining the extension of a file + # name. The first match group will be the entire extension, including the + # leading period. + def extension_regex + if @config && @config[:allow_periods_in_identifiers] + /(\.[^\/\.]+$)/ + else + /(\.[^\/]+$)/ end - - # Warn about bad filenames - unless bad_filenames.empty? - raise RuntimeError.new( - "The following files appear to be meta files, " + - "but have an invalid name:\n - " + - bad_filenames.join("\n - ") - ) - end - - good_filenames end - # Returns the filename of the content file in the given directory, - # ignoring any unwanted files (files that end with '~', '.orig', '.rej' or - # '.bak') - def content_filename_for_dir(dir) - # Find all files - filename_glob_1 = dir.sub(/([^\/]+)$/, '\1/\1.*') - filename_glob_2 = dir.sub(/([^\/]+)$/, '\1/index.*') - filenames = (Dir[filename_glob_1] + Dir[filename_glob_2]).uniq + # Parses the file named `filename` and returns an array with its first + # element a hash with the file's metadata, and with its second element the + # file content itself. + def parse(content_filename, meta_filename, kind) + # Read content and metadata from separate files + if meta_filename + content = content_filename ? File.read(content_filename) : '' + meta = YAML.load_file(meta_filename) || {} - # Reject meta files - filenames.reject! { |f| f =~ /\.yaml$/ } + return [ meta, content ] + end - # Reject backups - filenames.reject! { |f| f =~ /(~|\.orig|\.rej|\.bak)$/ } + # Read data + data = File.read(content_filename) - # Make sure there is only one content file - if filenames.size != 1 + # Check presence of metadata section + if data !~ /^(-{5}|-{3})/ + return [ {}, data ] + end + + # Split data + pieces = data.split(/^(-{5}|-{3})/) + if pieces.size < 4 raise RuntimeError.new( - "Expected 1 content file in #{dir} but found #{filenames.size}" + "The file '#{content_filename}' does not seem to be a nanoc #{kind}" ) end - # Return content filename - filenames.first - end + # Parse + meta = YAML.load(pieces[2]) || {} + content = pieces[4..-1].join.strip - def meta_filename_to_identifier(meta_filename, regex) - meta_filename.sub(regex, '').sub(/[^\/]+\.yaml$/, '') + # Done + [ meta, content ] end end end