lib/rubysl/pstore/pstore.rb in rubysl-pstore-1.0.0 vs lib/rubysl/pstore/pstore.rb in rubysl-pstore-2.0.0

- old
+ new

@@ -1,205 +1,235 @@ # = PStore -- Transactional File Storage for Ruby Objects # # pstore.rb - # originally by matz # documentation by Kev Jackson and James Edward Gray II +# improved by Hongli Lai # # See PStore for documentation. require "fileutils" require "digest/md5" +require "thread" # # PStore implements a file based persistence mechanism based on a Hash. User # code can store hierarchies of Ruby objects (values) into the data store file -# by name (keys). An object hierarchy may be just a single object. User code +# by name (keys). An object hierarchy may be just a single object. User code # may later read values back from the data store or even update data, as needed. -# +# # The transactional behavior ensures that any changes succeed or fail together. # This can be used to ensure that the data store is not left in a transitory # state, where some values were updated but others were not. -# -# Behind the scenes, Ruby objects are stored to the data store file with -# Marshal. That carries the usual limitations. Proc objects cannot be +# +# Behind the scenes, Ruby objects are stored to the data store file with +# Marshal. That carries the usual limitations. Proc objects cannot be # marshalled, for example. # # == Usage example: -# +# # require "pstore" -# +# # # a mock wiki object... # class WikiPage # def initialize( page_name, author, contents ) # @page_name = page_name # @revisions = Array.new -# +# # add_revision(author, contents) # end -# +# # attr_reader :page_name -# +# # def add_revision( author, contents ) # @revisions << { :created => Time.now, # :author => author, # :contents => contents } # end -# +# # def wiki_page_references # [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/) # end -# +# # # ... # end -# +# # # create a new page... # home_page = WikiPage.new( "HomePage", "James Edward Gray II", # "A page about the JoysOfDocumentation..." ) -# +# # # then we want to update page data and the index together, or not at all... # wiki = PStore.new("wiki_pages.pstore") # wiki.transaction do # begin transaction; do all of this or none of it # # store page... # wiki[home_page.page_name] = home_page # # ensure that an index has been created... # wiki[:wiki_index] ||= Array.new # # update wiki index... # wiki[:wiki_index].push(*home_page.wiki_page_references) # end # commit changes to wiki data store file -# +# # ### Some time later... ### -# +# # # read wiki data... # wiki.transaction(true) do # begin read-only transaction, no changes allowed # wiki.roots.each do |data_root_name| # p data_root_name # p wiki[data_root_name] # end # end # +# == Transaction modes +# +# By default, file integrity is only ensured as long as the operating system +# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an +# I/O error occurs while PStore is writing to its file, then the file will +# become corrupted. +# +# You can prevent this by setting <em>pstore.ultra_safe = true</em>. +# However, this results in a minor performance loss, and only works on platforms +# that support atomic file renames. Please consult the documentation for +# +ultra_safe+ for details. +# +# Needless to say, if you're storing valuable data with PStore, then you should +# backup the PStore files from time to time. class PStore binmode = defined?(File::BINARY) ? File::BINARY : 0 RDWR_ACCESS = File::RDWR | File::CREAT | binmode RD_ACCESS = File::RDONLY | binmode WR_ACCESS = File::WRONLY | File::CREAT | File::TRUNC | binmode # The error type thrown by all PStore methods. class Error < StandardError end - # - # To construct a PStore object, pass in the _file_ path where you would like + # Whether PStore should do its best to prevent file corruptions, even when under + # unlikely-to-occur error conditions such as out-of-space conditions and other + # unusual OS filesystem errors. Setting this flag comes at the price in the form + # of a performance loss. + # + # This flag only has effect on platforms on which file renames are atomic (e.g. + # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false. + attr_accessor :ultra_safe + + # + # To construct a PStore object, pass in the _file_ path where you would like # the data to be stored. - # - def initialize(file) + # + # PStore objects are always reentrant. But if _thread_safe_ is set to true, + # then it will become thread-safe at the cost of a minor performance hit. + # + def initialize(file, thread_safe = false) dir = File::dirname(file) unless File::directory? dir raise PStore::Error, format("directory %s does not exist", dir) end if File::exist? file and not File::readable? file raise PStore::Error, format("file %s not readable", file) end - @transaction = false @filename = file @abort = false + @ultra_safe = false + @thread_safe = thread_safe + @lock = Mutex.new end # Raises PStore::Error if the calling code is not in a PStore#transaction. def in_transaction - raise PStore::Error, "not in transaction" unless @transaction + raise PStore::Error, "not in transaction" unless @lock.locked? end - # + # # Raises PStore::Error if the calling code is not in a PStore#transaction or # if the code is in a read-only PStore#transaction. - # + # def in_transaction_wr() in_transaction() raise PStore::Error, "in read-only transaction" if @rdonly end private :in_transaction, :in_transaction_wr # - # Retrieves a value from the PStore file data, by _name_. The hierarchy of + # Retrieves a value from the PStore file data, by _name_. The hierarchy of # Ruby objects stored under that root _name_ will be returned. - # + # # *WARNING*: This method is only valid in a PStore#transaction. It will # raise PStore::Error if called at any other time. # def [](name) in_transaction @table[name] end # - # This method is just like PStore#[], save that you may also provide a - # _default_ value for the object. In the event the specified _name_ is not - # found in the data store, your _default_ will be returned instead. If you do - # not specify a default, PStore::Error will be raised if the object is not + # This method is just like PStore#[], save that you may also provide a + # _default_ value for the object. In the event the specified _name_ is not + # found in the data store, your _default_ will be returned instead. If you do + # not specify a default, PStore::Error will be raised if the object is not # found. - # + # # *WARNING*: This method is only valid in a PStore#transaction. It will # raise PStore::Error if called at any other time. # def fetch(name, default=PStore::Error) in_transaction unless @table.key? name - if default==PStore::Error - raise PStore::Error, format("undefined root name `%s'", name) + if default == PStore::Error + raise PStore::Error, format("undefined root name `%s'", name) else - return default + return default end end @table[name] end # # Stores an individual Ruby object or a hierarchy of Ruby objects in the data # store file under the root _name_. Assigning to a _name_ already in the data # store clobbers the old data. - # + # # == Example: - # + # # require "pstore" - # + # # store = PStore.new("data_file.pstore") # store.transaction do # begin transaction # # load some data into the store... # store[:single_object] = "My data..." # store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"], # "James Gray" => ["erb.rb", "pstore.rb"] } # end # commit changes to data store file - # + # # *WARNING*: This method is only valid in a PStore#transaction and it cannot # be read-only. It will raise PStore::Error if called at any other time. # def []=(name, value) in_transaction_wr() @table[name] = value end # # Removes an object hierarchy from the data store, by _name_. - # + # # *WARNING*: This method is only valid in a PStore#transaction and it cannot # be read-only. It will raise PStore::Error if called at any other time. # def delete(name) in_transaction_wr() @table.delete name end # # Returns the names of all object hierarchies currently in the store. - # + # # *WARNING*: This method is only valid in a PStore#transaction. It will # raise PStore::Error if called at any other time. # def roots in_transaction @table.keys end # # Returns true if the supplied _name_ is currently in the data store. - # + # # *WARNING*: This method is only valid in a PStore#transaction. It will # raise PStore::Error if called at any other time. # def root?(name) in_transaction @@ -211,26 +241,26 @@ end # # Ends the current PStore#transaction, committing any changes to the data # store immediately. - # + # # == Example: - # + # # require "pstore" - # + # # store = PStore.new("data_file.pstore") # store.transaction do # begin transaction # # load some data into the store... # store[:one] = 1 # store[:two] = 2 - # + # # store.commit # end transaction here, committing changes - # + # # store[:three] = 3 # this change is never reached # end - # + # # *WARNING*: This method is only valid in a PStore#transaction. It will # raise PStore::Error if called at any other time. # def commit in_transaction @@ -238,25 +268,25 @@ throw :pstore_abort_transaction end # # Ends the current PStore#transaction, discarding any changes to the data # store. - # + # # == Example: - # + # # require "pstore" - # + # # store = PStore.new("data_file.pstore") # store.transaction do # begin transaction # store[:one] = 1 # this change is not applied, see below... # store[:two] = 2 # this change is not applied, see below... - # + # # store.abort # end transaction here, discard all changes - # + # # store[:three] = 3 # this change is never reached # end - # + # # *WARNING*: This method is only valid in a PStore#transaction. It will # raise PStore::Error if called at any other time. # def abort in_transaction @@ -264,112 +294,219 @@ throw :pstore_abort_transaction end # # Opens a new transaction for the data store. Code executed inside a block - # passed to this method may read and write data to and from the data store + # passed to this method may read and write data to and from the data store # file. - # + # # At the end of the block, changes are committed to the data store - # automatically. You may exit the transaction early with a call to either + # automatically. You may exit the transaction early with a call to either # PStore#commit or PStore#abort. See those methods for details about how - # changes are handled. Raising an uncaught Exception in the block is + # changes are handled. Raising an uncaught Exception in the block is # equivalent to calling PStore#abort. - # + # # If _read_only_ is set to +true+, you will only be allowed to read from the # data store during the transaction and any attempts to change the data will # raise a PStore::Error. - # + # # Note that PStore does not support nested transactions. # - def transaction(read_only=false) # :yields: pstore - raise PStore::Error, "nested transaction" if @transaction - begin + def transaction(read_only = false, &block) # :yields: pstore + value = nil + raise PStore::Error, "nested transaction" if !@thread_safe && @lock.locked? + @lock.synchronize do @rdonly = read_only @abort = false - @transaction = true - value = nil - new_file = @filename + ".new" + file = open_and_lock_file(@filename, read_only) + if file + begin + @table, checksum, original_data_size = load_data(file, read_only) - content = nil - unless read_only - file = File.open(@filename, RDWR_ACCESS) - file.flock(File::LOCK_EX) - commit_new(file) if FileTest.exist?(new_file) - content = file.read() + catch(:pstore_abort_transaction) do + value = yield(self) + end + + if !@abort && !read_only + save_data(checksum, original_data_size, file) + end + ensure + file.close if !file.closed? + end else + # This can only occur if read_only == true. + @table = {} + catch(:pstore_abort_transaction) do + value = yield(self) + end + end + end + value + rescue ThreadError + raise PStore::Error, "nested transaction" + end + + private + # Constant for relieving Ruby's garbage collector. + EMPTY_STRING = "" + EMPTY_MARSHAL_DATA = Marshal.dump({}) + EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA) + + # + # Open the specified filename (either in read-only mode or in + # read-write mode) and lock it for reading or writing. + # + # The opened File object will be returned. If _read_only_ is true, + # and the file does not exist, then nil will be returned. + # + # All exceptions are propagated. + # + def open_and_lock_file(filename, read_only) + if read_only + begin + file = File.new(filename, RD_ACCESS) begin - file = File.open(@filename, RD_ACCESS) file.flock(File::LOCK_SH) - content = (File.open(new_file, RD_ACCESS) {|n| n.read} rescue file.read()) - rescue Errno::ENOENT - content = "" + return file + rescue + file.close + raise end + rescue Errno::ENOENT + return nil end + else + file = File.new(filename, RDWR_ACCESS) + file.flock(File::LOCK_EX) + return file + end + end - if content != "" - @table = load(content) - if !read_only - size = content.size - md5 = Digest::MD5.digest(content) + # Load the given PStore file. + # If +read_only+ is true, the unmarshalled Hash will be returned. + # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled + # Hash, an MD5 checksum of the data, and the size of the data. + def load_data(file, read_only) + if read_only + begin + table = load(file) + if !table.is_a?(Hash) + raise Error, "PStore file seems to be corrupted." end + rescue EOFError + # This seems to be a newly-created file. + table = {} + end + table + else + data = file.read + if data.empty? + # This seems to be a newly-created file. + table = {} + checksum = empty_marshal_checksum + size = empty_marshal_data.size else - @table = {} + table = load(data) + checksum = Digest::MD5.digest(data) + size = data.size + if !table.is_a?(Hash) + raise Error, "PStore file seems to be corrupted." + end end - content = nil # unreference huge data + data.replace(EMPTY_STRING) + [table, checksum, size] + end + end - begin - catch(:pstore_abort_transaction) do - value = yield(self) - end - rescue Exception - @abort = true - raise - ensure - if !read_only and !@abort - tmp_file = @filename + ".tmp" - content = dump(@table) - if !md5 || size != content.size || md5 != Digest::MD5.digest(content) - File.open(tmp_file, WR_ACCESS) {|t| t.write(content)} - File.rename(tmp_file, new_file) - commit_new(file) - end - content = nil # unreference huge data - end + def on_windows? + is_windows = RUBY_PLATFORM =~ /mswin/ || + RUBY_PLATFORM =~ /mingw/ || + RUBY_PLATFORM =~ /bccwin/ || + RUBY_PLATFORM =~ /wince/ + self.class.__send__(:define_method, :on_windows?) do + is_windows + end + is_windows + end + + # Check whether Marshal.dump supports the 'canonical' option. This option + # makes sure that Marshal.dump always dumps data structures in the same order. + # This is important because otherwise, the checksums that we generate may differ. + def marshal_dump_supports_canonical_option? + begin + Marshal.dump(nil, -1, true) + result = true + rescue + result = false + end + self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do + result + end + result + end + + def save_data(original_checksum, original_file_size, file) + # We only want to save the new data if the size or checksum has changed. + # This results in less filesystem calls, which is good for performance. + if marshal_dump_supports_canonical_option? + new_data = Marshal.dump(@table, -1, true) + else + new_data = dump(@table) + end + new_checksum = Digest::MD5.digest(new_data) + + if new_data.size != original_file_size || new_checksum != original_checksum + if @ultra_safe && !on_windows? + # Windows doesn't support atomic file renames. + save_data_with_atomic_file_rename_strategy(new_data, file) + else + save_data_with_fast_strategy(new_data, file) end + end + + new_data.replace(EMPTY_STRING) + end + + def save_data_with_atomic_file_rename_strategy(data, file) + temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}" + temp_file = File.new(temp_filename, WR_ACCESS) + begin + temp_file.flock(File::LOCK_EX) + temp_file.write(data) + temp_file.flush + File.rename(temp_filename, @filename) + rescue + File.unlink(temp_file) rescue nil + raise ensure - @table = nil - @transaction = false - file.close if file + temp_file.close end - value end - # This method is just a wrapped around Marshal.dump. + def save_data_with_fast_strategy(data, file) + file.rewind + file.truncate(0) + file.write(data) + end + + + # This method is just a wrapped around Marshal.dump + # to allow subclass overriding used in YAML::Store. def dump(table) # :nodoc: Marshal::dump(table) end # This method is just a wrapped around Marshal.load. + # to allow subclass overriding used in YAML::Store. def load(content) # :nodoc: Marshal::load(content) end - # This method is just a wrapped around Marshal.load. - def load_file(file) # :nodoc: - Marshal::load(file) + def empty_marshal_data + EMPTY_MARSHAL_DATA end - - private - # Commits changes to the data store file. - def commit_new(f) - f.truncate(0) - f.rewind - new_file = @filename + ".new" - File.open(new_file, RD_ACCESS) do |nf| - FileUtils.copy_stream(nf, f) - end - File.unlink(new_file) + def empty_marshal_checksum + EMPTY_MARSHAL_CHECKSUM end end # :enddoc: