module DocStorage
# The +MultiPartDocument+ class represents a document consisting of several
# simple documents (see the +SimpleDocument+ class documentation for a
# description), loosely based on the MIME multipart message format. It is
# suitable for storing multiple documents containing a text associated with
# some metadata (e.g. blog comments, each with an author and a publication
# date). The +MultiPartDocument+ class allows to create the document
# programatically, parse it from a file, manipulate its structure and save it
# to a file.
#
# == Document Format
#
# In serialized form, a multipart document looks like this:
#
# Boundary: =====
#
# --=====
# Author: Fan
# Datetime: 2009-11-01 20:07:15
#
# Your article is really great!
# --=====
# Author: Critic
# Datetime: 2009-11-01 20:10:54
#
# Your article sucks!
#
# The document is composed of one or more simple documents, separated by a
# _boundary_ -- a line beginning with "--" and containing a predefined
# boundary string. The first document is a _prologue_ and it defines
# the boundary string (without the "--" prefix) in its "Boundary" header. All
# other headers of the prologue are ignored and so is its body. Remaining
# documents are the _parts_ of the multipart document. Documents without any
# parts are perfectly legal, however the prologue with the boundary definition
# must be always present.
#
# == Example Usage
#
# require "lib/doc_storage"
#
# # Create a new document with two parts
# document = DocStorage::MultiPartDocument.new([
# DocStorage::SimpleDocument.new(
# {
# "Title" => "Finishing the documentation",
# "Priority" => "urgent"
# },
# "We should finish the documentation ASAP."
# ),
# DocStorage::SimpleDocument.new(
# {
# "Title" => "Finishing the code",
# "Priority" => "more urgent"
# },
# "But we should finish the code first!"
# ),
# ])
#
# # Parse a file
# document = File.open("examples/multipart.txt", "r") do |f|
# DocStorage::MultiPartDocument.parse(f)
# end
#
# # Document manipulation
# document.parts << DocStorage::SimpleDocument.new(
# {
# "Author" => "Middle man",
# "Datetime" => "2009-11-01 21:15:33",
# },
# "I think your article is neither good nor bad."
# )
#
# # Save the modified document
# File.open("examples/multipart_modified.txt", "w") do |f|
# f.write(document)
# end
class MultiPartDocument
# document parts (+Array+ of DocStorage::SimpleDocument)
attr_accessor :parts
class << self
private
def parse_from_io(io)
prologue = SimpleDocument.parse(io, :detect)
boundary = prologue.headers["Boundary"]
parts = []
until io.eof?
parts << SimpleDocument.parse(io, boundary)
end
MultiPartDocument.new(parts)
end
public
# Parses a multipart document from its serialized form and returns a new
# +MultiPartDocument+ instance.
#
# The +source+ can be either an +IO+-like object or a +String+. In the
# latter case, it is assumed that the string contains a serialized
# document (not a file name).
#
# If any syntax error occurs, a +SyntaxError+ exception is raised. This
# can happen when parsing the prologue or parts and an invalid header is
# encountered, the headers are not terminated (no empty line separating
# headers and body is parsed before the end of file) or if no "Boundary"
# header is found in the prologue.
#
# See the +MultiPartDocument+ class documentation for a detailed
# document format description.
def parse(source)
parse_from_io(source.is_a?(String) ? StringIO.new(source) : source)
end
end
# Creates a new +MultiPartDocument+ with given parts.
def initialize(parts)
@parts = parts
end
# Tests if two documents are equal, i.e. whether they have the same class
# and equal parts (in the == sense).
def ==(other)
other.instance_of?(self.class) && @parts == other.parts
end
# Returns string representation of this document. The result is in format
# described in the +MultiPartDocument+ class documentation.
def to_s
# The boundary is just a random string. We do not check if the boudnary
# appears anywhere in the subdocuments, which may lead to malformed
# document. This is of course principially wrong, but the probability of
# collision is so small that it does not bother me much.
chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
boundary = Array.new(64) { chars[rand(chars.length)] }.join("")
SimpleDocument.new({"Boundary" => boundary}, "").to_s +
@parts.map { |part| "--#{boundary}\n#{part.to_s}" }.join("\n")
end
end
end