lib/ezid/metadata.rb in ezid-client-0.1.0 vs lib/ezid/metadata.rb in ezid-client-0.1.1

- old
+ new

@@ -1,79 +1,151 @@ require "forwardable" module Ezid + # + # EZID metadata collection for an identifier + # + # @api public class Metadata extend Forwardable + include Enumerable + # The metadata elements hash attr_reader :elements - def_delegators :elements, :[], :[]=, :empty?, :to_h, :to_a - ERC_PROFILE = "erc" - DC_PROFILE = "dc" - DATACITE_PROFILE = "datacite" - CROSSREF_PROFILE = "crossref" + def_delegators :elements, :each, :empty?, :[], :[]= - STATUS_PUBLIC = "public" - STATUS_RESERVED = "reserved" - STATUS_UNAVAILABLE = "unavailable" + # EZID metadata profiles + PROFILES = %w( erc dc datacite crossref ) - # Internal metadata elements + # Public status + PUBLIC = "public" + + # Reserved status + RESERVED = "reserved" + + # Unavailable status + UNAVAILABLE = "unavailable" + + # EZID identifier status values + STATUS_VALUES = [PUBLIC, RESERVED, UNAVAILABLE].freeze + + # EZID internal read-only metadata elements INTERNAL_READONLY_ELEMENTS = %w( _owner _ownergroup _created _updated _shadows _shadowedby _datacenter ).freeze + + # EZID internal writable metadata elements INTERNAL_READWRITE_ELEMENTS = %w( _coowners _target _profile _status _export _crossref ).freeze + + # EZID internal metadata elements INTERNAL_ELEMENTS = (INTERNAL_READONLY_ELEMENTS + INTERNAL_READWRITE_ELEMENTS).freeze + + # Internal metadata element which are datetime values + # @note EZID outputs datetime info as epoch seconds. + DATETIME_ELEMENTS = %w( _created _updated ).freeze - ANVL_SEPARATOR = ": ".freeze + # EZID metadata field/value separator + ANVL_SEPARATOR = ": " - # Creates a reader method for each internal metadata element - INTERNAL_ELEMENTS.each do |element| - reader = element.sub("_", "").to_sym - define_method(reader) do - self[element] - end - end + # Characters to escape on output to EZID + ESCAPE_RE = /[%:\r\n]/ - # Creates a writer method for each writable internal metadata element - INTERNAL_READWRITE_ELEMENTS.each do |element| - writer = "#{element.sub('_', '')}=".to_sym - define_method(writer) do |value| - self[element] = value - end - end + # Character sequence to unescape from EZID + UNESCAPE_RE = /%\h\h/ - # @param data [Hash, String, Ezid::Metadata] EZID metadata + # A comment line + COMMENT_RE = /^#.*(\r?\n)?/ + + # A line continuation + LINE_CONTINUATION_RE = /\r?\n\s+/ + + # A line ending + LINE_ENDING_RE = /\r?\n/ + def initialize(data={}) @elements = coerce(data) end - # @todo escape \n, \r and % - # @todo force UTF-8 + # Output metadata in EZID ANVL format # @see http://ezid.cdlib.org/doc/apidoc.html#request-response-bodies + # @return [String] the ANVL output def to_anvl - to_a.map { |pair| pair.join(ANVL_SEPARATOR) }.join("\n") + lines = map { |element| element.map { |e| escape(e) }.join(ANVL_SEPARATOR) } + lines.join("\n").force_encoding(Encoding::UTF_8) end def to_s to_anvl end - # Add metadata + # Adds metadata to the collection + # @param data [String, Hash, Ezid::Metadata] the data to add + # @return [Ezid::Metadata] the updated metadata def update(data) elements.update(coerce(data)) + self end + # method_missing is used to provide internal element readers and writers + def method_missing(name, *args) + if INTERNAL_ELEMENTS.include?(element = "_#{name}") + reader(element) + elsif name.to_s.end_with?("=") && INTERNAL_READWRITE_ELEMENTS.include?(element = "_#{name}".sub("=", "")) + writer(element, args.first) + else + super + end + end + private + def reader(element) + value = self[element] + return Time.at(value.to_i) if DATETIME_ELEMENTS.include?(element) && !value.nil? + value + end + + def writer(element, value) + self[element] = value + end + # Coerce data into a Hash of elements - # @todo unescape - # @see {#to_anvl} def coerce(data) begin - data.to_h + stringify_keys(data.to_h) rescue NoMethodError - # This does not account for comments and continuation lines - # http://ezid.cdlib.org/doc/apidoc.html#request-response-bodies - data.split(/\r?\n/).map { |line| line.split(ANVL_SEPARATOR, 2) }.to_h + coerce_string(data) end + end + + def stringify_keys(hsh) + hsh.keys.map(&:to_s).zip(hsh.values).to_h + end + + # Escape value for sending to EZID host + # @see http://ezid.cdlib.org/doc/apidoc.html#request-response-bodies + # @param value [String] the value to escape + # @return [String] the escaped value + def escape(value) + value.gsub(ESCAPE_RE) { |m| URI.encode(m) } + end + + # Unescape value from EZID host (or other source) + # @see http://ezid.cdlib.org/doc/apidoc.html#request-response-bodies + # @param value [String] the value to unescape + # @return [String] the unescaped value + def unescape(value) + value.gsub(UNESCAPE_RE) { |m| URI.decode(m) } + end + + # Coerce a string of metadata (e.g., from EZID host) into a Hash + # @param data [String] the string to coerce + # @return [Hash] the hash of coerced data + def coerce_string(data) + data.gsub(COMMENT_RE, "") + .gsub(LINE_CONTINUATION_RE, " ") + .split(LINE_ENDING_RE) + .map { |line| line.split(ANVL_SEPARATOR, 2).map { |v| unescape(v).strip } } + .to_h end end end