# -*- encoding: utf-8; frozen_string_literal: true -*-
#
#--
# This file is part of HexaPDF.
#
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
# Copyright (C) 2014-2022 Thomas Leitner
#
# HexaPDF is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License version 3 as
# published by the Free Software Foundation with the addition of the
# following permission added to Section 15 as permitted in Section 7(a):
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
# INFRINGEMENT OF THIRD PARTY RIGHTS.
#
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with HexaPDF. If not, see .
#
# The interactive user interfaces in modified source and object code
# versions of HexaPDF must display Appropriate Legal Notices, as required
# under Section 5 of the GNU Affero General Public License version 3.
#
# In accordance with Section 7(b) of the GNU Affero General Public
# License, a covered work must retain the producer line in every PDF that
# is created or manipulated using HexaPDF.
#
# If the GNU Affero General Public License doesn't fit your need,
# commercial licenses are available at .
#++
require 'time'
require 'date'
require 'hexapdf/object'
require 'hexapdf/pdf_array'
require 'hexapdf/rectangle'
require 'hexapdf/configuration'
require 'hexapdf/utils/pdf_doc_encoding'
module HexaPDF
# A mixin used by Dictionary that implements the infrastructure and classes for defining fields.
#
# The class responsible for holding the field information is the Field class. Additionally, each
# field object is automatically assigned a stateless converter object that knows if data read
# from a PDF file potentially needs to be converted into a standard format before use.
#
# The methods that need to be implemented by such stateless converter objects are:
#
# usable_for?(type)::
# Should return +true+ if the converter is usable for the given type.
#
# additional_types::
# Should return +nil+, a single type class or an array of type classes which will additionally
# be allowed for the field.
#
# convert(data, type, document)::
# Should return the +converted+ data if conversion is possible and +nil+ otherwise. The +type+
# argument is the result of the Field#type method call and +document+ is the HexaPDF::Document
# for which the data should be converted.
module DictionaryFields
# This constant should *always* be used for boolean fields.
Boolean = [TrueClass, FalseClass].freeze
# PDFByteString is used for defining fields with strings in binary encoding.
PDFByteString = Class.new { private_class_method :new }
# PDFDate is used for defining fields which store a date object as a string.
PDFDate = Class.new { private_class_method :new }
# A dictionary field contains information about one field of a structured PDF object and this
# information comes directly from the PDF specification.
#
# By incorporating this field information into HexaPDF it is possible to do many things
# automatically, like checking for the correct minimum PDF version to use or converting a date
# from its string representation to a Time object.
class Field
# Returns the list of available converter objects.
#
# See ::converter_for for information on how this list is used.
def self.converters
@converters ||= []
end
# Returns the converter for the given +type+ specification.
#
# The converter list is checked for a suitable converter from the front to the back. So if
# two converters could potentially be used for the same type, the one that appears earlier
# is used.
def self.converter_for(type)
@converters.find {|converter| converter.usable_for?(type) }
end
# Returns +true+ if the value for this field needs to be an indirect object, +false+ if it
# needs to be a direct object or +nil+ if it can be either.
attr_reader :indirect
# Returns an array with the allowed values for this field, or +nil+ if the values are not
# constrained.
attr_reader :allowed_values
# Returns the PDF version that is required for this field.
attr_reader :version
# Create a new Field object. See Dictionary::define_field for information on the arguments.
#
# Depending on the +type+ entry an appropriate field converter object is chosen from the
# available converters.
def initialize(type, required: false, default: nil, indirect: nil, allowed_values: nil,
version: nil)
@type = [type].flatten
@type_mapped = false
@required, @default, @indirect, @version = required, default, indirect, version
@allowed_values = allowed_values && [allowed_values].flatten
@converters = @type.map {|t| self.class.converter_for(t) }.compact
end
# Returns the array with valid types for this field.
def type
return @type if @type_mapped
@type.concat(@converters.flat_map(&:additional_types).compact)
@type.map! do |type|
if type.kind_of?(Symbol)
HexaPDF::GlobalConfiguration.constantize('object.type_map', type)
else
type
end
end
@type.uniq!
@type_mapped = true
@type
end
# Returns +true+ if this field is required.
def required?
@required
end
# Returns +true+ if a default value is available.
def default?
!@default.nil?
end
# Returns a duplicated default value, automatically taking unduplicatable classes into
# account.
def default
@default.dup
end
# Returns +true+ if the given object is valid for this field.
def valid_object?(obj)
type.any? {|t| obj.kind_of?(t) } ||
(obj.kind_of?(HexaPDF::Object) && type.any? {|t| obj.value.kind_of?(t) })
end
# Converts the data into a useful object if possible. Otherwise returns +nil+.
def convert(data, document)
@converters.each do |converter|
result = converter.convert(data, type, document)
return result unless result.nil?
end
nil
end
end
# Converter module for fields of type Dictionary and its subclasses. The first class in the
# type array of the field is used for the conversion.
module DictionaryConverter
# This converter is used when either a Symbol is provided as +type+ (for lazy loading) or
# when the type is a class derived from the Dictionary class.
def self.usable_for?(type)
type.kind_of?(Symbol) ||
(type.respond_to?(:ancestors) && type.ancestors.include?(HexaPDF::Dictionary))
end
# Dictionary fields can also contain simple hashes.
def self.additional_types
Hash
end
# Wraps the given data value in the PDF specific type class if it can be converted. Otherwise
# returns +nil+.
def self.convert(data, type, document)
return if data.kind_of?(type.first) ||
!(data.kind_of?(Hash) || data.kind_of?(HexaPDF::Dictionary)) ||
(type.first <= HexaPDF::Stream && (data.kind_of?(Hash) || data.data.stream.nil?))
document.wrap(data, type: type.first)
end
end
# Converter module for fields of type PDFArray.
module ArrayConverter
# This converter is usable if the +type+ is PDFArray.
def self.usable_for?(type)
type == PDFArray
end
# PDFArray fields can also contain simple arrays.
def self.additional_types
Array
end
# Wraps a given array in the PDFArray class. Otherwise returns +nil+.
def self.convert(data, _type, document)
return unless data.kind_of?(Array)
document.wrap(data, type: PDFArray)
end
end
# Converter module for string fields to automatically convert a string into UTF-8 encoding.
module StringConverter
# This converter is usable if the +type+ is the String class.
def self.usable_for?(type)
type == String
end
# :nodoc:
def self.additional_types
end
# Converts the string into UTF-8 encoding, assuming it is a binary string. Otherwise +nil+ is
# returned.
def self.convert(str, _type, document)
return unless str.kind_of?(String) && str.encoding == Encoding::BINARY
if str.getbyte(0) == 254 && str.getbyte(1) == 255
str = str[2..-1].force_encoding(Encoding::UTF_16BE)
if str.valid_encoding?
str.encode!(Encoding::UTF_8)
else
document.config['document.on_invalid_string'].call(str)
end
else
Utils::PDFDocEncoding.convert_to_utf8(str)
end
end
end
# Converter module for binary string fields to automatically convert a string into binary
# encoding.
module PDFByteStringConverter
# This converter is usable if the +type+ is PDFByteString.
def self.usable_for?(type)
type == PDFByteString
end
# :nodoc:
def self.additional_types
String
end
# Converts the string into binary encoding, assuming it is a non-binary string. Otherwise
# returns +nil+.
def self.convert(str, _type, _document)
return if !str.kind_of?(String) || str.encoding == Encoding::BINARY
str.dup.force_encoding(Encoding::BINARY)
end
end
# Converter module for handling PDF date fields since they are stored as strings.
#
# The ISO PDF specification differs from Adobe's specification in respect to the supported
# date format. When converting from a date string to a Time object, this is taken into
# account.
#
# See: PDF1.7 s7.9.4, ADB1.7 3.8.3
module DateConverter
# This converter is usable if the +type+ is PDFDate.
def self.usable_for?(type)
type == PDFDate
end
# A date field may contain a string in PDF format, or a Time, Date or DateTime object.
def self.additional_types
[String, Time, Date, DateTime]
end
# :nodoc:
DATE_RE = /\AD:(\d{4})(\d\d)?(\d\d)?(\d\d)?(\d\d)?(\d\d)?([Z+-])?(?:(\d\d)(?:'|'([0-5]\d)'?|\z)?)?\z/n
# Checks if the given object is a string and converts into a Time object if possible.
# Otherwise returns +nil+.
def self.convert(str, _type, _document)
return unless str.kind_of?(String) && (m = str.match(DATE_RE))
utc_offset = (m[7].nil? || m[7] == 'Z' ? 0 : "#{m[7]}#{m[8]}:#{m[9] || '00'}")
Time.new(m[1].to_i, (m[2] ? m[2].to_i : 1), (m[3] ? m[3].to_i : 1),
m[4].to_i, m[5].to_i, m[6].to_i, utc_offset)
end
end
# Converter module for file specification fields. A file specification in string format is
# converted to the corresponding file specification dictionary.
module FileSpecificationConverter
# This converter is only used for the :Filespec type.
def self.usable_for?(type)
type == :Filespec
end
# Filespecs can also be simple hashes or strings.
def self.additional_types
[Hash, String]
end
# Converts a string file specification or a hash into a full file specification. Otherwise
# returns +nil+.
def self.convert(data, type, document)
return if data.kind_of?(type.first) ||
!(data.kind_of?(Hash) || data.kind_of?(HexaPDF::Dictionary) || data.kind_of?(String))
data = {F: data} if data.kind_of?(String)
document.wrap(data, type: type.first)
end
end
# Converter module for fields of type Rectangle.
module RectangleConverter
# This converter is usable if the +type+ is Rectangle.
def self.usable_for?(type)
type == Rectangle
end
# Rectangle fields can also contain simple arrays.
def self.additional_types
Array
end
# Wraps a given array in the Rectangle class. Otherwise returns +nil+.
def self.convert(data, _type, document)
return unless data.kind_of?(Array) || data.kind_of?(HexaPDF::PDFArray)
data.empty? ? document.wrap(nil) : document.wrap(data, type: Rectangle)
end
end
# Converter module for fields of type Integer.
module IntegerConverter
# This converter is usable if the +type+ is Integer.
def self.usable_for?(type)
type == Integer
end
# :nodoc:
def self.additional_types
end
# Converts a Float value into an Integer if the float is equal to its integer value. Otherwise
# returns +nil+
def self.convert(data, _type, _document)
return unless data.kind_of?(Float) && data == data.to_i
data.to_i
end
end
Field.converters.replace([FileSpecificationConverter, DictionaryConverter, ArrayConverter,
StringConverter, PDFByteStringConverter, DateConverter,
RectangleConverter, IntegerConverter])
end
end