=begin
This file is part of Origami, PDF manipulation framework for Ruby
Copyright (C) 2016 Guillaume Delugré.
Origami is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Origami is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Origami. If not, see .
=end
require 'date'
module Origami
#
# Module common to String objects.
#
module String
module Encoding
class EncodingError < Error #:nodoc:
end
module PDFDocEncoding
CHARMAP =
[
"\x00\x00", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd",
"\xff\xfd", "\x00\x09", "\x00\x0a", "\xff\xfd", "\x00\x0c", "\x00\x0d", "\xff\xfd", "\xff\xfd",
"\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd", "\xff\xfd",
"\x02\xd8", "\x02\xc7", "\x02\xc6", "\x02\xd9", "\x02\xdd", "\x02\xdb", "\x02\xda", "\x02\xdc",
"\x00\x20", "\x00\x21", "\x00\x22", "\x00\x23", "\x00\x24", "\x00\x25", "\x00\x26", "\x00\x27",
"\x00\x28", "\x00\x29", "\x00\x2a", "\x00\x2b", "\x00\x2c", "\x00\x2d", "\x00\x2e", "\x00\x2f",
"\x00\x30", "\x00\x31", "\x00\x32", "\x00\x33", "\x00\x34", "\x00\x35", "\x00\x36", "\x00\x37",
"\x00\x38", "\x00\x39", "\x00\x3a", "\x00\x3b", "\x00\x3c", "\x00\x3d", "\x00\x3e", "\x00\x3f",
"\x00\x40", "\x00\x41", "\x00\x42", "\x00\x43", "\x00\x44", "\x00\x45", "\x00\x46", "\x00\x47",
"\x00\x48", "\x00\x49", "\x00\x4a", "\x00\x4b", "\x00\x4c", "\x00\x4d", "\x00\x4e", "\x00\x4f",
"\x00\x50", "\x00\x51", "\x00\x52", "\x00\x53", "\x00\x54", "\x00\x55", "\x00\x56", "\x00\x57",
"\x00\x58", "\x00\x59", "\x00\x5a", "\x00\x5b", "\x00\x5c", "\x00\x5d", "\x00\x5e", "\x00\x5f",
"\x00\x60", "\x00\x61", "\x00\x62", "\x00\x63", "\x00\x64", "\x00\x65", "\x00\x66", "\x00\x67",
"\x00\x68", "\x00\x69", "\x00\x6a", "\x00\x6b", "\x00\x6c", "\x00\x6d", "\x00\x6e", "\x00\x6f",
"\x00\x70", "\x00\x71", "\x00\x72", "\x00\x73", "\x00\x74", "\x00\x75", "\x00\x76", "\x00\x77",
"\x00\x78", "\x00\x79", "\x00\x7a", "\x00\x7b", "\x00\x7c", "\x00\x7d", "\x00\x7e", "\xff\xfd",
"\x20\x22", "\x20\x20", "\x20\x21", "\x20\x26", "\x20\x14", "\x20\x13", "\x01\x92", "\x20\x44",
"\x20\x39", "\x20\x3a", "\x22\x12", "\x20\x30", "\x20\x1e", "\x20\x1c", "\x20\x1d", "\x20\x18",
"\x20\x19", "\x20\x1a", "\x21\x22", "\xfb\x01", "\xfb\x02", "\x01\x41", "\x01\x52", "\x01\x60",
"\x01\x78", "\x01\x7d", "\x01\x31", "\x01\x42", "\x01\x53", "\x01\x61", "\x01\x7e", "\xff\xfd",
"\x20\xac", "\x00\xa1", "\x00\xa2", "\x00\xa3", "\x00\xa4", "\x00\xa5", "\x00\xa6", "\x00\xa7",
"\x00\xa8", "\x00\xa9", "\x00\xaa", "\x00\xab", "\x00\xac", "\xff\xfd", "\x00\xae", "\x00\xaf",
"\x00\xb0", "\x00\xb1", "\x00\xb2", "\x00\xb3", "\x00\xb4", "\x00\xb5", "\x00\xb6", "\x00\xb7",
"\x00\xb8", "\x00\xb9", "\x00\xba", "\x00\xbb", "\x00\xbc", "\x00\xbd", "\x00\xbe", "\x00\xbf",
"\x00\xc0", "\x00\xc1", "\x00\xc2", "\x00\xc3", "\x00\xc4", "\x00\xc5", "\x00\xc6", "\x00\xc7",
"\x00\xc8", "\x00\xc9", "\x00\xca", "\x00\xcb", "\x00\xcc", "\x00\xcd", "\x00\xce", "\x00\xcf",
"\x00\xd0", "\x00\xd1", "\x00\xd2", "\x00\xd3", "\x00\xd4", "\x00\xd5", "\x00\xd6", "\x00\xd7",
"\x00\xd8", "\x00\xd9", "\x00\xda", "\x00\xdb", "\x00\xdc", "\x00\xdd", "\x00\xde", "\x00\xdf",
"\x00\xe0", "\x00\xe1", "\x00\xe2", "\x00\xe3", "\x00\xe4", "\x00\xe5", "\x00\xe6", "\x00\xe7",
"\x00\xe8", "\x00\xe9", "\x00\xea", "\x00\xeb", "\x00\xec", "\x00\xed", "\x00\xee", "\x00\xef",
"\x00\xf0", "\x00\xf1", "\x00\xf2", "\x00\xf3", "\x00\xf4", "\x00\xf5", "\x00\xf6", "\x00\xf7",
"\x00\xf8", "\x00\xf9", "\x00\xfa", "\x00\xfb", "\x00\xfc", "\x00\xfd", "\x00\xfe", "\x00\xff"
].map(&:b)
def PDFDocEncoding.to_utf16be(pdfdocstr)
utf16bestr = UTF16BE::BOM.dup
pdfdocstr.each_byte do |byte|
utf16bestr << CHARMAP[byte]
end
utf16bestr.force_encoding('binary')
end
def PDFDocEncoding.to_pdfdoc(str)
str
end
end
module UTF16BE
BOM = "\xFE\xFF".b
def UTF16BE.to_utf16be(str)
str
end
def UTF16BE.to_pdfdoc(str)
pdfdoc = []
i = 2
while i < str.size
char = PDFDocEncoding::CHARMAP.index(str[i,2])
raise EncodingError, "Can't convert UTF16-BE character to PDFDocEncoding" if char.nil?
pdfdoc << char
i = i + 2
end
pdfdoc.pack("C*")
end
end
end
include Origami::Object
attr_accessor :encoding
def initialize(str) #:nodoc:
super(str.force_encoding('binary'))
detect_encoding
end
#
# Convert String object to an UTF8 encoded Ruby string.
#
def to_utf8
detect_encoding
utf16 = self.encoding.to_utf16be(self.value)
utf16.slice!(0, Encoding::UTF16BE::BOM.size)
utf16.encode("utf-8", "utf-16be")
end
#
# Convert String object to an UTF16-BE encoded binary Ruby string.
#
def to_utf16be
detect_encoding
self.encoding.to_utf16be(self.value)
end
#
# Convert String object to a PDFDocEncoding encoded binary Ruby string.
#
def to_pdfdoc
detect_encoding
self.encoding.to_pdfdoc(self.value)
end
def detect_encoding #:nodoc:
if self.value[0,2] == Encoding::UTF16BE::BOM
@encoding = Encoding::UTF16BE
else
@encoding = Encoding::PDFDocEncoding
end
end
end
class InvalidHexaStringObjectError < InvalidObjectError #:nodoc:
end
#
# Class representing an hexadecimal-writen String Object.
#
class HexaString < ::String
include String
TOKENS = %w{ < > } #:nodoc:
@@regexp_open = Regexp.new(WHITESPACES + TOKENS.first)
@@regexp_close = Regexp.new(TOKENS.last)
#
# Creates a new PDF hexadecimal String.
# _str_:: The string value.
#
def initialize(str = "")
unless str.is_a?(::String)
raise TypeError, "Expected type String, received #{str.class}."
end
super(str)
end
def self.parse(stream, _parser = nil) #:nodoc:
scanner = Parser.init_scanner(stream)
offset = scanner.pos
if scanner.skip(@@regexp_open).nil?
raise InvalidHexaStringObjectError, "Hexadecimal string shall start with a '#{TOKENS.first}' token"
end
hexa = scanner.scan_until(@@regexp_close)
if hexa.nil?
raise InvalidHexaStringObjectError, "Hexadecimal string shall end with a '#{TOKENS.last}' token"
end
begin
decoded = Filter::ASCIIHex.decode(hexa.chomp!(TOKENS.last))
rescue Filter::InvalidASCIIHexStringError => e
raise InvalidHexaStringObjectError, e.message
end
hexastr = HexaString.new(decoded)
hexastr.file_offset = offset
hexastr
end
def to_s(eol: $/) #:nodoc:
super(TOKENS.first + Filter::ASCIIHex.encode(to_str) + TOKENS.last, eol: eol)
end
#
# Converts self to a literal String.
#
def to_literal
LiteralString.new(self.value)
end
def value
self.decrypt! if self.is_a?(Encryption::EncryptedString) and not @decrypted
to_str
end
end
class InvalidLiteralStringObjectError < InvalidObjectError #:nodoc:
end
#
# Class representing a literal String Object.
#
class LiteralString < ::String
include String
TOKENS = %w{ ( ) } #:nodoc:
@@regexp_open = Regexp.new(WHITESPACES + Regexp.escape(TOKENS.first))
@@regexp_close = Regexp.new(Regexp.escape(TOKENS.last))
#
# Creates a new PDF String.
# _str_:: The string value.
#
def initialize(str = "")
unless str.is_a?(::String)
raise TypeError, "Expected type String, received #{str.class}."
end
super(str)
end
def self.parse(stream, _parser = nil) #:nodoc:
scanner = Parser.init_scanner(stream)
offset = scanner.pos
unless scanner.skip(@@regexp_open)
raise InvalidLiteralStringObjectError, "No literal string start token found"
end
result = ""
depth = 0
while depth != 0 or scanner.peek(1) != TOKENS.last do
raise InvalidLiteralStringObjectError, "Non-terminated string" if scanner.eos?
c = scanner.get_byte
case c
when "\\"
if scanner.match?(/\d{1,3}/)
oct = scanner.peek(3).oct.chr
scanner.pos += 3
result << oct
elsif scanner.match?(/((\r?\n)|(\r\n?))/)
scanner.skip(/((\r?\n)|(\r\n?))/)
next
else
flag = scanner.get_byte
case flag
when "n" then result << "\n"
when "r" then result << "\r"
when "t" then result << "\t"
when "b" then result << "\b"
when "f" then result << "\f"
else
result << flag
end
end
when "(" then
depth = depth + 1
result << c
when ")" then
depth = depth - 1
result << c
else
result << c
end
end
unless scanner.skip(@@regexp_close)
raise InvalidLiteralStringObjectError, "Byte string shall be terminated with '#{TOKENS.last}'"
end
# Try to cast as a Date object if possible.
if result[0, 2] == 'D:'
begin
date = Date.parse(result)
date.file_offset = offset
return date
rescue InvalidDateError
end
end
bytestr = self.new(result)
bytestr.file_offset = offset
bytestr
end
def to_s(eol: $/) #:nodoc:
super(TOKENS.first + expand + TOKENS.last, eol: eol)
end
#
# Converts self to HexaString
#
def to_hex
HexaString.new(self.value)
end
#
# Returns a standard String representation.
#
def value
self.decrypt! if self.is_a?(Encryption::EncryptedString) and not @decrypted
to_str
end
private
def expand #:nodoc:
self.gsub(/[\n\r\t\b\f()\\]/,
"\n" => "\\n",
"\r" => "\\r",
"\t" => "\\t",
"\b" => "\\b",
"\f" => "\\f",
"\\" => "\\\\",
"(" => "\\(",
")" => "\\)")
end
end
class InvalidDateError < Error #:nodoc:
end
#
# Class representing a Date string.
#
class Date < LiteralString #:nodoc:
REGEXP_TOKEN =
/D: # Date header
(?\d{4}) # Year
(?\d{2})? # Month
(?\d{2})? # Day
(?\d{2})? # Hour
(?\d{2})? # Minute
(?\d{2})? # Second
(?:
(?[\+\-Z]) # UT relationship
(?\d{2}) # UT hour offset
('(?\d{2}))? # UT minute offset
)?
/x
attr_reader :year, :month, :day, :hour, :min, :sec, :utc_offset
def initialize(year:, month: 1, day: 1, hour: 0, min: 0, sec: 0, utc_offset: 0)
@year, @month, @day, @hour, @min, @sec = year, month, day, hour, min, sec
@utc_offset = utc_offset
date = "D:%04d%02d%02d%02d%02d%02d" % [year, month, day, hour, min, sec ]
if utc_offset == 0
date << "Z00'00"
else
date << (if utc_offset < 0 then '-' else '+' end)
off_hours, off_secs = utc_offset.abs.divmod(3600)
off_mins = off_secs / 60
date << "%02d'%02d" % [ off_hours, off_mins ]
end
super(date)
end
def to_datetime
::DateTime.new(@year, @month, @day, @hour, @min, @sec, (@utc_offset / 3600).to_s)
end
def self.parse(str) #:nodoc:
raise InvalidDateError, "Not a valid Date string" unless str =~ REGEXP_TOKEN
date =
{
year: $~['year'].to_i
}
date[:month] = $~['month'].to_i if $~['month']
date[:day] = $~['day'].to_i if $~['day']
date[:hour] = $~['hour'].to_i if $~['hour']
date[:min] = $~['min'].to_i if $~['min']
date[:sec] = $~['sec'].to_i if $~['sec']
if %w[+ -].include?($~['ut'])
utc_offset = $~['ut_hour_off'].to_i * 3600 + $~['ut_min_off'].to_i * 60
utc_offset = -utc_offset if $~['ut'] == '-'
date[:utc_offset] = utc_offset
end
Origami::Date.new(**date)
end
#
# Returns current Date String in UTC time.
#
def self.now
now = Time.now.utc
date =
{
year: now.strftime("%Y").to_i,
month: now.strftime("%m").to_i,
day: now.strftime("%d").to_i,
hour: now.strftime("%H").to_i,
min: now.strftime("%M").to_i,
sec: now.strftime("%S").to_i,
utc_offset: now.utc_offset
}
Origami::Date.new(**date)
end
end
end