require 'net/http'
require 'net/https'
require 'tempfile'
require 'stringio'
module Cul
module Fedora
module Image
WIDTH_TEMPLATE = '%s'
LENGTH_TEMPLATE = '%s'
XSAMPLING_TEMPLATE = '%s'
YSAMPLING_TEMPLATE = '%s'
SAMPLINGUNIT_CM = ''
SAMPLINGUNIT_IN = ''
SAMPLINGUNIT_NA = ''
UNITS = {:inch => SAMPLINGUNIT_IN, :cm => SAMPLINGUNIT_CM}
SIZE_TEMPLATE = '%s'
# magic bytes
# 2 bytes signatures
BITMAP = [0x42,0x4d] # "BM"
JPEG = [0xff,0xd8]
# 4 byte signatures
TIFF_BE = [0x49,0x49,0x2A,0] # "II*\x00"
TIFF_LE = [0x4d,0x4d,0,0x2A] # "MM\x00*"
GIF = [0x47,0x49,0x46,0x38] # "GIF8"
# 8 byte signatures
PNG = [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a]
def analyze_image(file_name, debug=false)
result = {}
file = nil
file_size = 0
begin
if (file_name.index("http://") == 0)
temp_file = Tempfile.new("image-download")
#download the url content, write to tempfile
file = temp_file
else
file = File.open(file_name,'rb')
end
result[:size] = file_size = File.size(file.path)
# get properties
header = []
8.times {
header.push(file.getc())
}
case
when header[0..1].eql?(BITMAP):
file.rewind()
result.merge!(analyze_bitmap(file,debug))
when header[0..1].eql?(JPEG):
file.rewind()
result.merge!(analyze_jpeg(file,debug))
when header[0..3].eql?(TIFF_LE), header[0..3].eql?(TIFF_BE):
file.rewind()
result.merge!(analyze_tiff(file,debug))
when header[0..3].eql?(GIF):
file.rewind()
result.merge!(analyze_gif(file,debug))
when header.eql?(PNG):
file.rewind()
result.merge!(analyze_png(file,debug))
else
msg = ''
header.each {|c|
msg += c.to_s(16)
msg += ' '
}
puts "\nUnmatched header bytes: " + msg
end
ensure
file.close() if file
end
# return hash
result
end
def map_image_properties(att_hash)
result = []
if (att_hash.has_key?(:size))
result.push(sprintf(SIZE_TEMPLATE,att_hash[:size]))
end
if (att_hash.has_key?(:width))
result.push(sprintf(WIDTH_TEMPLATE,att_hash[:width]))
end
if (att_hash.has_key?(:length))
result.push(sprintf(LENGTH_TEMPLATE,att_hash[:length]))
end
if (att_hash.has_key?(:x_sampling))
result.push(sprintf(YSAMPLING_TEMPLATE,att_hash[:x_sampling]))
end
if (att_hash.has_key?(:y_sampling))
result.push(sprintf(YSAMPLING_TEMPLATE,att_hash[:y_sampling]))
end
if (att_hash.has_key?(:sampling_unit) and UNITS.has_key?(att_hash[:sampling_unit]))
result.push(UNITS[att_hash[:sampling_unit]])
end
result
end
protected
def analyze_gif(file,debug)
props = {}
header = file.read(13)
width = header[6...8].unpack('v')[0]
length = header[8...10].unpack('v')[0]
par = header[12]
props[:width] = width
props[:length] = length
props[:mime] = 'image/gif'
props
end
def analyze_png(file,debug)
result = {}
size = File.size(file.path)
file.read(8) # skip signature
while (file.pos < size - 1) do
len_bytes = file.read(4)
length = len_bytes.unpack('N')[0]
ctype = file.read(4)
case
when 'pHYs'.eql?(ctype):
val = file.read(length)
xsam = val[0..3].unpack('N')[0]
ysam = val[4..7].unpack('N')[0]
unit = val[8].unpack('C')
if (unit ==1)
result[:sampling_unit] = :cm
xsam = xsam/100
ysam = ysam/100
end
result[:x_sampling] = xsam
result[:y_sampling] = ysam
file.seek(4,IO::SEEK_CUR)
when 'IHDR'.eql?(ctype):
val = file.read(length)
result[:width] = val[0..3].unpack('N')[0]
result[:length] = val[4..7].unpack('N')[0]
file.seek(4,IO::SEEK_CUR)
else
file.seek(4,IO::SEEK_CUR)
end
end
result
end
def analyze_bitmap(file,debug)
result = {}
file.seek(0x12,IO::SEEK_CUR)
width = file.read(4).unpack('V')[0]
length = file.read(4).unpack('V')[0]
file.seek(0x0c,IO::SEEK_CUR)
xsam = file.read(4).unpack('V')[0]
ysam = file.read(4).unpack('V')[0]
xsam /= 100 # ppm -> ppc
ysam /= 100 # ppm -> ppc
result[:mime] = 'image/bmp'
result[:sampling_unit] = :cm
result[:x_sampling] = xsam
result[:y_sampling] = ysam
result[:width] = width
result[:length] = length
result
end
=begin
TIFF Format notes taken from http://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf
Header format:
Bytes 0-1: BOM. [0x49,0x49] = LittleEndian, [0x4d,0x4d] = BigEndian
Bytes 2-3: Format marker (42) in the byte order indicated previously
Bytes 4-7: Byte offset of the first IFD, relative to file beginning
IFD format:
Bytes 0-1: Number of 12-byte IFD Entries
[IFD Entries]
Bytes -4 - -1: Byte offset of next IFD, or 0 if none remain
IFD Entry Format:
Bytes 0-1: Tag
Bytes 2-3: Type
1 = BYTE (unsigned 8-bit integer)
2 = ASCII (8 bit byte containing 7-bit char code)
3 = SHORT (16-bit unsigned integer)
4 = LONG (32-bit unsigned integer)
5 = RATIONAL (Two LONGs, first is numerator, second denominator)
Bytes 4-7: Num values of indicated Type
Bytes 8-11: Value offset
=end
def analyze_tiff(file,debug)
result = {:mime=>'image/tiff'}
littleEndian = [0x49,0x49].eql?(file.read(2))
file.seek(2,IO::SEEK_CUR)
nextIFD = littleEndian ? file.read(4).unpack('V')[0] : file.read(4).unpack('N')[0]
result.merge!(analyze_exif(file,nextIFD,littleEndian,debug))
result
end
JPEG_NO_PAYLOAD = (0xd0..0xd9)
JPEG_APP = (0xe0..0xef)
JPEG_VARIABLE_PAYLOAD = [0xc0,0xc2,0xc4,0xda,0xdb,0xfe]
def analyze_jpeg(file,debug)
result = {:mime => 'image/jpeg'}
while ((header = file.read(2)) and not "\xff\xd9".eql?(header))
case
when 0xdd.eql?(header[1]):
payload = nil
file.seek(2,IO::SEEK_CUR)
when JPEG_APP.member?(header[1]), JPEG_VARIABLE_PAYLOAD.member?(header[1]):
len = file.read(2).unpack('n')[0]
if ("\xff\xe0".eql?(header)): # APP0 segment - JFIF
puts "JFIF file segment detected" if debug
payload = file.read(len)
id = payload[0..4]
version = payload[5..6]
unit = payload[7]
x_sample = payload[8..9].unpack('n')[0]
y_sample = payload[10..11].unpack('n')[0]
result[:x_sampling] = x_sample
result[:y_sampling] = y_sample
if (unit == "\x01"): result[:sampling_unit] = :inch
elsif (unit == "\x02"): result[:sampling_unit] = :cm
end
elsif ("\xff\xe1".eql?(header)): # APP1 segment - EXIF
puts "EXIF file segment detected" if debug
payload = file.read(len)
result.merge!(analyze_exif(StringIO.new(payload),0,false,debug))
elsif (header[1] >= 0xc0 and header[1] <= 0xc3)
payload = file.read(len)
precision = payload[0]
length = payload[1..2].unpack('n')[0]
width = payload[3..4].unpack('n')[0]
result[:width] = width
result[:length] = length
else
file.seek(len,IO::SEEK_CUR)
end
else
payload = nil?
end
end
result
end
def analyze_exif(file,nextIFD,littleEndian,debug=false)
result = Hash.new()
until (nextIFD == 0) do
file.seek(nextIFD,IO::SEEK_SET)
bytes = file.read(2)
numEntries = littleEndian ? bytes.unpack('v')[0] : bytes.unpack('n')[0]
entries = Hash.new()
numEntries.times do
if (littleEndian)
tag = file.read(2).unpack('v')
ttype = file.read(2).unpack('v')[0]
numValues = file.read(4).unpack('V')[0]
valueOffsetBytes = file.read(4)
valueOffset = valueOffsetBytes.unpack('V')[0]
else
tag = file.read(2).unpack('n')[0]
ttype = file.read(2).unpack('n')[0]
numValues = file.read(4).unpack('N')[0]
valueOffsetBytes = file.read(4)
valueOffset = valueOffsetBytes.unpack('N')[0]
end
if (debug)
puts "\ntag : #{tag.to_s(16)} ttype: #{ttype.to_s(16)} numValues: #{numValues} valueOffset: #{valueOffset}"
end
nextEntry = file.tell()
values = []
if (1 <= ttype and ttype <= 5 and numValues > 0)
case
when ttype == 1: # unsigned bytes
if (numValues > 4)
file.seek(valueOffset,IO::SEEK_SET)
values = file.read(numValues)
else
values = valueOffsetBytes
end
values = values.unpack('C*')
when ttype == 2:
if (numValues > 4)
file.seek(valueOffset,IO::SEEK_SET)
values = file.read(numValues)
else
values = valueOffsetBytes
end
values = values.unpack('C*')
values.collect! {|c|
c.to_chr
}
when ttype == 3:
if (numValues > 2)
file.seek(valueOffset,IO::SEEK_SET)
values = file.read(numValues * 2)
else
values = valueOffsetBytes
end
values = littleEndian ? values.unpack('v*'):values.unpack('n*')
when ttype == 4:
if (numValues > 1)
file.seek(valueOffset,IO::SEEK_SET)
values = file.read(numValues * 4)
else
values = valueOffsetBytes
end
values = littleEndian ? values.unpack('V*'):values.unpack('N*')
when ttype == 5:
# RATIONAL: a sequence of pairs of 32-bit integers, numerator and denominator
file.seek(valueOffset,IO::SEEK_SET)
values = file.read(numValues * 8)
if(values.length % 8) != 0:
raise "Unexpected end of bytestream when reading EXIF data"
end
values = littleEndian ? values.unpack('V*'):values.unpack('N*')
values = (0...values.length).step(2).collect {|ix|
values[ix].quo(values[(ix)+1])
}
else
if debug: puts "Unknown tag type: #{ttype}"
end
end
entries[tag] = values
end
file.seek(nextEntry,IO::SEEK_SET)
end
nextIFD = littleEndian ? file.read(4).unpack('V')[0] : file.read(4).unpack('N')[0]
end
if (entries.has_key?(0x0100))
result[:width] = entries[0x0100][0]
end
if (entries.has_key?(0x0101))
result[:length] = entries[0x0101][0]
end
if (entries.has_key?(0x011a))
result[:x_sampling] = entries[0x011a][0]
end
if (entries.has_key?(0x011b))
result[:y_sampling] = entries[0x011b][0]
end
if (entries.has_key?(0x128))
unit_key = entries[0x128][0]
if (unit_key == 2)
result[:sampling_unit] = :inch
elsif (unit_key == 3)
result[:sampling_unit] = :cm
end
end
result
end
end
end
end