#! ruby
# -*- coding: UTF-8 -*-
#
# It was translated into Ruby script by whiteleaf.
#
# original source code:
# kindlestrip.py v.1.35 http://www.mobileread.com/forums/showthread.php?t=96903
#
# This script strips the penultimate record from a Mobipocket file.
# This is useful because the current KindleGen add a compressed copy
# of the source files used in this record, making the ebook produced
# about twice as big as it needs to be.
#
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to
#
# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
# With enhancements by Kevin Hendricks, KevinH on mobileread.com
#
# Changelog
# 1.00 - Initial version
# 1.10 - Added an option to output the stripped data
# 1.20 - Added check for source files section (thanks Piquan)
# 1.30 - Added prelim Support for K8 style mobis
# 1.31 - removed the SRCS section but kept a 0 size entry for it
# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
# 1.33 - now uses and modifies mobiheader SRCS and CNT
# 1.34 - added credit for Kevin Hendricks
# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records
KINDLESTRIP_VERSION = '1.35'
class StripException < StandardError; end
class SectionStripper
def load_section(section)
if section + 1 == @num_sections
endoff = @data_file.length
else
endoff = @sections[section + 1][0]
end
off = @sections[section][0]
@data_file[off...endoff]
end
def patch(off, _new)
@data_file = @data_file[0, off] + new + @data_file[off + _new.length .. -1]
end
def strip(off, len)
@data_file = @data_file[0, off] + @data_file[off + len .. -1]
end
def patch_section(section, _new, in_off = 0)
if section + 1 == @num_sections
endoff = @data_file.length
else
endoff = @sections[section + 1][0]
end
raise unless off + in_off + _new.length <= endoff
patch(off + in_off, _new)
end
def updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
mobi_length, = mobiheader[0x14...0x18].unpack("N")
exth_flag, = mobiheader[0x80...0x84].unpack("N")
exth = "NONE"
begin
if exth_flag & 0x40 != 0
exth = mobiheader[16 + mobi_length .. -1]
if exth.length >= 4 && exth[0, 4] == "EXTH"
nitems, = exth[8...12].unpack("N")
pos = 12
nitems.times do
type, size = exth[pos ... pos + 8].unpack("NN")
#puts "#{type}, #{size}"
if type == 121
boundaryptr, = exth[pos + 8 ... pos + size].unpack("N")
if srcs_secnum <= boundaryptr
boundaryptr -= srcs_cnt
prefix = mobiheader[0, 16 + mobi_length + pos + 8]
suffix = mobiheader[16 + mobi_length + pos + 8 + 4 .. -1]
nval = [boundaryptr].pack("N")
mobiheader = prefix + nval + suffix
end
end
pos += size
end
end
end
rescue
end
mobiheader
end
def initialize(datain, verbose = true)
@verbose = verbose
if datain[0x3C...0x3C+8] != "BOOKMOBI"
raise StripException, "invalid file format"
end
@num_sections, = datain[76...78].unpack("n")
# get mobiheader and check SRCS section number and count
offset0, = datain.unpack("@78N")
offset1, = datain.unpack("@86N")
mobiheader = datain[offset0 ... offset1]
srcs_secnum, srcs_cnt = mobiheader.unpack("@224NN")
if srcs_secnum == 0xffffffff || srcs_cnt == 0
raise StripException, "File doesn't contain the sources section."
end
puts "Found SRCS section number %d, and count %d" % [srcs_secnum, srcs_cnt] if @verbose
# find its offset and length
_next = srcs_secnum + srcs_cnt
srcs_offset, = datain.unpack("@#{78+srcs_secnum*8}NN")
next_offset, = datain.unpack("@#{78+_next*8}NN")
srcs_length = next_offset - srcs_offset
if datain[srcs_offset ... srcs_offset+4] != "SRCS"
raise StripException, "SRCS section num does not point to SRCS."
end
puts " beginning at offset %0x and ending at offset %0x" % [srcs_offset, srcs_length] if @verbose
# it appears bytes 68-71 always contain (2*num_sections) + 1
# this is not documented anyplace at all but it appears to be some sort of next
# available unique_id used to identify specific sections in the palm db
@data_file = datain[0, 68] + [(@num_sections - srcs_cnt) * 2 + 1].pack("N")
@data_file += datain[72...76]
# write out the number of sections reduced by srtcs_cnt
@data_file = @data_file + [@num_sections - srcs_cnt].pack("n")
# we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
# up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
delta = -8 * srcs_cnt
srcs_secnum.times do |i|
offset, flgval = datain.unpack("@#{78+i*8}NN")
offset += delta
@data_file += [offset].pack("N") + [flgval].pack("N")
end
# for every record after the srcs_cnt SRCS records we must start it
# earlier by 8*srcs_cnt + the length of the srcs sections themselves)
delta = delta - srcs_length
(srcs_secnum + srcs_cnt ... @num_sections).each do |i|
offset, = datain.unpack("@#{78+i*8}NN")
offset += delta
flgval = 2 * (i - srcs_cnt)
@data_file += [offset].pack("N") + [flgval].pack("N")
end
# now pad it out to begin right at the first offset
# typically this is 2 bytes of nulls
first_offset, = @data_file.unpack("@78NN")
@data_file += "\0" * (first_offset - @data_file.length)
# now finally add on every thing up to the original src_offset
@data_file += datain[offset0...srcs_offset]
# and everything afterwards
@data_file += datain[srcs_offset + srcs_length .. -1]
#store away the SRCS section in case the user wants it output
@stripped_data_header = datain[srcs_offset ... srcs_offset + 16]
@stripped_data = datain[srcs_offset + 16 ... srcs_offset + srcs_length]
# update the number of sections count
@num_section = @num_sections - srcs_cnt
# update the srcs_secnum and srcs_cnt in the mobiheader
offset0, = @data_file.unpack("@78NN")
offset1, = @data_file.unpack("@86NN")
mobiheader = @data_file[offset0 ... offset1]
mobiheader = mobiheader[0, 0xe0] + [-1].pack("N") + [0].pack("N") + mobiheader[0xe8 .. -1]
# if K8 mobi, handle metadata 121 in old mobiheader
mobiheader = updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
@data_file = @data_file[0, offset0] + mobiheader + @data_file[offset1 .. -1]
puts "done" if @verbose
end
def get_result
@data_file
end
def get_stripped_data
@stripped_data
end
def get_header
@stripped_data_header
end
def self.strip(infile, outfile = nil, verbose = true)
outfile = infile unless outfile
data_file = File.binread(infile)
stripped_file = new(data_file, verbose)
File.binwrite(outfile, stripped_file.get_result)
stripped_file
end
end
if __FILE__ == $0
puts "KndleStrip v#{KINDLESTRIP_VERSION}. " +
"Written 2010-2012 by Paul Durrant and Kevin Hendricks."
if ARGV.length < 2 || ARGV.length > 3
puts "Strips the Sources record from Mobipocket ebooks"
puts "For ebooks generated using KindleGen 1.1 and later that add the source"
puts "Usage:"
puts " %s " % File.basename(__FILE__)
puts " is optional."
exit 1
else
infile = ARGV[0]
outfile = ARGV[1]
begin
stripped_file = SectionStripper.strip(infile, outfile)
#print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
if ARGV.length == 3
File.binwrite(ARGV[2], stripped_file.get_stripped_data)
end
rescue StripException => e
warn "Error: #{e.message}"
exit 1
end
end
end