lib/kindlestrip.rb in narou-1.6.4 vs lib/kindlestrip.rb in narou-1.7.0

- old
+ new

@@ -1,249 +1,249 @@ -#! ruby -# -*- coding: utf-8 -*- -# -# It was translated into Ruby script by whiteleaf. -# -# original source code: -# kindlestrip.py v.1.35 http://www.mobileread.com/forums/showthread.php?t=96903 -# -# This script strips the penultimate record from a Mobipocket file. -# This is useful because the current KindleGen add a compressed copy -# of the source files used in this record, making the ebook produced -# about twice as big as it needs to be. -# -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# For more information, please refer to <http://unlicense.org/> -# -# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com -# With enhancements by Kevin Hendricks, KevinH on mobileread.com -# -# Changelog -# 1.00 - Initial version -# 1.10 - Added an option to output the stripped data -# 1.20 - Added check for source files section (thanks Piquan) -# 1.30 - Added prelim Support for K8 style mobis -# 1.31 - removed the SRCS section but kept a 0 size entry for it -# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed -# 1.33 - now uses and modifies mobiheader SRCS and CNT -# 1.34 - added credit for Kevin Hendricks -# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records - -KINDLESTRIP_VERSION = '1.35' - -class StripException < StandardError; end - -class SectionStripper - def load_section(section) - if section + 1 == @num_sections - endoff = @data_file.length - else - endoff = @sections[section + 1][0] - end - off = @sections[section][0] - @data_file[off...endoff] - end - - def patch(off, _new) - @data_file = @data_file[0, off] + new + @data_file[off + _new.length .. -1] - end - - def strip(off, len) - @data_file = @data_file[0, off] + @data_file[off + len .. -1] - end - - def patch_section(section, _new, in_off = 0) - if section + 1 == @num_sections - endoff = @data_file.length - else - endoff = @sections[section + 1][0] - end - raise unless off + in_off + _new.length <= endoff - patch(off + in_off, _new) - end - - def updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) - mobi_length, = mobiheader[0x14...0x18].unpack("N") - exth_flag, = mobiheader[0x80...0x84].unpack("N") - exth = "NONE" - begin - if exth_flag & 0x40 != 0 - exth = mobiheader[16 + mobi_length .. -1] - if exth.length >= 4 && exth[0, 4] == "EXTH" - nitems, = exth[8...12].unpack("N") - pos = 12 - nitems.times do - type, size = exth[pos ... pos + 8].unpack("NN") - #puts "#{type}, #{size}" - if type == 121 - boundaryptr, = exth[pos + 8 ... pos + size].unpack("N") - if srcs_secnum <= boundaryptr - boundaryptr -= srcs_cnt - prefix = mobiheader[0, 16 + mobi_length + pos + 8] - suffix = mobiheader[16 + mobi_length + pos + 8 + 4 .. -1] - nval = [boundaryptr].pack("N") - mobiheader = prefix + nval + suffix - end - end - pos += size - end - end - end - rescue - end - mobiheader - end - - def initialize(datain, verbose = true) - @verbose = verbose - if datain[0x3C...0x3C+8] != "BOOKMOBI" - raise StripException, "invalid file format" - end - @num_sections, = datain[76...78].unpack("n") - - # get mobiheader and check SRCS section number and count - offset0, = datain.unpack("@78N") - offset1, = datain.unpack("@86N") - mobiheader = datain[offset0 ... offset1] - srcs_secnum, srcs_cnt = mobiheader.unpack("@224NN") - if srcs_secnum == 0xffffffff || srcs_cnt == 0 - raise StripException, "File doesn't contain the sources section." - end - - puts "Found SRCS section number %d, and count %d" % [srcs_secnum, srcs_cnt] if @verbose - # find its offset and length - _next = srcs_secnum + srcs_cnt - srcs_offset, = datain.unpack("@#{78+srcs_secnum*8}NN") - next_offset, = datain.unpack("@#{78+_next*8}NN") - srcs_length = next_offset - srcs_offset - if datain[srcs_offset ... srcs_offset+4] != "SRCS" - raise StripException, "SRCS section num does not point to SRCS." - end - puts " beginning at offset %0x and ending at offset %0x" % [srcs_offset, srcs_length] if @verbose - - # it appears bytes 68-71 always contain (2*num_sections) + 1 - # this is not documented anyplace at all but it appears to be some sort of next - # available unique_id used to identify specific sections in the palm db - @data_file = datain[0, 68] + [(@num_sections - srcs_cnt) * 2 + 1].pack("N") - @data_file += datain[72...76] - - # write out the number of sections reduced by srtcs_cnt - @data_file = @data_file + [@num_sections - srcs_cnt].pack("n") - - # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table - # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 ) - delta = -8 * srcs_cnt - srcs_secnum.times do |i| - offset, flgval = datain.unpack("@#{78+i*8}NN") - offset += delta - @data_file += [offset].pack("N") + [flgval].pack("N") - end - - # for every record after the srcs_cnt SRCS records we must start it - # earlier by 8*srcs_cnt + the length of the srcs sections themselves) - delta = delta - srcs_length - (srcs_secnum + srcs_cnt ... @num_sections).each do |i| - offset, = datain.unpack("@#{78+i*8}NN") - offset += delta - flgval = 2 * (i - srcs_cnt) - @data_file += [offset].pack("N") + [flgval].pack("N") - end - - # now pad it out to begin right at the first offset - # typically this is 2 bytes of nulls - first_offset, = @data_file.unpack("@78NN") - @data_file += "\0" * (first_offset - @data_file.length) - - # now finally add on every thing up to the original src_offset - @data_file += datain[offset0...srcs_offset] - - # and everything afterwards - @data_file += datain[srcs_offset + srcs_length .. -1] - - #store away the SRCS section in case the user wants it output - @stripped_data_header = datain[srcs_offset ... srcs_offset + 16] - @stripped_data = datain[srcs_offset + 16 ... srcs_offset + srcs_length] - - # update the number of sections count - @num_section = @num_sections - srcs_cnt - - # update the srcs_secnum and srcs_cnt in the mobiheader - offset0, = @data_file.unpack("@78NN") - offset1, = @data_file.unpack("@86NN") - mobiheader = @data_file[offset0 ... offset1] - mobiheader = mobiheader[0, 0xe0] + [-1].pack("N") + [0].pack("N") + mobiheader[0xe8 .. -1] - - # if K8 mobi, handle metadata 121 in old mobiheader - mobiheader = updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) - @data_file = @data_file[0, offset0] + mobiheader + @data_file[offset1 .. -1] - puts "done" if @verbose - end - - def get_result - @data_file - end - - def get_stripped_data - @stripped_data - end - - def get_header - @stripped_data_header - end - - def self.strip(infile, outfile = nil, verbose = true) - outfile = infile unless outfile - data_file = File.binread(infile) - stripped_file = new(data_file, verbose) - File.binwrite(outfile, stripped_file.get_result) - stripped_file - end -end - -if __FILE__ == $0 - puts "KndleStrip v#{KINDLESTRIP_VERSION}. " + - "Written 2010-2012 by Paul Durrant and Kevin Hendricks." - if ARGV.length < 2 || ARGV.length > 3 - puts "Strips the Sources record from Mobipocket ebooks" - puts "For ebooks generated using KindleGen 1.1 and later that add the source" - puts "Usage:" - puts " %s <infile> <outfile> <strippeddatafile>" % File.basename(__FILE__) - puts "<strippeddatafile> is optional." - exit 1 - else - infile = ARGV[0] - outfile = ARGV[1] - begin - stripped_file = SectionStripper.strip(infile, outfile) - #print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()) - if ARGV.length == 3 - File.binwrite(ARGV[2], stripped_file.get_stripped_data) - end - rescue StripException => e - warn "Error: #{e.message}" - exit 1 - end - end -end +#! ruby +# -*- coding: utf-8 -*- +# +# It was translated into Ruby script by whiteleaf. +# +# original source code: +# kindlestrip.py v.1.35 http://www.mobileread.com/forums/showthread.php?t=96903 +# +# This script strips the penultimate record from a Mobipocket file. +# This is useful because the current KindleGen add a compressed copy +# of the source files used in this record, making the ebook produced +# about twice as big as it needs to be. +# +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to <http://unlicense.org/> +# +# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com +# With enhancements by Kevin Hendricks, KevinH on mobileread.com +# +# Changelog +# 1.00 - Initial version +# 1.10 - Added an option to output the stripped data +# 1.20 - Added check for source files section (thanks Piquan) +# 1.30 - Added prelim Support for K8 style mobis +# 1.31 - removed the SRCS section but kept a 0 size entry for it +# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed +# 1.33 - now uses and modifies mobiheader SRCS and CNT +# 1.34 - added credit for Kevin Hendricks +# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records + +KINDLESTRIP_VERSION = '1.35' + +class StripException < StandardError; end + +class SectionStripper + def load_section(section) + if section + 1 == @num_sections + endoff = @data_file.length + else + endoff = @sections[section + 1][0] + end + off = @sections[section][0] + @data_file[off...endoff] + end + + def patch(off, _new) + @data_file = @data_file[0, off] + new + @data_file[off + _new.length .. -1] + end + + def strip(off, len) + @data_file = @data_file[0, off] + @data_file[off + len .. -1] + end + + def patch_section(section, _new, in_off = 0) + if section + 1 == @num_sections + endoff = @data_file.length + else + endoff = @sections[section + 1][0] + end + raise unless off + in_off + _new.length <= endoff + patch(off + in_off, _new) + end + + def updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) + mobi_length, = mobiheader[0x14...0x18].unpack("N") + exth_flag, = mobiheader[0x80...0x84].unpack("N") + exth = "NONE" + begin + if exth_flag & 0x40 != 0 + exth = mobiheader[16 + mobi_length .. -1] + if exth.length >= 4 && exth[0, 4] == "EXTH" + nitems, = exth[8...12].unpack("N") + pos = 12 + nitems.times do + type, size = exth[pos ... pos + 8].unpack("NN") + #puts "#{type}, #{size}" + if type == 121 + boundaryptr, = exth[pos + 8 ... pos + size].unpack("N") + if srcs_secnum <= boundaryptr + boundaryptr -= srcs_cnt + prefix = mobiheader[0, 16 + mobi_length + pos + 8] + suffix = mobiheader[16 + mobi_length + pos + 8 + 4 .. -1] + nval = [boundaryptr].pack("N") + mobiheader = prefix + nval + suffix + end + end + pos += size + end + end + end + rescue + end + mobiheader + end + + def initialize(datain, verbose = true) + @verbose = verbose + if datain[0x3C...0x3C+8] != "BOOKMOBI" + raise StripException, "invalid file format" + end + @num_sections, = datain[76...78].unpack("n") + + # get mobiheader and check SRCS section number and count + offset0, = datain.unpack("@78N") + offset1, = datain.unpack("@86N") + mobiheader = datain[offset0 ... offset1] + srcs_secnum, srcs_cnt = mobiheader.unpack("@224NN") + if srcs_secnum == 0xffffffff || srcs_cnt == 0 + raise StripException, "File doesn't contain the sources section." + end + + puts "Found SRCS section number %d, and count %d" % [srcs_secnum, srcs_cnt] if @verbose + # find its offset and length + _next = srcs_secnum + srcs_cnt + srcs_offset, = datain.unpack("@#{78+srcs_secnum*8}NN") + next_offset, = datain.unpack("@#{78+_next*8}NN") + srcs_length = next_offset - srcs_offset + if datain[srcs_offset ... srcs_offset+4] != "SRCS" + raise StripException, "SRCS section num does not point to SRCS." + end + puts " beginning at offset %0x and ending at offset %0x" % [srcs_offset, srcs_length] if @verbose + + # it appears bytes 68-71 always contain (2*num_sections) + 1 + # this is not documented anyplace at all but it appears to be some sort of next + # available unique_id used to identify specific sections in the palm db + @data_file = datain[0, 68] + [(@num_sections - srcs_cnt) * 2 + 1].pack("N") + @data_file += datain[72...76] + + # write out the number of sections reduced by srtcs_cnt + @data_file = @data_file + [@num_sections - srcs_cnt].pack("n") + + # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table + # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 ) + delta = -8 * srcs_cnt + srcs_secnum.times do |i| + offset, flgval = datain.unpack("@#{78+i*8}NN") + offset += delta + @data_file += [offset].pack("N") + [flgval].pack("N") + end + + # for every record after the srcs_cnt SRCS records we must start it + # earlier by 8*srcs_cnt + the length of the srcs sections themselves) + delta = delta - srcs_length + (srcs_secnum + srcs_cnt ... @num_sections).each do |i| + offset, = datain.unpack("@#{78+i*8}NN") + offset += delta + flgval = 2 * (i - srcs_cnt) + @data_file += [offset].pack("N") + [flgval].pack("N") + end + + # now pad it out to begin right at the first offset + # typically this is 2 bytes of nulls + first_offset, = @data_file.unpack("@78NN") + @data_file += "\0" * (first_offset - @data_file.length) + + # now finally add on every thing up to the original src_offset + @data_file += datain[offset0...srcs_offset] + + # and everything afterwards + @data_file += datain[srcs_offset + srcs_length .. -1] + + #store away the SRCS section in case the user wants it output + @stripped_data_header = datain[srcs_offset ... srcs_offset + 16] + @stripped_data = datain[srcs_offset + 16 ... srcs_offset + srcs_length] + + # update the number of sections count + @num_section = @num_sections - srcs_cnt + + # update the srcs_secnum and srcs_cnt in the mobiheader + offset0, = @data_file.unpack("@78NN") + offset1, = @data_file.unpack("@86NN") + mobiheader = @data_file[offset0 ... offset1] + mobiheader = mobiheader[0, 0xe0] + [-1].pack("N") + [0].pack("N") + mobiheader[0xe8 .. -1] + + # if K8 mobi, handle metadata 121 in old mobiheader + mobiheader = updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) + @data_file = @data_file[0, offset0] + mobiheader + @data_file[offset1 .. -1] + puts "done" if @verbose + end + + def get_result + @data_file + end + + def get_stripped_data + @stripped_data + end + + def get_header + @stripped_data_header + end + + def self.strip(infile, outfile = nil, verbose = true) + outfile = infile unless outfile + data_file = File.binread(infile) + stripped_file = new(data_file, verbose) + File.binwrite(outfile, stripped_file.get_result) + stripped_file + end +end + +if __FILE__ == $0 + puts "KndleStrip v#{KINDLESTRIP_VERSION}. " + + "Written 2010-2012 by Paul Durrant and Kevin Hendricks." + if ARGV.length < 2 || ARGV.length > 3 + puts "Strips the Sources record from Mobipocket ebooks" + puts "For ebooks generated using KindleGen 1.1 and later that add the source" + puts "Usage:" + puts " %s <infile> <outfile> <strippeddatafile>" % File.basename(__FILE__) + puts "<strippeddatafile> is optional." + exit 1 + else + infile = ARGV[0] + outfile = ARGV[1] + begin + stripped_file = SectionStripper.strip(infile, outfile) + #print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()) + if ARGV.length == 3 + File.binwrite(ARGV[2], stripped_file.get_stripped_data) + end + rescue StripException => e + warn "Error: #{e.message}" + exit 1 + end + end +end