lib/kindlestrip.rb in narou-1.6.4 vs lib/kindlestrip.rb in narou-1.7.0
- old
+ new
@@ -1,249 +1,249 @@
-#! ruby
-# -*- coding: utf-8 -*-
-#
-# It was translated into Ruby script by whiteleaf.
-#
-# original source code:
-# kindlestrip.py v.1.35 http://www.mobileread.com/forums/showthread.php?t=96903
-#
-# This script strips the penultimate record from a Mobipocket file.
-# This is useful because the current KindleGen add a compressed copy
-# of the source files used in this record, making the ebook produced
-# about twice as big as it needs to be.
-#
-#
-# This is free and unencumbered software released into the public domain.
-#
-# Anyone is free to copy, modify, publish, use, compile, sell, or
-# distribute this software, either in source code form or as a compiled
-# binary, for any purpose, commercial or non-commercial, and by any
-# means.
-#
-# In jurisdictions that recognize copyright laws, the author or authors
-# of this software dedicate any and all copyright interest in the
-# software to the public domain. We make this dedication for the benefit
-# of the public at large and to the detriment of our heirs and
-# successors. We intend this dedication to be an overt act of
-# relinquishment in perpetuity of all present and future rights to this
-# software under copyright law.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# For more information, please refer to <http://unlicense.org/>
-#
-# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
-# With enhancements by Kevin Hendricks, KevinH on mobileread.com
-#
-# Changelog
-# 1.00 - Initial version
-# 1.10 - Added an option to output the stripped data
-# 1.20 - Added check for source files section (thanks Piquan)
-# 1.30 - Added prelim Support for K8 style mobis
-# 1.31 - removed the SRCS section but kept a 0 size entry for it
-# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
-# 1.33 - now uses and modifies mobiheader SRCS and CNT
-# 1.34 - added credit for Kevin Hendricks
-# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records
-
-KINDLESTRIP_VERSION = '1.35'
-
-class StripException < StandardError; end
-
-class SectionStripper
- def load_section(section)
- if section + 1 == @num_sections
- endoff = @data_file.length
- else
- endoff = @sections[section + 1][0]
- end
- off = @sections[section][0]
- @data_file[off...endoff]
- end
-
- def patch(off, _new)
- @data_file = @data_file[0, off] + new + @data_file[off + _new.length .. -1]
- end
-
- def strip(off, len)
- @data_file = @data_file[0, off] + @data_file[off + len .. -1]
- end
-
- def patch_section(section, _new, in_off = 0)
- if section + 1 == @num_sections
- endoff = @data_file.length
- else
- endoff = @sections[section + 1][0]
- end
- raise unless off + in_off + _new.length <= endoff
- patch(off + in_off, _new)
- end
-
- def updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
- mobi_length, = mobiheader[0x14...0x18].unpack("N")
- exth_flag, = mobiheader[0x80...0x84].unpack("N")
- exth = "NONE"
- begin
- if exth_flag & 0x40 != 0
- exth = mobiheader[16 + mobi_length .. -1]
- if exth.length >= 4 && exth[0, 4] == "EXTH"
- nitems, = exth[8...12].unpack("N")
- pos = 12
- nitems.times do
- type, size = exth[pos ... pos + 8].unpack("NN")
- #puts "#{type}, #{size}"
- if type == 121
- boundaryptr, = exth[pos + 8 ... pos + size].unpack("N")
- if srcs_secnum <= boundaryptr
- boundaryptr -= srcs_cnt
- prefix = mobiheader[0, 16 + mobi_length + pos + 8]
- suffix = mobiheader[16 + mobi_length + pos + 8 + 4 .. -1]
- nval = [boundaryptr].pack("N")
- mobiheader = prefix + nval + suffix
- end
- end
- pos += size
- end
- end
- end
- rescue
- end
- mobiheader
- end
-
- def initialize(datain, verbose = true)
- @verbose = verbose
- if datain[0x3C...0x3C+8] != "BOOKMOBI"
- raise StripException, "invalid file format"
- end
- @num_sections, = datain[76...78].unpack("n")
-
- # get mobiheader and check SRCS section number and count
- offset0, = datain.unpack("@78N")
- offset1, = datain.unpack("@86N")
- mobiheader = datain[offset0 ... offset1]
- srcs_secnum, srcs_cnt = mobiheader.unpack("@224NN")
- if srcs_secnum == 0xffffffff || srcs_cnt == 0
- raise StripException, "File doesn't contain the sources section."
- end
-
- puts "Found SRCS section number %d, and count %d" % [srcs_secnum, srcs_cnt] if @verbose
- # find its offset and length
- _next = srcs_secnum + srcs_cnt
- srcs_offset, = datain.unpack("@#{78+srcs_secnum*8}NN")
- next_offset, = datain.unpack("@#{78+_next*8}NN")
- srcs_length = next_offset - srcs_offset
- if datain[srcs_offset ... srcs_offset+4] != "SRCS"
- raise StripException, "SRCS section num does not point to SRCS."
- end
- puts " beginning at offset %0x and ending at offset %0x" % [srcs_offset, srcs_length] if @verbose
-
- # it appears bytes 68-71 always contain (2*num_sections) + 1
- # this is not documented anyplace at all but it appears to be some sort of next
- # available unique_id used to identify specific sections in the palm db
- @data_file = datain[0, 68] + [(@num_sections - srcs_cnt) * 2 + 1].pack("N")
- @data_file += datain[72...76]
-
- # write out the number of sections reduced by srtcs_cnt
- @data_file = @data_file + [@num_sections - srcs_cnt].pack("n")
-
- # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
- # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
- delta = -8 * srcs_cnt
- srcs_secnum.times do |i|
- offset, flgval = datain.unpack("@#{78+i*8}NN")
- offset += delta
- @data_file += [offset].pack("N") + [flgval].pack("N")
- end
-
- # for every record after the srcs_cnt SRCS records we must start it
- # earlier by 8*srcs_cnt + the length of the srcs sections themselves)
- delta = delta - srcs_length
- (srcs_secnum + srcs_cnt ... @num_sections).each do |i|
- offset, = datain.unpack("@#{78+i*8}NN")
- offset += delta
- flgval = 2 * (i - srcs_cnt)
- @data_file += [offset].pack("N") + [flgval].pack("N")
- end
-
- # now pad it out to begin right at the first offset
- # typically this is 2 bytes of nulls
- first_offset, = @data_file.unpack("@78NN")
- @data_file += "\0" * (first_offset - @data_file.length)
-
- # now finally add on every thing up to the original src_offset
- @data_file += datain[offset0...srcs_offset]
-
- # and everything afterwards
- @data_file += datain[srcs_offset + srcs_length .. -1]
-
- #store away the SRCS section in case the user wants it output
- @stripped_data_header = datain[srcs_offset ... srcs_offset + 16]
- @stripped_data = datain[srcs_offset + 16 ... srcs_offset + srcs_length]
-
- # update the number of sections count
- @num_section = @num_sections - srcs_cnt
-
- # update the srcs_secnum and srcs_cnt in the mobiheader
- offset0, = @data_file.unpack("@78NN")
- offset1, = @data_file.unpack("@86NN")
- mobiheader = @data_file[offset0 ... offset1]
- mobiheader = mobiheader[0, 0xe0] + [-1].pack("N") + [0].pack("N") + mobiheader[0xe8 .. -1]
-
- # if K8 mobi, handle metadata 121 in old mobiheader
- mobiheader = updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
- @data_file = @data_file[0, offset0] + mobiheader + @data_file[offset1 .. -1]
- puts "done" if @verbose
- end
-
- def get_result
- @data_file
- end
-
- def get_stripped_data
- @stripped_data
- end
-
- def get_header
- @stripped_data_header
- end
-
- def self.strip(infile, outfile = nil, verbose = true)
- outfile = infile unless outfile
- data_file = File.binread(infile)
- stripped_file = new(data_file, verbose)
- File.binwrite(outfile, stripped_file.get_result)
- stripped_file
- end
-end
-
-if __FILE__ == $0
- puts "KndleStrip v#{KINDLESTRIP_VERSION}. " +
- "Written 2010-2012 by Paul Durrant and Kevin Hendricks."
- if ARGV.length < 2 || ARGV.length > 3
- puts "Strips the Sources record from Mobipocket ebooks"
- puts "For ebooks generated using KindleGen 1.1 and later that add the source"
- puts "Usage:"
- puts " %s <infile> <outfile> <strippeddatafile>" % File.basename(__FILE__)
- puts "<strippeddatafile> is optional."
- exit 1
- else
- infile = ARGV[0]
- outfile = ARGV[1]
- begin
- stripped_file = SectionStripper.strip(infile, outfile)
- #print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
- if ARGV.length == 3
- File.binwrite(ARGV[2], stripped_file.get_stripped_data)
- end
- rescue StripException => e
- warn "Error: #{e.message}"
- exit 1
- end
- end
-end
+#! ruby
+# -*- coding: utf-8 -*-
+#
+# It was translated into Ruby script by whiteleaf.
+#
+# original source code:
+# kindlestrip.py v.1.35 http://www.mobileread.com/forums/showthread.php?t=96903
+#
+# This script strips the penultimate record from a Mobipocket file.
+# This is useful because the current KindleGen add a compressed copy
+# of the source files used in this record, making the ebook produced
+# about twice as big as it needs to be.
+#
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# For more information, please refer to <http://unlicense.org/>
+#
+# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
+# With enhancements by Kevin Hendricks, KevinH on mobileread.com
+#
+# Changelog
+# 1.00 - Initial version
+# 1.10 - Added an option to output the stripped data
+# 1.20 - Added check for source files section (thanks Piquan)
+# 1.30 - Added prelim Support for K8 style mobis
+# 1.31 - removed the SRCS section but kept a 0 size entry for it
+# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
+# 1.33 - now uses and modifies mobiheader SRCS and CNT
+# 1.34 - added credit for Kevin Hendricks
+# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records
+
+KINDLESTRIP_VERSION = '1.35'
+
+class StripException < StandardError; end
+
+class SectionStripper
+ def load_section(section)
+ if section + 1 == @num_sections
+ endoff = @data_file.length
+ else
+ endoff = @sections[section + 1][0]
+ end
+ off = @sections[section][0]
+ @data_file[off...endoff]
+ end
+
+ def patch(off, _new)
+ @data_file = @data_file[0, off] + new + @data_file[off + _new.length .. -1]
+ end
+
+ def strip(off, len)
+ @data_file = @data_file[0, off] + @data_file[off + len .. -1]
+ end
+
+ def patch_section(section, _new, in_off = 0)
+ if section + 1 == @num_sections
+ endoff = @data_file.length
+ else
+ endoff = @sections[section + 1][0]
+ end
+ raise unless off + in_off + _new.length <= endoff
+ patch(off + in_off, _new)
+ end
+
+ def updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
+ mobi_length, = mobiheader[0x14...0x18].unpack("N")
+ exth_flag, = mobiheader[0x80...0x84].unpack("N")
+ exth = "NONE"
+ begin
+ if exth_flag & 0x40 != 0
+ exth = mobiheader[16 + mobi_length .. -1]
+ if exth.length >= 4 && exth[0, 4] == "EXTH"
+ nitems, = exth[8...12].unpack("N")
+ pos = 12
+ nitems.times do
+ type, size = exth[pos ... pos + 8].unpack("NN")
+ #puts "#{type}, #{size}"
+ if type == 121
+ boundaryptr, = exth[pos + 8 ... pos + size].unpack("N")
+ if srcs_secnum <= boundaryptr
+ boundaryptr -= srcs_cnt
+ prefix = mobiheader[0, 16 + mobi_length + pos + 8]
+ suffix = mobiheader[16 + mobi_length + pos + 8 + 4 .. -1]
+ nval = [boundaryptr].pack("N")
+ mobiheader = prefix + nval + suffix
+ end
+ end
+ pos += size
+ end
+ end
+ end
+ rescue
+ end
+ mobiheader
+ end
+
+ def initialize(datain, verbose = true)
+ @verbose = verbose
+ if datain[0x3C...0x3C+8] != "BOOKMOBI"
+ raise StripException, "invalid file format"
+ end
+ @num_sections, = datain[76...78].unpack("n")
+
+ # get mobiheader and check SRCS section number and count
+ offset0, = datain.unpack("@78N")
+ offset1, = datain.unpack("@86N")
+ mobiheader = datain[offset0 ... offset1]
+ srcs_secnum, srcs_cnt = mobiheader.unpack("@224NN")
+ if srcs_secnum == 0xffffffff || srcs_cnt == 0
+ raise StripException, "File doesn't contain the sources section."
+ end
+
+ puts "Found SRCS section number %d, and count %d" % [srcs_secnum, srcs_cnt] if @verbose
+ # find its offset and length
+ _next = srcs_secnum + srcs_cnt
+ srcs_offset, = datain.unpack("@#{78+srcs_secnum*8}NN")
+ next_offset, = datain.unpack("@#{78+_next*8}NN")
+ srcs_length = next_offset - srcs_offset
+ if datain[srcs_offset ... srcs_offset+4] != "SRCS"
+ raise StripException, "SRCS section num does not point to SRCS."
+ end
+ puts " beginning at offset %0x and ending at offset %0x" % [srcs_offset, srcs_length] if @verbose
+
+ # it appears bytes 68-71 always contain (2*num_sections) + 1
+ # this is not documented anyplace at all but it appears to be some sort of next
+ # available unique_id used to identify specific sections in the palm db
+ @data_file = datain[0, 68] + [(@num_sections - srcs_cnt) * 2 + 1].pack("N")
+ @data_file += datain[72...76]
+
+ # write out the number of sections reduced by srtcs_cnt
+ @data_file = @data_file + [@num_sections - srcs_cnt].pack("n")
+
+ # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
+ # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
+ delta = -8 * srcs_cnt
+ srcs_secnum.times do |i|
+ offset, flgval = datain.unpack("@#{78+i*8}NN")
+ offset += delta
+ @data_file += [offset].pack("N") + [flgval].pack("N")
+ end
+
+ # for every record after the srcs_cnt SRCS records we must start it
+ # earlier by 8*srcs_cnt + the length of the srcs sections themselves)
+ delta = delta - srcs_length
+ (srcs_secnum + srcs_cnt ... @num_sections).each do |i|
+ offset, = datain.unpack("@#{78+i*8}NN")
+ offset += delta
+ flgval = 2 * (i - srcs_cnt)
+ @data_file += [offset].pack("N") + [flgval].pack("N")
+ end
+
+ # now pad it out to begin right at the first offset
+ # typically this is 2 bytes of nulls
+ first_offset, = @data_file.unpack("@78NN")
+ @data_file += "\0" * (first_offset - @data_file.length)
+
+ # now finally add on every thing up to the original src_offset
+ @data_file += datain[offset0...srcs_offset]
+
+ # and everything afterwards
+ @data_file += datain[srcs_offset + srcs_length .. -1]
+
+ #store away the SRCS section in case the user wants it output
+ @stripped_data_header = datain[srcs_offset ... srcs_offset + 16]
+ @stripped_data = datain[srcs_offset + 16 ... srcs_offset + srcs_length]
+
+ # update the number of sections count
+ @num_section = @num_sections - srcs_cnt
+
+ # update the srcs_secnum and srcs_cnt in the mobiheader
+ offset0, = @data_file.unpack("@78NN")
+ offset1, = @data_file.unpack("@86NN")
+ mobiheader = @data_file[offset0 ... offset1]
+ mobiheader = mobiheader[0, 0xe0] + [-1].pack("N") + [0].pack("N") + mobiheader[0xe8 .. -1]
+
+ # if K8 mobi, handle metadata 121 in old mobiheader
+ mobiheader = updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
+ @data_file = @data_file[0, offset0] + mobiheader + @data_file[offset1 .. -1]
+ puts "done" if @verbose
+ end
+
+ def get_result
+ @data_file
+ end
+
+ def get_stripped_data
+ @stripped_data
+ end
+
+ def get_header
+ @stripped_data_header
+ end
+
+ def self.strip(infile, outfile = nil, verbose = true)
+ outfile = infile unless outfile
+ data_file = File.binread(infile)
+ stripped_file = new(data_file, verbose)
+ File.binwrite(outfile, stripped_file.get_result)
+ stripped_file
+ end
+end
+
+if __FILE__ == $0
+ puts "KndleStrip v#{KINDLESTRIP_VERSION}. " +
+ "Written 2010-2012 by Paul Durrant and Kevin Hendricks."
+ if ARGV.length < 2 || ARGV.length > 3
+ puts "Strips the Sources record from Mobipocket ebooks"
+ puts "For ebooks generated using KindleGen 1.1 and later that add the source"
+ puts "Usage:"
+ puts " %s <infile> <outfile> <strippeddatafile>" % File.basename(__FILE__)
+ puts "<strippeddatafile> is optional."
+ exit 1
+ else
+ infile = ARGV[0]
+ outfile = ARGV[1]
+ begin
+ stripped_file = SectionStripper.strip(infile, outfile)
+ #print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
+ if ARGV.length == 3
+ File.binwrite(ARGV[2], stripped_file.get_stripped_data)
+ end
+ rescue StripException => e
+ warn "Error: #{e.message}"
+ exit 1
+ end
+ end
+end