#! ruby
# -*- coding: utf-8 -*-
#
# It was translated into Ruby script by whiteleaf.
#
# original source code:
# kindlestrip.py v.1.35 http://www.mobileread.com/forums/showthread.php?t=96903
#
# This script strips the penultimate record from a Mobipocket file.
# This is useful because the current KindleGen add a compressed copy
# of the source files used in this record, making the ebook produced
# about twice as big as it needs to be.
#
#
# This is free and unencumbered software released into the public domain.
# 
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
# 
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
# 
# For more information, please refer to <http://unlicense.org/>
#
# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
# With enhancements by Kevin Hendricks, KevinH on mobileread.com
#
# Changelog
#  1.00 - Initial version
#  1.10 - Added an option to output the stripped data
#  1.20 - Added check for source files section (thanks Piquan)
#  1.30 - Added prelim Support for K8 style mobis
#  1.31 - removed the SRCS section but kept a 0 size entry for it
#  1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
#  1.33 - now uses and modifies mobiheader SRCS and CNT
#  1.34 - added credit for Kevin Hendricks
#  1.35 - fixed bug when more than one compilation (SRCS/CMET) records

KINDLESTRIP_VERSION = '1.35'

class StripException < StandardError; end

class SectionStripper
  def load_section(section)
    if section + 1 == @num_sections
      endoff = @data_file.length
    else
      endoff = @sections[section + 1][0]
    end
    off = @sections[section][0]
    @data_file[off...endoff]
  end

  def patch(off, _new)
    @data_file = @data_file[0, off] + new + @data_file[off + _new.length .. -1]
  end

  def strip(off, len)
    @data_file = @data_file[0, off] + @data_file[off + len .. -1]
  end

  def patch_section(section, _new, in_off = 0)
    if section + 1 == @num_sections
      endoff = @data_file.length
    else
      endoff = @sections[section + 1][0]
    end
    raise unless off + in_off + _new.length <= endoff
    patch(off + in_off, _new)
  end

  def updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
    mobi_length, = mobiheader[0x14...0x18].unpack("N")
    exth_flag, = mobiheader[0x80...0x84].unpack("N")
    exth = "NONE"
    begin
      if exth_flag & 0x40 != 0
        exth = mobiheader[16 + mobi_length .. -1]
        if exth.length >= 4 && exth[0, 4] == "EXTH"
          nitems, = exth[8...12].unpack("N")
          pos = 12
          nitems.times do
            type, size = exth[pos ... pos + 8].unpack("NN")
            #puts "#{type}, #{size}"
            if type == 121
              boundaryptr, = exth[pos + 8 ... pos + size].unpack("N")
              if srcs_secnum <= boundaryptr
                boundaryptr -= srcs_cnt
                prefix = mobiheader[0, 16 + mobi_length + pos + 8]
                suffix = mobiheader[16 + mobi_length + pos + 8 + 4 .. -1]
                nval = [boundaryptr].pack("N")
                mobiheader = prefix + nval + suffix
              end
            end
            pos += size
          end
        end
      end
    rescue
    end
    mobiheader
  end

  def initialize(datain, verbose = true)
    @verbose = verbose
    if datain[0x3C...0x3C+8] != "BOOKMOBI"
      raise StripException, "invalid file format"
    end
    @num_sections, = datain[76...78].unpack("n")

    # get mobiheader and check SRCS section number and count
    offset0, = datain.unpack("@78N")
    offset1, = datain.unpack("@86N")
    mobiheader = datain[offset0 ... offset1]
    srcs_secnum, srcs_cnt = mobiheader.unpack("@224NN")
    if srcs_secnum == 0xffffffff || srcs_cnt == 0
      raise StripException, "File doesn't contain the sources section."
    end

    puts "Found SRCS section number %d, and count %d" % [srcs_secnum, srcs_cnt] if @verbose
    # find its offset and length
    _next = srcs_secnum + srcs_cnt
    srcs_offset, = datain.unpack("@#{78+srcs_secnum*8}NN")
    next_offset, = datain.unpack("@#{78+_next*8}NN")
    srcs_length = next_offset - srcs_offset
    if datain[srcs_offset ... srcs_offset+4] != "SRCS"
      raise StripException, "SRCS section num does not point to SRCS."
    end
    puts "   beginning at offset %0x and ending at offset %0x" % [srcs_offset, srcs_length] if @verbose

    # it appears bytes 68-71 always contain (2*num_sections) + 1
    # this is not documented anyplace at all but it appears to be some sort of next 
    # available unique_id used to identify specific sections in the palm db
    @data_file = datain[0, 68] + [(@num_sections - srcs_cnt) * 2 + 1].pack("N")
    @data_file += datain[72...76]

    # write out the number of sections reduced by srtcs_cnt
    @data_file = @data_file + [@num_sections - srcs_cnt].pack("n")

    # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
    # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
    delta = -8 * srcs_cnt
    srcs_secnum.times do |i|
      offset, flgval = datain.unpack("@#{78+i*8}NN")
      offset += delta
      @data_file += [offset].pack("N") + [flgval].pack("N")
    end

    # for every record after the srcs_cnt SRCS records we must start it
    # earlier by 8*srcs_cnt + the length of the srcs sections themselves)
    delta = delta - srcs_length
    (srcs_secnum + srcs_cnt ... @num_sections).each do |i|
      offset, = datain.unpack("@#{78+i*8}NN")
      offset += delta
      flgval = 2 * (i - srcs_cnt)
      @data_file += [offset].pack("N") + [flgval].pack("N")
    end

    # now pad it out to begin right at the first offset
    # typically this is 2 bytes of nulls
    first_offset, = @data_file.unpack("@78NN")
    @data_file += "\0" * (first_offset - @data_file.length)

    # now finally add on every thing up to the original src_offset
    @data_file += datain[offset0...srcs_offset]
    
    # and everything afterwards
    @data_file += datain[srcs_offset + srcs_length .. -1]

    #store away the SRCS section in case the user wants it output
    @stripped_data_header = datain[srcs_offset ... srcs_offset + 16]
    @stripped_data = datain[srcs_offset + 16 ... srcs_offset + srcs_length]

    # update the number of sections count
    @num_section = @num_sections - srcs_cnt

    # update the srcs_secnum and srcs_cnt in the mobiheader
    offset0, = @data_file.unpack("@78NN")
    offset1, = @data_file.unpack("@86NN")
    mobiheader = @data_file[offset0 ... offset1]
    mobiheader = mobiheader[0, 0xe0] + [-1].pack("N") + [0].pack("N") + mobiheader[0xe8 .. -1]

    # if K8 mobi, handle metadata 121 in old mobiheader
    mobiheader = updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
    @data_file = @data_file[0, offset0] + mobiheader + @data_file[offset1 .. -1]
    puts "done" if @verbose
  end

  def get_result
    @data_file
  end

  def get_stripped_data
    @stripped_data
  end

  def get_header
    @stripped_data_header
  end

  def self.strip(infile, outfile = nil, verbose = true)
    outfile = infile unless outfile
    data_file = File.binread(infile)
    stripped_file = new(data_file, verbose)
    File.binwrite(outfile, stripped_file.get_result)
    stripped_file
  end
end

if __FILE__ == $0
  puts "KndleStrip v#{KINDLESTRIP_VERSION}. " +
       "Written 2010-2012 by Paul Durrant and Kevin Hendricks."
  if ARGV.length < 2 || ARGV.length > 3
    puts "Strips the Sources record from Mobipocket ebooks"
    puts "For ebooks generated using KindleGen 1.1 and later that add the source"
    puts "Usage:"
    puts "    %s <infile> <outfile> <strippeddatafile>" % File.basename(__FILE__)
    puts "<strippeddatafile> is optional."
    exit 1
  else
    infile = ARGV[0]
    outfile = ARGV[1]
    begin
      stripped_file = SectionStripper.strip(infile, outfile)
      #print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
      if ARGV.length == 3
        File.binwrite(ARGV[2], stripped_file.get_stripped_data)
      end
    rescue StripException => e
      warn "Error: #{e.message}"
      exit 1
    end
  end
end