lib/epub/publication/package/manifest.rb in epub-parser-0.1.6 vs lib/epub/publication/package/manifest.rb in epub-parser-0.1.7

- old
+ new

@@ -1,7 +1,8 @@ require 'set' require 'enumerabler' +require 'rchardet' require 'epub/constants' require 'epub/parser/content_document' module EPUB module Publication @@ -89,12 +90,24 @@ rootfile = manifest.package.book.ocf.container.rootfile.full_path Addressable::URI.unescape(rootfile + href.normalize.request_uri) end def read - Zip::Archive.open(manifest.package.book.epub_file) {|zip| + raw_content = Zip::Archive.open(manifest.package.book.epub_file) {|zip| zip.fopen(entry_name).read } + # CharDet.detect doesn't raise Encoding::CompatibilityError + # that is caused when trying compare CharDet's internal + # ASCII-8BIT RegExp with a String with other encoding + # because Zip::File#read returns a String with encoding ASCII-8BIT. + # So, no need to rescue the error here. + encoding = CharDet.detect(raw_content)['encoding'] + if encoding + raw_content.force_encoding(encoding) + else + warn "No encoding detected for #{entry_name}. Set to ASCII-8BIT" if $DEBUG || $VERBOSE + raw_content + end end def xhtml? media_type == 'application/xhtml+xml' end