lib/klipbook/clippings_parser.rb in klipbook-0.2.1 vs lib/klipbook/clippings_parser.rb in klipbook-0.3.0

- old
+ new

@@ -1,5 +1,7 @@ +# encoding: UTF-8 + module Klipbook class ClippingsParser def extract_clippings_from(file_text) clippings_text_from(file_text).map { |clipping_text| build_clipping_from(clipping_text) }.compact end @@ -27,19 +29,24 @@ { title: extract_title(title_line), author: extract_author(title_line), type: extract_type(metadata), location: extract_location(metadata), + page: extract_page(metadata), added_on: extract_added_date(metadata), text: extract_text(text_lines) } end + def strip_control_characters(file_text) + file_text.gsub("\r", '').gsub("\xef\xbb\xbf", '') + end + private def clippings_text_from(file_text) - file_text.gsub("\r", '').split('==========') + strip_control_characters(file_text).split('==========') end def valid_metadata?(metadata) metadata.match(/^-.*Added on/) end @@ -70,9 +77,18 @@ match = metadata.scan(/Loc(ation|\.) ([0-9]+-?)/) return nil if match.empty? location = match.first[1] + location.to_i + end + + def extract_page(metadata) + match = metadata.scan(/Page (\d+)/) + + return nil if match.empty? + + location = match.first.first location.to_i end def extract_text(text_lines) text_lines.join('').rstrip