lib/klipbook/clippings_parser.rb in klipbook-0.2.1 vs lib/klipbook/clippings_parser.rb in klipbook-0.3.0
- old
+ new
@@ -1,5 +1,7 @@
+# encoding: UTF-8
+
module Klipbook
class ClippingsParser
def extract_clippings_from(file_text)
clippings_text_from(file_text).map { |clipping_text| build_clipping_from(clipping_text) }.compact
end
@@ -27,19 +29,24 @@
{
title: extract_title(title_line),
author: extract_author(title_line),
type: extract_type(metadata),
location: extract_location(metadata),
+ page: extract_page(metadata),
added_on: extract_added_date(metadata),
text: extract_text(text_lines)
}
end
+ def strip_control_characters(file_text)
+ file_text.gsub("\r", '').gsub("\xef\xbb\xbf", '')
+ end
+
private
def clippings_text_from(file_text)
- file_text.gsub("\r", '').split('==========')
+ strip_control_characters(file_text).split('==========')
end
def valid_metadata?(metadata)
metadata.match(/^-.*Added on/)
end
@@ -70,9 +77,18 @@
match = metadata.scan(/Loc(ation|\.) ([0-9]+-?)/)
return nil if match.empty?
location = match.first[1]
+ location.to_i
+ end
+
+ def extract_page(metadata)
+ match = metadata.scan(/Page (\d+)/)
+
+ return nil if match.empty?
+
+ location = match.first.first
location.to_i
end
def extract_text(text_lines)
text_lines.join('').rstrip