lib/kindle_highlights/client.rb in kindle-highlights-0.0.8 vs lib/kindle_highlights/client.rb in kindle-highlights-1.0.0
- old
+ new
@@ -1,50 +1,93 @@
module KindleHighlights
class Client
- attr_reader :books
-
- def initialize(email_address, password)
- @email_address = email_address
- @password = password
-
- setup_mechanize_agent
- load_books_from_kindle_account
+ class CaptchaError < StandardError; end
+
+ attr_writer :mechanize_agent
+ attr_accessor :kindle_logged_in_page
+
+ def initialize(email_address:, password:, mechanize_options: {})
+ @email_address = email_address
+ @password = password
+ @mechanize_options = mechanize_options
end
-
+
+ def books
+ @books ||= load_books_from_kindle_account
+ end
+
def highlights_for(asin)
- highlights = @mechanize_agent.get("https://kindle.amazon.com/kcw/highlights?asin=#{asin}&cursor=0&count=1000")
- json = JSON.parse(highlights.body)
- json["items"]
+ conditionally_sign_in_to_amazon
+
+ cursor = 0
+ highlights = []
+
+ loop do
+ # This endpoint includes a `hasMore` field. Unfortunately at the time of this writing is always `false`.
+ page = mechanize_agent.get("https://kindle.amazon.com/kcw/highlights?asin=#{asin}&cursor=#{cursor}&count=#{BATCH_SIZE}")
+ items = JSON.parse(page.body).fetch("items", [])
+
+ break unless items.any?
+
+ highlights.concat(items)
+ cursor += BATCH_SIZE
+ end
+ highlights
end
-
+
private
-
+
+ attr_accessor :email_address, :password, :mechanize_options
+
+ def conditionally_sign_in_to_amazon
+ if @kindle_logged_in_page.nil?
+ signin_page = mechanize_agent.get(KINDLE_LOGIN_PAGE)
+ signin_form = signin_page.form(SIGNIN_FORM_IDENTIFIER)
+ signin_form.email = email_address
+ signin_form.password = password
+ post_signin_page = mechanize_agent.submit(signin_form)
+
+ if post_signin_page.search("#ap_captcha_img").any?
+ resolution_url = post_signin_page.link_with(text: /See a new challenge/).resolved_uri.to_s
+ raise CaptchaError, "Received a CAPTCHA while attempting to sign in to your Amazon account. You will need to resolve this manually at #{resolution_url}"
+ else
+ @kindle_logged_in_page = post_signin_page
+ end
+ end
+ end
+
def load_books_from_kindle_account
- @books = Hash.new
- signin_page = @mechanize_agent.get(KINDLE_LOGIN_PAGE)
-
- signin_form = signin_page.form(SIGNIN_FORM_IDENTIFIER)
- signin_form.email = @email_address
- signin_form.password = @password
-
- kindle_logged_in_page = @mechanize_agent.submit(signin_form)
- highlights_page = @mechanize_agent.click(kindle_logged_in_page.link_with(:text => /Your Books/))
+ conditionally_sign_in_to_amazon
+ books = {}
+ highlights_page = mechanize_agent.click(kindle_logged_in_page.link_with(text: /Your Books/))
+
loop do
- books = highlights_page.search(".//td[@class='titleAndAuthor']")
- books.each do |book|
- asin = book.search("a").first.attributes["href"].value.split("/").last
- title = book.search("a").first.inner_html
- @books[asin] = title
+ highlights_page.search(".//td[@class='titleAndAuthor']").each do |book|
+ asin_and_title_element = book.search("a").first
+ asin = asin_and_title_element.attributes.fetch("href").value.split("/").last
+ title = asin_and_title_element.inner_html
+ books[asin] = title
end
+
break if highlights_page.link_with(text: /Next/).nil?
- highlights_page = @mechanize_agent.click(highlights_page.link_with(text: /Next/))
+ highlights_page = mechanize_agent.click(highlights_page.link_with(text: /Next/))
end
+ books
end
-
- def setup_mechanize_agent
- @mechanize_agent = Mechanize.new
- @mechanize_agent.user_agent_alias = 'Windows Mozilla'
- @mechanize_agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+
+ def mechanize_agent
+ @mechanize_agent ||= initialize_mechanize_agent
end
+
+ def initialize_mechanize_agent
+ mechanize_agent = Mechanize.new
+ mechanize_agent.user_agent_alias = 'Windows Mozilla'
+ mechanize_agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+
+ mechanize_options.each do |mech_attr, value|
+ mechanize_agent.send("#{mech_attr}=", value)
+ end
+ mechanize_agent
+ end
end
-end
\ No newline at end of file
+end