lib/kindle_highlights/client.rb in kindle-highlights-1.0.2 vs lib/kindle_highlights/client.rb in kindle-highlights-2.0.0

- old
+ new

@@ -1,53 +1,70 @@ module KindleHighlights class Client class CaptchaError < StandardError; end class AuthenticationError < StandardError; end + class AsinNotFoundError < StandardError; end - attr_writer :mechanize_agent - attr_accessor :kindle_logged_in_page + KINDLE_LOGIN_PAGE = "https://read.amazon.com/notebook" + SIGNIN_FORM_IDENTIFIER = "signIn" + MAX_AUTH_RETRIES = 2 + attr_writer :mechanize_agent, :kindle_logged_in_page + def initialize(email_address:, password:, mechanize_options: {}) - @email_address = email_address - @password = password + @email_address = email_address + @password = password @mechanize_options = mechanize_options + @retries = 0 + @kindle_logged_in_page = nil end def books @books ||= load_books_from_kindle_account end def highlights_for(asin) - conditionally_sign_in_to_amazon + if book = books.detect { |book| book.asin == asin } + book.highlights_from_amazon + else + raise AsinNotFoundError, "Book with ASIN #{asin} not found." + end + end - cursor = 0 - highlights = [] + private - loop do - # This endpoint includes a `hasMore` field. Unfortunately at the time of this writing is always `false`. - page = mechanize_agent.get("https://kindle.amazon.com/kcw/highlights?asin=#{asin}&cursor=#{cursor}&count=#{BATCH_SIZE}") - items = JSON.parse(page.body).fetch("items", []) + attr_accessor :email_address, :password, :mechanize_options + attr_reader :kindle_logged_in_page - break unless items.any? + def mechanize_agent + @mechanize_agent ||= initialize_mechanize_agent + end - highlights.concat(items) - cursor += BATCH_SIZE + def initialize_mechanize_agent + mechanize_agent = Mechanize.new + mechanize_agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys.grep(/\A(Linux|Mac|Windows)/).sample + mechanize_agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE + + mechanize_options.each do |mech_attr, value| + mechanize_agent.send("#{mech_attr}=", value) end - highlights + mechanize_agent end - private + def load_books_from_kindle_account + conditionally_sign_in_to_amazon - attr_accessor :email_address, :password, :mechanize_options + kindle_library.map do |book| + unless book.attributes["id"].blank? + Book.from_html_elements(html_element: book, mechanize_agent: mechanize_agent) + end + end.compact + end def conditionally_sign_in_to_amazon - if @kindle_logged_in_page.nil? - signin_page = mechanize_agent.get(KINDLE_LOGIN_PAGE) - signin_form = signin_page.form(SIGNIN_FORM_IDENTIFIER) - signin_form.email = email_address - signin_form.password = password - post_signin_page = mechanize_agent.submit(signin_form) + if login? + post_signin_page = login_via_mechanize if post_signin_page.search("#ap_captcha_img").any? resolution_url = post_signin_page.link_with(text: /See a new challenge/).resolved_uri.to_s raise CaptchaError, "Received a CAPTCHA while attempting to sign in to your Amazon account. You will need to resolve this manually at #{resolution_url}" elsif post_signin_page.search("#message_error > p").any? @@ -55,43 +72,34 @@ raise AuthenticationError, "Unable to sign in, received error: '#{amazon_error}'" else @kindle_logged_in_page = post_signin_page end end + rescue AuthenticationError + retry unless too_many_retries? end - def load_books_from_kindle_account - conditionally_sign_in_to_amazon + def kindle_library + @kindle_library ||= @kindle_logged_in_page.search("div#kp-notebook-library").children + end - books = {} - highlights_page = mechanize_agent.click(kindle_logged_in_page.link_with(text: /Your Books/)) + def login_via_mechanize + signin_page = mechanize_agent.get(KINDLE_LOGIN_PAGE) + signin_form = signin_page.form(SIGNIN_FORM_IDENTIFIER) + signin_form.email = email_address + signin_form.password = password + mechanize_agent.submit(signin_form) + end - loop do - highlights_page.search(".//td[@class='titleAndAuthor']").each do |book| - asin_and_title_element = book.search("a").first - asin = asin_and_title_element.attributes.fetch("href").value.split("/").last - title = asin_and_title_element.inner_html - books[asin] = title - end - - break if highlights_page.link_with(text: /Next/).nil? - highlights_page = mechanize_agent.click(highlights_page.link_with(text: /Next/)) - end - books + def login? + @kindle_logged_in_page.blank? end - def mechanize_agent - @mechanize_agent ||= initialize_mechanize_agent + def too_many_retries? + retry! == MAX_AUTH_RETRIES end - def initialize_mechanize_agent - mechanize_agent = Mechanize.new - mechanize_agent.user_agent_alias = 'Windows Mozilla' - mechanize_agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE - - mechanize_options.each do |mech_attr, value| - mechanize_agent.send("#{mech_attr}=", value) - end - mechanize_agent + def retry! + retries += 1 end end end