diff --git a/.gitignore b/.gitignore index 72f9d94..8c142ad 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ .gemspec config/*.yml .DS_Store +application.yml +Gemfile.lock diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 875eb0d..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,47 +0,0 @@ -PATH - remote: . - specs: - kindle-highlights (1.0.2) - mechanize (>= 2.7.2) - -GEM - remote: https://rubygems.org/ - specs: - domain_name (0.5.20170404) - unf (>= 0.0.5, < 1.0.0) - http-cookie (1.0.3) - domain_name (~> 0.5) - mechanize (2.7.3) - domain_name (~> 0.5, >= 0.5.1) - http-cookie (~> 1.0) - mime-types (~> 2.0) - net-http-digest_auth (~> 1.1, >= 1.1.1) - net-http-persistent (~> 2.5, >= 2.5.2) - nokogiri (~> 1.4) - ntlm-http (~> 0.1, >= 0.1.1) - webrobots (>= 0.0.9, < 0.2) - mime-types (2.99) - mini_portile2 (2.2.0) - minitest (5.8.4) - net-http-digest_auth (1.4) - net-http-persistent (2.9.4) - nokogiri (1.8.0) - mini_portile2 (~> 2.2.0) - ntlm-http (0.1.1) - rake (10.5.0) - unf (0.1.4) - unf_ext - unf_ext (0.0.7.4) - webrobots (0.1.1) - -PLATFORMS - ruby - -DEPENDENCIES - bundler (~> 1.3) - kindle-highlights! - minitest (~> 5.0) - rake - -BUNDLED WITH - 1.15.3 diff --git a/README.md b/README.md index 42d2e4b..ccad4e1 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,6 @@ A Ruby gem for collecting your Kindle highlights. * Ruby `2.1.0` or greater * An Amazon Kindle account -Note: Version `0.0.8` of `kindle-highlights` is the last version which is compatible with older -versions of Ruby. For documentation on how to use that -version, see [the release](https://github.com/speric/kindle-highlights/releases/tag/v0.0.8). - ### Install ``` gem install kindle-highlights @@ -26,65 +22,64 @@ to sign into your Amazon Kindle account: ```ruby require 'kindle_highlights' -kindle = KindleHighlights::Client.new(email_address: "email.address@gmail.com", password: "password") +kindle = KindleHighlights::Client.new( + email_address: "email.address@gmail.com", + password: "password" +) ``` ### Fetching a list of your Kindle books Use the `books` method to get a listing of all your Kindle books. This method -returns a hash, keyed on the ASIN, with the title as the value: +returns a collection of `KindleHighlights::Book` objects: ```ruby kindle.books #=> -{ - "B002JCSCO8" => "The Art of the Commonplace: The Agrarian Essays of Wendell Berry", - "B0049SPHC0" => "Calvinistic Concept of Culture, The", - "B003HNOB34" => "The Collected Works of William Butler Yeats (Unexpurgated Edition) (Halcyon Classics)", - "B000JMKZX6" => "The Essays of Arthur Schopenhauer; On Human Nature", - "B005CQ2ZE6" => "From the Garden to the City", - "B0082ZJFCO" => "The Golden Sayings of Epictetus", - "B000SEGEKI" => "The Pragmatic Programmer: From Journeyman to Master", - "B009D6AGOM" => "The Rare Jewel of Christian Contentment", - "B00E25KVLW" => "Ruby on Rails 4.0 Guide", - "B004X5RLBY" => "The Seven Lamps of Architecture", - "B0032UWX1O" => "The Westminster Confession of Faith", - "B0026772N8" => "Zen and the Art of Motorcycle Maintenance" -} +[ + , + , + +] ``` ### Fetching all highlights for a single book To get only the highlights for a specific book, use the `highlights_for` method, passing -in the book's Amazon ASIN as the only method parameter: +in the book's Amazon ASIN as the only method parameter. This method returns a collection of +`KindleHighlights::Highlight` objects: ```ruby kindle.highlights_for("B005CQ2ZE6") #=> [ - { - "asin" => "B005CQ2ZE6", - "customerId" => "...", - "embeddedId" => "From_the_Garden_to_the_City:420E805A", - "endLocation" => 29591, - "highlight" => "One of the most dangerous things you can believe in this world is that technology is neutral.", - "howLongAgo" => "1 year ago", - "startLocation" => 29496, - "timestamp" => 1320901233000 - }, - { - "asin" => "B005CQ2ZE6", - "customerId" => "...", - "embeddedId" => "From_the_Garden_to_the_City:420E805A", - "endLocation" => 54220, - "highlight" => "While God's words are eternal and unchanging, the tools we use to access those words do change, and those changes in technology also bring subtle changes to the practice of worship. When we fail to recognize the impact of such technological change, we run the risk of allowing our tools to dictate our methods. Technology should not dictate our values or our methods. Rather, we must use technology out of our convictions and values.", - "howLongAgo" => "1 year ago", - "startLocation" => 53780, - "timestamp" => 1321038422000 - } + ] ``` +Additionally, each book has it's own `highlights_from_amazon` method: + +``` +book = kindle.books.first +book.highlights_from_amazon +``` + ### Advanced Usage This gem uses [mechanize](https://github.com/sparklemotion/mechanize) to interact with Amazon's Kindle pages. You can override any of the default mechanize settings (see `lib/kindle_highlights/client.rb`) by passing your settings to the initializer: @@ -138,4 +133,4 @@ kindle = KindleHighlights::Client.new( ### Copyright -Copyright (c) 2011-2016 Eric Farkas. See MIT-LICENSE for details. +Copyright (c) 2011-2018 Eric Farkas. See MIT-LICENSE for details. diff --git a/kindle_highlights.gemspec b/kindle_highlights.gemspec index 785c0ba..3391fb4 100644 --- a/kindle_highlights.gemspec +++ b/kindle_highlights.gemspec @@ -1,18 +1,20 @@ Gem::Specification.new do |s| s.name = "kindle-highlights" - s.version = "1.0.2" + s.version = "2.0.0" s.summary = "Kindle highlights" s.description = "Until there is a Kindle API, this will suffice." s.authors = ["Eric Farkas"] s.email = "eric@prudentiadigital.com" - s.files = ["lib/kindle_highlights.rb", "lib/kindle_highlights/client.rb"] + s.files = `git ls-files -- lib/*`.split("\n") + s.files += ["MIT-LICENSE"] s.homepage = "https://github.com/speric/kindle-highlights" s.license = "MIT" s.required_ruby_version = ">= 2.1.0" - s.add_runtime_dependency "mechanize", ">= 2.7.2" + s.add_runtime_dependency "mechanize", ">= 2.7.5" s.add_development_dependency "rake" s.add_development_dependency "bundler", "~> 1.3" s.add_development_dependency "minitest", "~> 5.0" + s.add_development_dependency "activesupport" end diff --git a/lib/kindle_highlights.rb b/lib/kindle_highlights.rb index 2617d0f..86539b4 100644 --- a/lib/kindle_highlights.rb +++ b/lib/kindle_highlights.rb @@ -1,10 +1,8 @@ require 'rubygems' require 'mechanize' -require 'json' -require 'kindle_highlights/client' +require 'active_support/core_ext/object/blank' +require 'active_support/core_ext/string/filters' -module KindleHighlights - KINDLE_LOGIN_PAGE = "http://kindle.amazon.com/login" - SIGNIN_FORM_IDENTIFIER = "signIn" - BATCH_SIZE = 200 -end +require_relative './kindle_highlights/client' +require_relative './kindle_highlights/book' +require_relative './kindle_highlights/highlight' diff --git a/lib/kindle_highlights/book.rb b/lib/kindle_highlights/book.rb new file mode 100644 index 0000000..0f90ed1 --- /dev/null +++ b/lib/kindle_highlights/book.rb @@ -0,0 +1,56 @@ +module KindleHighlights + class Book + attr_accessor :asin, :author, :title + + def self.from_html_elements(html_element:, mechanize_agent:) + new( + mechanize_agent: mechanize_agent, + asin: html_element.attributes["id"].value.squish, + title: html_element.children.search("h2").first.text.squish, + author: html_element.children.search("p").first.text.split(":").last.strip.squish + ) + end + + def initialize(asin:, author:, title:, mechanize_agent: nil) + @asin = asin + @author = author + @title = title + @mechanize_agent = mechanize_agent + end + + def to_s + "#{title} by #{author}" + end + + def inspect + "<#{self.class}: #{inspectable_vars}>" + end + + def highlights_from_amazon + return [] unless mechanize_agent.present? + + @highlights ||= fetch_highlights_from_amazon + end + + private + + attr_reader :mechanize_agent + + def fetch_highlights_from_amazon + mechanize_agent + .get("https://read.amazon.com/kp/notebook?captcha_verified=1&asin=#{asin}&contentLimitState=&") + .search("div#kp-notebook-annotations") + .children + .select { |child| child.name == "div" } + .select { |child| child.children.search("div.kp-notebook-highlight").first.present? } + .map { |html_elements| Highlight.from_html_elements(book: self, html_elements: html_elements) } + end + + def inspectable_vars + instance_variables + .select { |ivar| ivar != :@mechanize_agent } + .map { |ivar| "#{ivar}=#{instance_variable_get(ivar).inspect}" } + .join(", ") + end + end +end diff --git a/lib/kindle_highlights/client.rb b/lib/kindle_highlights/client.rb index cb4c725..9bfcd61 100644 --- a/lib/kindle_highlights/client.rb +++ b/lib/kindle_highlights/client.rb @@ -2,16 +2,20 @@ module KindleHighlights class Client class CaptchaError < StandardError; end class AuthenticationError < StandardError; end + class AsinNotFoundError < StandardError; end - MAX_AUTH_RETRIES = 2 + KINDLE_LOGIN_PAGE = "https://read.amazon.com/notebook" + SIGNIN_FORM_IDENTIFIER = "signIn" + MAX_AUTH_RETRIES = 2 - attr_writer :mechanize_agent - attr_accessor :kindle_logged_in_page + attr_writer :mechanize_agent, :kindle_logged_in_page def initialize(email_address:, password:, mechanize_options: {}) - @email_address = email_address - @password = password + @email_address = email_address + @password = password @mechanize_options = mechanize_options + @retries = 0 + @kindle_logged_in_page = nil end def books @@ -19,37 +23,46 @@ def books end def highlights_for(asin) - conditionally_sign_in_to_amazon + if book = books.detect { |book| book.asin == asin } + book.highlights_from_amazon + else + raise AsinNotFoundError, "Book with ASIN #{asin} not found." + end + end - cursor = 0 - highlights = [] + private - loop do - # This endpoint includes a `hasMore` field. Unfortunately at the time of this writing is always `false`. - page = mechanize_agent.get("https://kindle.amazon.com/kcw/highlights?asin=#{asin}&cursor=#{cursor}&count=#{BATCH_SIZE}") - items = JSON.parse(page.body).fetch("items", []) + attr_accessor :email_address, :password, :mechanize_options + attr_reader :kindle_logged_in_page - break unless items.any? + def mechanize_agent + @mechanize_agent ||= initialize_mechanize_agent + end - highlights.concat(items) - cursor += BATCH_SIZE + def initialize_mechanize_agent + mechanize_agent = Mechanize.new + mechanize_agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys.grep(/\A(Linux|Mac|Windows)/).sample + mechanize_agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE + + mechanize_options.each do |mech_attr, value| + mechanize_agent.send("#{mech_attr}=", value) end - highlights + mechanize_agent end - private + def load_books_from_kindle_account + conditionally_sign_in_to_amazon - attr_accessor :email_address, :password, :mechanize_options + kindle_library.map do |book| + unless book.attributes["id"].blank? + Book.from_html_elements(html_element: book, mechanize_agent: mechanize_agent) + end + end.compact + end def conditionally_sign_in_to_amazon - retries ||= 0 - - if @kindle_logged_in_page.nil? - signin_page = mechanize_agent.get(KINDLE_LOGIN_PAGE) - signin_form = signin_page.form(SIGNIN_FORM_IDENTIFIER) - signin_form.email = email_address - signin_form.password = password - post_signin_page = mechanize_agent.submit(signin_form) + if login? + post_signin_page = login_via_mechanize if post_signin_page.search("#ap_captcha_img").any? resolution_url = post_signin_page.link_with(text: /See a new challenge/).resolved_uri.to_s @@ -62,42 +75,31 @@ def conditionally_sign_in_to_amazon end end rescue AuthenticationError - retry unless (retries += 1) == MAX_AUTH_RETRIES + retry unless too_many_retries? end - def load_books_from_kindle_account - conditionally_sign_in_to_amazon - - books = {} - highlights_page = mechanize_agent.click(kindle_logged_in_page.link_with(text: /Your Books/)) - - loop do - highlights_page.search(".//td[@class='titleAndAuthor']").each do |book| - asin_and_title_element = book.search("a").first - asin = asin_and_title_element.attributes.fetch("href").value.split("/").last - title = asin_and_title_element.inner_html - books[asin] = title - end + def kindle_library + @kindle_library ||= @kindle_logged_in_page.search("div#kp-notebook-library").children + end - break if highlights_page.link_with(text: /Next/).nil? - highlights_page = mechanize_agent.click(highlights_page.link_with(text: /Next/)) - end - books + def login_via_mechanize + signin_page = mechanize_agent.get(KINDLE_LOGIN_PAGE) + signin_form = signin_page.form(SIGNIN_FORM_IDENTIFIER) + signin_form.email = email_address + signin_form.password = password + mechanize_agent.submit(signin_form) end - def mechanize_agent - @mechanize_agent ||= initialize_mechanize_agent + def login? + @kindle_logged_in_page.blank? end - def initialize_mechanize_agent - mechanize_agent = Mechanize.new - mechanize_agent.user_agent_alias = Mechanize::AGENT_ALIASES.keys.grep(/\A(Linux|Mac|Windows)/).sample - mechanize_agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE + def too_many_retries? + retry! == MAX_AUTH_RETRIES + end - mechanize_options.each do |mech_attr, value| - mechanize_agent.send("#{mech_attr}=", value) - end - mechanize_agent + def retry! + retries += 1 end end end diff --git a/lib/kindle_highlights/highlight.rb b/lib/kindle_highlights/highlight.rb new file mode 100644 index 0000000..2a6aa85 --- /dev/null +++ b/lib/kindle_highlights/highlight.rb @@ -0,0 +1,23 @@ +module KindleHighlights + class Highlight + attr_accessor :asin, :text, :location + + def self.from_html_elements(book:, html_elements:) + new( + asin: book.asin, + text: html_elements.children.search("div.kp-notebook-highlight").first.text.squish, + location: html_elements.children.search("input#kp-annotation-location").first.attributes["value"].value, + ) + end + + def initialize(asin:, text:, location:) + @asin = asin + @text = text + @location = location + end + + def to_s + text + end + end +end diff --git a/test/fetching_all_highlights_for_a_book_test.rb b/test/fetching_all_highlights_for_a_book_test.rb deleted file mode 100644 index 9c88ca3..0000000 --- a/test/fetching_all_highlights_for_a_book_test.rb +++ /dev/null @@ -1,90 +0,0 @@ -require 'kindle_highlights' -require 'minitest/autorun' - -class FetchingAllHighlightsForABookTest < Minitest::Test - def setup - @kindle = KindleHighlights::Client.new( - email_address: "amazon@example.com", - password: "letmein" - ) - @kindle.mechanize_agent = FakeMechanizeAgentForSingleBook.new - @kindle.kindle_logged_in_page = "" - end - - def test_fetching_all_quotes_from_a_kindle_book - quotes = @kindle.highlights_for("B003XDUCEU") - - assert_equal 2, quotes.count - - assert_equal "B003XDUCEU", quotes.first["asin"] - assert_equal "CUS_ID", quotes.first["customerId"] - assert_equal "CR!SDMDM6529H7S511VQ7R627N2J874:EE63BD4D", quotes.first["embeddedId"] - assert_equal 40116, quotes.first["endLocation"] - assert_equal "A good manager creates opportunity, but it’s your responsibility to take it.", quotes.first["highlight"] - assert_equal "3 hours ago", quotes.first["howLongAgo"] - assert_equal 40041, quotes.first["startLocation"] - assert_equal 1457525264000, quotes.first["timestamp"] - - assert_equal "B003XDUCEU", quotes.last["asin"] - assert_equal "CUS_ID", quotes.last["customerId"] - assert_equal "CR!SDMDM6529H7S511VQ7R627N2J874:EE63BD4D", quotes.last["embeddedId"] - assert_equal 43633, quotes.last["endLocation"] - assert_equal "Any task, big or small, that has landed on your plate and you failed to complete is eroding your reputation.", quotes.last["highlight"] - assert_equal "3 hours ago", quotes.last["howLongAgo"] - assert_equal 43536, quotes.last["startLocation"] - assert_equal 1457525368000, quotes.last["timestamp"] - end - - class FakeMechanizeAgentForSingleBook - def get(asin_url) - FakeResponse.new(asin_url) - end - - class FakeResponse - def initialize(asin_url) - @asin_url = asin_url - end - - def body - if @asin_url =~ /cursor=0/ - response_with_items - else - response_without_items - end.to_json - end - - def response_without_items - { - "items" => [] - } - end - - def response_with_items - { - "items" => [ - { - "asin" => "B003XDUCEU", - "customerId" => "CUS_ID", - "embeddedId" => "CR!SDMDM6529H7S511VQ7R627N2J874:EE63BD4D", - "endLocation" => 40116, - "highlight" => "A good manager creates opportunity, but it’s your responsibility to take it.", - "howLongAgo" => "3 hours ago", - "startLocation" => 40041, - "timestamp" => 1457525264000 - }, - { - "asin" => "B003XDUCEU", - "customerId" => "CUS_ID", - "embeddedId" => "CR!SDMDM6529H7S511VQ7R627N2J874:EE63BD4D", - "endLocation" => 43633, - "highlight" => "Any task, big or small, that has landed on your plate and you failed to complete is eroding your reputation.", - "howLongAgo" => "3 hours ago", - "startLocation" => 43536, - "timestamp" => 1457525368000 - } - ] - } - end - end - end -end diff --git a/test/fetching_all_kindle_books_test.rb b/test/fetching_all_kindle_books_test.rb deleted file mode 100644 index 6822b4c..0000000 --- a/test/fetching_all_kindle_books_test.rb +++ /dev/null @@ -1,113 +0,0 @@ -require 'kindle_highlights' -require 'minitest/autorun' - -class FetchingAllKindleBooksTest < Minitest::Test - def setup - @kindle = KindleHighlights::Client.new( - email_address: "amazon@example.com", - password: "letmein" - ) - @kindle.mechanize_agent = FakeMechanizeAgentForAllBooks.new - end - - def test_fetching_all_books_from_a_kindle_account - assert_equal ({ "B003XDUCEU" => "Being Geek: The Software Developer's Career Handbook" }), @kindle.books - end - - class FakeMechanizeAgentForAllBooks - def get(login_page) - FakeAmazonSignInPage.new - end - - def submit(signin_form) - html_page <<-BODY - - - - - Amazon Kindle: Home - - - - - - BODY - end - - def click(logged_in_page) - html_page <<-BODY - - - - Amazon Kindle: Your Books - All (Kindle Only) - head - -
-
- - - - - - - - - - - - - - - - - -
BookReading StatusYour RatingMake reading status & rating publicPublic Notes: Make yours publicRemove From List
- Book - - Being Geek: The Software Developer's Career Handbook
- Michael Lopp -
-
-
-
-
-
- -
-
-
-
-
- - - BODY - end - - def html_page(raw_html) - Mechanize::Page.new(URI('http://foo'), nil, raw_html, 200, Mechanize.new) - end - - class FakeAmazonSignInPage - def form(form_id) - FakeAmazonSignInForm.new - end - - class FakeAmazonSignInForm - attr_writer :email, :password - end - end - end -end diff --git a/test/fetching_books_and_highlights_test.rb b/test/fetching_books_and_highlights_test.rb new file mode 100644 index 0000000..ba18df6 --- /dev/null +++ b/test/fetching_books_and_highlights_test.rb @@ -0,0 +1,114 @@ +require 'kindle_highlights' +require 'minitest/autorun' + +class FetchingBooksAndHighlightsTest < Minitest::Test + def setup + @kindle = KindleHighlights::Client.new( + email_address: "amazon@example.com", + password: "letmein" + ) + @kindle.mechanize_agent = FakeMechanizeAgent.new + @kindle.kindle_logged_in_page = Mechanize::Page.new( + URI('http://foo'), + nil, + raw_signin_page, + 200, + Mechanize.new + ) + end + + def test_fetching_books_from_kindle_account + assert_equal 1, @kindle.books.count + + book = @kindle.books.first + assert_equal "B000XUAETY", book.asin + assert_equal "James R. Mcdonough", book.author + assert_equal "Platoon Leader: A Memoir of Command in Combat", book.title + end + + def test_fetching_highlights_for_a_book + highlights = @kindle.highlights_for("B000XUAETY") + assert_equal 1, highlights.count + + highlight = highlights.first + assert_equal "306", highlight.location + assert_equal "Destiny is not born of decision; it is born of uncontrollable circumstances.", highlight.text + assert_equal "B000XUAETY", highlight.asin + end + + def test_fetching_highlights_for_a_non_existing_asin + assert_raises KindleHighlights::Client::AsinNotFoundError do + @kindle.highlights_for("BADASIN") + end + end + + def raw_signin_page + <<-BODY + + BODY + end + + class FakeMechanizeAgent + def get(_) + Mechanize::Page.new( + URI('http://foo'), + nil, + raw_quotes_page, + 200, + Mechanize.new + ) + end + + def raw_quotes_page + <<-BODY +
+
+
+
+ +
+
+
+
+ + Destiny is not born of decision; it is born of uncontrollable circumstances. + +
+
+
+
+
+
+
+
+ BODY + end + end +end