lib/shep/session.rb in shep-0.1.0.pre.alpha0 vs lib/shep/session.rb in shep-0.2.0.pre.alpha0

- old
+ new

@@ -9,12 +9,13 @@ module Shep using Assert # Represents a connection to a Mastodon (or equivalent) server. # - # @attr_reader [Logger] logger The logger object - # @attr_reader [String] host The Server's hostname + # @attr_reader [Logger] logger The logger object + # @attr_reader [String] host The Server's hostname + # @attr_reader [String] user_agent User-Agent string; frozen # # ## Conventions # # `fetch_*` methods retrieve a single Mastodon object, an `Entity` # subinstance. @@ -22,53 +23,141 @@ # `each_*` methods retrieve multiple objects, also `Entity` # subinstances. If called with a block, the block is evaluated on # each item in turn and the block's result is ignored. Otherwise, # it returns an `Enumerator` which can be used in the usual ways. # - # `each_*` will automatically paginate through the available items; - # for example, + # Some examples: # - # statuses = session.each_public_status.to_a + # # Evaluate a block on each status + # session.each_status(account) { |status| do_thing(status) } # - # will retrieve the server's entire public timeline and put it in an - # array. (Note: don't do this.) The `limit:` keyword option will - # set an upper limit on the number of items retrieved (but note that - # the method may retrieve more from the API endpoint). + # # Retrieve the last 100 statuses in an array + # statuses = session.each_status(account, limit: 100).to_a # + # # Retrieve the last 200 statuses via an enumerator and do + # # extra transformation on the result before collecting + # # them in an array. + # statuses = session.each_status(account, limit: 200) + # .select{|status| BESTIES.include? status.account.username } + # .map{|status| status.id} + # .to_a + # + # The actual web API "paginates" the output. That is, it returns + # the first 40 (or so) items and then provides a link to the next + # chunk. Shep's `each_*` methods handle this for you automatically. + # This means that unless you use `limit:`, the `each_*` methods will + # retrieve **all** available items, at least until you reach the + # rate limit (see below). + # + # Note that it is safe to leave an `each_*` methods block with + # `break`, `return`, an exception, or any other such mechanism. + # # The remaining Mastodon API methods will in some way modify the # state of the server and return an Entity subinstance on success. # # All API calls throw an exception on failure. + # + # ## Rate Limits + # + # Mastodon servers restrict the number of times you can use a + # specific endpoint within a time period as a way to prevent abuse. + # Shep provides several tools for handling these limits gracefully. + # + # 1. The method {rate_limit} will return a Struct that tells you how + # many requests you have left and when the count is reset. + # + # 2. If a rate limit is exceeded, the method will throw an + # {Error::RateLimit} exception instead of an ordinary + # {Error::Http} exception. + # + # 3. If the Session is created with argument `rate_limit_retry:` set + # to true, the Session will instead wait out the reset time and + # try again. + # + # If you enable the wait-and-retry mechanism, you can also provide a + # hook function (i.e. a thing that responds to `call`) via + # constructor argument `retry_hook:`. This is called with one + # argument, the result of {rate_limit} for the limited API endpoint, + # immediately before Shep starts waiting for the limit to reset. + # + # The built-in wait time takes the callback's execution time into + # account so it's possible to use the callback to do your own + # waiting and use that time more productively. + # + # Alternately, all of the `each_*` methods have a `limit:` parameter + # so it's easy to avoid making too many API calls and many have a + # `max_id:` parameter that allows you to continue where you left off. + # class Session - attr_reader :logger, :host + attr_reader :logger, :host, :user_agent # Initialize a new {Session}. # - # @param host [String] Hostname of the server - # @param token [String] Bearer token; optional - # @param logger [Logger] The logger or mode; optional - # @param debug_http [Boolean] Enable `Net::HTTP` debugging; insecure! + # @param host [String] Hostname of the server + # @param token [String] Bearer token; optional # + # @param user_agent [String] User-Agent string to use + # @param ua_comment [String] Comment part of User-Agent string + # + # @param rate_limit_retry [Boolean] Handle request limits by waiting + # for the count to reset and trying + # again. + # + # @param retry_hook [Proc] One-argument hook function to call + # before waiting for the rate limit + # to reset + # + # @param logger [Logger] The logger or mode; optional + # @param debug_http [Boolean] Enable `Net::HTTP` debugging; + # **insecure!** + # + # By default, the User-Agent header is set to the gem's + # identifier, but may be overridden with the `user_agent` + # parameter. It is your responsibility to make sure it is + # formatted correctly. You can also append comment text to the + # given User-Agent string with `ua_comment`; this lets you add a + # comment to the default text. + # # Parameter `logger` may be a `Logger` object, `nil`, or a # `Symbol` whose value is the name of one of the supported log # levels. In the latter case, a new Logger is created and set to # that level. If `nil` is given, a dummy `Logger` is created and # used. # # If `debug_http` is true, compression is disabled and the # transactions are sent to `STDERR` via - # `Net::HTTP.set_debug_output`. **WARNING:** this opens a serious - # security hole and should not be used in production. + # `Net::HTTP.set_debug_output`. + # + # **WARNING:** this opens a serious security hole and should not + # be used in production. def initialize(host:, - token: nil, - logger: nil, - debug_http: false) + token: nil, + user_agent: "ShepRubyGem/#{Shep::Version}", + ua_comment: nil, + + rate_limit_retry: false, + retry_hook: nil, + + logger: nil, + debug_http: false) @host = host @token = token @logger = init_logger(logger) - @rate_limit = Struct.new(:limit, :remaining, :reset).new + + @user_agent = user_agent + @user_agent += " #{ua_comment}" if ua_comment + @user_agent.freeze + + @rate_limit_retry = rate_limit_retry + @retry_hook = retry_hook + @debug_http = debug_http + + @rate_limit = Struct.new(:limit, :remaining, :reset).new + + raise Error::Caller.new("retry_hook: must a callable or nil") unless + @retry_hook == nil || @retry_hook.respond_to?(:call) end private def init_logger(logger_arg) @@ -86,11 +175,11 @@ end public - # Return the rate limit information as of the last operation. + # Return the rate limit information from the last REST request. # # The result is a Struct with the following fields: # # * limit Integer - Number of allowed requests per time period # * remaining Integer - Number of requests you have left @@ -99,10 +188,14 @@ # Note that different types of operations have different rate # limits. For example, most endpoints can be called up to 300 # times within 5 minutes but no more than 30 media uploads are # allowed within a 30 minute time period. # + # Note also that some Shep methods will perform multiple API + # requests; this is only ever the rate limit information from the + # latest of these. + # # @see https://docs.joinmastodon.org/api/rate-limits/ # # @return [Struct.new(:limit, :remaining, :reset)] def rate_limit = @rate_limit.dup.freeze @@ -148,18 +241,23 @@ # Fetch user details by username. # # The username must belong to a user on the current server. # - # @param handle [String] the account's username **with** the + # @param handle [String] the account's username with or without the # leading '@' character (e.g. @benoitmandelbot) # - # @return [Entity::Account] # + # @return [Entity::Account, nil] The Account or nil if it can't be found. + # # @see https://docs.joinmastodon.org/methods/accounts/#get def fetch_account_by_username(handle) return rest_get("accounts/lookup", Entity::Account, {acct: handle}) + rescue Error::Http => oopsie + # As a special case, return nil if the lookup fails + return nil if oopsie.response.is_a?(Net::HTTPNotFound) + raise oopsie end # Fetch an individual notification by ID. # # Requires a token with sufficient permissions. @@ -231,22 +329,22 @@ status.media_attachments.each { |ma| outfile = File.join(media_dir, File.basename(ma.url.path)) if !refetch && File.exist?(outfile) @logger.info "Found '#{outfile}'; skipping." - success = true else tmp = File.join(media_dir, SecureRandom.uuid + '.tmp') - success = basic_get_binary(ma.url, tmp) - if success + begin + basic_get_binary(ma.url, tmp) FileUtils.mv(tmp, outfile) - else + rescue Error::Http => e FileUtils.rm(tmp, force: true) + raise e end end - media[ma.url.to_s] = success ? outfile : nil + media[ma.url.to_s] = outfile } return [status, media] end @@ -298,10 +396,12 @@ # @param account_id [String] The ID of the account # # @param limit [Integer] Maximum number of accounts to # retrieve # + # @param max_id [String] retrieve results older than this ID. + # # @param only_media [Boolean] If true, filter for statuses # with media # # @param exclude_replies [Boolean] If true, exclude replies # @@ -319,10 +419,11 @@ # @return [Enumerator] if block is not given, otherwise self # # @see https://docs.joinmastodon.org/methods/accounts/#statuses def each_status(account_id, limit: nil, + max_id: "", only_media: false, exclude_replies: false, exclude_reblogs: false, pinned: false, tagged: nil, @@ -335,10 +436,12 @@ # # May require a token depending on the instance's settings. # # @param limit [Integer] Max. items to retrieve. # + # @param max_id [String] retrieve results older than this ID. + # # @param local [Boolean] Retrieve only local statuses # # @param remote [Boolean] Retrieve only remote statuses # # @param only_media [Boolean] Retrieve only statuses with media @@ -349,10 +452,11 @@ # # @return [Enumerator] if block is not given, otherwise self # # @see https://docs.joinmastodon.org/methods/timelines/#public def each_public_status(limit: nil, + max_id: "", local: false, remote: false, only_media: false, &block) query = magically_get_caller_kwargs(binding, method(__method__)) @@ -371,10 +475,12 @@ # @param hashtag_s [String, Array<String>] # Hashtag(s) to retrieve. # # @param limit [Integer] maximum number of items to retrieve # + # @param max_id [String] return results older than this ID. + # # @param local [Boolean] retrieve only local statuses # # @param remote [Boolean] retrieve only remote statuses # # @param only_media [Boolean] retrieve only media status @@ -390,10 +496,11 @@ # @return [Enumerator] if block is not given, otherwise self # # @see https://docs.joinmastodon.org/methods/timelines/#tag def each_tag_status(hashtag_s, limit: nil, + max_id: "", local: false, remote: false, only_media: false, all: [], none: [], @@ -420,25 +527,32 @@ # Retrieve each Entity::Status in the home timeline. # # Requires token. # - # @param limit [Integer] , - # @param local [Boolean] , - # @param remote [Boolean] , - # @param only_media [Boolean] , + # @param limit [Integer] maximum number of items to retrieve # + # @param max_id [String] retrieve results older than this ID. + # + # @param local [Boolean] retrieve only local statuses + # + # @param remote [Boolean] retrieve only remote statuses + # + # @param only_media [Boolean] retrieve only media status + # + # # @yield [item] # # @yieldparam [Entity::Status] # # @return [Enumerator] if block is not given, otherwise self # # # @see https://docs.joinmastodon.org/methods/timelines/#home def each_home_status(limit: nil, local: false, + max_id: "", remote: false, only_media: false, &block) query = magically_get_caller_kwargs(binding, method(__method__)) rest_get_seq("timelines/home", Entity::Status, query, block) @@ -486,10 +600,22 @@ end # Retrieve each notification. # + # Requires a bearer token. + # + # Notification types are indicated by of the following symbols: + # + # `:mention`, `:status`, `:reblog`, `:follow`, `:follow_request` + # `:favourite`, `:poll`, `:update`, `:admin.sign_up`, or + # `:admin.report` + # + # This method will throw an `Error::Caller` exception if an + # unknown value is used. + # + # # @param types [Array<String>] list of notifications types to # enumerate; others are ignoredn # # @param exclude_types [Array<String>] types of notifications to exclude # @@ -512,13 +638,17 @@ &block) allowed_notifications = %i{mention status reblog follow follow_request favourite poll update admin.sign_up admin.report} - # Ensure valid filter values - types.uniq! - exclude_types.uniq! + # Remove duplicates and convert strings to symbols + [types, exclude_types].each{|param| + param.map!{|item| item.intern} + param.uniq! + } + + # Now, ensure there are no incorrect notification types. (types + exclude_types).each{|filter| assert("Unknown notification type: #{filter}") { allowed_notifications.include?(filter.intern) } } @@ -548,10 +678,12 @@ # @param spoiler_text [String] Content warning if non-empty string. # Also sets `sensitive` to true. # # @param language [String] ISO language code # + # @return [Entity::Status] The new status. + # # @see https://docs.joinmastodon.org/methods/statuses/#create def post_status(text, visibility: :private, media_ids: [], spoiler_text: "", @@ -667,16 +799,18 @@ url = rest_uri("notifications/#{id}/dismiss", {}) basic_rest_post_or_put(url, {}) return nil end - private + # - # High-level REST support + # High(ish)-level REST support # + private + # Extract all of the keyword arguments and their values from the # caller's context. (The context is 'cbinding', which must be a # call to method 'cmethod'.) # # The results are returned as a hash mapping keyword to value. @@ -709,11 +843,11 @@ return result_klass.from(result_obj) end def rest_delete(path, result_klass) uri = rest_uri(path, {}) - result_obj, _ = basic_rest_get_or_delete(uri, also_delete: true) + result_obj, _ = basic_rest_get_or_delete(uri, is_delete: true) return result_klass.from(result_obj) end # We do the block+enumerator thing here as a placeholder for # pagination. @@ -764,11 +898,11 @@ return result_klass.from(result) end # - # Low-level REST support + # Low(ish)-level REST support # # Given a hash of query arguments, return an array of key-value # pairs. # @@ -802,55 +936,95 @@ version = v2 ? "v2" : "v1" return URI("https://#{@host}/api/#{version}/#{path}#{args}") end - def basic_rest_get_or_delete(uri, also_delete: false) + def parse_link_header(hdr) + result = {} + return result unless hdr # could be nil + + for link in hdr.split(', ') + md = link.match(/^<([^>]+)>; rel="([^"]+)"/) + assert{md} + + result[ md[2] ] = md[1] + end + + return result + end + + # Perform a GET or DELETE operation. (They have mostly the same + # structure, so we combine the functionality here.) + def basic_rest_get_or_delete(url, is_delete: false) headers = headers_for(:get) - # Do the thing. - response = http_get_or_delete(uri, headers, also_delete) - update_rate_limit(response) + request = is_delete ? + Net::HTTP::Delete.new(url, headers) : + Net::HTTP::Get.new(url, headers) - result = response.body() if response.class.body_permitted? - raise Error::Http.new(response) if response.is_a? Net::HTTPClientError + response = http_operation(request) - result_obj = JSON.parse(result) + result = JSON.parse(response.body) - if result.is_a?(Hash) && result_obj.has_key?("error") - raise Error::Server.new(result_obj["error"]) + if result.is_a?(Hash) && result.has_key?("error") + raise Error::Server.new(result["error"]) end link = parse_link_header(response["link"]) - return [result_obj, link] + return [result, link] rescue JSON::JSONError => e raise Error::Remote.new("Error parsing result JSON: #{e}") end - def http_get_or_delete(uri, headers, is_delete) - @logger.debug("#{is_delete ? "Deleting" : "Requesting"} #{uri} " + - "(token: #{!!@token})") + # Retrieve the resource (assumed to be binary) at 'uri' and write + # it to 'filename'. For now, returns false if the request + # returned an error code and true on success. + def basic_get_binary(url, filename) + request = Net::HTTP::Get.new(url, headers_for(:get)) - response = Net::HTTP.start( - uri.hostname, - uri.port, - use_ssl: uri.scheme == 'https' - ) { |http| - next is_delete ? - http.delete(uri.path, headers) : - http.request_get(uri, headers) - } + @logger.debug("Output file is #{filename}") + File.open(filename, "wb") { |outfile| http_operation(request, outfile) } + @logger.debug("Done (#{filename})") + end - @logger.debug("Response: #{response}") - return response + def basic_rest_post_or_put(url, formdata, is_put: false) + headers = headers_for(:post) + + # Select request type + request = is_put ? + Net::HTTP::Put.new(url, headers) : + Net::HTTP::Post.new(url, headers) + + # Set the parameters + enctype = 'multipart/form-data' if formdata.is_a?(Array) + enctype = 'application/x-www-form-urlencoded' if + formdata.is_a?(Hash) + enctype or + raise Error::Caller.new("Unknown formdate type: #{formdata.class}") + request.set_form(formdata, enctype) + + # Do the deed + response = http_operation(request) + + result = JSON.parse(response.body) + + if result.is_a?(Hash) && result.has_key?("error") + raise Error::Server.new(result["error"]) + end + + return result + rescue JSON::JSONError => e + raise Error::Remote.new("Error parsing result JSON: #{e}") end def headers_for(method) headers = {} headers["Authorization"] = "Bearer #{@token}" if @token + headers['User-Agent'] = @user_agent + if method == :post extras = { "Indempotency-Key": SecureRandom.uuid, "Content-Type": "application/json", } @@ -858,113 +1032,85 @@ end return headers end - def update_rate_limit(response) - @rate_limit.limit = @rate_limit.remaining = @rate_limit.reset = nil + def http_operation(request, output_handle = nil) + url = request.uri - @rate_limit.limit = response['X-RateLimit-Limit'].to_i - @rate_limit.remaining = response['X-RateLimit-Remaining'].to_i + while true + http = Net::HTTP.new(url.hostname, url.port) + http.use_ssl = (url.scheme == 'https') - reset = response['X-RateLimit-Reset'] - @rate_limit.reset = Time.iso8601(reset) if reset + if @debug_http + http.set_debug_output(STDERR) + request['Accept-Encoding'] = 'identity' + end - @logger.debug "Rate limit: #{rate_limit_desc}" - end + http.start do |http| + @logger.debug("Request #{request}; (token: #{!!@token})") - # Retrieve the resource (assumed to be binary) at 'uri' and write - # it to 'filename'. For now, returns false if the request - # returned an error code and true on success. - def basic_get_binary(uri, filename) - headers = headers_for(:get) + # We have to invoke 'request' with a block argument to get the + # response because that's the only way we can get at it before + # it's downloaded the entire response into RAM. + http.request(request) do |response| + @logger.debug("Response: #{response}") - @logger.debug("Requesting #{uri} (token: #{!!@token})") - Net::HTTP.get_response(uri, headers) {|response| - @logger.debug("Response: #{response.code}") + update_rate_limit(response) - update_rate_limit(response) + #raise_http_exception_if_error(response) - if response.is_a? Net::HTTPClientError - @logger.warn("Got response #{response} for #{uri}.") - return false - end + if response.is_a?(Net::HTTPClientError) + # Finish any body reading that may have been in + # progress. + response.read_body() - @logger.debug("Writing body to #{filename}") - File.open(filename, "wb") { |outfile| - response.read_body { |chunk| - @logger.debug(" Writing #{chunk.size} bytes...") - outfile.write(chunk) - } - } - @logger.debug("Done (#{filename})") - } + # Special case: too many requests. We may throw an + # exception or wait until it resets and try again. + if response.is_a?(Net::HTTPTooManyRequests) + handle_rate_limit_reached(response) + next # if we get here, we're trying again + end - return true - end + raise Error::Http.new(response) + end - def parse_link_header(hdr) - result = {} - return result unless hdr # could be nil + # read_body will write the response body to output_handle if + # it's a handle or keep it internally if it's nil. + response.read_body(output_handle) - for link in hdr.split(', ') - md = link.match(/^<([^>]+)>; rel="([^"]+)"/) - assert{md} - - result[ md[2] ] = md[1] + return response + end + end end - - return result end + def handle_rate_limit_reached(response) + raise Error::RateLimit.new(response) unless @rate_limit_retry - def basic_rest_post_or_put(uri, formdata, is_put: false) - headers = headers_for(:post) + # Call the retry hook first. + @retry_hook.call(self.rate_limit().dup) if @retry_hook - # Do the thing. - @logger.debug("Posting #{uri} (token: #{!!@token})") - response = http_post_form(uri, headers, formdata, is_put) - @logger.debug("Response: #{response}") - - update_rate_limit(response) - - result = response.body() if response.class.body_permitted? - raise Error::Http.new(response) if response.is_a? Net::HTTPClientError - - result_obj = JSON.parse(result) - - if result.is_a?(Hash) && result_obj.has_key?("error") - raise Error::Server.new(result_obj["error"]) + # Now, wait out any remaining elapsed time. + while true + delay = (@rate_limit.reset - Time.now) + 2 + break if delay <= 0 + @logger.info "Sleeping for #{delay.round} seconds." + sleep delay end - - return result_obj - rescue JSON::JSONError => e - raise Error::Remote.new("Error parsing result JSON: #{e}") end - def http_post_form(url, headers, formdata, is_put) - request = is_put ? - Net::HTTP::Put.new(url, headers) : - Net::HTTP::Post.new(url, headers) + def update_rate_limit(response) + @rate_limit.limit = @rate_limit.remaining = @rate_limit.reset = nil - enctype = 'multipart/form-data' if formdata.is_a?(Array) - enctype = 'application/x-www-form-urlencoded' if - formdata.is_a?(Hash) - enctype or - raise Error::Caller.new("Unknown formdate type: #{formdata.class}") - request.set_form(formdata, enctype) + @rate_limit.limit = response['X-RateLimit-Limit'].to_i + @rate_limit.remaining = response['X-RateLimit-Remaining'].to_i + reset = response['X-RateLimit-Reset'] + @rate_limit.reset = Time.iso8601(reset) if reset - http = Net::HTTP.new(url.hostname, url.port) - http.use_ssl = (url.scheme == 'https') - - if @debug_http - http.set_debug_output(STDERR) - request['Accept-Encoding'] = 'identity' - end - - return http.start {|http| - http.request(request) - } + @logger.debug "Rate limit: #{rate_limit_desc}" end + end + end