lib/shep/session.rb in shep-0.1.0.pre.alpha0 vs lib/shep/session.rb in shep-0.2.0.pre.alpha0
- old
+ new
@@ -9,12 +9,13 @@
module Shep
using Assert
# Represents a connection to a Mastodon (or equivalent) server.
#
- # @attr_reader [Logger] logger The logger object
- # @attr_reader [String] host The Server's hostname
+ # @attr_reader [Logger] logger The logger object
+ # @attr_reader [String] host The Server's hostname
+ # @attr_reader [String] user_agent User-Agent string; frozen
#
# ## Conventions
#
# `fetch_*` methods retrieve a single Mastodon object, an `Entity`
# subinstance.
@@ -22,53 +23,141 @@
# `each_*` methods retrieve multiple objects, also `Entity`
# subinstances. If called with a block, the block is evaluated on
# each item in turn and the block's result is ignored. Otherwise,
# it returns an `Enumerator` which can be used in the usual ways.
#
- # `each_*` will automatically paginate through the available items;
- # for example,
+ # Some examples:
#
- # statuses = session.each_public_status.to_a
+ # # Evaluate a block on each status
+ # session.each_status(account) { |status| do_thing(status) }
#
- # will retrieve the server's entire public timeline and put it in an
- # array. (Note: don't do this.) The `limit:` keyword option will
- # set an upper limit on the number of items retrieved (but note that
- # the method may retrieve more from the API endpoint).
+ # # Retrieve the last 100 statuses in an array
+ # statuses = session.each_status(account, limit: 100).to_a
#
+ # # Retrieve the last 200 statuses via an enumerator and do
+ # # extra transformation on the result before collecting
+ # # them in an array.
+ # statuses = session.each_status(account, limit: 200)
+ # .select{|status| BESTIES.include? status.account.username }
+ # .map{|status| status.id}
+ # .to_a
+ #
+ # The actual web API "paginates" the output. That is, it returns
+ # the first 40 (or so) items and then provides a link to the next
+ # chunk. Shep's `each_*` methods handle this for you automatically.
+ # This means that unless you use `limit:`, the `each_*` methods will
+ # retrieve **all** available items, at least until you reach the
+ # rate limit (see below).
+ #
+ # Note that it is safe to leave an `each_*` methods block with
+ # `break`, `return`, an exception, or any other such mechanism.
+ #
# The remaining Mastodon API methods will in some way modify the
# state of the server and return an Entity subinstance on success.
#
# All API calls throw an exception on failure.
+ #
+ # ## Rate Limits
+ #
+ # Mastodon servers restrict the number of times you can use a
+ # specific endpoint within a time period as a way to prevent abuse.
+ # Shep provides several tools for handling these limits gracefully.
+ #
+ # 1. The method {rate_limit} will return a Struct that tells you how
+ # many requests you have left and when the count is reset.
+ #
+ # 2. If a rate limit is exceeded, the method will throw an
+ # {Error::RateLimit} exception instead of an ordinary
+ # {Error::Http} exception.
+ #
+ # 3. If the Session is created with argument `rate_limit_retry:` set
+ # to true, the Session will instead wait out the reset time and
+ # try again.
+ #
+ # If you enable the wait-and-retry mechanism, you can also provide a
+ # hook function (i.e. a thing that responds to `call`) via
+ # constructor argument `retry_hook:`. This is called with one
+ # argument, the result of {rate_limit} for the limited API endpoint,
+ # immediately before Shep starts waiting for the limit to reset.
+ #
+ # The built-in wait time takes the callback's execution time into
+ # account so it's possible to use the callback to do your own
+ # waiting and use that time more productively.
+ #
+ # Alternately, all of the `each_*` methods have a `limit:` parameter
+ # so it's easy to avoid making too many API calls and many have a
+ # `max_id:` parameter that allows you to continue where you left off.
+ #
class Session
- attr_reader :logger, :host
+ attr_reader :logger, :host, :user_agent
# Initialize a new {Session}.
#
- # @param host [String] Hostname of the server
- # @param token [String] Bearer token; optional
- # @param logger [Logger] The logger or mode; optional
- # @param debug_http [Boolean] Enable `Net::HTTP` debugging; insecure!
+ # @param host [String] Hostname of the server
+ # @param token [String] Bearer token; optional
#
+ # @param user_agent [String] User-Agent string to use
+ # @param ua_comment [String] Comment part of User-Agent string
+ #
+ # @param rate_limit_retry [Boolean] Handle request limits by waiting
+ # for the count to reset and trying
+ # again.
+ #
+ # @param retry_hook [Proc] One-argument hook function to call
+ # before waiting for the rate limit
+ # to reset
+ #
+ # @param logger [Logger] The logger or mode; optional
+ # @param debug_http [Boolean] Enable `Net::HTTP` debugging;
+ # **insecure!**
+ #
+ # By default, the User-Agent header is set to the gem's
+ # identifier, but may be overridden with the `user_agent`
+ # parameter. It is your responsibility to make sure it is
+ # formatted correctly. You can also append comment text to the
+ # given User-Agent string with `ua_comment`; this lets you add a
+ # comment to the default text.
+ #
# Parameter `logger` may be a `Logger` object, `nil`, or a
# `Symbol` whose value is the name of one of the supported log
# levels. In the latter case, a new Logger is created and set to
# that level. If `nil` is given, a dummy `Logger` is created and
# used.
#
# If `debug_http` is true, compression is disabled and the
# transactions are sent to `STDERR` via
- # `Net::HTTP.set_debug_output`. **WARNING:** this opens a serious
- # security hole and should not be used in production.
+ # `Net::HTTP.set_debug_output`.
+ #
+ # **WARNING:** this opens a serious security hole and should not
+ # be used in production.
def initialize(host:,
- token: nil,
- logger: nil,
- debug_http: false)
+ token: nil,
+ user_agent: "ShepRubyGem/#{Shep::Version}",
+ ua_comment: nil,
+
+ rate_limit_retry: false,
+ retry_hook: nil,
+
+ logger: nil,
+ debug_http: false)
@host = host
@token = token
@logger = init_logger(logger)
- @rate_limit = Struct.new(:limit, :remaining, :reset).new
+
+ @user_agent = user_agent
+ @user_agent += " #{ua_comment}" if ua_comment
+ @user_agent.freeze
+
+ @rate_limit_retry = rate_limit_retry
+ @retry_hook = retry_hook
+
@debug_http = debug_http
+
+ @rate_limit = Struct.new(:limit, :remaining, :reset).new
+
+ raise Error::Caller.new("retry_hook: must a callable or nil") unless
+ @retry_hook == nil || @retry_hook.respond_to?(:call)
end
private
def init_logger(logger_arg)
@@ -86,11 +175,11 @@
end
public
- # Return the rate limit information as of the last operation.
+ # Return the rate limit information from the last REST request.
#
# The result is a Struct with the following fields:
#
# * limit Integer - Number of allowed requests per time period
# * remaining Integer - Number of requests you have left
@@ -99,10 +188,14 @@
# Note that different types of operations have different rate
# limits. For example, most endpoints can be called up to 300
# times within 5 minutes but no more than 30 media uploads are
# allowed within a 30 minute time period.
#
+ # Note also that some Shep methods will perform multiple API
+ # requests; this is only ever the rate limit information from the
+ # latest of these.
+ #
# @see https://docs.joinmastodon.org/api/rate-limits/
#
# @return [Struct.new(:limit, :remaining, :reset)]
def rate_limit = @rate_limit.dup.freeze
@@ -148,18 +241,23 @@
# Fetch user details by username.
#
# The username must belong to a user on the current server.
#
- # @param handle [String] the account's username **with** the
+ # @param handle [String] the account's username with or without the
# leading '@' character (e.g. @benoitmandelbot)
#
- # @return [Entity::Account]
#
+ # @return [Entity::Account, nil] The Account or nil if it can't be found.
+ #
# @see https://docs.joinmastodon.org/methods/accounts/#get
def fetch_account_by_username(handle)
return rest_get("accounts/lookup", Entity::Account, {acct: handle})
+ rescue Error::Http => oopsie
+ # As a special case, return nil if the lookup fails
+ return nil if oopsie.response.is_a?(Net::HTTPNotFound)
+ raise oopsie
end
# Fetch an individual notification by ID.
#
# Requires a token with sufficient permissions.
@@ -231,22 +329,22 @@
status.media_attachments.each { |ma|
outfile = File.join(media_dir, File.basename(ma.url.path))
if !refetch && File.exist?(outfile)
@logger.info "Found '#{outfile}'; skipping."
- success = true
else
tmp = File.join(media_dir, SecureRandom.uuid + '.tmp')
- success = basic_get_binary(ma.url, tmp)
- if success
+ begin
+ basic_get_binary(ma.url, tmp)
FileUtils.mv(tmp, outfile)
- else
+ rescue Error::Http => e
FileUtils.rm(tmp, force: true)
+ raise e
end
end
- media[ma.url.to_s] = success ? outfile : nil
+ media[ma.url.to_s] = outfile
}
return [status, media]
end
@@ -298,10 +396,12 @@
# @param account_id [String] The ID of the account
#
# @param limit [Integer] Maximum number of accounts to
# retrieve
#
+ # @param max_id [String] retrieve results older than this ID.
+ #
# @param only_media [Boolean] If true, filter for statuses
# with media
#
# @param exclude_replies [Boolean] If true, exclude replies
#
@@ -319,10 +419,11 @@
# @return [Enumerator] if block is not given, otherwise self
#
# @see https://docs.joinmastodon.org/methods/accounts/#statuses
def each_status(account_id,
limit: nil,
+ max_id: "",
only_media: false,
exclude_replies: false,
exclude_reblogs: false,
pinned: false,
tagged: nil,
@@ -335,10 +436,12 @@
#
# May require a token depending on the instance's settings.
#
# @param limit [Integer] Max. items to retrieve.
#
+ # @param max_id [String] retrieve results older than this ID.
+ #
# @param local [Boolean] Retrieve only local statuses
#
# @param remote [Boolean] Retrieve only remote statuses
#
# @param only_media [Boolean] Retrieve only statuses with media
@@ -349,10 +452,11 @@
#
# @return [Enumerator] if block is not given, otherwise self
#
# @see https://docs.joinmastodon.org/methods/timelines/#public
def each_public_status(limit: nil,
+ max_id: "",
local: false,
remote: false,
only_media: false,
&block)
query = magically_get_caller_kwargs(binding, method(__method__))
@@ -371,10 +475,12 @@
# @param hashtag_s [String, Array<String>]
# Hashtag(s) to retrieve.
#
# @param limit [Integer] maximum number of items to retrieve
#
+ # @param max_id [String] return results older than this ID.
+ #
# @param local [Boolean] retrieve only local statuses
#
# @param remote [Boolean] retrieve only remote statuses
#
# @param only_media [Boolean] retrieve only media status
@@ -390,10 +496,11 @@
# @return [Enumerator] if block is not given, otherwise self
#
# @see https://docs.joinmastodon.org/methods/timelines/#tag
def each_tag_status(hashtag_s,
limit: nil,
+ max_id: "",
local: false,
remote: false,
only_media: false,
all: [],
none: [],
@@ -420,25 +527,32 @@
# Retrieve each Entity::Status in the home timeline.
#
# Requires token.
#
- # @param limit [Integer] ,
- # @param local [Boolean] ,
- # @param remote [Boolean] ,
- # @param only_media [Boolean] ,
+ # @param limit [Integer] maximum number of items to retrieve
#
+ # @param max_id [String] retrieve results older than this ID.
+ #
+ # @param local [Boolean] retrieve only local statuses
+ #
+ # @param remote [Boolean] retrieve only remote statuses
+ #
+ # @param only_media [Boolean] retrieve only media status
+ #
+ #
# @yield [item]
#
# @yieldparam [Entity::Status]
#
# @return [Enumerator] if block is not given, otherwise self
#
#
# @see https://docs.joinmastodon.org/methods/timelines/#home
def each_home_status(limit: nil,
local: false,
+ max_id: "",
remote: false,
only_media: false,
&block)
query = magically_get_caller_kwargs(binding, method(__method__))
rest_get_seq("timelines/home", Entity::Status, query, block)
@@ -486,10 +600,22 @@
end
# Retrieve each notification.
#
+ # Requires a bearer token.
+ #
+ # Notification types are indicated by of the following symbols:
+ #
+ # `:mention`, `:status`, `:reblog`, `:follow`, `:follow_request`
+ # `:favourite`, `:poll`, `:update`, `:admin.sign_up`, or
+ # `:admin.report`
+ #
+ # This method will throw an `Error::Caller` exception if an
+ # unknown value is used.
+ #
+ #
# @param types [Array<String>] list of notifications types to
# enumerate; others are ignoredn
#
# @param exclude_types [Array<String>] types of notifications to exclude
#
@@ -512,13 +638,17 @@
&block)
allowed_notifications = %i{mention status reblog follow follow_request
favourite poll update admin.sign_up
admin.report}
- # Ensure valid filter values
- types.uniq!
- exclude_types.uniq!
+ # Remove duplicates and convert strings to symbols
+ [types, exclude_types].each{|param|
+ param.map!{|item| item.intern}
+ param.uniq!
+ }
+
+ # Now, ensure there are no incorrect notification types.
(types + exclude_types).each{|filter|
assert("Unknown notification type: #{filter}") {
allowed_notifications.include?(filter.intern)
}
}
@@ -548,10 +678,12 @@
# @param spoiler_text [String] Content warning if non-empty string.
# Also sets `sensitive` to true.
#
# @param language [String] ISO language code
#
+ # @return [Entity::Status] The new status.
+ #
# @see https://docs.joinmastodon.org/methods/statuses/#create
def post_status(text,
visibility: :private,
media_ids: [],
spoiler_text: "",
@@ -667,16 +799,18 @@
url = rest_uri("notifications/#{id}/dismiss", {})
basic_rest_post_or_put(url, {})
return nil
end
- private
+
#
- # High-level REST support
+ # High(ish)-level REST support
#
+ private
+
# Extract all of the keyword arguments and their values from the
# caller's context. (The context is 'cbinding', which must be a
# call to method 'cmethod'.)
#
# The results are returned as a hash mapping keyword to value.
@@ -709,11 +843,11 @@
return result_klass.from(result_obj)
end
def rest_delete(path, result_klass)
uri = rest_uri(path, {})
- result_obj, _ = basic_rest_get_or_delete(uri, also_delete: true)
+ result_obj, _ = basic_rest_get_or_delete(uri, is_delete: true)
return result_klass.from(result_obj)
end
# We do the block+enumerator thing here as a placeholder for
# pagination.
@@ -764,11 +898,11 @@
return result_klass.from(result)
end
#
- # Low-level REST support
+ # Low(ish)-level REST support
#
# Given a hash of query arguments, return an array of key-value
# pairs.
#
@@ -802,55 +936,95 @@
version = v2 ? "v2" : "v1"
return URI("https://#{@host}/api/#{version}/#{path}#{args}")
end
- def basic_rest_get_or_delete(uri, also_delete: false)
+ def parse_link_header(hdr)
+ result = {}
+ return result unless hdr # could be nil
+
+ for link in hdr.split(', ')
+ md = link.match(/^<([^>]+)>; rel="([^"]+)"/)
+ assert{md}
+
+ result[ md[2] ] = md[1]
+ end
+
+ return result
+ end
+
+ # Perform a GET or DELETE operation. (They have mostly the same
+ # structure, so we combine the functionality here.)
+ def basic_rest_get_or_delete(url, is_delete: false)
headers = headers_for(:get)
- # Do the thing.
- response = http_get_or_delete(uri, headers, also_delete)
- update_rate_limit(response)
+ request = is_delete ?
+ Net::HTTP::Delete.new(url, headers) :
+ Net::HTTP::Get.new(url, headers)
- result = response.body() if response.class.body_permitted?
- raise Error::Http.new(response) if response.is_a? Net::HTTPClientError
+ response = http_operation(request)
- result_obj = JSON.parse(result)
+ result = JSON.parse(response.body)
- if result.is_a?(Hash) && result_obj.has_key?("error")
- raise Error::Server.new(result_obj["error"])
+ if result.is_a?(Hash) && result.has_key?("error")
+ raise Error::Server.new(result["error"])
end
link = parse_link_header(response["link"])
- return [result_obj, link]
+ return [result, link]
rescue JSON::JSONError => e
raise Error::Remote.new("Error parsing result JSON: #{e}")
end
- def http_get_or_delete(uri, headers, is_delete)
- @logger.debug("#{is_delete ? "Deleting" : "Requesting"} #{uri} " +
- "(token: #{!!@token})")
+ # Retrieve the resource (assumed to be binary) at 'uri' and write
+ # it to 'filename'. For now, returns false if the request
+ # returned an error code and true on success.
+ def basic_get_binary(url, filename)
+ request = Net::HTTP::Get.new(url, headers_for(:get))
- response = Net::HTTP.start(
- uri.hostname,
- uri.port,
- use_ssl: uri.scheme == 'https'
- ) { |http|
- next is_delete ?
- http.delete(uri.path, headers) :
- http.request_get(uri, headers)
- }
+ @logger.debug("Output file is #{filename}")
+ File.open(filename, "wb") { |outfile| http_operation(request, outfile) }
+ @logger.debug("Done (#{filename})")
+ end
- @logger.debug("Response: #{response}")
- return response
+ def basic_rest_post_or_put(url, formdata, is_put: false)
+ headers = headers_for(:post)
+
+ # Select request type
+ request = is_put ?
+ Net::HTTP::Put.new(url, headers) :
+ Net::HTTP::Post.new(url, headers)
+
+ # Set the parameters
+ enctype = 'multipart/form-data' if formdata.is_a?(Array)
+ enctype = 'application/x-www-form-urlencoded' if
+ formdata.is_a?(Hash)
+ enctype or
+ raise Error::Caller.new("Unknown formdate type: #{formdata.class}")
+ request.set_form(formdata, enctype)
+
+ # Do the deed
+ response = http_operation(request)
+
+ result = JSON.parse(response.body)
+
+ if result.is_a?(Hash) && result.has_key?("error")
+ raise Error::Server.new(result["error"])
+ end
+
+ return result
+ rescue JSON::JSONError => e
+ raise Error::Remote.new("Error parsing result JSON: #{e}")
end
def headers_for(method)
headers = {}
headers["Authorization"] = "Bearer #{@token}" if @token
+ headers['User-Agent'] = @user_agent
+
if method == :post
extras = {
"Indempotency-Key": SecureRandom.uuid,
"Content-Type": "application/json",
}
@@ -858,113 +1032,85 @@
end
return headers
end
- def update_rate_limit(response)
- @rate_limit.limit = @rate_limit.remaining = @rate_limit.reset = nil
+ def http_operation(request, output_handle = nil)
+ url = request.uri
- @rate_limit.limit = response['X-RateLimit-Limit'].to_i
- @rate_limit.remaining = response['X-RateLimit-Remaining'].to_i
+ while true
+ http = Net::HTTP.new(url.hostname, url.port)
+ http.use_ssl = (url.scheme == 'https')
- reset = response['X-RateLimit-Reset']
- @rate_limit.reset = Time.iso8601(reset) if reset
+ if @debug_http
+ http.set_debug_output(STDERR)
+ request['Accept-Encoding'] = 'identity'
+ end
- @logger.debug "Rate limit: #{rate_limit_desc}"
- end
+ http.start do |http|
+ @logger.debug("Request #{request}; (token: #{!!@token})")
- # Retrieve the resource (assumed to be binary) at 'uri' and write
- # it to 'filename'. For now, returns false if the request
- # returned an error code and true on success.
- def basic_get_binary(uri, filename)
- headers = headers_for(:get)
+ # We have to invoke 'request' with a block argument to get the
+ # response because that's the only way we can get at it before
+ # it's downloaded the entire response into RAM.
+ http.request(request) do |response|
+ @logger.debug("Response: #{response}")
- @logger.debug("Requesting #{uri} (token: #{!!@token})")
- Net::HTTP.get_response(uri, headers) {|response|
- @logger.debug("Response: #{response.code}")
+ update_rate_limit(response)
- update_rate_limit(response)
+ #raise_http_exception_if_error(response)
- if response.is_a? Net::HTTPClientError
- @logger.warn("Got response #{response} for #{uri}.")
- return false
- end
+ if response.is_a?(Net::HTTPClientError)
+ # Finish any body reading that may have been in
+ # progress.
+ response.read_body()
- @logger.debug("Writing body to #{filename}")
- File.open(filename, "wb") { |outfile|
- response.read_body { |chunk|
- @logger.debug(" Writing #{chunk.size} bytes...")
- outfile.write(chunk)
- }
- }
- @logger.debug("Done (#{filename})")
- }
+ # Special case: too many requests. We may throw an
+ # exception or wait until it resets and try again.
+ if response.is_a?(Net::HTTPTooManyRequests)
+ handle_rate_limit_reached(response)
+ next # if we get here, we're trying again
+ end
- return true
- end
+ raise Error::Http.new(response)
+ end
- def parse_link_header(hdr)
- result = {}
- return result unless hdr # could be nil
+ # read_body will write the response body to output_handle if
+ # it's a handle or keep it internally if it's nil.
+ response.read_body(output_handle)
- for link in hdr.split(', ')
- md = link.match(/^<([^>]+)>; rel="([^"]+)"/)
- assert{md}
-
- result[ md[2] ] = md[1]
+ return response
+ end
+ end
end
-
- return result
end
+ def handle_rate_limit_reached(response)
+ raise Error::RateLimit.new(response) unless @rate_limit_retry
- def basic_rest_post_or_put(uri, formdata, is_put: false)
- headers = headers_for(:post)
+ # Call the retry hook first.
+ @retry_hook.call(self.rate_limit().dup) if @retry_hook
- # Do the thing.
- @logger.debug("Posting #{uri} (token: #{!!@token})")
- response = http_post_form(uri, headers, formdata, is_put)
- @logger.debug("Response: #{response}")
-
- update_rate_limit(response)
-
- result = response.body() if response.class.body_permitted?
- raise Error::Http.new(response) if response.is_a? Net::HTTPClientError
-
- result_obj = JSON.parse(result)
-
- if result.is_a?(Hash) && result_obj.has_key?("error")
- raise Error::Server.new(result_obj["error"])
+ # Now, wait out any remaining elapsed time.
+ while true
+ delay = (@rate_limit.reset - Time.now) + 2
+ break if delay <= 0
+ @logger.info "Sleeping for #{delay.round} seconds."
+ sleep delay
end
-
- return result_obj
- rescue JSON::JSONError => e
- raise Error::Remote.new("Error parsing result JSON: #{e}")
end
- def http_post_form(url, headers, formdata, is_put)
- request = is_put ?
- Net::HTTP::Put.new(url, headers) :
- Net::HTTP::Post.new(url, headers)
+ def update_rate_limit(response)
+ @rate_limit.limit = @rate_limit.remaining = @rate_limit.reset = nil
- enctype = 'multipart/form-data' if formdata.is_a?(Array)
- enctype = 'application/x-www-form-urlencoded' if
- formdata.is_a?(Hash)
- enctype or
- raise Error::Caller.new("Unknown formdate type: #{formdata.class}")
- request.set_form(formdata, enctype)
+ @rate_limit.limit = response['X-RateLimit-Limit'].to_i
+ @rate_limit.remaining = response['X-RateLimit-Remaining'].to_i
+ reset = response['X-RateLimit-Reset']
+ @rate_limit.reset = Time.iso8601(reset) if reset
- http = Net::HTTP.new(url.hostname, url.port)
- http.use_ssl = (url.scheme == 'https')
-
- if @debug_http
- http.set_debug_output(STDERR)
- request['Accept-Encoding'] = 'identity'
- end
-
- return http.start {|http|
- http.request(request)
- }
+ @logger.debug "Rate limit: #{rate_limit_desc}"
end
+
end
+
end