lib/hako/schedulers/ecs.rb in hako-1.5.1 vs lib/hako/schedulers/ecs.rb in hako-1.5.2

- old
+ new

@@ -433,22 +433,36 @@ end # @param [Array<Hash>] definitions # @return [Array<Boolean, Aws::ECS::Types::TaskDefinition] def register_task_definition_for_oneshot(definitions) - family = "#{@app_id}-oneshot" - current_task_definition = describe_task_definition(family) - if task_definition_changed?(definitions, current_task_definition) - new_task_definition = ecs_client.register_task_definition( - family: family, - task_role_arn: @task_role_arn, - container_definitions: definitions, - volumes: volumes_definition, - ).task_definition - [true, new_task_definition] - else - [false, current_task_definition] + 10.times do |i| + begin + family = "#{@app_id}-oneshot" + current_task_definition = describe_task_definition(family) + if task_definition_changed?(definitions, current_task_definition) + new_task_definition = ecs_client.register_task_definition( + family: family, + task_role_arn: @task_role_arn, + container_definitions: definitions, + volumes: volumes_definition, + ).task_definition + return [true, new_task_definition] + else + return [false, current_task_definition] + end + rescue Aws::ECS::Errors::ClientException => e + if e.message.include?('Too many concurrent attempts to create a new revision of the specified family') + Hako.logger.error(e.message) + interval = 2**i + rand(0.0..10.0) + Hako.logger.error("Retrying register_task_definition_for_oneshot after #{interval} seconds") + sleep(interval) + else + raise e + end + end end + raise Error.new('Unable to register task definition for oneshot due to too many client errors') end # @return [Hash] def volumes_definition @volumes.map do |name, volume|