lib/hako/schedulers/ecs.rb in hako-1.3.0 vs lib/hako/schedulers/ecs.rb in hako-1.3.1
- old
+ new
@@ -813,52 +813,65 @@
end
raise "Unable to find rollback target. #{task_definition.task_definition_arn} is INACTIVE?"
end
+ MIN_ASG_INTERVAL = 1
+ MAX_ASG_INTERVAL = 120
# @param [Aws::ECS::Types::TaskDefinition] task_definition
# @return [Boolean] true if the capacity is reserved
def on_no_tasks_started(task_definition)
unless @autoscaling_group_for_oneshot
return false
end
autoscaling = Aws::AutoScaling::Client.new
+ interval = MIN_ASG_INTERVAL
loop do
- asg = autoscaling.describe_auto_scaling_groups(auto_scaling_group_names: [@autoscaling_group_for_oneshot]).auto_scaling_groups[0]
+ begin
+ asg = autoscaling.describe_auto_scaling_groups(auto_scaling_group_names: [@autoscaling_group_for_oneshot]).auto_scaling_groups[0]
+ rescue Aws::AutoScaling::Errors::Throttling => e
+ Hako.logger.error(e)
+ interval = [interval * 2, MAX_ASG_INTERVAL].min
+ Hako.logger.info("Retrying after #{interval} seconds...")
+ sleep interval
+ next
+ end
unless asg
raise Error.new("AutoScaling Group '#{@autoscaling_group_for_oneshot}' does not exist")
end
container_instances = ecs_client.list_container_instances(cluster: @cluster).flat_map { |c| ecs_client.describe_container_instances(cluster: @cluster, container_instances: c.container_instance_arns).container_instances }
if has_capacity?(task_definition, container_instances)
Hako.logger.info("There's remaining capacity. Start retrying...")
return true
end
+ interval = [interval / 2, MIN_ASG_INTERVAL].max
# Check autoscaling group health
current = asg.instances.count { |i| i.lifecycle_state == 'InService' }
if asg.desired_capacity != current
- Hako.logger.debug("#{asg.auto_scaling_group_name} isn't in desired state. desired_capacity=#{asg.desired_capacity} in-service instances=#{current}")
- sleep 1
+ Hako.logger.debug("#{asg.auto_scaling_group_name} isn't in desired state. desired_capacity=#{asg.desired_capacity} in-service instances=#{current}. Retry after #{interval} seconds")
+ sleep interval
next
end
# Check out-of-service instances
out_instances = asg.instances.map(&:instance_id)
container_instances.each do |ci|
out_instances.delete(ci.ec2_instance_id)
end
unless out_instances.empty?
- Hako.logger.debug("There's instances that is running but not registered as container instances: #{out_instances}")
- sleep 1
+ Hako.logger.debug("There's instances that is running but not registered as container instances: #{out_instances}. Retry after #{interval} seconds")
+ sleep interval
next
end
# Scale out
desired = current + 1
Hako.logger.info("Increment desired_capacity of #{asg.auto_scaling_group_name} from #{current} to #{desired}")
autoscaling.set_desired_capacity(auto_scaling_group_name: asg.auto_scaling_group_name, desired_capacity: desired)
+ sleep interval
end
end
# @param [Aws::ECS::Types::TaskDefinition] task_definition
# @param [Array<Aws::ECS::Types::ContainerInstance>] container_instances