# Copyright:: Copyright (c) 2018 eGlobalTech, Inc., all rights reserved # # Licensed under the BSD-3 license (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License in the root of the project or at # # http://egt-labs.com/mu/LICENSE.html # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. module MU class Cloud class AWS # A ContainerCluster as configured in {MU::Config::BasketofKittens::container_clusters} class ContainerCluster < MU::Cloud::ContainerCluster # Initialize this cloud resource object. Calling +super+ will invoke the initializer defined under {MU::Cloud}, which should set the attribtues listed in {MU::Cloud::PUBLIC_ATTRS} as well as applicable dependency shortcuts, like +@vpc+, for us. # @param args [Hash]: Hash of named arguments passed via Ruby's double-splat def initialize(**args) super if @cloud_id and args[:from_cloud_desc] if args[:from_cloud_desc].class.name == "Aws::ECS::Types::Cluster" @config['flavor'] = "ECS" # XXX but we need to tell when it's Fargate elsif args[:from_cloud_desc].class.name == "Aws::EKS::Types::Cluster" # XXX but we need to tell when it's Fargate @config['flavor'] = "EKS" end end @mu_name ||= @deploy.getResourceName(@config["name"]) end # Called automatically by {MU::Deploy#createResources} def create if @config['flavor'] == "EKS" or (@config['flavor'] == "Fargate" and !@config['containers']) subnet_ids = mySubnets.map { |s| s.cloud_id } params = { :name => @mu_name, :version => @config['kubernetes']['version'], :role_arn => @deploy.findLitterMate(name: @config['name']+"controlplane", type: "roles").arn, :resources_vpc_config => { :security_group_ids => myFirewallRules.map { |fw| fw.cloud_id }, :subnet_ids => subnet_ids } } if @config['logging'] and @config['logging'].size > 0 params[:logging] = { :cluster_logging => [ { :types => @config['logging'], :enabled => true } ] } end params.delete(:version) if params[:version] == "latest" on_retry = Proc.new { |e| # soul-crushing, yet effective if e.message.match(/because (#{Regexp.quote(@config['region'])}[a-z]), the targeted availability zone, does not currently have sufficient capacity/) bad_az = Regexp.last_match(1) deletia = [] mySubnets.each { |subnet| deletia << subnet.cloud_id if subnet.az == bad_az } raise e if deletia.empty? MU.log "#{bad_az} does not have EKS capacity. Dropping unsupported subnets from ContainerCluster '#{@config['name']}' and retrying.", MU::NOTICE, details: deletia deletia.each { |subnet| params[:resources_vpc_config][:subnet_ids].delete(subnet) } end } MU.retrier([Aws::EKS::Errors::UnsupportedAvailabilityZoneException, Aws::EKS::Errors::InvalidParameterException], on_retry: on_retry, max: subnet_ids.size) { MU.log "Creating EKS cluster #{@mu_name}", details: params MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).create_cluster(params) } @cloud_id = @mu_name loop_if = Proc.new { cloud_desc(use_cache: false).status != "ACTIVE" } MU.retrier(ignoreme: [Aws::EKS::Errors::ResourceNotFoundException], wait: 30, max: 60, loop_if: loop_if) { |retries, _wait| if cloud_desc.status == "FAILED" raise MuError, "EKS cluster #{@mu_name} had FAILED status" end if retries > 0 and (retries % 3) == 0 and cloud_desc.status != "ACTIVE" MU.log "Waiting for EKS cluster #{@mu_name} to become active (currently #{cloud_desc.status})", MU::NOTICE end } MU.log "Creation of EKS cluster #{@mu_name} complete" else MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).create_cluster( cluster_name: @mu_name ) @cloud_id = @mu_name end end # Called automatically by {MU::Deploy#createResources} def groom # EKS or Fargate-EKS: do Kubernetes things if @config['flavor'] == "EKS" or (@config['flavor'] == "Fargate" and !@config['containers']) # This will be needed if a loadbalancer has never been created in # this account; EKS applications might want one, but will fail in # confusing ways if this hasn't been done. begin MU::Cloud::AWS.iam(credentials: @config['credentials']).create_service_linked_role( aws_service_name: "elasticloadbalancing.amazonaws.com" ) rescue ::Aws::IAM::Errors::InvalidInput end apply_kubernetes_tags create_fargate_kubernetes_profile if @config['flavor'] == "Fargate" apply_kubernetes_resources elsif @config['flavor'] != "Fargate" manage_ecs_workers end # ECS: manage containers/services/tasks if @config['flavor'] != "EKS" and @config['containers'] # Reorganize things so that we have services and task definitions # mapped to the set of containers they must contain tasks = {} @config['containers'].each { |c| service_name = c['service'] ? @mu_name+"-"+c['service'].upcase : @mu_name tasks[service_name] ||= [] tasks[service_name] << c } existing_svcs = list_ecs_services tasks.each_pair { |service_name, containers| role_arn = nil container_definitions, role, lbs = get_ecs_container_definitions(containers) role_arn ||= role cpu_total = mem_total = 0 containers.each { |c| cpu_total += c['cpu'] mem_total += c['memory'] } cpu_total = 2 if cpu_total == 0 mem_total = 2 if mem_total == 0 task_def = register_ecs_task(container_definitions, service_name, cpu_total, mem_total, role_arn: role_arn) create_update_ecs_service(task_def, service_name, lbs, existing_svcs) existing_svcs << service_name } if tasks.size > 0 tasks_failing = false MU.retrier(wait: 15, max: 10, loop_if: Proc.new { tasks_failing }){ |retries, _wait| tasks_failing = !MU::Cloud::AWS::ContainerCluster.tasksRunning?(@mu_name, log: (retries > 0), region: @config['region'], credentials: @config['credentials']) } if tasks_failing MU.log "Not all tasks successfully launched in cluster #{@mu_name}", MU::WARN end end end end # Returns true if all tasks in the given ECS/Fargate cluster are in the # RUNNING state. # @param cluster [String]: The cluster to check # @param log [Boolean]: Output the state of each task to Mu's logger facility # @param region [String] # @param credentials [String] # @return [Boolean] def self.tasksRunning?(cluster, log: true, region: MU.myRegion, credentials: nil) services = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_services( cluster: cluster ).service_arns.map { |s| s.sub(/.*?:service\/([^\/:]+?)$/, '\1') } tasks_defined = [] begin listme = services.slice!(0, (services.length >= 10 ? 10 : services.length)) if services.size > 0 tasks_defined.concat( MU::Cloud::AWS.ecs(region: region, credentials: credentials).describe_services( cluster: cluster, services: listme ).services.map { |s| s.task_definition } ) end end while services.size > 0 containers = {} tasks_defined.each { |t| taskdef = MU::Cloud::AWS.ecs(region: region, credentials: credentials).describe_task_definition( task_definition: t.sub(/^.*?:task-definition\/([^\/:]+)$/, '\1') ) taskdef.task_definition.container_definitions.each { |c| containers[c.name] = {} } } tasks = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_tasks( cluster: cluster, desired_status: "RUNNING" ).task_arns tasks.concat(MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_tasks( cluster: cluster, desired_status: "STOPPED" ).task_arns) begin sample = tasks.slice!(0, (tasks.length >= 100 ? 100 : tasks.length)) break if sample.size == 0 task_ids = sample.map { |task_arn| task_arn.sub(/^.*?:task\/([a-f0-9\-]+)$/, '\1') } MU::Cloud::AWS.ecs(region: region, credentials: credentials).describe_tasks( cluster: cluster, tasks: task_ids ).tasks.each { |t| t.containers.each { |c| containers[c.name] ||= {} containers[c.name][t.desired_status] ||= { "reasons" => [] } [t.stopped_reason, c.reason].each { |r| next if r.nil? containers[c.name][t.desired_status]["reasons"] << r } containers[c.name][t.desired_status]["reasons"].uniq! if !containers[c.name][t.desired_status]['time'] or t.created_at > containers[c.name][t.desired_status]['time'] MU.log c.name, MU::NOTICE, details: t containers[c.name][t.desired_status] = { "time" => t.created_at, "status" => c.last_status, "reasons" => containers[c.name][t.desired_status]["reasons"] } end } } end while tasks.size > 0 to_return = true containers.each_pair { |name, states| if !states["RUNNING"] or states["RUNNING"]["status"] != "RUNNING" to_return = false if states["STOPPED"] and states["STOPPED"]["status"] MU.log "Container #{name} has failures", MU::WARN, details: states["STOPPED"] if log elsif states["RUNNING"] and states["RUNNING"]["status"] MU.log "Container #{name} not currently running", MU::NOTICE, details: states["RUNNING"] if log else MU.log "Container #{name} in unknown state", MU::WARN, details: states["STOPPED"] if log end else MU.log "Container #{name} running", details: states["RUNNING"] if log end } to_return end @cloud_desc_cache = nil # Return the cloud layer descriptor for this EKS/ECS/Fargate cluster # @return [OpenStruct] def cloud_desc(use_cache: true) return @cloud_desc_cache if @cloud_desc_cache and use_cache return nil if !@cloud_id @cloud_desc_cache = if @config['flavor'] == "EKS" or (@config['flavor'] == "Fargate" and !@config['containers']) resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_cluster( name: @cloud_id ) resp.cluster else resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).describe_clusters( clusters: [@cloud_id] ) resp.clusters.first end @cloud_desc_cache end # Canonical Amazon Resource Number for this resource # @return [String] def arn if @config['flavor'] == "EKS" cloud_desc.arn else cloud_desc.cluster_arn end end # Return the metadata for this ContainerCluster # @return [Hash] def notify deploy_struct = MU.structToHash(cloud_desc) deploy_struct['cloud_id'] = @mu_name deploy_struct["region"] = @config['region'] if @config['flavor'] == "EKS" deploy_struct["max_pods"] = @config['kubernetes']['max_pods'].to_s # XXX if FargateKS, get the Fargate Profile artifact end return deploy_struct end @@eks_versions = {} @@eks_version_semaphores = {} # Use the AWS SSM API to fetch the current version of the Amazon Linux # ECS-optimized AMI, so we can use it as a default AMI for ECS deploys. # @param flavor [String]: ECS or EKS # @param region [String]: Target AWS region # @param version [String]: Version of Kubernetes, if +flavor+ is set to +EKS+ # @param gpu [Boolean]: Whether to request an image with GPU support def self.getStandardImage(flavor = "ECS", region = MU.myRegion, version: nil, gpu: false) resp = if flavor == "ECS" MU::Cloud::AWS.ssm(region: region).get_parameters( names: ["/aws/service/#{flavor.downcase}/optimized-ami/amazon-linux/recommended"] ) else @@eks_version_semaphores[region] ||= Mutex.new @@eks_version_semaphores[region].synchronize { if !@@eks_versions[region] @@eks_versions[region] ||= [] versions = {} resp = MU::Cloud::AWS.ssm(region: region).get_parameters_by_path( path: "/aws/service/#{flavor.downcase}/optimized-ami", recursive: true, max_results: 10 # as high as it goes, ugh ) resp.parameters.each { |p| p.name.match(/\/aws\/service\/eks\/optimized-ami\/([^\/]+?)\//) versions[Regexp.last_match[1]] = true } @@eks_versions[region] = versions.keys.sort { |a, b| MU.version_sort(a, b) } end } if !version or version == "latest" version = @@eks_versions[region].last end MU::Cloud::AWS.ssm(region: region).get_parameters( names: ["/aws/service/#{flavor.downcase}/optimized-ami/#{version}/amazon-linux-2#{gpu ? "-gpu" : ""}/recommended"] ) end if resp and resp.parameters and resp.parameters.size > 0 image_details = JSON.parse(resp.parameters.first.value) return image_details['image_id'] end nil end @@supported_eks_region_cache = [] @@eks_region_semaphore = Mutex.new # Return the list of regions where we know EKS is supported. def self.EKSRegions(credentials = nil) @@eks_region_semaphore.synchronize { if @@supported_eks_region_cache and !@@supported_eks_region_cache.empty? return @@supported_eks_region_cache end start = Time.now # the SSM API is painfully slow for large result sets, so thread # these and do them in parallel @@supported_eks_region_cache = [] region_threads = [] MU::Cloud::AWS.listRegions(credentials: credentials).each { |region| region_threads << Thread.new(region) { |r| r_start = Time.now ami = getStandardImage("EKS", r) @@supported_eks_region_cache << r if ami } } region_threads.each { |t| t.join } @@supported_eks_region_cache } end # Does this resource type exist as a global (cloud-wide) artifact, or # is it localized to a region/zone? # @return [Boolean] def self.isGlobal? false end # Denote whether this resource implementation is experiment, ready for # testing, or ready for production use. def self.quality MU::Cloud::RELEASE end # Remove all container_clusters associated with the currently loaded deployment. # @param noop [Boolean]: If true, will only print what would be done # @param ignoremaster [Boolean]: If true, will remove resources not flagged as originating from this Mu server # @param region [String]: The cloud provider region # @return [void] def self.cleanup(noop: false, deploy_id: MU.deploy_id, ignoremaster: false, region: MU.curRegion, credentials: nil, flags: {}) MU.log "AWS::ContainerCluster.cleanup: need to support flags['known']", MU::DEBUG, details: flags MU.log "Placeholder: AWS ContainerCluster artifacts do not support tags, so ignoremaster cleanup flag has no effect", MU::DEBUG, details: ignoremaster purge_ecs_clusters(noop: noop, region: region, credentials: credentials, deploy_id: deploy_id) purge_eks_clusters(noop: noop, region: region, credentials: credentials, deploy_id: deploy_id) end def self.purge_eks_clusters(noop: false, region: MU.curRegion, credentials: nil, deploy_id: MU.deploy_id) resp = begin MU::Cloud::AWS.eks(credentials: credentials, region: region).list_clusters rescue Aws::EKS::Errors::AccessDeniedException # EKS isn't actually live in this region, even though SSM lists # base images for it if @@supported_eks_region_cache @@supported_eks_region_cache.delete(region) end return end return if !resp or !resp.clusters resp.clusters.each { |cluster| if cluster.match(/^#{deploy_id}-/) desc = MU::Cloud::AWS.eks(credentials: credentials, region: region).describe_cluster( name: cluster ).cluster profiles = MU::Cloud::AWS.eks(region: region, credentials: credentials).list_fargate_profiles( cluster_name: cluster ) if profiles and profiles.fargate_profile_names profiles.fargate_profile_names.each { |profile| MU.log "Deleting Fargate EKS profile #{profile}" next if noop MU::Cloud::AWS::ContainerCluster.purge_fargate_profile(profile, cluster, region, credentials) } end remove_kubernetes_tags(cluster, desc, region: region, credentials: credentials, noop: noop) MU.log "Deleting EKS Cluster #{cluster}" next if noop MU::Cloud::AWS.eks(credentials: credentials, region: region).delete_cluster( name: cluster ) status = nil loop_if = Proc.new { status != "FAILED" } MU.retrier(ignoreme: [Aws::EKS::Errors::ResourceNotFoundException], wait: 60){ |retries, _wait| status = MU::Cloud::AWS.eks(credentials: credentials, region: region).describe_cluster( name: cluster ).cluster.status if retries > 0 and (retries % 3) == 0 MU.log "Waiting for EKS cluster #{cluster} to finish deleting (status #{status})", MU::NOTICE end } # MU::Cloud.resourceClass("AWS", "Server").removeIAMProfile(cluster) end } end private_class_method :purge_eks_clusters def self.purge_ecs_clusters(noop: false, region: MU.curRegion, credentials: nil, deploy_id: MU.deploy_id) start = Time.now resp = MU::Cloud::AWS.ecs(credentials: credentials, region: region).list_clusters return if !resp or !resp.cluster_arns or resp.cluster_arns.empty? resp.cluster_arns.each { |arn| if arn.match(/:cluster\/(#{deploy_id}[^:]+)$/) cluster = Regexp.last_match[1] svc_resp = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_services( cluster: arn ) if svc_resp and svc_resp.service_arns svc_resp.service_arns.each { |svc_arn| svc_name = svc_arn.gsub(/.*?:service\/(.*)/, '\1') MU.log "Deleting Service #{svc_name} from ECS Cluster #{cluster}" next if noop MU::Cloud::AWS.ecs(region: region, credentials: credentials).delete_service( cluster: arn, service: svc_name, force: true # man forget scaling up and down if we're just deleting the cluster ) } end instances = MU::Cloud::AWS.ecs(credentials: credentials, region: region).list_container_instances({ cluster: cluster }) if instances instances.container_instance_arns.each { |instance_arn| uuid = instance_arn.sub(/^.*?:container-instance\//, "") MU.log "Deregistering instance #{uuid} from ECS Cluster #{cluster}" next if noop resp = MU::Cloud::AWS.ecs(credentials: credentials, region: region).deregister_container_instance({ cluster: cluster, container_instance: uuid, force: true, }) } end MU.log "Deleting ECS Cluster #{cluster}" next if noop MU.retrier([Aws::ECS::Errors::ClusterContainsTasksException], wait: 5){ # TODO de-register container instances MU::Cloud::AWS.ecs(credentials: credentials, region: region).delete_cluster( cluster: cluster ) } end } tasks = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_task_definitions( family_prefix: deploy_id ) if tasks and tasks.task_definition_arns tasks.task_definition_arns.each { |arn| MU.log "Deregistering Fargate task definition #{arn}" if !noop MU::Cloud::AWS.ecs(region: region, credentials: credentials).deregister_task_definition( task_definition: arn ) end } end end private_class_method :purge_ecs_clusters # Locate an existing container_cluster. # @return [Hash]: The cloud provider's complete descriptions of matching container_clusters. def self.find(**args) found = {} if args[:cloud_id] resp = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).describe_clusters(clusters: [args[:cloud_id]]) if resp.clusters and resp.clusters.size > 0 found[args[:cloud_id]] = resp.clusters.first else # XXX misses due to name collision are possible here desc = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).describe_cluster(name: args[:cloud_id]) found[args[:cloud_id]] = desc.cluster if desc and desc.cluster end else next_token = nil begin resp = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).list_clusters(next_token: next_token) break if !resp or !resp.cluster_arns next_token = resp.next_token names = resp.cluster_arns.map { |a| a.sub(/.*?:cluster\//, '') } descs = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).describe_clusters(clusters: names) if descs and descs.clusters descs.clusters.each { |c| found[c.cluster_name] = c } end end while next_token # XXX name collision is possible here next_token = nil begin resp = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).list_clusters(next_token: next_token) break if !resp or !resp.clusters resp.clusters.each { |c| desc = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).describe_cluster(name: c) found[c] = desc.cluster if desc and desc.cluster } next_token = resp.next_token rescue Aws::EKS::Errors::AccessDeniedException # not all regions support EKS end while next_token end found end # Cloud-specific configuration properties. # @param _config [MU::Config]: The calling MU::Config object # @return [Array]: List of required fields, and json-schema Hash of cloud-specific configuration parameters for this resource def self.schema(_config) toplevel_required = [] schema = { "flavor" => { "enum" => ["ECS", "EKS", "Fargate", "Kubernetes"], "type" => "string", "description" => "The AWS container platform to deploy", "default" => "Fargate" }, "kubernetes_pools" => { "default" => [ [ { "namespace" => "default" }, { "namespace" => "gitlab-managed-apps" } ] ], "type" => "array", "description" => "Fargate Kubernetes worker pools, with namespace/label selectors for targeting pods. Specifying multiple pools will create and attach multiple Fargate Profiles to this EKS cluster. Our default behavior is to create one pool (one Fargate Profile) that will match any pod and deploy it to the +default+ namespace.", "items" => { "type" => "array", "items" => { "type" => "object", "description" => "A namespace/label selector for a Fargate EKS Profile. See also https://docs.aws.amazon.com/cli/latest/reference/eks/create-fargate-profile.html", "properties" => { "namespace" => { "type" => "string", "default" => "default", "description" => "The Kubernetes namespace into which pods matching our labels should be deployed." }, "labels" => { "type" => "object", "description" => "Key/value pairs of Kubernetes labels, which a pod must have in order to be deployed to this pool. A pod must match all labels." } } } } }, "kubernetes" => { "default" => { "version" => "latest" } }, "gpu" => { "type" => "boolean", "default" => false, "description" => "Enable worker nodes with GPU capabilities" }, "platform" => { "description" => "The platform to choose for worker nodes." }, "ami_id" => { "type" => "string", "description" => "The Amazon EC2 AMI on which to base this cluster's container hosts. Will use the default appropriate for the platform, if not specified. Only valid for EKS and ECS flavors." }, "run_list" => { "type" => "array", "items" => { "type" => "string", "description" => "An extra Chef run list entry, e.g. role[rolename] or recipe[recipename]s, to be run on worker nodes. Only valid for EKS and ECS flavors." } }, "ingress_rules" => { "type" => "array", "items" => MU::Config::FirewallRule.ruleschema, "default" => [ { "egress" => true, "port" => 443, "hosts" => [ "0.0.0.0/0" ] } ] }, "logging" => { "type" => "array", "default" => ["authenticator", "api"], "items" => { "type" => "string", "description" => "Cluster CloudWatch logs to enable for EKS clusters.", "enum" => ["api", "audit", "authenticator", "controllerManager", "scheduler"] } }, "volumes" => { "type" => "array", "items" => { "description" => "Define one or more volumes which can then be referenced by the +mount_points+ parameter inside +containers+. +docker+ volumes are not valid for Fargate clusters. See also https://docs.aws.amazon.com/AmazonECS/latest/developerguide/using_data_volumes.html", "type" => "object", "required" => ["name", "type"], "properties" => { "name" => { "type" => "string", "description" => "Name this volume so it can be referenced by containers." }, "type" => { "type" => "string", "enum" => ["docker", "host"] }, "docker_volume_configuration" => { "type" => "object", "default" => { "autoprovision" => true, "driver" => "local" }, "description" => "This parameter is specified when you are using +docker+ volumes. Docker volumes are only supported when you are using the EC2 launch type. To use bind mounts, specify a +host+ volume instead.", "properties" => { "autoprovision" => { "type" => "boolean", "description" => "Create the Docker volume if it does not already exist.", "default" => true }, "driver" => { "type" => "string", "description" => "The Docker volume driver to use. Note that Windows containers can only use the +local+ driver. This parameter maps to +Driver+ in the Create a volume section of the Docker Remote API and the +xxdriver+ option to docker volume create." }, "labels" => { "description" => "Custom metadata to add to your Docker volume.", "type" => "object" }, "driver_opts" => { "description" => "A map of Docker driver-specific options passed through. This parameter maps to +DriverOpts+ in the Create a volume section of the Docker Remote API and the +xxopt+ option to docker volume create .", "type" => "object" }, } }, "host_volume_source_path" => { "type" => "string", "description" => "If specified, and the +type+ of this volume is +host+, data will be stored in the container host in this location and will persist after containers associated with it stop running." } } } }, "containers" => { "type" => "array", "items" => { "type" => "object", "description" => "A container image to run on this cluster.", "required" => ["name", "image"], "properties" => { "name" => { "type" => "string", "description" => "The name of a container. If you are linking multiple containers together in a task definition, the name of one container can be entered in the +links+ of another container to connect the containers. This parameter maps to +name+ in the Create a container section of the Docker Remote API and the +--name+ option to docker run." }, "service" => { "type" => "string", "description" => "The Service of which this container will be a component. Default behavior, if unspecified, is to create a service with the name of this container definition and assume they map 1:1." }, "image" => { "type" => "string", "description" => "A Docker image to run, as a shorthand name for a public Dockerhub image or a full URL to a private container repository (+repository-url/image:tag+ or repository-url/image@digest). See +repository_credentials+ to specify authentication for a container repository.", }, "cpu" => { "type" => "integer", "default" => 256, "description" => "CPU to allocate for this container/task. This parameter maps to +CpuShares+ in the Create a container section of the Docker Remote API and the +--cpu-shares+ option to docker run. Not all +cpu+ and +memory+ combinations are valid, particularly when using Fargate, see https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html" }, "memory" => { "type" => "integer", "default" => 512, "description" => "Hard limit of memory to allocate for this container/task. Not all +cpu+ and +memory+ combinations are valid, particularly when using Fargate, see https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html" }, "memory_reservation" => { "type" => "integer", "default" => 512, "description" => "Soft limit of memory to allocate for this container/task. This parameter maps to +MemoryReservation+ in the Create a container section of the Docker Remote API and the +--memory-reservation+ option to docker run." }, "role" => MU::Config::Role.reference, "essential" => { "type" => "boolean", "description" => "Flag this container as essential or non-essential to its parent task. If the container fails and is marked essential, the parent task will also be marked as failed.", "default" => true }, "hostname" => { "type" => "string", "description" => "Set this container's local hostname. If not specified, will inherit the name of the parent task. Not valid for Fargate clusters. This parameter maps to +Hostname+ in the Create a container section of the Docker Remote API and the +--hostname+ option to docker run." }, "user" => { "type" => "string", "description" => "The system-level user to use when executing commands inside this container" }, "working_directory" => { "type" => "string", "description" => "The working directory in which to run commands inside the container." }, "disable_networking" => { "type" => "boolean", "description" => "This parameter maps to +NetworkDisabled+ in the Create a container section of the Docker Remote API." }, "privileged" => { "type" => "boolean", "description" => "When this parameter is true, the container is given elevated privileges on the host container instance (similar to the root user). This parameter maps to +Privileged+ in the Create a container section of the Docker Remote API and the +--privileged+ option to docker run. Not valid for Fargate clusters." }, "readonly_root_filesystem" => { "type" => "boolean", "description" => "This parameter maps to +ReadonlyRootfs+ in the Create a container section of the Docker Remote API and the +--read-only+ option to docker run." }, "interactive" => { "type" => "boolean", "description" => "When this parameter is +true+, this allows you to deploy containerized applications that require +stdin+ or a +tty+ to be allocated. This parameter maps to +OpenStdin+ in the Create a container section of the Docker Remote API and the +--interactive+ option to docker run." }, "pseudo_terminal" => { "type" => "boolean", "description" => "When this parameter is true, a TTY is allocated. This parameter maps to +Tty+ in the Create a container section of the Docker Remote API and the +--tty+ option to docker run." }, "start_timeout" => { "type" => "integer", "description" => "Time duration to wait before giving up on containers which have been specified with +depends_on+ for this one." }, "stop_timeout" => { "type" => "integer", "description" => "Time duration to wait before the container is forcefully killed if it doesn't exit normally on its own." }, "links" => { "type" => "array", "items" => { "description" => "The +link+ parameter allows containers to communicate with each other without the need for port mappings. Only supported if the network mode of a task definition is set to +bridge+. The +name:internalName+ construct is analogous to +name:alias+ in Docker links.", "type" => "string" } }, "entry_point" => { "type" => "array", "items" => { "type" => "string", "description" => "The entry point that is passed to the container. This parameter maps to +Entrypoint+ in the Create a container section of the Docker Remote API and the +--entrypoint+ option to docker run." } }, "command" => { "type" => "array", "items" => { "type" => "string", "description" => "This parameter maps to +Cmd+ in the Create a container section of the Docker Remote API and the +COMMAND+ parameter to docker run." } }, "dns_servers" => { "type" => "array", "items" => { "type" => "string", "description" => "A list of DNS servers that are presented to the container. This parameter maps to +Dns+ in the Create a container section of the Docker Remote API and the +--dns+ option to docker run." } }, "dns_search_domains" => { "type" => "array", "items" => { "type" => "string", "description" => "A list of DNS search domains that are presented to the container. This parameter maps to +DnsSearch+ in the Create a container section of the Docker Remote API and the +--dns-search+ option to docker run." } }, "linux_parameters" => { "type" => "object", "description" => "Linux-specific options that are applied to the container, such as Linux KernelCapabilities.", "properties" => { "init_process_enabled" => { "type" => "boolean", "description" => "Run an +init+ process inside the container that forwards signals and reaps processes. This parameter maps to the +--init+ option to docker run." }, "shared_memory_size" => { "type" => "integer", "description" => "The value for the size (in MiB) of the +/dev/shm+ volume. This parameter maps to the +--shm-size+ option to docker run. Not valid for Fargate clusters." }, "capabilities" => { "type" => "object", "description" => "The Linux capabilities for the container that are added to or dropped from the default configuration provided by Docker.", "properties" => { "add" => { "type" => "array", "items" => { "type" => "string", "description" => "This parameter maps to +CapAdd+ in the Create a container section of the Docker Remote API and the +--cap-add+ option to docker run. Not valid for Fargate clusters.", "enum" => ["ALL", "AUDIT_CONTROL", "AUDIT_WRITE", "BLOCK_SUSPEND", "CHOWN", "DAC_OVERRIDE", "DAC_READ_SEARCH", "FOWNER", "FSETID", "IPC_LOCK", "IPC_OWNER", "KILL", "LEASE", "LINUX_IMMUTABLE", "MAC_ADMIN", "MAC_OVERRIDE", "MKNOD", "NET_ADMIN", "NET_BIND_SERVICE", "NET_BROADCAST", "NET_RAW", "SETFCAP", "SETGID", "SETPCAP", "SETUID", "SYS_ADMIN", "SYS_BOOT", "SYS_CHROOT", "SYS_MODULE", "SYS_NICE", "SYS_PACCT", "SYS_PTRACE", "SYS_RAWIO", "SYS_RESOURCE", "SYS_TIME", "SYS_TTY_CONFIG", "SYSLOG", "WAKE_ALARM"] } }, "drop" => { "type" => "array", "items" => { "type" => "string", "description" => "This parameter maps to +CapDrop+ in the Create a container section of the Docker Remote API and the +--cap-drop+ option to docker run.", "enum" => ["ALL", "AUDIT_CONTROL", "AUDIT_WRITE", "BLOCK_SUSPEND", "CHOWN", "DAC_OVERRIDE", "DAC_READ_SEARCH", "FOWNER", "FSETID", "IPC_LOCK", "IPC_OWNER", "KILL", "LEASE", "LINUX_IMMUTABLE", "MAC_ADMIN", "MAC_OVERRIDE", "MKNOD", "NET_ADMIN", "NET_BIND_SERVICE", "NET_BROADCAST", "NET_RAW", "SETFCAP", "SETGID", "SETPCAP", "SETUID", "SYS_ADMIN", "SYS_BOOT", "SYS_CHROOT", "SYS_MODULE", "SYS_NICE", "SYS_PACCT", "SYS_PTRACE", "SYS_RAWIO", "SYS_RESOURCE", "SYS_TIME", "SYS_TTY_CONFIG", "SYSLOG", "WAKE_ALARM"] } } } }, "devices" => { "type" => "array", "items" => { "type" => "object", "description" => "Host devices to expose to the container.", "properties" => { "host_path" => { "type" => "string", "description" => "The path for the device on the host container instance." }, "container_path" => { "type" => "string", "description" => "The path inside the container at which to expose the host device." }, "permissions" => { "type" => "array", "items" => { "description" => "The explicit permissions to provide to the container for the device. By default, the container has permissions for +read+, +write+, and +mknod+ for the device.", "type" => "string" } } } } }, "tmpfs" => { "type" => "array", "items" => { "type" => "object", "description" => "A tmpfs device to expost to the container. This parameter maps to the +--tmpfs+ option to docker run. Not valid for Fargate clusters.", "properties" => { "container_path" => { "type" => "string", "description" => "The absolute file path where the tmpfs volume is to be mounted." }, "size" => { "type" => "integer", "description" => "The size (in MiB) of the tmpfs volume." }, "mount_options" => { "type" => "array", "items" => { "description" => "tmpfs volume mount options", "type" => "string", "enum" => ["defaults", "ro", "rw", "suid", "nosuid", "dev", "nodev", "exec", "noexec", "sync", "async", "dirsync", "remount", "mand", "nomand", "atime", "noatime", "diratime", "nodiratime", "bind", "rbind", "unbindable", "runbindable", "private", "rprivate", "shared", "rshared", "slave", "rslave", "relatime", "norelatime", "strictatime", "nostrictatime", "mode", "uid", "gid", "nr_inodes", "nr_blocks", "mpol"] } } } } } } }, "docker_labels" => { "type" => "object", "description" => "A key/value map of labels to add to the container. This parameter maps to +Labels+ in the Create a container section of the Docker Remote API and the +--label+ option to docker run." }, "docker_security_options" => { "type" => "array", "items" => { "type" => "string", "description" => "A list of strings to provide custom labels for SELinux and AppArmor multi-level security systems. This field is not valid for containers in tasks using the Fargate launch type. This parameter maps to +SecurityOpt+ in the Create a container section of the Docker Remote API and the +--security-opt+ option to docker run." } }, "health_check" => { "type" => "object", "required" => ["command"], "description" => "The health check command and associated configuration parameters for the container. This parameter maps to +HealthCheck+ in the Create a container section of the Docker Remote API and the +HEALTHCHECK+ parameter of docker run.", "properties" => { "command" => { "type" => "array", "items" => { "type" => "string", "description" => "A string array representing the command that the container runs to determine if it is healthy." } }, "interval" => { "type" => "integer", "description" => "The time period in seconds between each health check execution." }, "timeout" => { "type" => "integer", "description" => "The time period in seconds to wait for a health check to succeed before it is considered a failure." }, "retries" => { "type" => "integer", "description" => "The number of times to retry a failed health check before the container is considered unhealthy." }, "start_period" => { "type" => "integer", "description" => "The optional grace period within which to provide containers time to bootstrap before failed health checks count towards the maximum number of retries." } } }, "environment" => { "type" => "array", "items" => { "type" => "object", "description" => "The environment variables to pass to a container. This parameter maps to +Env+ in the Create a container section of the Docker Remote API and the +--env+ option to docker run.", "properties" => { "name" => { "type" => "string" }, "value" => { "type" => "string" } } } }, "resource_requirements" => { "type" => "array", "items" => { "type" => "object", "description" => "Special requirements for this container. As of this writing, +GPU+ is the only valid option.", "required" => ["type", "value"], "properties" => { "type" => { "type" => "string", "enum" => ["GPU"], "description" => "Special requirements for this container. As of this writing, +GPU+ is the only valid option." }, "value" => { "type" => "string", "description" => "The number of physical GPUs the Amazon ECS container agent will reserve for the container." } } } }, "system_controls" => { "type" => "array", "items" => { "type" => "object", "description" => "A list of namespaced kernel parameters to set in the container. This parameter maps to +Sysctls+ in the Create a container section of the Docker Remote API and the +--sysctl+ option to docker run.", "properties" => { "namespace" => { "type" => "string", "description" => "The namespaced kernel parameter for which to set a +value+." }, "value" => { "type" => "string", "description" => "The value for the namespaced kernel parameter specified in +namespace+." } } } }, "ulimits" => { "type" => "array", "items" => { "type" => "object", "description" => "This parameter maps to +Ulimits+ in the Create a container section of the Docker Remote API and the +--ulimit+ option to docker run.", "required" => ["name", "soft_limit", "hard_limit"], "properties" => { "name" => { "type" => "string", "description" => "The ulimit parameter to set.", "enum" => ["core", "cpu", "data", "fsize", "locks", "memlock", "msgqueue", "nice", "nofile", "nproc", "rss", "rtprio", "rttime", "sigpending", "stack"] }, "soft_limit" => { "type" => "integer", "description" => "The soft limit for the ulimit type." }, "hard_limit" => { "type" => "integer", "description" => "The hard limit for the ulimit type." }, } } }, "extra_hosts" => { "type" => "array", "items" => { "type" => "object", "description" => "A list of hostnames and IP address mappings to append to the +/etc/hosts+ file on the container. This parameter maps to ExtraHosts in the +Create+ a container section of the Docker Remote API and the +--add-host+ option to docker run.", "required" => ["hostname", "ip_address"], "properties" => { "hostname" => { "type" => "string" }, "ip_address" => { "type" => "string" } } } }, "secrets" => { "type" => "array", "items" => { "type" => "object", "description" => "See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/specifying-sensitive-data.html", "required" => ["name", "value_from"], "properties" => { "name" => { "type" => "string", "description" => "The value to set as the environment variable on the container." }, "value_from" => { "type" => "string", "description" => "The secret to expose to the container." } } } }, "depends_on" => { "type" => "array", "items" => { "type" => "object", "required" => ["container_name", "condition"], "description" => "The dependencies defined for container startup and shutdown. A container can contain multiple dependencies. When a dependency is defined for container startup, for container shutdown it is reversed.", "properties" => { "container_name" => { "type" => "string" }, "condition" => { "type" => "string", "enum" => ["START", "COMPLETE", "SUCCESS", "HEALTHY"] } } } }, "mount_points" => { "type" => "array", "items" => { "type" => "object", "description" => "The mount points for data volumes in your container. This parameter maps to +Volumes+ in the Create a container section of the Docker Remote API and the +--volume+ option to docker run.", "properties" => { "source_volume" => { "type" => "string", "description" => "The name of the +volume+ to mount, defined under the +volumes+ section of our parent +container_cluster+ (if the volume is not defined, an ephemeral bind host volume will be allocated)." }, "container_path" => { "type" => "string", "description" => "The container-side path where this volume must be mounted" }, "read_only" => { "type" => "boolean", "default" => false, "description" => "Mount the volume read-only" } } } }, "volumes_from" => { "type" => "array", "items" => { "type" => "object", "description" => "Data volumes to mount from another container. This parameter maps to +VolumesFrom+ in the Create a container section of the Docker Remote API and the +--volumes-from+ option to docker run.", "properties" => { "source_container" => { "type" => "string", "description" => "The name of another container within the same task definition from which to mount volumes." }, "read_only" => { "type" => "boolean", "default" => false, "description" => "If this value is +true+, the container has read-only access to the volume." } } } }, "repository_credentials" => { "type" => "object", "description" => "The Amazon Resource Name (ARN) of a secret containing the private repository credentials.", "properties" => { "credentials_parameter" => { "type" => "string", # XXX KMS? Secrets Manager? This documentation is vague. "description" => "The Amazon Resource Name (ARN) of a secret containing the private repository credentials." } } }, "port_mappings" => { "type" => "array", "items" => { "description" => "Mappings of ports between the container instance and the host instance. This parameter maps to +PortBindings+ in the Create a container section of the Docker Remote API and the +--publish+ option to docker run.", "type" => "object", "properties" => { "container_port" => { "type" => "integer", "description" => "The port number on the container that is bound to the user-specified or automatically assigned host port." }, "host_port" => { "type" => "integer", "description" => "The port number on the container instance to reserve for your container. This should not be specified for Fargate clusters, nor for ECS clusters deployed into VPCs." }, "protocol" => { "type" => "string", "description" => "The protocol used for the port mapping.", "enum" => ["tcp", "udp"], "default" => "tcp" }, } } }, "log_configuration" => { "type" => "object", "description" => "Where to send container logs. If not specified, Mu will create a CloudWatch Logs output channel. See also: https://docs.aws.amazon.com/sdkforruby/api/Aws/ECS/Types/ContainerDefinition.html#log_configuration-instance_method", "default" => { "log_driver" => "awslogs" }, "required" => ["log_driver"], "properties" => { "log_driver" => { "type" => "string", "description" => "Type of logging facility to use for container logs.", "enum" => ["json-file", "syslog", "journald", "gelf", "fluentd", "awslogs", "splunk"] }, "options" => { "type" => "object", "description" => "Per-driver configuration options. See also: https://docs.aws.amazon.com/sdkforruby/api/Aws/ECS/Types/ContainerDefinition.html#log_configuration-instance_method" } } }, "loadbalancers" => { "type" => "array", "description" => "Array of loadbalancers to associate with this container servvice See also: https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/ECS/Client.html#create_service-instance_method", "default" => [], "items" => { "description" => "Load Balancers to associate with the container services", "type" => "object", "properties" => { "name" => { "type" => "string", "description" => "Name of the loadbalancer to associate" }, "container_port" => { "type" => "integer", "description" => "container port to map to the loadbalancer" } } } } } } } } [toplevel_required, schema] end # Cloud-specific pre-processing of {MU::Config::BasketofKittens::container_clusters}, bare and unvalidated. # @param cluster [Hash]: The resource to process and validate # @param configurator [MU::Config]: The overall deployment configurator of which this resource is a member # @return [Boolean]: True if validation succeeded, False otherwise def self.validateConfig(cluster, configurator) ok = true start = Time.now cluster['size'] = MU::Cloud.resourceClass("AWS", "Server").validateInstanceType(cluster["instance_type"], cluster["region"]) ok = false if cluster['size'].nil? cluster["flavor"] = "EKS" if cluster["flavor"].match(/^Kubernetes$/i) if cluster["flavor"] == "ECS" and cluster["kubernetes"] and !MU::Cloud::AWS.isGovCloud?(cluster["region"]) and !cluster["containers"] and MU::Cloud::AWS::ContainerCluster.EKSRegions(cluster['credentials']).include?(cluster['region']) cluster["flavor"] = "EKS" MU.log "Setting flavor of ContainerCluster '#{cluster['name']}' to EKS ('kubernetes' stanza was specified)", MU::NOTICE end if cluster["flavor"] == "EKS" and !MU::Cloud::AWS::ContainerCluster.EKSRegions(cluster['credentials']).include?(cluster['region']) MU.log "EKS is only available in some regions", MU::ERR, details: MU::Cloud::AWS::ContainerCluster.EKSRegions ok = false end if ["Fargate", "EKS"].include?(cluster["flavor"]) and !cluster["vpc"] siblings = configurator.haveLitterMate?(nil, "vpcs", has_multiple: true) if siblings.size == 1 MU.log "ContainerCluster #{cluster['name']} did not declare a VPC. Inserting into sibling VPC #{siblings[0]['name']}.", MU::WARN cluster["vpc"] = { "name" => siblings[0]['name'], "subnet_pref" => "all_private" } elsif MU::Cloud::AWS.hosted? and MU::Cloud::AWS.myVPCObj cluster["vpc"] = { "id" => MU.myVPC, "subnet_pref" => "all_private" } else MU.log "ContainerCluster #{cluster['name']} must declare a VPC", MU::ERR ok = false end # Re-insert ourselves with this modification so that our child # resources get this VPC we just shoved in if ok and cluster['vpc'] cluster.delete("#MU_VALIDATED") return configurator.insertKitten(cluster, "container_clusters", overwrite: true) end end if cluster["volumes"] cluster["volumes"].each { |v| if v["type"] == "docker" if cluster["flavor"] == "Fargate" MU.log "ContainerCluster #{cluster['name']}: Docker volumes are not supported in Fargate clusters (volume '#{v['name']}' is not valid)", MU::ERR ok = false end end } end if cluster["flavor"] != "EKS" and cluster["containers"] cluster.delete("kubernetes") created_generic_loggroup = false cluster['containers'].each { |c| if c['log_configuration'] and c['log_configuration']['log_driver'] == "awslogs" and (!c['log_configuration']['options'] or !c['log_configuration']['options']['awslogs-group']) logname = cluster["name"]+"-svclogs" rolename = cluster["name"]+"-logrole" c['log_configuration']['options'] ||= {} c['log_configuration']['options']['awslogs-group'] = logname c['log_configuration']['options']['awslogs-region'] = cluster["region"] c['log_configuration']['options']['awslogs-stream-prefix'] ||= c['name'] if c['mount_points'] cluster['volumes'] ||= [] volnames = cluster['volumes'].map { |v| v['name'] } c['mount_points'].each { |m| if !volnames.include?(m['source_volume']) cluster['volumes'] << { "name" => m['source_volume'], "type" => "host" } end } end if !created_generic_loggroup MU::Config.addDependency(cluster, logname, "log") logdesc = { "name" => logname, "region" => cluster["region"], "cloud" => "AWS" } configurator.insertKitten(logdesc, "logs") if !c['role'] roledesc = { "name" => rolename, "cloud" => "AWS", "can_assume" => [ { "entity_id" => "ecs-tasks.amazonaws.com", "entity_type" => "service" } ], "policies" => [ { "name" => "ECSTaskLogPerms", "permissions" => [ "logs:CreateLogStream", "logs:DescribeLogGroups", "logs:DescribeLogStreams", "logs:PutLogEvents" ], "targets" => [ { "type" => "log", "identifier" => logname } ] } ], "dependencies" => [{ "type" => "log", "name" => logname }] } configurator.insertKitten(roledesc, "roles") MU::Config.addDependency(cluster, rolename, "role") end created_generic_loggroup = true end c['role'] ||= { 'name' => rolename } end } end if cluster['flavor'] == "Fargate" and !cluster['containers'] if !cluster['kubernetes'] and !cluster['kubernetes_resources'] MU.log "Fargate requested without ECS-specific parameters, will build an EKS (Kubernetes) cluster", MU::NOTICE end role = { "name" => cluster["name"]+"pods", "credentials" => cluster["credentials"], "cloud" => "AWS", "can_assume" => [ { "entity_id" => "eks-fargate-pods.amazonaws.com", "entity_type" => "service" } ], "attachable_policies" => [ { "id" => "AmazonEKSFargatePodExecutionRolePolicy" } ] } role["tags"] = cluster["tags"] if !cluster["tags"].nil? role["optional_tags"] = cluster["optional_tags"] if !cluster["optional_tags"].nil? configurator.insertKitten(role, "roles") MU::Config.addDependency(cluster, cluster["name"]+"pods", "role", phase: "groom") if !MU::Master.kubectl MU.log "Since I can't find a kubectl executable, you will have to handle all service account, user, and role bindings manually!", MU::WARN end end if MU::Cloud::AWS.isGovCloud?(cluster["region"]) and cluster["flavor"] == "EKS" MU.log "AWS GovCloud does not support #{cluster["flavor"]} yet", MU::ERR ok = false end if ["ECS", "EKS"].include?(cluster["flavor"]) version = cluster["kubernetes"] ? cluster['kubernetes']['version'] : nil std_ami = getStandardImage(cluster["flavor"], cluster['region'], version: version, gpu: cluster['gpu']) cluster["host_image"] ||= std_ami if cluster["host_image"] != std_ami if cluster["flavor"] == "ECS" MU.log "You have specified a non-standard AMI for ECS container hosts. This can work, but you will need to install Docker and the ECS Agent yourself, ideally through a Chef recipes. See AWS documentation for details.", MU::WARN, details: "https://docs.aws.amazon.com/AmazonECS/latest/developerguide/manually_update_agent.html" elsif cluster["flavor"] == "EKS" MU.log "You have specified a non-standard AMI for EKS worker hosts. This can work, but you will need to install Docker and configure Kubernetes yourself, ideally through a Chef recipes. See AWS documentation for details.", MU::WARN, details: "https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html" end else cluster["host_ssh_user"] = "ec2-user" cluster.delete("platform") end end if cluster["flavor"] == "Fargate" and !cluster['vpc'] if MU.myVPC cluster["vpc"] = { "vpc_id" => MU.myVPC, "subnet_pref" => "all_private" } MU.log "Fargate cluster #{cluster['name']} did not specify a VPC, inserting into private subnets of #{MU.myVPC}", MU::NOTICE else MU.log "Fargate cluster #{cluster['name']} must specify a VPC", MU::ERR ok = false end end fwname = "container_cluster#{cluster['name']}" cluster['ingress_rules'] ||= [] if ["ECS", "EKS"].include?(cluster["flavor"]) cluster['ingress_rules'] << { "sgs" => ["server_pool"+cluster["name"]+"workers"], "port" => 443, "proto" => "tcp", "ingress" => true, "comment" => "Allow worker nodes to access API" } ruleset = configurator.haveLitterMate?(fwname, "firewall_rules") if ruleset ruleset["rules"].concat(cluster['ingress_rules']) ruleset["rules"].uniq! end end if ["ECS", "EKS"].include?(cluster["flavor"]) cluster["max_size"] ||= cluster["instance_count"] cluster["min_size"] ||= cluster["instance_count"] worker_pool = { "name" => cluster["name"]+"workers", "cloud" => "AWS", "skipinitialupdates" => (cluster["flavor"] == "EKS"), "credentials" => cluster["credentials"], "region" => cluster['region'], "min_size" => cluster["min_size"], "max_size" => cluster["max_size"], "wait_for_nodes" => cluster["instance_count"], "ssh_user" => cluster["host_ssh_user"], "role_strip_path" => true, "basis" => { "launch_config" => { "name" => cluster["name"]+"workers", "size" => cluster["instance_type"] } } } if cluster["flavor"] == "EKS" worker_pool["ingress_rules"] = [ "sgs" => [fwname], "port_range" => "1-65535" ] worker_pool["application_attributes"] ||= {} worker_pool["application_attributes"]["skip_recipes"] ||= [] worker_pool["application_attributes"]["skip_recipes"] << "set_local_fw" end if cluster["vpc"] worker_pool["vpc"] = cluster["vpc"].dup worker_pool["vpc"]["subnet_pref"] = cluster["instance_subnet_pref"] worker_pool["vpc"].delete("subnets") end if cluster["host_image"] worker_pool["basis"]["launch_config"]["image_id"] = cluster["host_image"] end if cluster["flavor"] == "EKS" if !MU::Master.kubectl MU.log "Without a kubectl executable, I cannot bind IAM roles to EKS worker nodes", MU::ERR ok = false end worker_pool["canned_iam_policies"] = [ "AmazonEKSWorkerNodePolicy", "AmazonEKS_CNI_Policy", "AmazonEC2ContainerRegistryReadOnly" ] MU::Config.addDependency(worker_pool, cluster["name"], "container_cluster") worker_pool["run_list"] = ["recipe[mu-tools::eks]"] worker_pool["run_list"].concat(cluster["run_list"]) if cluster["run_list"] MU::Config::Server.common_properties.keys.each { |k| if cluster[k] and !worker_pool[k] worker_pool[k] = cluster[k] end } else worker_pool["groom"] = false # don't meddle with ECS workers unnecessarily end configurator.insertKitten(worker_pool, "server_pools") if cluster["flavor"] == "ECS" MU::Config.addDependency(cluster, cluster["name"]+"workers", "server_pool") end end if cluster["flavor"] == "EKS" or (cluster['flavor'] == "Fargate" and !cluster['containers']) role = { "name" => cluster["name"]+"controlplane", "credentials" => cluster["credentials"], "cloud" => "AWS", "can_assume" => [ { "entity_id" => "eks.amazonaws.com", "entity_type" => "service" } ], "attachable_policies" => [ { "id" => "AmazonEKSServicePolicy" }, { "id" => "AmazonEKSClusterPolicy" } ] } role["tags"] = cluster["tags"] if !cluster["tags"].nil? role["optional_tags"] = cluster["optional_tags"] if !cluster["optional_tags"].nil? configurator.insertKitten(role, "roles") MU::Config.addDependency(cluster, cluster["name"]+"controlplane", "role", phase: "groom") end ok end # Delete a Fargate profile, needed both for cleanup and regroom updates # @param profile [String]: # @param cluster [String]: # @param region [String]: # @param credentials [String]: def self.purge_fargate_profile(profile, cluster, region, credentials) check = begin MU::Cloud::AWS.eks(region: region, credentials: credentials).delete_fargate_profile( cluster_name: cluster, fargate_profile_name: profile ) rescue Aws::EKS::Errors::ResourceNotFoundException return rescue Aws::EKS::Errors::ResourceInUseException sleep 10 retry end loop_if = Proc.new { check = MU::Cloud::AWS.eks(region: region, credentials: credentials).describe_fargate_profile( cluster_name: cluster, fargate_profile_name: profile ) check.fargate_profile.status == "DELETING" } MU.retrier(ignoreme: [Aws::EKS::Errors::ResourceNotFoundException], wait: 30, max: 40, loop_if: loop_if) { if check.fargate_profile.status != "DELETING" break elsif retries > 0 and (retries % 3) == 0 MU.log "Waiting for Fargate EKS profile #{profile} to delete (status #{check.fargate_profile.status})", MU::NOTICE end } end private def apply_kubernetes_resources kube = ERB.new(File.read(MU.myRoot+"/cookbooks/mu-tools/templates/default/kubeconfig-eks.erb")) configmap = ERB.new(File.read(MU.myRoot+"/extras/aws-auth-cm.yaml.erb")) @endpoint = cloud_desc.endpoint @cacert = cloud_desc.certificate_authority.data @cluster = @mu_name if @config['flavor'] != "Fargate" resp = MU::Cloud::AWS.iam(credentials: @config['credentials']).get_role(role_name: @mu_name+"WORKERS") @worker_role_arn = resp.role.arn end kube_conf = @deploy.deploy_dir+"/kubeconfig-#{@config['name']}" gitlab_helper = @deploy.deploy_dir+"/gitlab-eks-helper-#{@config['name']}.sh" File.open(kube_conf, "w"){ |k| k.puts kube.result(binding) } gitlab = ERB.new(File.read(MU.myRoot+"/extras/gitlab-eks-helper.sh.erb")) File.open(gitlab_helper, "w"){ |k| k.puts gitlab.result(binding) } if @config['flavor'] != "Fargate" eks_auth = @deploy.deploy_dir+"/eks-auth-cm-#{@config['name']}.yaml" File.open(eks_auth, "w"){ |k| k.puts configmap.result(binding) } authmap_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{eks_auth}"} MU.log "Configuring Kubernetes <=> IAM mapping for worker nodes", MU::NOTICE, details: authmap_cmd MU.retrier(max: 10, wait: 10, loop_if: Proc.new {$?.exitstatus != 0}){ puts %x{#{authmap_cmd}} } raise MuError, "Failed to apply #{authmap_cmd}" if $?.exitstatus != 0 end if MU::Master.kubectl admin_user_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{MU.myRoot}/extras/admin-user.yaml"} admin_role_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{MU.myRoot}/extras/admin-role-binding.yaml"} MU.log "Configuring Kubernetes admin-user and role", MU::NOTICE, details: admin_user_cmd+"\n"+admin_role_cmd %x{#{admin_user_cmd}} %x{#{admin_role_cmd}} if @config['kubernetes_resources'] MU::Master.applyKubernetesResources( @config['name'], @config['kubernetes_resources'], kubeconfig: kube_conf, outputdir: @deploy.deploy_dir ) end end MU.log %Q{How to interact with your EKS cluster\nkubectl --kubeconfig "#{kube_conf}" get all\nkubectl --kubeconfig "#{kube_conf}" create -f some_k8s_deploy.yml\nkubectl --kubeconfig "#{kube_conf}" get nodes}, MU::SUMMARY end def create_fargate_kubernetes_profile fargate_subnets = mySubnets.map { |s| s.cloud_id } podrole_arn = @deploy.findLitterMate(name: @config['name']+"pods", type: "roles").arn poolnum = 0 @config['kubernetes_pools'].each { |selectors| profname = @mu_name+"-"+poolnum.to_s poolnum += 1 desc = { :fargate_profile_name => profname, :cluster_name => @mu_name, :pod_execution_role_arn => podrole_arn, :selectors => selectors, :subnets => fargate_subnets.sort, :tags => @tags } begin resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_fargate_profile( cluster_name: @mu_name, fargate_profile_name: profname ) if resp and resp.fargate_profile old_desc = MU.structToHash(resp.fargate_profile, stringify_keys: true) new_desc = MU.structToHash(desc, stringify_keys: true) ["created_at", "status", "fargate_profile_arn"].each { |k| old_desc.delete(k) } old_desc["subnets"].sort! if !old_desc.eql?(new_desc) MU.log "Deleting Fargate profile #{profname} in order to apply changes", MU::WARN, details: desc MU::Cloud::AWS::ContainerCluster.purge_fargate_profile(profname, @mu_name, @config['region'], @credentials) else next end end rescue Aws::EKS::Errors::ResourceNotFoundException # This is just fine! end MU.log "Creating EKS Fargate profile #{profname}", details: desc resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).create_fargate_profile(desc) begin resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_fargate_profile( cluster_name: @mu_name, fargate_profile_name: profname ) sleep 1 if resp.fargate_profile.status == "CREATING" end while resp.fargate_profile.status == "CREATING" MU.log "Creation of EKS Fargate profile #{profname} complete" } end def self.remove_kubernetes_tags(cluster, desc, region: MU.myRegion, credentials: nil, noop: false) untag = [] untag << desc.resources_vpc_config.vpc_id subnets = MU::Cloud::AWS.ec2(credentials: credentials, region: region).describe_subnets( filters: [ { name: "vpc-id", values: [desc.resources_vpc_config.vpc_id] } ] ).subnets # subnets untag.concat(subnets.map { |s| s.subnet_id } ) rtbs = MU::Cloud::AWS.ec2(credentials: credentials, region: region).describe_route_tables( filters: [ { name: "vpc-id", values: [desc.resources_vpc_config.vpc_id] } ] ).route_tables untag.concat(rtbs.map { |r| r.route_table_id } ) untag.concat(desc.resources_vpc_config.subnet_ids) untag.concat(desc.resources_vpc_config.security_group_ids) MU.log "Removing Kubernetes tags from VPC resources for #{cluster}", details: untag if !noop MU::Cloud::AWS.removeTag("kubernetes.io/cluster/#{cluster}", "shared", untag) MU::Cloud::AWS.removeTag("kubernetes.io/cluster/elb", cluster, untag) end end private_class_method :remove_kubernetes_tags def apply_kubernetes_tags tagme = [@vpc.cloud_id] tagme_elb = [] @vpc.subnets.each { |s| tagme << s.cloud_id tagme_elb << s.cloud_id if !s.private? } rtbs = MU::Cloud::AWS.ec2(region: @config['region'], credentials: @config['credentials']).describe_route_tables( filters: [ { name: "vpc-id", values: [@vpc.cloud_id] } ] ).route_tables tagme.concat(rtbs.map { |r| r.route_table_id } ) main_sg = @deploy.findLitterMate(type: "firewall_rules", name: "server_pool#{@config['name']}workers") tagme << main_sg.cloud_id if main_sg MU.log "Applying kubernetes.io tags to VPC resources", details: tagme MU::Cloud::AWS.createTag(tagme, "kubernetes.io/cluster/#{@mu_name}", "shared", credentials: @config['credentials']) MU::Cloud::AWS.createTag(tagme_elb, "kubernetes.io/cluster/elb", @mu_name, credentials: @config['credentials']) end def manage_ecs_workers resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).list_container_instances({ cluster: @mu_name }) existing = {} if resp uuids = [] resp.container_instance_arns.each { |arn| uuids << arn.sub(/^.*?:container-instance\//, "") } if uuids.size > 0 resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).describe_container_instances({ cluster: @mu_name, container_instances: uuids }) resp.container_instances.each { |i| existing[i.ec2_instance_id] = i } end end threads = [] resource_lookup = MU::Cloud::AWS.listInstanceTypes(@config['region'])[@config['region']] serverpool = if ['EKS', 'ECS'].include?(@config['flavor']) @deploy.findLitterMate(type: "server_pools", name: @config["name"]+"workers") end serverpool.listNodes.each { |mynode| resources = resource_lookup[mynode.cloud_desc.instance_type] threads << Thread.new(mynode) { |node| ident_doc = nil ident_doc_sig = nil if !node.windows? session = node.getSSHSession(10, 30) ident_doc = session.exec!("curl -s http://169.254.169.254/latest/dynamic/instance-identity/document/") ident_doc_sig = session.exec!("curl -s http://169.254.169.254/latest/dynamic/instance-identity/signature/") # else # begin # session = node.getWinRMSession(1, 60) # rescue StandardError # XXX # session = node.getSSHSession(1, 60) # end end MU.log "Identity document for #{node}", MU::DEBUG, details: ident_doc MU.log "Identity document signature for #{node}", MU::DEBUG, details: ident_doc_sig params = { :cluster => @mu_name, :instance_identity_document => ident_doc, :instance_identity_document_signature => ident_doc_sig, :total_resources => [ { :name => "CPU", :type => "INTEGER", :integer_value => resources["vcpu"].to_i }, { :name => "MEMORY", :type => "INTEGER", :integer_value => (resources["memory"]*1024*1024).to_i } ] } if !existing.has_key?(node.cloud_id) MU.log "Registering ECS instance #{node} in cluster #{@mu_name}", details: params else params[:container_instance_arn] = existing[node.cloud_id].container_instance_arn MU.log "Updating ECS instance #{node} in cluster #{@mu_name}", MU::NOTICE, details: params end MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).register_container_instance(params) } } threads.each { |t| t.join } end def get_ecs_loadbalancers(container_name) lbs = [] if @loadbalancers and !@loadbalancers.empty? @loadbalancers.each {|lb| MU.log "Mapping LB #{lb.mu_name} to service #{c['name']}", MU::INFO if lb.cloud_desc.type != "classic" elb_groups = MU::Cloud::AWS.elb2(region: @config['region'], credentials: @config['credentials']).describe_target_groups({ load_balancer_arn: lb.cloud_desc.load_balancer_arn }) matching_target_groups = [] elb_groups.target_groups.each { |tg| if tg.port.to_i == lb['container_port'].to_i matching_target_groups << { arn: tg['target_group_arn'], name: tg['target_group_name'] } end } if matching_target_groups.length >= 1 MU.log "#{matching_target_groups.length} matching target groups lb. Mapping #{container_name} to target group #{matching_target_groups.first['name']}", MU::INFO lbs << { container_name: container_name, container_port: lb['container_port'], target_group_arn: matching_target_groups.first[:arn] } else raise MuError, "No matching target groups lb" end elsif @config['flavor'] == "Fargate" && lb.cloud_desc.type == "classic" raise MuError, "Classic Load Balancers are not supported with Fargate." else MU.log "Mapping Classic LB #{lb.mu_name} to service #{container_name}", MU::INFO lbs << { container_name: container_name, container_port: lb['container_port'], load_balancer_name: lb.mu_name } end } end lbs end def get_ecs_container_definitions(containers) role_arn = nil lbs = [] defs = containers.map { |c| container_name = @mu_name+"-"+c['name'].upcase lbs.concat(get_ecs_loadbalancers(container_name)) if c["role"] and !role_arn found = MU::MommaCat.findStray( @config['cloud'], "role", cloud_id: c["role"]["id"], name: c["role"]["name"], deploy_id: c["role"]["deploy_id"] || @deploy.deploy_id, dummy_ok: false ) if found found = found.first if found and found.cloudobj role_arn = found.cloudobj.arn end else raise MuError, "Unable to find execution role from #{c["role"]}" end end params = { name: @mu_name+"-"+c['name'].upcase, image: c['image'], memory: c['memory'], cpu: c['cpu'] } if !@config['vpc'] c['hostname'] ||= @mu_name+"-"+c['name'].upcase end [:essential, :hostname, :start_timeout, :stop_timeout, :user, :working_directory, :disable_networking, :privileged, :readonly_root_filesystem, :interactive, :pseudo_terminal, :links, :entry_point, :command, :dns_servers, :dns_search_domains, :docker_security_options, :port_mappings, :repository_credentials, :mount_points, :environment, :volumes_from, :secrets, :depends_on, :extra_hosts, :docker_labels, :ulimits, :system_controls, :health_check, :resource_requirements].each { |param| if c.has_key?(param.to_s) params[param] = if !c[param.to_s].nil? and (c[param.to_s].is_a?(Hash) or c[param.to_s].is_a?(Array)) MU.strToSym(c[param.to_s]) else c[param.to_s] end end } if @config['vpc'] [:hostname, :dns_servers, :dns_search_domains, :links].each { |param| if params[param] MU.log "Container parameter #{param.to_s} not supported in VPC clusters, ignoring", MU::WARN params.delete(param) end } end if @config['flavor'] == "Fargate" [:privileged, :docker_security_options].each { |param| if params[param] MU.log "Container parameter #{param.to_s} not supported in Fargate clusters, ignoring", MU::WARN params.delete(param) end } end if c['log_configuration'] log_obj = @deploy.findLitterMate(name: c['log_configuration']['options']['awslogs-group'], type: "logs") if log_obj c['log_configuration']['options']['awslogs-group'] = log_obj.mu_name end params[:log_configuration] = MU.strToSym(c['log_configuration']) end params } [defs, role_arn, lbs] end def register_ecs_task(container_definitions, service_name, cpu_total = 2, mem_total = 2, role_arn: nil) task_params = { family: @deploy.deploy_id, container_definitions: container_definitions, requires_compatibilities: [@config['flavor'] == "ECS" ? "EC2" : "FARGATE"] } if @config['volumes'] task_params[:volumes] = [] @config['volumes'].each { |v| vol = { :name => v['name'] } if v['type'] == "host" vol[:host] = {} if v['host_volume_source_path'] vol[:host][:source_path] = v['host_volume_source_path'] end elsif v['type'] == "docker" vol[:docker_volume_configuration] = MU.strToSym(v['docker_volume_configuration']) else raise MuError, "Invalid volume type '#{v['type']}' specified in ContainerCluster '#{@mu_name}'" end task_params[:volumes] << vol } end if role_arn task_params[:execution_role_arn] = role_arn task_params[:task_role_arn] = role_arn end if @config['flavor'] == "Fargate" task_params[:network_mode] = "awsvpc" task_params[:cpu] = cpu_total.to_i.to_s task_params[:memory] = mem_total.to_i.to_s elsif @config['vpc'] task_params[:network_mode] = "awsvpc" end MU.log "Registering task definition #{service_name} with #{container_definitions.size.to_s} containers" # XXX this helpfully keeps revisions, but let's compare anyway and avoid cluttering with identical ones resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).register_task_definition(task_params) resp.task_definition.task_definition_arn end def list_ecs_services svc_resp = nil MU.retrier([Aws::ECS::Errors::ClusterNotFoundException], wait: 5, max: 10){ svc_resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).list_services( cluster: arn ) } svc_resp.service_arns.map { |s| s.gsub(/.*?:service\/(.*)/, '\1') } end def create_update_ecs_service(task_def, service_name, lbs, existing_svcs) service_params = { :cluster => @mu_name, :desired_count => @config['instance_count'], # XXX this makes no sense :service_name => service_name, :launch_type => @config['flavor'] == "ECS" ? "EC2" : "FARGATE", :task_definition => task_def, :load_balancers => lbs } if @config['vpc'] subnet_ids = [] all_public = true mySubnets.each { |subnet| subnet_ids << subnet.cloud_id all_public = false if subnet.private? } service_params[:network_configuration] = { :awsvpc_configuration => { :subnets => subnet_ids, :security_groups => myFirewallRules.map { |fw| fw.cloud_id }, :assign_public_ip => all_public ? "ENABLED" : "DISABLED" } } end if !existing_svcs.include?(service_name) MU.log "Creating Service #{service_name}" MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).create_service(service_params) else service_params[:service] = service_params[:service_name].dup service_params.delete(:service_name) service_params.delete(:launch_type) MU.log "Updating Service #{service_name}", MU::NOTICE, details: service_params MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).update_service(service_params) end end end end end end