I use circleci with orbs: aws/ecr, aws/ecs that uploads the new docker image, and supposed to update ecs service.
My issue is that it uses the first, and not the latest task definition, and cannot seem to find a way to set the deployment to update with the latest task definition.
I use terraform to manage my infrastructure which looks like:
# Get subnets
data "aws_subnets" "subnets" {
filter {
name = "vpc-id"
values = [var.vpc_id]
}
}
# Get security groups
data "aws_security_groups" "security_groups" {
filter {
name = "vpc-id"
values = [var.vpc_id]
}
}
# Create load balancer
resource "aws_lb" "load_balancer" {
name = "${var.app_name}-load-balancer-${var.env}"
subnets = data.aws_subnets.subnets.ids
security_groups = data.aws_security_groups.security_groups.ids
load_balancer_type = "application"
tags = {
Environment = "${var.env}"
}
}
resource "aws_lb_target_group" "blue_target" {
name = "${var.app_name}-blue-target-${var.env}"
protocol = "HTTPS"
port = var.port
target_type = "ip"
vpc_id = var.vpc_id
health_check {
healthy_threshold = 5
interval = 30
matcher = 200
path = "${var.health_check_path}"
protocol = "HTTPS"
timeout = 10
unhealthy_threshold = 2
}
}
resource "aws_lb_target_group" "green_target" {
name = "${var.app_name}-green-target-${var.env}"
protocol = "HTTPS"
port = var.port
target_type = "ip"
vpc_id = var.vpc_id
health_check {
healthy_threshold = 5
interval = 30
matcher = 200
path = "${var.health_check_path}"
protocol = "HTTPS"
timeout = 10
unhealthy_threshold = 2
}
}
data "aws_acm_certificate" "cert" {
domain = var.domain
statuses = ["ISSUED"]
most_recent = true
}
resource "aws_lb_listener" "listener" {
load_balancer_arn = aws_lb.load_balancer.arn
port = var.port
protocol = "HTTPS"
ssl_policy = "ELBSecurityPolicy-2016-08"
certificate_arn = data.aws_acm_certificate.cert.arn
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.blue_target.arn
}
}
# ECS
resource "aws_ecs_cluster" "cluster" {
name = "${var.app_name}-cluster-${var.env}"
}
data "aws_ecr_repository" "ecr_repository" {
name = var.image_repo_name
}
resource "aws_iam_role" "ecs_task_role" {
name = "EcsTaskRole"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = ""
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
},
]
})
}
resource "aws_iam_policy" "secrets_manager_read_policy" {
name = "SecretsManagerRead"
description = "Read only access to secrets manager"
policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Sid = "",
Effect = "Allow",
Action = [
"secretsmanager:GetRandomPassword",
"secretsmanager:GetResourcePolicy",
"secretsmanager:GetSecretValue",
"secretsmanager:DescribeSecret",
"secretsmanager:ListSecretVersionIds",
"secretsmanager:ListSecrets"
],
Resource = "*"
}
]
})
}
resource "aws_iam_role_policy_attachment" "attach_secrets_manager_read_to_task_role" {
role = aws_iam_role.ecs_task_role.name
policy_arn = aws_iam_policy.secrets_manager_read_policy.arn
}
resource "aws_iam_role_policy_attachment" "attach_s3_read_to_task_role" {
role = aws_iam_role.ecs_task_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
}
resource "aws_iam_role_policy_attachment" "attach_ses_to_task_role" {
role = aws_iam_role.ecs_task_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonSESFullAccess"
}
resource "aws_iam_role" "ecs_exec_role" {
name = "EcsExecRole"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = ""
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
},
]
})
}
resource "aws_iam_role_policy_attachment" "attach_ecs_task_exec_to_exec_role" {
role = aws_iam_role.ecs_exec_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role_policy_attachment" "attach_fault_injection_simulator_to_exec_role" {
role = aws_iam_role.ecs_exec_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSFaultInjectionSimulatorECSAccess"
}
resource "aws_ecs_task_definition" "task_def" {
family = "${var.app_name}-task-def-${var.env}"
network_mode = "awsvpc"
task_role_arn = aws_iam_role.ecs_task_role.arn
execution_role_arn = aws_iam_role.ecs_exec_role.arn
requires_compatibilities = ["FARGATE"]
cpu = "256"
memory = "512"
runtime_platform {
cpu_architecture = "X86_64"
operating_system_family = "LINUX"
}
container_definitions = jsonencode([
{
name = "${var.app_name}-container-${var.env}"
image = "${data.aws_ecr_repository.ecr_repository.repository_url}:latest"
cpu = 0
essential = true
portMappings = [
{
containerPort = var.port
hostPort = var.port
},
]
environment = [
{
name = "PORT",
value = tostring("${var.port}")
},
{
name = "NODE_ENV",
value = var.env
}
]
logConfiguration = {
logDriver = "awslogs"
options = {
"awslogs-create-group" = "true"
"awslogs-group" = "${var.app_name}-task-def-${var.env}"
"awslogs-region" = "${var.region}"
"awslogs-stream-prefix" = "ecs"
}
}
},
])
}
resource "aws_ecs_service" "service" {
lifecycle {
ignore_changes = [
task_definition,
load_balancer,
]
}
cluster = aws_ecs_cluster.cluster.arn
name = "${var.app_name}-service-${var.env}"
task_definition = aws_ecs_task_definition.task_def.arn
load_balancer {
target_group_arn = aws_lb_target_group.blue_target.arn
container_name = "${var.app_name}-container-${var.env}"
container_port = var.port
}
capacity_provider_strategy {
capacity_provider = "FARGATE"
base = 0
weight = 1
}
scheduling_strategy = "REPLICA"
deployment_controller {
type = "CODE_DEPLOY"
}
platform_version = "1.4.0"
network_configuration {
assign_public_ip = true
subnets = data.aws_subnets.subnets.ids
security_groups = data.aws_security_groups.security_groups.ids
}
desired_count = 1
}
# DEPLOYMENT
resource "aws_codedeploy_app" "codedeploy_app" {
name = "${var.app_name}-application-${var.env}"
compute_platform = "ECS"
}
resource "aws_iam_role" "codedeploy_role" {
name = "CodedeployRole"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = ""
Principal = {
Service = "codedeploy.amazonaws.com"
}
},
]
})
}
resource "aws_iam_role_policy_attachment" "attach_codedeploy_role" {
role = aws_iam_role.codedeploy_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSCodeDeployRole"
}
resource "aws_iam_role_policy_attachment" "attach_codedeploy_role_for_ecs" {
role = aws_iam_role.codedeploy_role.name
policy_arn = "arn:aws:iam::aws:policy/AWSCodeDeployRoleForECS"
}
resource "aws_codedeploy_deployment_group" "deployment_group" {
app_name = aws_codedeploy_app.codedeploy_app.name
deployment_config_name = "CodeDeployDefault.ECSAllAtOnce"
auto_rollback_configuration {
enabled = true
events = ["DEPLOYMENT_FAILURE"]
}
blue_green_deployment_config {
deployment_ready_option {
action_on_timeout = "CONTINUE_DEPLOYMENT"
wait_time_in_minutes = 0
}
terminate_blue_instances_on_deployment_success {
action = "TERMINATE"
termination_wait_time_in_minutes = 5
}
}
deployment_group_name = "${var.app_name}-deployment-group-${var.env}"
deployment_style {
deployment_option = "WITH_TRAFFIC_CONTROL"
deployment_type = "BLUE_GREEN"
}
load_balancer_info {
target_group_pair_info {
prod_traffic_route {
listener_arns = [aws_lb_listener.listener.arn]
}
target_group {
name = aws_lb_target_group.blue_target.name
}
target_group {
name = aws_lb_target_group.green_target.name
}
}
}
service_role_arn = aws_iam_role.codedeploy_role.arn
ecs_service {
service_name = aws_ecs_service.service.name
cluster_name = aws_ecs_cluster.cluster.name
}
}
resource "aws_appautoscaling_target" "scalable_target" {
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.cluster.name}/${aws_ecs_service.service.name}"
scalable_dimension = "ecs:service:DesiredCount"
min_capacity = 1
max_capacity = 5
}
resource "aws_appautoscaling_policy" "cpu_scaling_policy" {
name = "${var.app_name}-cpu-scaling-policy-${var.env}"
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.cluster.name}/${aws_ecs_service.service.name}"
scalable_dimension = "ecs:service:DesiredCount"
policy_type = "TargetTrackingScaling"
target_tracking_scaling_policy_configuration {
target_value = 70
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
scale_out_cooldown = 300
scale_in_cooldown = 300
disable_scale_in = false
}
}
resource "aws_appautoscaling_policy" "memory_scaling_policy" {
name = "${var.app_name}-memory-scaling-policy-${var.env}"
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.cluster.name}/${aws_ecs_service.service.name}"
scalable_dimension = "ecs:service:DesiredCount"
policy_type = "TargetTrackingScaling"
target_tracking_scaling_policy_configuration {
target_value = 70
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageMemoryUtilization"
}
scale_out_cooldown = 300
scale_in_cooldown = 300
disable_scale_in = false
}
}
And my circleci config (the relevant parts):
deploy:
executor: aws-ecr/default
working_directory: ~/code
parameters:
env:
type: string
steps:
- attach_workspace:
at: ~/
- aws-ecr/build-and-push-image:
# attach-workspace: true
# create-repo: true
repo: "test-<< parameters.env >>"
tag: "latest,${CIRCLE_BUILD_NUM}"
- aws-ecs/update-service: # orb built-in job
family: "test-task-def-<< parameters.env >>"
service-name: "test-service-<< parameters.env >>"
cluster: "test-cluster-<< parameters.env >>"
container-image-name-updates: "container=test-container-<< parameters.env >>,tag=${CIRCLE_BUILD_NUM}"
deployment-controller: "CODE_DEPLOY"
codedeploy-application-name: "test-application-<< parameters.env >>"
codedeploy-deployment-group-name: "test-deployment-group-<< parameters.env >>"
codedeploy-load-balanced-container-name: "test-container-<< parameters.env >>"
codedeploy-load-balanced-container-port: 3000
Can anyone spot why my service isn't using the latest task definition, and the deployments are using the old task definition for the service?
Some additional info: The deployment created in circleci uses the latest task definition, and it's visible in the deployment revision as well (appspec).
I've made a mistake in the first creation of the task definiton, I used container_definitions.image as an ecr image's id (which is sha id), and it cannot pull images.
The issue is that the ecs service is using that task definition, constantly trying to create tasks, and failing while codedeploy deployment - which is somewhat unexpected, as it should use the new task definition to create new tasks.
So I have a service with wrongly configured task definition, and when I try to fix it, the deployment cannot go through with the fix, because it's using the wrong task definition to create new tasks, and the deployment is stuck.