1

I am trying to do BlueGreen Deployment on ECS. My service when I deploy on ECS cluster manually is running fine and it is passing all the health checks. But Whenever I do blue-green deployment on the same service on ECS it get stuck in install phase untill timeout.

After Timeout I get this error "The deployment timed out while waiting for the replacement task set to become healthy. This time out period is 60 minutes." I am not sure what to do now.

Stuck at install phase

I have applied everything, tested Load Balancer, target groups, and ecr all of them seems working fine when I manually deploy service and test. Please find my terraform code and help me out on this. And let me know if you need furthur details.

ECS Cluster

resource "aws_ecs_cluster" "production-fargate-cluster" {
  name = "Production-Fargate-Cluster"
}

#Application Load Balancer
resource "aws_alb" "ecs_cluster_alb" {
  name            = var.ecs_cluster_name
  internal        = false
  security_groups = [aws_security_group.ecs_alb_security_group.id]
  subnets         = data.terraform_remote_state.infrastructure.outputs.two_public_subnets

  tags = {
    Name = "${var.ecs_cluster_name} - Application Load Balancer"
  }
}

#First Target group
resource "aws_alb_target_group" "ecs_default_target_group" {

  name = "${var.ecs_cluster_name}-BlueTG"
  port     = var.alb_target_group_port #port 80
  protocol = "HTTP"
  vpc_id   = data.terraform_remote_state.infrastructure.outputs.vpc_id
  target_type = "ip"

  health_check {
    enabled = true
    path = "/actuator/health"
    interval = 30
    healthy_threshold = 3
    unhealthy_threshold = 2
}

  tags = {
      Name = "Blue-TG"
  }

}

#First Load balancer's listener
resource "aws_alb_listener" "ecs_alb_http_listener" {
  load_balancer_arn = aws_alb.ecs_cluster_alb.arn
  port              = var.first_load_balancer_listener_port #80 port
  protocol          = "HTTP"

  default_action {
    type             = "forward"
    target_group_arn = aws_alb_target_group.ecs_default_target_group.arn
  }

  lifecycle {
    ignore_changes = [default_action]
  }

}



#Second Load balancer's listener
resource "aws_alb_listener" "ecs_alb_http_listener_second" {
  load_balancer_arn = aws_alb.ecs_cluster_alb.arn
  port              = 8080
  protocol          = "HTTP"

  default_action {
    type             = "forward"
    target_group_arn = aws_alb_target_group.ecs_default_target_group_second.arn
  }

  lifecycle {
    ignore_changes = [default_action]
  }

}

#Second Target group
resource "aws_alb_target_group" "ecs_default_target_group_second" {

  name = "${var.ecs_cluster_name}-GreenTG"
  port     = 8080
  protocol = "HTTP"
  vpc_id   = data.terraform_remote_state.infrastructure.outputs.vpc_id
  target_type = "ip"

  health_check {
    enabled = true
    path = "/actuator/health"
    interval = 30
    healthy_threshold = 3
    unhealthy_threshold = 2
  }

  tags = {
    Name = "Blue-TG"
  }

}

Fargate ECS Service

resource "aws_ecs_service" "ecs_service" {
  name            = var.ecs_service_name
  task_definition = aws_ecs_task_definition.task_definition_for_application.arn
  cluster         = data.terraform_remote_state.platform.outputs.ecs_cluster_name
  launch_type     = "FARGATE"

  network_configuration {
    #since we have a load balancer and nat gateway attached we should be deploying in private subnets
    #but I deployed in public subnet just to try some few things
    #you can deploy services in private subnet!! And you should :) 

    subnets          = data.terraform_remote_state.platform.outputs.ecs_public_subnets 
    security_groups  = [aws_security_group.app_security_group.id]
    assign_public_ip = true
  }

  load_balancer {
    container_name   = var.task_definition_name
    container_port   = var.docker_container_port
    target_group_arn = data.terraform_remote_state.platform.outputs.aws_alb_target_group_arn[0] #target group with port 80 is given here
  }
  desired_count = 2

  deployment_controller {
    type = "CODE_DEPLOY"
  }

  lifecycle {
    ignore_changes = [load_balancer, task_definition, desired_count]
  }

}

#Task definition for application
resource "aws_ecs_task_definition" "task_definition_for_application" {
  container_definitions    = data.template_file.ecs_task_definition_template.rendered
  family                   = var.task_definition_name
  cpu                      = var.cpu
  memory                   = var.memory
  requires_compatibilities = ["FARGATE"]
  network_mode             = "awsvpc"

  execution_role_arn = aws_iam_role.fargate_iam_role.arn
  task_role_arn      = aws_iam_role.ecs_task_execution_role.arn

}

#Role
resource "aws_iam_role" "fargate_iam_role" {
  name = "fargate_iam_role"
  assume_role_policy = data.aws_iam_policy_document.ecs-task-assume-role.json
}

resource "aws_iam_role_policy_attachment" "fargate_iam_role_policy" {
  role = aws_iam_role.fargate_iam_role.name
  policy_arn = data.aws_iam_policy.ecs-task-execution-role.arn
}

#Security Group
resource "aws_security_group" "app_security_group" {
  name        = "${var.ecs_service_name}-SG"
  description = "Security group for springbootapp to communicate in and out"
  vpc_id      = data.terraform_remote_state.platform.outputs.vpc_id

  ingress {
    from_port   = 80
    protocol    = "TCP"
    to_port     = 8080
    cidr_blocks = [data.terraform_remote_state.platform.outputs.vpc_cidr_block]
  }

  egress {
    from_port   = 0
    protocol    = "-1"
    to_port     = 0
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Name = "${var.ecs_service_name}-SG"
  }

}

#CloudWatch
resource "aws_cloudwatch_log_group" "application_log_group" {
  name = "/ecs/sun-api"
}

Code Pipeline

#Code Pipeline
resource "aws_codepipeline" "codepipeline_for_blue_green_deployment" {
  name     = var.pipeline_name
  role_arn = aws_iam_role.codepipeline_roles.arn

  artifact_store {
    location = var.bucket_for_codepipeline
    type     = var.artifact_store_type
  }

  stage {
    name = "github_Source"

    action {
      name             = "github_Source"
      category         = "Source"
      owner            = var.source_stage_owner
      provider         = var.source_stage_provider
      version          = "1"
      output_artifacts = ["SourceArtifact"]

      configuration = {
        PollForSourceChanges = true
        OAuthToken           = var.github_token
        Owner                = var.git_hub_owner
        Repo                 = var.repo_name
        Branch               = var.branch_name
      }
    }

    action {
      name             = "Image"
      category         = "Source"
      owner            = "AWS"
      provider         = "ECR"
      version          = "1"
      output_artifacts = ["MyImage"]
      run_order = 1

      configuration = {
        ImageTag: "latest"
        RepositoryName:"umar-tahir-terraform-repo"

      }
    }
  }

  stage {
    name = "Deploy"

    action {
      name = "Deploy"
      category = "Deploy"
      owner = "AWS"
      provider = "CodeDeployToECS"
      version = "1"
      input_artifacts = ["SourceArtifact","MyImage"]

      configuration ={
        ApplicationName = aws_codedeploy_app.application_deploy.name
        DeploymentGroupName = aws_codedeploy_deployment_group.code_deployment_group.deployment_group_name

        TaskDefinitionTemplateArtifact: "SourceArtifact",
        AppSpecTemplateArtifact: "SourceArtifact",

        TaskDefinitionTemplatePath: "taskdef.json",
        AppSpecTemplatePath: "appspec.yaml",


        Image1ArtifactName: "MyImage",
        Image1ContainerName: "IMAGE1_NAME",



      }
    }
  }
}

Code Deploy

resource "aws_codedeploy_app" "application_deploy" {
  compute_platform = var.compute_platform
  name             = var.aws_codedeploy_app_name
}

resource "aws_codedeploy_deployment_group" "code_deployment_group" {
  app_name               = aws_codedeploy_app.application_deploy.name
  deployment_group_name  = var.deployment_group_name
  deployment_config_name = var.deployment_config_name
  service_role_arn       = aws_iam_role.codedeploy_role_blue_green.arn

  auto_rollback_configuration {
    enabled = true
    events  = ["DEPLOYMENT_FAILURE"]
  }


  blue_green_deployment_config {

    deployment_ready_option {
      action_on_timeout =  var.action_on_timeout
    }

    terminate_blue_instances_on_deployment_success {
      action = var.terminate_blue_instances_on_deployment_success_action
    }
  }

  ecs_service {
    cluster_name = data.terraform_remote_state.aws_modules_state.outputs.ecs_cluster_name
    service_name = "generalapplication"
  }

  deployment_style {
    deployment_option = var.deployment_option
    deployment_type   = var.deployment_type
  }

  load_balancer_info {
    target_group_pair_info {
      prod_traffic_route {
        listener_arns = [data.terraform_remote_state.aws_modules_state.outputs.listener_arns]
      }

      target_group {
        name = data.terraform_remote_state.aws_modules_state.outputs.green_target_group_name
      }

      target_group {
        name = data.terraform_remote_state.aws_modules_state.outputs.blue_target_group_name
      }
    }
  }
}

appSpec.yml

version: 0.0
Resources:
  - TargetService:
      Type: AWS::ECS::Service
      Properties:
        TaskDefinition: <TASK_DEFINITION>
        LoadBalancerInfo:
          ContainerName: "springboottaskdefinition"
          ContainerPort: 8080
        PlatformVersion: "LATEST"

task def

{
  "taskRoleArn": "arn-xxxx",
  "executionRoleArn": "arn-xxxx",
  "containerDefinitions": [
    {
      "logConfiguration": {
        "logDriver": "awslogs",
        "options": {
          "awslogs-group": "/ecs/sun-api",
          "awslogs-region": "us-east-1",
          "awslogs-stream-prefix": "springboottaskdefinition-LogGroup-stream"
        }
      },
      "portMappings": [
        {
          "hostPort": 8080,
          "protocol": "tcp",
          "containerPort": 8080
        }
      ],
      "image": "<IMAGE1_NAME>",
      "essential": true,
      "name": "springboottaskdefinition"
    }
  ],
  "memory": "1024",
  "family": "springboottaskdefinition",
  "requiresCompatibilities": [
    "FARGATE"
  ],
  "networkMode": "awsvpc",
  "cpu": "512"
}
Umar Tahir
  • 585
  • 6
  • 21
  • 2
    Your replacement task never seems to get healthy. Can you please check the ECS Service Events (ECS Console under Service being updated) when the deployment is going on to get some clue of the errors if any faced during new task creation. – shariqmaws Apr 30 '20 at 17:51
  • Thanks, I solve the problem. There was a problem in my task definition roles. – Umar Tahir May 04 '20 at 07:34
  • For future reference to anyone seeing this question, if you are getting this problem then there could be any of the following two causes: 1: Issues in task role 2: or your image is not being pulled from ECR. – Umar Tahir May 04 '20 at 07:38

0 Answers0