0

I am working on observability. I have a Windows Server 2016 on-premises, I installed a web app that produces logs on the server. I have chosen CloudWatch to monitor all logs and metrics that the app produces. I installed a CloudWatch agent on the Windows server 2016.

Here is the configuration file of the Agent agent-cloudwatch-config.conf:

{
  "agent": {
    "metrics_collection_interval": 5,
    "logfile": "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log",
    "region": "eu-central-1",
    "debug": true
  },
  "logs": {
    "logs_collected": {
      "files": {
        "collect_list": [
          {
            "file_path": "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log",
            "log_group_name": "amazon-cloudwatch-agent-group-log.log",
            "log_stream_name": "amazon-cloudwatch-agent-stream-log.log",
            "timezone": "UTC"
          },
          {
            "file_path": "C:\\Users\\michael.ranivo\\Docuements\\Monitoring\\Middleware\\questions.txt",
            "log_group_name": "test-middleware-group-logs",
            "log_stream_name": "test-middleware-stream-logs",
            "timezone":"Local"
          }
        ]
      }
    },
    "force_flush_interval" : 5
  },
  "metrics": {
    "metrics_collected": {
      "namespace": "test-middleware-metrics",
      "statsd": {},
      "Processor": {
        "measurement": [
          {"name": "% Idle Time", "rename": "CPU_IDLE", "unit": "Percent"},
          "% Interrupt Time",
          "% User Time",
          "% Processor Time"
        ],
        "resources": [
          "*"
        ],
        "append_dimensions": {
          "d1": "win_foo",
          "d2": "win_bar"
        }
      },
      "LogicalDisk": {
        "measurement": [
          {"name": "% Idle Time", "unit": "Percent"},
          {"name": "% Disk Read Time", "rename": "DISK_READ"},
          "% Disk Write Time"
        ],
        "resources": [
          "*"
        ]
      },
      "Memory": {
        "metrics_collection_interval": 5,
        "measurement": [
          "Available Bytes",
          "Cache Faults/sec",
          "Page Faults/sec",
          "Pages/sec"
        ],
        "append_dimensions": {
          "d3": "win_bo"
        }
      },
      "Network Interface": {
        "metrics_collection_interval": 5,
        "measurement": [
          "Bytes Received/sec",
          "Bytes Sent/sec",
          "Packets Received/sec",
          "Packets Sent/sec"
        ],
        "resources": [
          "*"
        ],
        "append_dimensions": {
          "d3": "win_bo"
        }
      },
      "System": {
        "measurement": [
          "Context Switches/sec",
          "System Calls/sec",
          "Processor Queue Length"
        ],
        "append_dimensions": {
          "d1": "win_foo",
          "d2": "win_bar"
        }
      }
    },
    "append_dimensions": {
      "ImageId": "${aws:ImageId}",
      "InstanceId": "${aws:InstanceId}",
      "InstanceType": "${aws:InstanceType}",
      "AutoScalingGroupName": "${aws:AutoScalingGroupName}"
    },
    "aggregation_dimensions" : [["ImageId"], ["InstanceId", "InstanceType"], ["d1"],[]]
  }
}

I launch the agent, with this command:

& "C:\Program Files\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent-ctl.ps1" -a fetch-config -m onPremise -s -c file:"C:\\ProgramData\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent.json"

The agent launches well but when I look at the agent’s logs amazon-cloudwatch-agent.log I have this:

2023/01/19 16:56:29 I! Config has been translated into TOML C:\ProgramData\Amazon\AmazonCloudWatchAgent\\amazon-cloudwatch-agent.toml 
2023/01/19 16:56:29 D! toml config [agent]
  collection_jitter = "0s"
  debug = true
  flush_interval = "1s"
  flush_jitter = "0s"
  hostname = ""
  interval = "5s"
  logfile = "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log"
  logtarget = "lumberjack"
  metric_batch_size = 1000
  metric_buffer_limit = 10000
  omit_hostname = false
  precision = ""
  quiet = false
  round_interval = false

[inputs]

  [[inputs.logfile]]
    destination = "cloudwatchlogs"
    file_state_folder = "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\state"

    [[inputs.logfile.file_config]]
      file_path = "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log"
      from_beginning = true
      log_group_name = "server-perso-amazon-cloudwatch-agent-group-log.log"
      log_stream_name = "server-perso-amazon-cloudwatch-agent-stream-log.log"
      pipe = false
      retention_in_days = -1
      timezone = "UTC"

    [[inputs.logfile.file_config]]
      file_path = "C:\\Users\\leka\\Documents\\tests.txt"
      from_beginning = true
      log_group_name = "server-perso-test-middleware-group-logs"
      log_stream_name = "server-perso-test-middleware-stream-logs"
      pipe = false
      retention_in_days = -1
      timezone = "LOCAL"
    [inputs.logfile.tags]
      metricPath = "logs"

  [[inputs.statsd]]
    interval = "10s"
    parse_data_dog_tags = true
    service_address = ":8125"
    [inputs.statsd.tags]
      "aws:AggregationInterval" = "60s"
      metricPath = "metrics"

  [[inputs.win_perf_counters]]
    DisableReplacer = true

    [[inputs.win_perf_counters.object]]
      Counters = ["% Idle Time", "% Disk Read Time", "% Disk Write Time"]
      Instances = ["*"]
      Measurement = "LogicalDisk"
      ObjectName = "LogicalDisk"
      WarnOnMissing = true
    [inputs.win_perf_counters.tags]
      "aws:StorageResolution" = "true"
      metricPath = "metrics"

  [[inputs.win_perf_counters]]
    DisableReplacer = true
    interval = "5s"

    [[inputs.win_perf_counters.object]]
      Counters = ["Available Bytes", "Cache Faults/sec", "Page Faults/sec", "Pages/sec"]
      Instances = ["------"]
      Measurement = "Memory"
      ObjectName = "Memory"
      WarnOnMissing = true

    [[inputs.win_perf_counters.object]]
      Counters = ["Bytes Received/sec", "Bytes Sent/sec", "Packets Received/sec", "Packets Sent/sec"]
      Instances = ["*"]
      Measurement = "Network Interface"
      ObjectName = "Network Interface"
      WarnOnMissing = true
    [inputs.win_perf_counters.tags]
      "aws:StorageResolution" = "true"
      d3 = "win_bo"
      metricPath = "metrics"

  [[inputs.win_perf_counters]]
    DisableReplacer = true

    [[inputs.win_perf_counters.object]]
      Counters = ["% Idle Time", "% Interrupt Time", "% User Time", "% Processor Time"]
      Instances = ["*"]
      Measurement = "Processor"
      ObjectName = "Processor"
      WarnOnMissing = true

    [[inputs.win_perf_counters.object]]
      Counters = ["Context Switches/sec", "System Calls/sec", "Processor Queue Length"]
      Instances = ["------"]
      Measurement = "System"
      ObjectName = "System"
      WarnOnMissing = true
    [inputs.win_perf_counters.tags]
      "aws:StorageResolution" = "true"
      d1 = "win_foo"
      d2 = "win_bar"
      metricPath = "metrics"

[outputs]

  [[outputs.cloudwatch]]
    force_flush_interval = "60s"
    namespace = "server-perso-test-middleware-metrics"
    profile = "default"
    region = "eu-central-1"
    rollup_dimensions = [["ImageId"], ["InstanceId", "InstanceType"], ["d1"], []]
    shared_credential_file = "C:\\Users\\leka\\.aws\\credentials"
    tagexclude = ["host", "metricPath"]

    [[outputs.cloudwatch.metric_decoration]]
      category = "LogicalDisk"
      name = "% Idle Time"
      unit = "Percent"

    [[outputs.cloudwatch.metric_decoration]]
      category = "LogicalDisk"
      name = "% Disk Read Time"
      rename = "DISK_READ"

    [[outputs.cloudwatch.metric_decoration]]
      category = "Processor"
      name = "% Idle Time"
      rename = "CPU_IDLE"
      unit = "Percent"
    [outputs.cloudwatch.tagpass]
      metricPath = ["metrics"]

  [[outputs.cloudwatchlogs]]
    force_flush_interval = "5s"
    log_stream_name = "wind"
    profile = "default"
    region = "eu-central-1"
    shared_credential_file = "C:\\Users\\leka\\.aws\\credentials"
    tagexclude = ["metricPath"]
    [outputs.cloudwatchlogs.tagpass]
      metricPath = ["logs"]

[processors]

  [[processors.ec2tagger]]
    ec2_instance_tag_keys = ["aws:autoscaling:groupName"]
    ec2_metadata_tags = ["ImageId", "InstanceId", "InstanceType"]
    profile = "default"
    refresh_interval_seconds = "0s"
    shared_credential_file = "C:\\Users\\leka\\.aws\\credentials"
    [processors.ec2tagger.tagpass]
      metricPath = ["metrics"]
2023-01-19T15:56:29Z I! Starting AmazonCloudWatchAgent 1.247357.0
2023-01-19T15:56:29Z I! AWS SDK log level not set
2023-01-19T15:56:29Z I! Loaded inputs: logfile statsd win_perf_counters (3x)
2023-01-19T15:56:29Z I! Loaded aggregators: 
2023-01-19T15:56:29Z I! Loaded processors: ec2tagger
2023-01-19T15:56:29Z I! Loaded outputs: cloudwatch cloudwatchlogs
2023-01-19T15:56:29Z I! Tags enabled: host=wind
2023-01-19T15:56:29Z I! [agent] Config: Interval:5s, Quiet:false, Hostname:"wind", Flush Interval:1s
2023-01-19T15:56:29Z D! [agent] Initializing plugins
2023-01-19T15:56:29Z I! [processors.ec2tagger] ec2tagger: Check EC2 Metadata.
2023-01-19T15:56:29Z D! Successfully created credential sessions
2023-01-19T15:56:29Z I! [logagent] starting
2023-01-19T15:56:29Z I! [logagent] found plugin cloudwatchlogs is a log backend
2023-01-19T15:56:29Z I! [logagent] found plugin logfile is a log collection
2023-01-19T15:56:30Z D! [logagent] open file count, 0
2023-01-19T15:56:31Z D! [logagent] open file count, 0
2023-01-19T15:56:32Z D! [logagent] open file count, 0
2023-01-19T15:56:33Z D! [logagent] open file count, 0
2023-01-19T15:56:34Z D! [logagent] open file count, 0
2023-01-19T15:56:35Z D! [logagent] open file count, 0
2023-01-19T15:56:36Z D! [logagent] open file count, 0
2023-01-19T15:56:37Z D! [logagent] open file count, 0
2023-01-19T15:56:38Z D! [logagent] open file count, 0
2023-01-19T15:56:39Z D! [logagent] open file count, 0
2023-01-19T15:56:40Z D! [logagent] open file count, 0
2023-01-19T15:56:41Z D! [logagent] open file count, 0
2023-01-19T15:56:42Z D! [logagent] open file count, 0
2023-01-19T15:56:43Z D! [logagent] open file count, 0
2023-01-19T15:56:44Z D! [logagent] open file count, 0
2023-01-19T15:56:45Z D! [logagent] open file count, 0
2023-01-19T15:56:46Z D! [logagent] open file count, 0
2023-01-19T15:56:47Z D! [logagent] open file count, 0
2023-01-19T15:56:48Z D! [logagent] open file count, 0
2023-01-19T15:56:49Z D! [logagent] open file count, 0
2023-01-19T15:56:50Z D! [logagent] open file count, 0
2023-01-19T15:56:51Z D! [logagent] open file count, 0
2023-01-19T15:56:52Z D! [logagent] open file count, 0
2023-01-19T15:56:54Z D! [logagent] open file count, 0
2023-01-19T15:56:54Z D! [logagent] open file count, 0
2023-01-19T15:56:55Z D! [logagent] open file count, 0
2023-01-19T15:56:56Z D! [logagent] open file count, 0
2023-01-19T15:56:57Z D! [logagent] open file count, 0
2023-01-19T15:56:58Z D! [logagent] open file count, 0
2023-01-19T15:56:59Z D! [logagent] open file count, 0
2023-01-19T15:56:59Z I! CWAGENT_LOG_LEVEL is set to "DEBUG"
2023-01-19T15:57:00Z D! [logagent] open file count, 0
2023-01-19T15:57:01Z D! [logagent] open file count, 0
2023-01-19T15:57:02Z D! [logagent] open file count, 0
2023-01-19T15:57:03Z D! [logagent] open file count, 0
2023-01-19T15:57:04Z D! [logagent] open file count, 0
2023-01-19T15:57:05Z D! [logagent] open file count, 0
2023-01-19T15:57:06Z D! [logagent] open file count, 0
2023-01-19T15:57:07Z D! [logagent] open file count, 0
2023-01-19T15:59:18Z E! Failed to get credential from session: NoCredentialProviders: no valid providers in chain
caused by: EnvAccessKeyNotFound: failed to find credentials in the environment.
SharedCredsLoad: failed to load profile, .
EC2RoleRequestError: no EC2 instance role found
caused by: RequestError: send request failed
caused by: Get "http://169.254.169.254/latest/meta-data/iam/security-credentials/": dial tcp 169.254.169.254:80: connectex: Une tentative de connexion a échoué car le parti connecté n’a pas répondu convenablement au-delà d’une certaine durée ou une connexion établie a échoué car l’hôte de connexion n’a pas répondu.
2023-01-19T15:59:18Z D! [logagent] open file count, 0
2023-01-19T15:59:19Z D! [logagent] open file count, 0
2023-01-19T15:59:20Z D! [logagent] open file count, 0
2023-01-19T15:59:21Z D! [logagent] open file count, 0
2023-01-19T15:59:22Z D! [logagent] open file count, 0
2023-01-19T15:59:23Z D! [logagent] open file count, 0
2023-01-19T15:59:24Z D! [logagent] open file count, 0
2023-01-19T15:59:25Z E! [processors.ec2tagger] ec2tagger: Unable to retrieve EC2 Metadata. This plugin must only be used on an EC2 instance.
2023-01-19T15:59:25Z E! [telegraf] Error running agent: could not initialize processor processors.ec2tagger: EC2MetadataRequestError: failed to get EC2 instance identity document
caused by: RequestError: send request failed
caused by: Get "http://169.254.169.254/latest/dynamic/instance-identity/document": context deadline exceeded (Client.Timeout exceeded while awaiting headers)
2023/01/19 16:59:25 E! Error when starting Agent, Error is exit status 1 

The metrics part of the agent configuration file is a copy/paste of AWS documentation for setting up a CloudWatch agent for an on-premises Windows server. And when I remove this metrics part, the agent does send the logs to CloudWatch.

Thanks for reading me.

I'm looking for help to try resolve my issue.

0 Answers0