I am working on observability. I have a Windows Server 2016 on-premises, I installed a web app that produces logs on the server. I have chosen CloudWatch to monitor all logs and metrics that the app produces. I installed a CloudWatch agent on the Windows server 2016.
Here is the configuration file of the Agent agent-cloudwatch-config.conf
:
{
"agent": {
"metrics_collection_interval": 5,
"logfile": "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log",
"region": "eu-central-1",
"debug": true
},
"logs": {
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log",
"log_group_name": "amazon-cloudwatch-agent-group-log.log",
"log_stream_name": "amazon-cloudwatch-agent-stream-log.log",
"timezone": "UTC"
},
{
"file_path": "C:\\Users\\michael.ranivo\\Docuements\\Monitoring\\Middleware\\questions.txt",
"log_group_name": "test-middleware-group-logs",
"log_stream_name": "test-middleware-stream-logs",
"timezone":"Local"
}
]
}
},
"force_flush_interval" : 5
},
"metrics": {
"metrics_collected": {
"namespace": "test-middleware-metrics",
"statsd": {},
"Processor": {
"measurement": [
{"name": "% Idle Time", "rename": "CPU_IDLE", "unit": "Percent"},
"% Interrupt Time",
"% User Time",
"% Processor Time"
],
"resources": [
"*"
],
"append_dimensions": {
"d1": "win_foo",
"d2": "win_bar"
}
},
"LogicalDisk": {
"measurement": [
{"name": "% Idle Time", "unit": "Percent"},
{"name": "% Disk Read Time", "rename": "DISK_READ"},
"% Disk Write Time"
],
"resources": [
"*"
]
},
"Memory": {
"metrics_collection_interval": 5,
"measurement": [
"Available Bytes",
"Cache Faults/sec",
"Page Faults/sec",
"Pages/sec"
],
"append_dimensions": {
"d3": "win_bo"
}
},
"Network Interface": {
"metrics_collection_interval": 5,
"measurement": [
"Bytes Received/sec",
"Bytes Sent/sec",
"Packets Received/sec",
"Packets Sent/sec"
],
"resources": [
"*"
],
"append_dimensions": {
"d3": "win_bo"
}
},
"System": {
"measurement": [
"Context Switches/sec",
"System Calls/sec",
"Processor Queue Length"
],
"append_dimensions": {
"d1": "win_foo",
"d2": "win_bar"
}
}
},
"append_dimensions": {
"ImageId": "${aws:ImageId}",
"InstanceId": "${aws:InstanceId}",
"InstanceType": "${aws:InstanceType}",
"AutoScalingGroupName": "${aws:AutoScalingGroupName}"
},
"aggregation_dimensions" : [["ImageId"], ["InstanceId", "InstanceType"], ["d1"],[]]
}
}
I launch the agent, with this command:
& "C:\Program Files\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent-ctl.ps1" -a fetch-config -m onPremise -s -c file:"C:\\ProgramData\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent.json"
The agent launches well but when I look at the agent’s logs amazon-cloudwatch-agent.log
I have this:
2023/01/19 16:56:29 I! Config has been translated into TOML C:\ProgramData\Amazon\AmazonCloudWatchAgent\\amazon-cloudwatch-agent.toml
2023/01/19 16:56:29 D! toml config [agent]
collection_jitter = "0s"
debug = true
flush_interval = "1s"
flush_jitter = "0s"
hostname = ""
interval = "5s"
logfile = "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log"
logtarget = "lumberjack"
metric_batch_size = 1000
metric_buffer_limit = 10000
omit_hostname = false
precision = ""
quiet = false
round_interval = false
[inputs]
[[inputs.logfile]]
destination = "cloudwatchlogs"
file_state_folder = "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\state"
[[inputs.logfile.file_config]]
file_path = "C:\\ProgramData\\Amazon\\AmazonCloudWatchAgent\\Logs\\amazon-cloudwatch-agent.log"
from_beginning = true
log_group_name = "server-perso-amazon-cloudwatch-agent-group-log.log"
log_stream_name = "server-perso-amazon-cloudwatch-agent-stream-log.log"
pipe = false
retention_in_days = -1
timezone = "UTC"
[[inputs.logfile.file_config]]
file_path = "C:\\Users\\leka\\Documents\\tests.txt"
from_beginning = true
log_group_name = "server-perso-test-middleware-group-logs"
log_stream_name = "server-perso-test-middleware-stream-logs"
pipe = false
retention_in_days = -1
timezone = "LOCAL"
[inputs.logfile.tags]
metricPath = "logs"
[[inputs.statsd]]
interval = "10s"
parse_data_dog_tags = true
service_address = ":8125"
[inputs.statsd.tags]
"aws:AggregationInterval" = "60s"
metricPath = "metrics"
[[inputs.win_perf_counters]]
DisableReplacer = true
[[inputs.win_perf_counters.object]]
Counters = ["% Idle Time", "% Disk Read Time", "% Disk Write Time"]
Instances = ["*"]
Measurement = "LogicalDisk"
ObjectName = "LogicalDisk"
WarnOnMissing = true
[inputs.win_perf_counters.tags]
"aws:StorageResolution" = "true"
metricPath = "metrics"
[[inputs.win_perf_counters]]
DisableReplacer = true
interval = "5s"
[[inputs.win_perf_counters.object]]
Counters = ["Available Bytes", "Cache Faults/sec", "Page Faults/sec", "Pages/sec"]
Instances = ["------"]
Measurement = "Memory"
ObjectName = "Memory"
WarnOnMissing = true
[[inputs.win_perf_counters.object]]
Counters = ["Bytes Received/sec", "Bytes Sent/sec", "Packets Received/sec", "Packets Sent/sec"]
Instances = ["*"]
Measurement = "Network Interface"
ObjectName = "Network Interface"
WarnOnMissing = true
[inputs.win_perf_counters.tags]
"aws:StorageResolution" = "true"
d3 = "win_bo"
metricPath = "metrics"
[[inputs.win_perf_counters]]
DisableReplacer = true
[[inputs.win_perf_counters.object]]
Counters = ["% Idle Time", "% Interrupt Time", "% User Time", "% Processor Time"]
Instances = ["*"]
Measurement = "Processor"
ObjectName = "Processor"
WarnOnMissing = true
[[inputs.win_perf_counters.object]]
Counters = ["Context Switches/sec", "System Calls/sec", "Processor Queue Length"]
Instances = ["------"]
Measurement = "System"
ObjectName = "System"
WarnOnMissing = true
[inputs.win_perf_counters.tags]
"aws:StorageResolution" = "true"
d1 = "win_foo"
d2 = "win_bar"
metricPath = "metrics"
[outputs]
[[outputs.cloudwatch]]
force_flush_interval = "60s"
namespace = "server-perso-test-middleware-metrics"
profile = "default"
region = "eu-central-1"
rollup_dimensions = [["ImageId"], ["InstanceId", "InstanceType"], ["d1"], []]
shared_credential_file = "C:\\Users\\leka\\.aws\\credentials"
tagexclude = ["host", "metricPath"]
[[outputs.cloudwatch.metric_decoration]]
category = "LogicalDisk"
name = "% Idle Time"
unit = "Percent"
[[outputs.cloudwatch.metric_decoration]]
category = "LogicalDisk"
name = "% Disk Read Time"
rename = "DISK_READ"
[[outputs.cloudwatch.metric_decoration]]
category = "Processor"
name = "% Idle Time"
rename = "CPU_IDLE"
unit = "Percent"
[outputs.cloudwatch.tagpass]
metricPath = ["metrics"]
[[outputs.cloudwatchlogs]]
force_flush_interval = "5s"
log_stream_name = "wind"
profile = "default"
region = "eu-central-1"
shared_credential_file = "C:\\Users\\leka\\.aws\\credentials"
tagexclude = ["metricPath"]
[outputs.cloudwatchlogs.tagpass]
metricPath = ["logs"]
[processors]
[[processors.ec2tagger]]
ec2_instance_tag_keys = ["aws:autoscaling:groupName"]
ec2_metadata_tags = ["ImageId", "InstanceId", "InstanceType"]
profile = "default"
refresh_interval_seconds = "0s"
shared_credential_file = "C:\\Users\\leka\\.aws\\credentials"
[processors.ec2tagger.tagpass]
metricPath = ["metrics"]
2023-01-19T15:56:29Z I! Starting AmazonCloudWatchAgent 1.247357.0
2023-01-19T15:56:29Z I! AWS SDK log level not set
2023-01-19T15:56:29Z I! Loaded inputs: logfile statsd win_perf_counters (3x)
2023-01-19T15:56:29Z I! Loaded aggregators:
2023-01-19T15:56:29Z I! Loaded processors: ec2tagger
2023-01-19T15:56:29Z I! Loaded outputs: cloudwatch cloudwatchlogs
2023-01-19T15:56:29Z I! Tags enabled: host=wind
2023-01-19T15:56:29Z I! [agent] Config: Interval:5s, Quiet:false, Hostname:"wind", Flush Interval:1s
2023-01-19T15:56:29Z D! [agent] Initializing plugins
2023-01-19T15:56:29Z I! [processors.ec2tagger] ec2tagger: Check EC2 Metadata.
2023-01-19T15:56:29Z D! Successfully created credential sessions
2023-01-19T15:56:29Z I! [logagent] starting
2023-01-19T15:56:29Z I! [logagent] found plugin cloudwatchlogs is a log backend
2023-01-19T15:56:29Z I! [logagent] found plugin logfile is a log collection
2023-01-19T15:56:30Z D! [logagent] open file count, 0
2023-01-19T15:56:31Z D! [logagent] open file count, 0
2023-01-19T15:56:32Z D! [logagent] open file count, 0
2023-01-19T15:56:33Z D! [logagent] open file count, 0
2023-01-19T15:56:34Z D! [logagent] open file count, 0
2023-01-19T15:56:35Z D! [logagent] open file count, 0
2023-01-19T15:56:36Z D! [logagent] open file count, 0
2023-01-19T15:56:37Z D! [logagent] open file count, 0
2023-01-19T15:56:38Z D! [logagent] open file count, 0
2023-01-19T15:56:39Z D! [logagent] open file count, 0
2023-01-19T15:56:40Z D! [logagent] open file count, 0
2023-01-19T15:56:41Z D! [logagent] open file count, 0
2023-01-19T15:56:42Z D! [logagent] open file count, 0
2023-01-19T15:56:43Z D! [logagent] open file count, 0
2023-01-19T15:56:44Z D! [logagent] open file count, 0
2023-01-19T15:56:45Z D! [logagent] open file count, 0
2023-01-19T15:56:46Z D! [logagent] open file count, 0
2023-01-19T15:56:47Z D! [logagent] open file count, 0
2023-01-19T15:56:48Z D! [logagent] open file count, 0
2023-01-19T15:56:49Z D! [logagent] open file count, 0
2023-01-19T15:56:50Z D! [logagent] open file count, 0
2023-01-19T15:56:51Z D! [logagent] open file count, 0
2023-01-19T15:56:52Z D! [logagent] open file count, 0
2023-01-19T15:56:54Z D! [logagent] open file count, 0
2023-01-19T15:56:54Z D! [logagent] open file count, 0
2023-01-19T15:56:55Z D! [logagent] open file count, 0
2023-01-19T15:56:56Z D! [logagent] open file count, 0
2023-01-19T15:56:57Z D! [logagent] open file count, 0
2023-01-19T15:56:58Z D! [logagent] open file count, 0
2023-01-19T15:56:59Z D! [logagent] open file count, 0
2023-01-19T15:56:59Z I! CWAGENT_LOG_LEVEL is set to "DEBUG"
2023-01-19T15:57:00Z D! [logagent] open file count, 0
2023-01-19T15:57:01Z D! [logagent] open file count, 0
2023-01-19T15:57:02Z D! [logagent] open file count, 0
2023-01-19T15:57:03Z D! [logagent] open file count, 0
2023-01-19T15:57:04Z D! [logagent] open file count, 0
2023-01-19T15:57:05Z D! [logagent] open file count, 0
2023-01-19T15:57:06Z D! [logagent] open file count, 0
2023-01-19T15:57:07Z D! [logagent] open file count, 0
2023-01-19T15:59:18Z E! Failed to get credential from session: NoCredentialProviders: no valid providers in chain
caused by: EnvAccessKeyNotFound: failed to find credentials in the environment.
SharedCredsLoad: failed to load profile, .
EC2RoleRequestError: no EC2 instance role found
caused by: RequestError: send request failed
caused by: Get "http://169.254.169.254/latest/meta-data/iam/security-credentials/": dial tcp 169.254.169.254:80: connectex: Une tentative de connexion a échoué car le parti connecté n’a pas répondu convenablement au-delà d’une certaine durée ou une connexion établie a échoué car l’hôte de connexion n’a pas répondu.
2023-01-19T15:59:18Z D! [logagent] open file count, 0
2023-01-19T15:59:19Z D! [logagent] open file count, 0
2023-01-19T15:59:20Z D! [logagent] open file count, 0
2023-01-19T15:59:21Z D! [logagent] open file count, 0
2023-01-19T15:59:22Z D! [logagent] open file count, 0
2023-01-19T15:59:23Z D! [logagent] open file count, 0
2023-01-19T15:59:24Z D! [logagent] open file count, 0
2023-01-19T15:59:25Z E! [processors.ec2tagger] ec2tagger: Unable to retrieve EC2 Metadata. This plugin must only be used on an EC2 instance.
2023-01-19T15:59:25Z E! [telegraf] Error running agent: could not initialize processor processors.ec2tagger: EC2MetadataRequestError: failed to get EC2 instance identity document
caused by: RequestError: send request failed
caused by: Get "http://169.254.169.254/latest/dynamic/instance-identity/document": context deadline exceeded (Client.Timeout exceeded while awaiting headers)
2023/01/19 16:59:25 E! Error when starting Agent, Error is exit status 1
The metrics part of the agent configuration file is a copy/paste of AWS documentation for setting up a CloudWatch agent for an on-premises Windows server. And when I remove this metrics part, the agent does send the logs to CloudWatch.
Thanks for reading me.
I'm looking for help to try resolve my issue.