The AWS Application Load Balancer saves log files into Amazon S3.
Amazon Athena can then be used to query the files saved in S3. The important part is knowing the file format.
See this excellent article: Athena & ALB Log Analysis
They use this query to create the table:
CREATE EXTERNAL TABLE IF NOT EXISTS logs.web_alb (
type string,
time string,
elb string,
client_ip string,
client_port string,
target string,
request_processing_time int,
target_processing_time int,
response_processing_time int,
elb_status_code int,
target_status_code string,
received_bytes int,
sent_bytes int,
request_verb string,
request_url string,
request_proto string,
user_agent string,
ssl_cipher string,
ssl_protocol string,
target_group_arn string,
trace_id string
)
PARTITIONED BY(year string, month string, day string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1',
'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*) ([-0-9]*) ([-0-9]*) ([-0-9]*) ([-0-9]*) ([^ ]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) ([^ ]*)\" \"([^\"]*)\" ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)'
) LOCATION 's3://{{BUCKET}}/AWSLogs/{{ACCOUNT}}/elasticloadbalancing/us-east-1/';