Over the last few days, I was trying to understand the Istio retry policy. Then, I found a field named "retry-on". The default value of this field is below (I use version 1.14.3 Istio).
RetryOn: "connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes"
I want to know which case is included in "connect-failure". From the document, They explain like this.
connect-failure
- Envoy will attempt a retry if a request is failed because of a connection failure to the upstream server (connect timeout, etc.). (Included in 5xx)
- NOTE: A connection failure/timeout is a the TCP level, not the request level. This does not include upstream request timeouts specified via x-envoy-upstream-rq-timeout-ms or via route configuration or via virtual host retry policy.
So, I think it will retry if any error occurs in the TCP protocol at the transport layer. I tried to prove that by creating 2 pods in the Kubernetes cluster. First is Nginx forwarding every HTTP request to the Second. The Second is the NodeJS TCP server that will break the TCP connection if you send an HTTP request with "/error" path. I show it below.
Nginx
user nginx;
error_log /var/log/nginx/error.log warn;
events {
worker_connections 65535;
use epoll;
multi_accept on;
}
http {
log_format main escape=json
'{'
'"clientIP":"$remote_addr",'
'"time-local":"$time_local",'
'"server-port":"$server_port",'
'"message":"$request",'
'"statusCode":"$status",'
'"dataLength":"$body_bytes_sent",'
'"referer":"$http_referer",'
'"userAgent":"$http_user_agent",'
'"xForwardedFor":"$http_x_forwarded_for",'
'"upstream-response-time":"$upstream_response_time",'
'"correlation-id":"$http_x_correlation_id",'
'"user-tier":"$http_x_neo_user_tier",'
'"session-id":"$http_x_session_id"'
'}';
access_log /var/log/nginx/access.log main;
client_max_body_size 100m;
client_header_timeout 5m; # default 60s
client_body_timeout 5m; # default 60s
send_timeout 5m; # default 60s
proxy_connect_timeout 5m;
proxy_send_timeout 5m;
proxy_read_timeout 5m;
server {
listen 8080;
location / {
proxy_pass http://ice-node-service.neo-platform.svc.cluster.local:8080;
}
}
}
NodeJS
var net = require('net');
var server = net.createServer();
server.listen(8080, '127.0.0.1');
server.addListener('close', () => {
console.log('close');
})
server.addListener('connection', socket => {
console.log('connect');
socket.addListener('data', data => {
try {
const [method, path] = data.toString().split("\n")[0].split(" ")
console.log(method, path);
if (path === "/error") {
socket.destroy(new Error("force error"))
} else {
socket.write(respond())
socket.end()
}
} catch (e) {
console.log(e);
}
})
})
server.addListener('error', err => {
console.log('error', err);
})
server.addListener('listening', () => {
console.log('listening');
})
function respond() {
const body = `<html><body>Hello</body></html>`
return `HTTP/1.1 200 OK
Date: ${new Date().toGMTString()}
Server: Apache
Last-Modified: Tue, 01 Dec 2009 20:18:22 GMT
ETag: "51142bc1-7449-479b075b2891b"
Accept-Ranges: bytes
Content-Length: ${body.length + 2}
Content-Type: text/html
${body}\r\n`
}
So, I tried to send a request through Nginx to the NodeJS server on "/error" path. I expected Istio would resend the request if the TCP connection is broken. But, It wasn't retried. So, I want to know why it wasn't.