I currently have a problem with varnish 7.0.3 on ubuntu 22.04.
I'm using varnish to serve Magento frontend. Right now I have a problem with the varnish Transient storage. It keeps increasing and never goes down. It ended up consuming all the memory in my server, then swap, until the OS killed the process.
I then added a limit to the Varnish Transient memory.
The varnish.service now looks like this:
[Service]
Type=forking
KillMode=process
Environment="MALLOC_CONF=thp:never,narenas:2"
# Maximum number of open files (for ulimit -n)
LimitNOFILE=131072
# Locked shared memory - should suffice to lock the shared memory log
# (varnishd -l argument)
# Default log size is 80MB vsl + 1M vsm + header -> 82MB
# unit is bytes
LimitMEMLOCK=85983232
# Enable this to avoid "fork failed" on reload.
TasksMax=infinity
# Maximum size of the corefile.
LimitCORE=infinity
ExecStart=/usr/sbin/varnishd \
-a :80 \
-p feature=+http2 \
-p feature=+esi_ignore_other_elements \
-p feature=+esi_disable_xml_check \
-p feature=+esi_ignore_https \
-p http_req_size=256k \
-p http_resp_hdr_len=128k \
-p http_resp_size=512k \
-p workspace_backend=512k \
-p workspace_client=256k \
-f /etc/varnish/default.vcl \
-s Cache=malloc,2g \
-s Transient=malloc,512m
ExecReload=/usr/sbin/varnishreload
[Install]
WantedBy=multi-user.target
My vcl looks like this:
vcl 4.0;
import std;
# The minimal Varnish version is 6.0
# For SSL offloading, pass the following header in your proxy server or load balancer: 'X-Forwarded-Proto: https'
backend default {
.host = "xxx.xx.xx.xx";
.port = "80";
.first_byte_timeout = 30s;
.probe = {
.url = "/health_check.php";
.timeout = 2s;
.interval = 2m;
.window = 10;
.threshold = 5;
}
}
acl purge {
"xx.xxx.xx.xx";
}
sub vcl_recv {
if (req.restarts > 0) {
set req.hash_always_miss = true;
}
if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
}
# To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
}
if (req.http.X-Pool) {
ban("obj.http.X-Pool ~ " + req.http.X-Pool);
}
return (synth(200, "Purged"));
}
if (req.method != "GET" &&
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
# We only deal with GET and HEAD by default
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Bypass customer, shopping cart, checkout
if (req.url ~ "/customer" || req.url ~ "/checkout") {
return (pass);
}
# Bypass health check requests
if (req.url ~ "^/(pub/)?(health_check.php)$") {
return (pass);
}
# Set initial grace period usage status
set req.http.grace = "none";
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://", "");
# collect all cookies
std.collect(req.http.Cookie);
# Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unknown algorithm
unset req.http.Accept-Encoding;
}
}
# Remove all marketing get parameters to minimize the cache objects
if (req.url ~ "(\?|&)(gclid|cx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=") {
set req.url = regsuball(req.url, "(gclid|cx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
set req.url = regsub(req.url, "[?|&]+$", "");
}
# Static files caching
if (req.url ~ "^/(pub/)?(media|static)/") {
# Static files should not be cached by default
return (pass);
# But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
#unset req.http.Https;
#unset req.http.X-Forwarded-Proto;
#unset req.http.Cookie;
}
# Bypass authenticated GraphQL requests without a X-Magento-Cache-Id
if (req.url ~ "/graphql" && !req.http.X-Magento-Cache-Id && req.http.Authorization ~ "^Bearer") {
return (pass);
}
return (hash);
}
sub vcl_hash {
if ((req.url !~ "/graphql" || !req.http.X-Magento-Cache-Id) && req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}
# To make sure http users don't see ssl warning
if (req.http.X-Forwarded-Proto) {
hash_data(req.http.X-Forwarded-Proto);
}
if (req.url ~ "/graphql") {
call process_graphql_headers;
}
}
sub process_graphql_headers {
if (req.http.X-Magento-Cache-Id) {
hash_data(req.http.X-Magento-Cache-Id);
# When the frontend stops sending the auth token, make sure users stop getting results cached for logged-in users
if (req.http.Authorization ~ "^Bearer") {
hash_data("Authorized");
}
}
if (req.http.Store) {
hash_data(req.http.Store);
}
if (req.http.Content-Currency) {
hash_data(req.http.Content-Currency);
}
}
sub vcl_backend_response {
set beresp.grace = 3d;
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}
# cache only successfully responses and 404s that are not marked as private
if (beresp.status != 200 &&
beresp.status != 404 &&
beresp.http.Cache-Control ~ "private") {
set beresp.uncacheable = true;
set beresp.ttl = 86400s;
return (deliver);
}
# validate if we need to cache it and prevent from setting cookie
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.set-cookie;
}
# If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
if (beresp.ttl <= 0s ||
beresp.http.Surrogate-control ~ "no-store" ||
(!beresp.http.Surrogate-Control &&
beresp.http.Cache-Control ~ "no-cache|no-store") ||
beresp.http.Vary == "*") {
# Mark as Hit-For-Pass for the next 2 minutes
set beresp.ttl = 120s;
set beresp.uncacheable = true;
}
# If the cache key in the Magento response doesn't match the one that was sent in the request, don't cache under the request's key
if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
set beresp.ttl = 0s;
set beresp.uncacheable = true;
}
return (deliver);
}
sub vcl_deliver {
if (resp.http.x-varnish ~ " ") {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
set resp.http.X-Magento-Cache-Debug = "MISS";
}
# Not letting browser to cache non-static files.
if (resp.http.Cache-Control !~ "private" && req.url !~ "^/(pub/)?(media|static|repository)/") {
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "-1";
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, max-age=0";
}
if (!resp.http.X-Magento-Debug) {
unset resp.http.Age;
}
unset resp.http.X-Magento-Debug;
unset resp.http.X-Magento-Tags;
unset resp.http.X-Powered-By;
unset resp.http.Server;
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.Link;
}
sub vcl_hit {
if (obj.ttl >= 0s) {
# Hit within TTL period
return (deliver);
}
if (std.healthy(req.backend_hint)) {
if (obj.ttl + 300s > 0s) {
# Hit after TTL expiration, but within grace period
set req.http.grace = "normal (healthy server)";
return (deliver);
} else {
# Hit after TTL and grace expiration
return (restart);
}
} else {
# server is not healthy, retrieve from cache
set req.http.grace = "unlimited (unhealthy server)";
return (deliver);
}
}
My current storage usage acording to varnishstat:
SMA.Cache.c_req 560519 3.00 5.09 6.84 6.67 6.67
SMA.Cache.c_bytes 5.41G 26.28K 51.53K 68.42K 66.72K 66.72KSMA.Cache.c_freed 5.11G 15.97K 48.68K 52.50K 74.28K 74.28KSMA.Cache.g_alloc 65665 2.00 . 65647.17 65661.34 65661.34
SMA.Cache.g_bytes 307.05M 10.30K . 306.92M 307.09M 307.09MSMA.Cache.g_space 1.70G -10.30K . 1.70G 1.70G 1.70GSMA.Transient.c_req 909978 14.98 8.26 8.48 11.18 11.18
SMA.Transient.c_fail 3579 0.00 0.03 0.56 1.64 1.64
SMA.Transient.c_bytes 4.32G 110.74K 41.16K 57.47K 68.57K 68.57KSMA.Transient.c_freed 3.82G 104.61K 36.40K 54.04K 70.60K 70.60KSMA.Transient.g_alloc 94503 1.00 . 94495.50 94490.31 94490.31
SMA.Transient.g_bytes 511.89M 6.13K . 511.84M 511.81M 511.81M
SMA.Transient.g_space 116.70K -6.13K . 161.41K 191.47K 191.47K
My process is now consuming 1.3g according to "top" command. I don't know where the overhead in memory is coming from.
Another problem is that I don't see the transient memory every going down, and it reaches the limit of the defined total transient memory, according to the varnishstat. If I put a bigger limit it reaches it in the space of a day and never goes down. Once it reaches the limit I start having 503 errors on the pages are uncachable.
Also the total memory of the varnish process ends up being bigger that the limits I put on it like I describe above, sometimes consuming the server memory, then causing the OS to kill the process.
If I take out the transient memory limit it as the same behaviour. It just consumes all the server memory and ends up using swap, until it reaches its limit.
At that point the OS kills the varnish process.
Here is an example of a request that goes into the transient malloc:
* << Request >> 1738503
- Begin req 1738414 rxreq
- Timestamp Start: 1692807381.618637 0.000000 0.000000
- Timestamp Req: 1692807381.618637 0.000000 0.000000
- VCL_use boot
- ReqStart xxx.xx.xx.xxx 61744 a0
- ReqMethod GET
- ReqURL /pt/amasty_customform/form/sessiondata/?form_id=117&product_id=0
- ReqProtocol HTTP/1.1
- ReqHeader X-Forwarded-For: xxx.xx.xx.xxx
- ReqHeader X-Forwarded-Proto: https
- ReqHeader X-Forwarded-Port: 443
- ReqHeader Host: example.com
- ReqHeader X-Amzn-Trace-Id: Root=1-64e630d5-44639bc57601a91f12db85e0
- ReqHeader sec-ch-ua: "Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"
- ReqHeader accept: application/json, text/javascript, */*; q=0.01
- ReqHeader x-requested-with: XMLHttpRequest
- ReqHeader sec-ch-ua-mobile: ?0
- ReqHeader user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36
- ReqHeader sec-ch-ua-platform: "Windows"
- ReqHeader sec-fetch-site: same-origin
- ReqHeader sec-fetch-mode: cors
- ReqHeader sec-fetch-dest: empty
- ReqHeader referer: https://example.com/pt/catalogsearch/result/?q=luis
- ReqHeader accept-encoding: gzip, deflate, br
- ReqHeader accept-language: pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7
- ReqHeader cookie: _fw_crm_v=a04c9d30-7217-4f38-8899-a4a9369be22f; _fbp=fb.1.1661177844022.1077852103; _tt_enable_cookie=1; _ttp=c8dbd677-372a-4e20-b19f-199a83c67d08; _hjSessionUser_1906806=eyJpZCI6Ijk4MjQxYzk2LTVhMzItNTZmMy1hNGQ0LWFkNDk3MDE2ZjQ1NSIsImNyZWF0ZWQiOjE
- ReqUnset X-Forwarded-For: xxx.xx.xx.xxx
- ReqHeader X-Forwarded-For: xxx.xx.xx.xxx, xxx.xx.xx.xxx
- VCL_call RECV
- ReqHeader grace: none
- ReqURL /pt/amasty_customform/form/sessiondata/?form_id=117&product_id=0
- ReqUnset accept-encoding: gzip, deflate, br
- ReqHeader Accept-Encoding: gzip
- VCL_return hash
- VCL_call HASH
- VCL_return lookup
- HitMiss 1738502 119.935997
- VCL_call MISS
- VCL_return fetch
- Link bereq 1738504 fetch
- Timestamp Fetch: 1692807381.675756 0.057118 0.057118
- RespProtocol HTTP/1.1
- RespStatus 200
- RespReason OK
- RespHeader Server: nginx
- RespHeader Date: Wed, 23 Aug 2023 16:16:21 GMT
- RespHeader Content-Type: application/json
- RespHeader Set-Cookie: PHPSESSID=8g2ec2gn6n2n9sqk5h6oqpie4q; expires=Wed, 23-Aug-2023 17:16:21 GMT; Max-Age=3600; path=/; domain=example.com; secure; HttpOnly; SameSite=Lax
- RespHeader Set-Cookie: form_key=iPsTGv3W97OSSV5l; expires=Wed, 23-Aug-2023 17:16:21 GMT; Max-Age=3600; path=/; domain=example.com; secure; SameSite=Lax
- RespHeader Pragma: no-cache
- RespHeader Cache-Control: max-age=0, must-revalidate, no-cache, no-store
- RespHeader Expires: Tue, 23 Aug 2022 16:16:21 GMT
- RespHeader Content-Security-Policy: font-src *.gstatic.com data: *.fontawesome.com *.twitter.com *.typekit.net *.twimg.com *.trustedshops.com *.googleapis.com *.amazonaws.com *.klarnacdn.net data: 'self' 'unsafe-inline'; form-actio
- RespHeader X-Content-Type-Options: nosniff
- RespHeader X-XSS-Protection: 1; mode=block
- RespHeader X-Frame-Options: SAMEORIGIN
- RespHeader Content-Encoding: gzip
- RespHeader Vary: Accept-Encoding
- RespHeader X-Varnish: 1738503
- RespHeader Age: 0
- RespHeader Via: 1.1 varnish (Varnish/7.0)
- RespHeader Accept-Ranges: bytes
- VCL_call DELIVER
- RespHeader X-Magento-Cache-Debug: MISS
- RespUnset Pragma: no-cache
- RespHeader Pragma: no-cache
- RespUnset Expires: Tue, 23 Aug 2022 16:16:21 GMT
- RespHeader Expires: -1
- RespUnset Cache-Control: max-age=0, must-revalidate, no-cache, no-store
- RespHeader Cache-Control: no-store, no-cache, must-revalidate, max-age=0
- RespUnset Age: 0
- RespUnset Server: nginx
- RespUnset X-Varnish: 1738503
- RespUnset Via: 1.1 varnish (Varnish/7.0)
- VCL_return deliver
- Timestamp Process: 1692807381.675787 0.057150 0.000031
- Filters
- RespHeader Content-Length: 46
- RespHeader Connection: keep-alive
- Timestamp Resp: 1692807381.675807 0.057169 0.000019
- ReqAcct 2813 0 2813 6040 46 6086
- End
** << BeReq >> 1738504
-- Begin bereq 1738503 fetch
-- VCL_use boot
-- Timestamp Start: 1692807381.618703 0.000000 0.000000
-- BereqMethod GET
-- BereqURL /pt/amasty_customform/form/sessiondata/?form_id=117&product_id=0
-- BereqProtocol HTTP/1.1
-- BereqHeader X-Forwarded-Proto: https
-- BereqHeader X-Forwarded-Port: 443
-- BereqHeader Host: example.com
-- BereqHeader X-Amzn-Trace-Id: Root=1-64e630d5-44639bc57601a91f12db85e0
-- BereqHeader sec-ch-ua: "Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"
-- BereqHeader accept: application/json, text/javascript, */*; q=0.01
-- BereqHeader x-requested-with: XMLHttpRequest
-- BereqHeader sec-ch-ua-mobile: ?0
-- BereqHeader user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36
-- BereqHeader sec-ch-ua-platform: "Windows"
-- BereqHeader sec-fetch-site: same-origin
-- BereqHeader sec-fetch-mode: cors
-- BereqHeader sec-fetch-dest: empty
-- BereqHeader referer: https://example.com/pt/catalogsearch/result/?q=luis
-- BereqHeader accept-language: pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7
-- BereqHeader cookie: _fw_crm_v=a04c9d30-7217-4f38-8899-a4a9369be22f; _fbp=fb.1.1661177844022.1077852103; _tt_enable_cookie=1; _ttp=c8dbd677-372a-4e20-b19f-199a83c67d08; _hjSessionUser_1906806=eyJpZCI6Ijk4MjQxYzk2LTVhMzItNTZmMy1hNGQ0LWFkNDk3MDE2ZjQ1NSIsImNyZWF0ZWQiOjE
-- BereqHeader X-Forwarded-For: xxx.xx.xx.xxx, xxx.xx.xx.xxx
-- BereqHeader grace: none
-- BereqHeader Accept-Encoding: gzip
-- BereqHeader X-Varnish: 1738504
-- VCL_call BACKEND_FETCH
-- VCL_return fetch
-- Timestamp Fetch: 1692807381.618718 0.000015 0.000015
-- Timestamp Connected: 1692807381.619308 0.000604 0.000589
-- BackendOpen 33 default xxx.xx.xx.xxx 80 xxx.xx.xx.xxx 55692 connect
-- Timestamp Bereq: 1692807381.619323 0.000619 0.000015
-- Timestamp Beresp: 1692807381.675691 0.056987 0.056367
-- BerespProtocol HTTP/1.1
-- BerespStatus 200
-- BerespReason OK
-- BerespHeader Server: nginx
-- BerespHeader Date: Wed, 23 Aug 2023 16:16:21 GMT
-- BerespHeader Content-Type: application/json
-- BerespHeader Transfer-Encoding: chunked
-- BerespHeader Connection: keep-alive
-- BerespHeader Set-Cookie: PHPSESSID=8g2ec2gn6n2n9sqk5h6oqpie4q; expires=Wed, 23-Aug-2023 17:16:21 GMT; Max-Age=3600; path=/; domain=example.com; secure; HttpOnly; SameSite=Lax
-- BerespHeader Set-Cookie: form_key=iPsTGv3W97OSSV5l; expires=Wed, 23-Aug-2023 17:16:21 GMT; Max-Age=3600; path=/; domain=example.com; secure; SameSite=Lax
-- BerespHeader Pragma: no-cache
-- BerespHeader Cache-Control: max-age=0, must-revalidate, no-cache, no-store
-- BerespHeader Expires: Tue, 23 Aug 2022 16:16:21 GMT
-- BerespHeader Content-Security-Policy: font-src *.gstatic.com data: *.fontawesome.com *.twitter.com *.typekit.net *.twimg.com *.trustedshops.com *.googleapis.com *.amazonaws.com *.klarnacdn.net data: 'self' 'unsafe-inline'; form-actio
-- BerespHeader X-Content-Type-Options: nosniff
-- BerespHeader X-XSS-Protection: 1; mode=block
-- BerespHeader X-Frame-Options: SAMEORIGIN
-- BerespHeader Content-Encoding: gzip
-- BerespHeader Vary: Accept-Encoding
-- TTL RFC 0 10 0 1692807382 1692807382 1692807381 1661271381 0 cacheable
-- VCL_call BACKEND_RESPONSE
-- TTL VCL 0 259200 0 1692807382 cacheable
-- TTL VCL 120 259200 0 1692807382 cacheable
-- TTL VCL 120 259200 0 1692807382 uncacheable
-- VCL_return deliver
-- Timestamp Process: 1692807381.675716 0.057012 0.000025
-- Filters testgunzip
-- Storage malloc Transient
-- Fetch_Body 2 chunked stream
-- Gzip u F - 46 31 80 80 304
-- BackendClose 33 default recycle
-- Timestamp BerespBody: 1692807381.675759 0.057056 0.000043
-- Length 46
-- BereqAcct 2848 0 2848 6039 46 6085
-- End
Don't know if there is something wrong with the configurations above or it is a problem with the libjemalloc2 version "libjemalloc2/jammy,now 5.2.1-4ubuntu1" and need somehow to upgrade the jemalloc2 to 5.3.0.
If so how can I upgrade to that version, since everything was installed by the apt package manager?
Edited:
I increased the transient memory to 1GB now. The transient memory is still increasing but I noticed 1 detail:
SMA.Cache.c_req 244078 0.00 5.72 2.70 3.83 3.83
SMA.Cache.c_bytes 2.27G 0.00 55.74K 27.39K 35.41K 35.41K
SMA.Cache.c_freed 1.82G 0.00 44.69K 7.80K 13.66K 13.66KSMA.Cache.g_alloc 73357 0.00 . 73337.70 73282.31 73282.31
SMA.Cache.g_bytes 460.15M 0.00 . 459.98M 459.52M 459.52M
SMA.Cache.g_space 1.25G 0.00 . 1.25G 1.25G 1.25G
SMA.Transient.c_req 384452 2.99 9.01 3.27 3.89 3.89
SMA.Transient.c_bytes 1.91G 21.93K 47.01K 23.70K 28.57K 28.57K
SMA.Transient.c_freed 1.66G 15.99K 40.90K 18.81K 23.93K 23.93K
SMA.Transient.g_alloc 44521 1.00 . 44513.66 44497.95 44497.95
SMA.Transient.g_bytes 254.50M 5.93K . 254.46M 254.36M 254.36M
SMA.Transient.g_space 769.50M -5.93K . 769.54M 769.64M 769.64M
If you look at SMA.Transient.c_bytes 21.93K is comming in (at a specific time), so I think it should release the same amound of data (not sure about this) but SMA.Transient.c_freed is just releasing 15.99K. That is why Transient memory keeps increasing.
Can someone tell me why this could be happening?
Thank you.