I have a server application that hangs randomly after a few hours of running. I have a pool of I/O threads to handle accept, read & write asynchronously. At random points, no connections are accepted, nor any data is read. I suspected that io_context was running out of work. Even after adding work_guard, problem persists. When monitoring code attempts to restart the listener, one of the I/O threads is stuck here. Only timer I have is idle connection close. There is a timer per client connection. Timer gets updated after each async_read so it can close correctly if there are no bytes arriving. Anyways, going back to where worker thread is hanging:
(gdb) thread 25
[Switching to thread 25 (Thread 0x7f834dab1640 (LWP 1625296))]
#0 0x0000565123c2d25a in std::chrono::operator< <std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (__lhs=..., __rhs=...) at /usr/include/c++/10/chrono:990
990 /usr/include/c++/10/chrono: No such file or directory.
(gdb) bt
#0 0x0000565123c2d25a in std::chrono::operator< <std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (__lhs=..., __rhs=...) at /usr/include/c++/10/chrono:990
#1 0x0000565123c33dfd in boost::asio::detail::chrono_time_traits<std::chrono::_V2::steady_clock, boost::asio::wait_traits<std::chrono::_V2::steady_clock> >::less_than (t1=..., t2=...)
at /extern/boost/linux/include/boost/asio/detail/chrono_time_traits.hpp:120
#2 0x0000565123c3fcb7 in boost::asio::detail::timer_queue<boost::asio::detail::chrono_time_traits<std::chrono::_V2::steady_clock, boost::asio::wait_traits<std::chrono::_V2::steady_clock> > >::get_ready_timers (this=0x56512482d6d8, ops=...) at /extern/boost/linux/include/boost/asio/detail/timer_queue.hpp:151
#3 0x0000565123c21d02 in boost::asio::detail::timer_queue_set::get_ready_timers (this=0x56512482ef48, ops=...)
at /extern/boost/linux/include/boost/asio/detail/impl/timer_queue_set.ipp:88
#4 0x0000565123c22e8f in boost::asio::detail::epoll_reactor::run (this=0x56512482eed0, usec=-1, ops=...)
at /extern/boost/linux/include/boost/asio/detail/impl/epoll_reactor.ipp:554
#5 0x0000565123c245d6 in boost::asio::detail::scheduler::do_run_one (this=0x56512482ed80, lock=..., this_thread=..., ec=...)
at /extern/boost/linux/include/boost/asio/detail/impl/scheduler.ipp:465
#6 0x0000565123c23fa8 in boost::asio::detail::scheduler::run (this=0x56512482ed80, ec=...)
at /extern/boost/linux/include/boost/asio/detail/impl/scheduler.ipp:204
#7 0x0000565123c59d70 in boost::asio::io_context::run (this=0x7ffdb18c5010)
at /extern/boost/linux/include/boost/asio/impl/io_context.ipp:63
#8 0x0000565123c6e57d in boost::_mfi::mf0<unsigned long, boost::asio::io_context>::operator() (this=0x565124842968, p=0x7ffdb18c5010)
at /extern/boost/linux/include/boost/bind/mem_fn_template.hpp:49
#9 0x0000565123c6e4b2 in boost::_bi::list1<boost::_bi::value<boost::asio::io_context*> >::operator()<unsigned long, boost::_mfi::mf0<unsigned long, boost::asio::io_context>, boost::_bi::list0> (
this=0x565124842978, f=..., a=...) at /extern/boost/linux/include/boost/bind/bind.hpp:249
#10 0x0000565123c6e286 in boost::_bi::bind_t<unsigned long, boost::_mfi::mf0<unsigned long, boost::asio::io_context>, boost::_bi::list1<boost::_bi::value<boost::asio::io_context*> > >::operator() (
this=0x565124842968) at /extern/boost/linux/include/boost/bind/bind.hpp:1294
#11 0x0000565123c6d49e in boost::detail::thread_data<boost::_bi::bind_t<unsigned long, boost::_mfi::mf0<unsigned long, boost::asio::io_context>, boost::_bi::list1<boost::_bi::value<boost::asio::io_context*> > > >::run (this=0x565124842830) at /extern/boost/linux/include/boost/thread/detail/thread.hpp:120
#12 0x0000565123c7931b in thread_proxy ()
#13 0x00007f835dd3eb43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#14 0x00007f835ddd0a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81