I have a normal server application which hits SIGSEGV during booting. Code involved in SIGSEGV part contains pthread_cond_wait, pthread_setcancelstate, pthread_cond_timedwait, pthread_cleanup_push and pthread_cleanup_pop.
#include <pthread.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
pthread_mutex_t mutex;
pthread_cond_t cond;
#define CHECK_RETURN_VAL_OR_FAIL(ret,str) \
({ if ((ret) != 0) \
{ \
printf ("%s failed: %s\n", (str), strerror (ret)); \
ret = 1; \
goto out; \
} \
})
void
clean (void *arg)
{
puts ("clean: Unlocking mutex...");
pthread_mutex_unlock ((pthread_mutex_t *) arg);
puts ("clean: Mutex unlocked...");
}
void *
thr (void *arg)
{
int ret = 0;
pthread_mutexattr_t mutexAttr;
ret = pthread_mutexattr_init (&mutexAttr);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_mutexattr_init");
ret = pthread_mutexattr_setprotocol (&mutexAttr, PTHREAD_PRIO_INHERIT);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_mutexattr_setprotocol");
ret = pthread_mutex_init (&mutex, &mutexAttr);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_mutex_init");
ret = pthread_cond_init (&cond, 0);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_cond_init");
puts ("th: Init done, entering wait...");
pthread_cleanup_push (clean, (void *) &mutex);
ret = pthread_mutex_lock (&mutex);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_mutex_lock");
while (1)
{
ret = pthread_cond_wait (&cond, &mutex);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_cond_wait");
}
pthread_cleanup_pop (1);
out:
return (void *)ret;
}
int
main()
{
pthread_t thread;
int ret = 0;
void *thr_ret = 0;
ret = pthread_create (&thread, 0, thr, &thr_ret);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_create");
puts ("main: Thread created, waiting a bit...");
sleep (2);
puts ("main: Cancelling thread...");
ret = pthread_cancel (thread);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_cancel");
puts ("main: Joining th...");
ret = pthread_join (thread, NULL);
CHECK_RETURN_VAL_OR_FAIL (ret, "pthread_join");
if (thr_ret != NULL)
return 1;
puts ("main: Joined thread, done!");
out:
return ret;
}
I tried gdb and it gave me this back trace
(gdb) bt
#0 0xf7318387 in ?? () from /lib/libgcc_s.so.1
#1 0xf7318820 in _Unwind_Resume () from /lib/libgcc_s.so.1
#2 0xf7dfbb52 in _Unwind_Resume () from /lib/libpthread.so.0
#3 0xf7df6d95 in __condvar_w_cleanup () from /lib/libpthread.so.0
#4 0x08048b01 in thr ()
#5 0xf7df2338 in start_thread () from /lib/libpthread.so.0
#6 0xf7d30aee in clone () from /lib/libc.so.6
I also took strace -f for this process and its children, turns out it hits SIGSEGV in child.
[pid 18192] open("./tls/i686/sse2/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./tls/i686/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./tls/sse2/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./tls/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./i686/sse2/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./i686/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./sse2/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("./libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid 18192] open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
[pid 18192] fstat64(3, {st_mode=S_IFREG|0644, st_size=94009, ...}) = 0
[pid 18192] mmap2(NULL, 94009, PROT_READ, MAP_PRIVATE, 3, 0) = 0xf75de000
[pid 18192] close(3) = 0
[pid 18192] open("/lib/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = 3
[pid 18192] read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\240%\0\0004\0\0\0"..., 512) = 512
[pid 18192] fstat64(3, {st_mode=S_IFREG|0755, st_size=116284, ...}) = 0
[pid 18192] mmap2(NULL, 119400, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xf6ab2000
[pid 18192] mmap2(0xf6ace000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b000) = 0xf6ace000
[pid 18192] close(3) = 0
[pid 18192] mprotect(0xf6ace000, 4096, PROT_READ) = 0
[pid 18192] munmap(0xf75de000, 94009) = 0
[pid 18192] tgkill(18192, 18193, SIGRTMIN) = 0
[pid 18192] futex(0xf7310ba8, FUTEX_WAIT, 18193, NULL <unfinished ...>
[pid 18193] <... futex resumed> ) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
[pid 18193] --- SIGRTMIN {si_signo=SIGRTMIN, si_code=SI_TKILL, si_pid=18192, si_uid=3535917} ---
[pid 18193] futex(0xf6acf10c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
[pid 18193] futex(0x804b0a4, FUTEX_WAKE_PRIVATE, 2147483647) = 0
[pid 18193] --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
[pid 18193] +++ killed by SIGSEGV (core dumped) +++
+++ killed by SIGSEGV (core dumped) +++
Operating System : SUSE 12.4
Kernel : 4.12.14-95.83-default
gcc : 4.8.5
glibc : 2.22
I'm still not able to understand why this application is hitting SIGSEGV.
Compilation :
gcc sapl.c -m32 -pthread
Works for
gcc sapl.c -pthread
Any ideas ?
Edit 1 : Original code source was different and for more threads, strace was corresponding to that, when user sonicwave asked for minimal reproducible code, I had changed the code and gdb trace but missed strace, that's why there was anamoly.
Also, in this code, for reproducing this issue, only one thread is enough.
Again, this issue is occurring in specified gcc version, glibc version only. If I use older compiler like 4.3.7 , this issue does not reproduce.