For reasons I won't go into, I need to run a variant of 'top -m io -d 2 10' within a subprocess from a Python thread on FreeBSD 8.1. The trouble is, it seems that sometimes SIGTTOU gets produced (under certain code-dependent conditions that I haven't yet deciphered), halting top and the thread entirely. Other times, it seems that SIGTTOU is not produced, but top or the thread get stuck anyway.
The output from top should produce two sets of IO stats for the top 10 processes on the system, where the first set is "absolute" numbers and the second set is the incremental difference of the stats since the last set, one second earlier. Running this command on the terminal or within a shell script, whether redirecting the output or not, works fine.
When the problem occurs, it seems that 'top' writes the first set of outputs, but then hangs/receives SIGTTOU before it can output the second set. In the sample code below, only one set of process stats is written to the output file.
I discovered the SIGTTOU signal running the python script under 'truss', but it seems that interactions between 'truss' and 'top' themselves may be a confounding matter, since simply running truss top -d 2
produces the signal and hangs, as below:
...
ioctl(1,TIOCGETA,0xffffe460) = 0 (0x0)
ioctl(1,TIOCGETA,0xc6b138) = 0 (0x0)
ioctl(1,TIOCGETA,0xffffe410) = 0 (0x0)
ioctl(1,TIOCGWINSZ,0xffffe460) = 0 (0x0)
ioctl(1,TIOCGWINSZ,0xffffe930) = 0 (0x0)
ioctl(1,TIOCGETA,0x50e560) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGINT|SIGQUIT|SIGTSTP,0x0) = 0 (0x0)
ioctl(1,TIOCGETA,0x50e560) = 0 (0x0)
SIGNAL 22 (SIGTTOU)
Here's a sample Python script that reproduces the hang and/or SIGTTOU:
import subprocess
from threading import Thread
def run():
with open("top.log", "wb") as f:
subprocess.Popen(("/usr/bin/top", "-m", "io", "-d", "2", "10"), stdout=f, stderr=f, stdin=subprocess.PIPE).communicate()
if __name__ == "__main__":
th = Thread(target=run)
print "Starting"
th.start()
th.join()
On my last run through, this sample program did not produce SIGTTOU, but top did hang. Truss shows:
....
open("/usr/local/lib/python2.7/lib-tk/_heapq.pyc",O_RDONLY,0666) ERR#2 'No such file or directory'
stat("/usr/local/lib/python2.7/lib-dynload/_heapq",0x7fffffffa500) ERR#2 'No such file or directory'
open("/usr/local/lib/python2.7/lib-dynload/_heapq.so",O_RDONLY,0666) = 5 (0x5)
fstat(5,{ mode=-rwxr-xr-x ,inode=238187,size=22293,blksize=16384 }) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
open("/usr/local/lib/python2.7/lib-dynload/_heapq.so",O_RDONLY,057) = 6 (0x6)
fstat(6,{ mode=-rwxr-xr-x ,inode=238187,size=22293,blksize=16384 }) = 0 (0x0)
pread(0x6,0x80074c2e0,0x1000,0x0,0xffff800800653120,0x8080808080808080) = 4096 (0x1000)
mmap(0x0,1069056,PROT_NONE,MAP_PRIVATE|MAP_ANON|MAP_NOCORE,-1,0x0) = 34389442560 (0x801c54000)
mmap(0x801c54000,12288,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE,6,0x0) = 34389442560 (0x801c54000)
mmap(0x801d56000,12288,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED,6,0x2000) = 34390499328 (0x801d56000)
mmap(0x0,36864,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 34366377984 (0x800655000)
close(6) = 0 (0x0)
mmap(0x0,832,PROT_READ|PROT_WRITE,MAP_ANON,-1,0x0) = 34366414848 (0x80065e000)
munmap(0x80065e000,832) = 0 (0x0)
sigprocmask(SIG_SETMASK,0x0,0x0) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
sigprocmask(SIG_SETMASK,0x0,0x0) = 0 (0x0)
close(5) = 0 (0x0)
close(4) = 0 (0x0)
close(3) = 0 (0x0)
close(2) = 0 (0x0)
fstat(1,{ mode=crw------- ,inode=102,size=0,blksize=4096 }) = 0 (0x0)
ioctl(1,TIOCGETA,0xffffe400) = 0 (0x0)
Starting
write(1,"Starting\n",9) = 9 (0x9)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
_umtx_op(0x7fffffffe1d8,0x3,0x1,0x0,0x0,0x0) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGABRT|SIGEMT|SIGKILL|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2) = 0 (0x0)
sigprocmask(SIG_SETMASK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGABRT|SIGEMT|SIGKILL|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2) = 0 (0x0)
sigprocmask(SIG_SETMASK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
mmap(0x7fffffbde000,135168,PROT_READ|PROT_WRITE,MAP_STACK,-1,0x0) = 140737484021760 (0x7fffffbde000)
mprotect(0x7fffffbde000,4096,PROT_NONE) = 0 (0x0)
thr_new(0x7fffffffe220,0x68,0x800a9f4c0,0x186fc,0xffffffff,0x0) = 0 (0x0)
sigprocmask(SIG_SETMASK,0x0,0x0) = 0 (0x0)
mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 34390511616 (0x801d59000)
mmap(0x801f59000,684032,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 34392608768 (0x801f59000)
munmap(0x801d59000,684032) = 0 (0x0)
_umtx_op(0x8010127f8,0x10,0x1,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0xf,0x0,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0x10,0x1,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0x10,0x1,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0x10,0x1,0x0,0x0,0x8080808080808080) = 0 (0x0)
open("top.log",O_WRONLY|O_CREAT|O_TRUNC,0666) = 2 (0x2)
fstat(2,{ mode=-rw-r--r-- ,inode=70860,size=0,blksize=16384 }) = 0 (0x0)
pipe(0x7fffffbfd910) = 0 (0x0)
pipe(0x7fffffbfd870) = 0 (0x0)
fcntl(6,F_GETFD,) = 0 (0x0)
fcntl(6,F_SETFD,FD_CLOEXEC) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGABRT|SIGEMT|SIGKILL|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2) = 0 (0x0)
fork() = 21503 (0x53ff)
sigprocmask(SIG_SETMASK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
close(6) = 0 (0x0)
close(3) = 0 (0x0)
read(5,0x801e31024,1048576) = 0 (0x0)
close(5) = 0 (0x0)
fcntl(4,F_GETFL,) = 2 (0x2)
fstat(4,{ mode=p--------- ,inode=0,size=0,blksize=4096 }) = 0 (0x0)
close(4) = 0 (0x0)
I've looked into SIGTTOU and found references to the TOSTOP termios flag, and I've fiddled with it in the main thread, in the child thread, and in the environment invoking Python, all to no avail. It's been an educational process, but I'm not there yet.
I've run tests to make sure that the top process is created in and appears to stay in the process group of the Python process (based on the SIGTTOU documentation, if it weren't, this would be the reason for SIGTTOU), and that seems fine: the PGRP ends up being the same as the Python PID/PGRP.
I've tried running 'top' with subprocess.check_output and with .Popen() using shell=True, shell=False, and redirecting std{out,err,in} all over the place, none of which seems to change this end result. I've tried running 'top' using a '/bin/sh -c' command executed through subprocess, also to no avail.
Without doing something semi-weird like running 'top' within a shell script which my Python thread invokes, or resorting to os.fork() instead of using threading, how can I get around this issue, and what's the root cause?