[JDEV] 1.4.2CVS _mio_main race and patch
Tom Riddle
ftr at almeric.com
Thu Jul 18 10:36:04 CDT 2002
The following test program causes the _mio_main thread to sleep forever.
It does this by causing _mio_main to call pth_write() on
mio__data->zzz[1] twice without an intervening call to pth_read(). This
causes the thread to sleep because pth_write() checks the fd for
writeability using select() before writing to it. If the fd is not
writeable it sleeps the thread until it is writeable. mio_data->zzz is
a pipe and the select semantics for a pipe is that the pipe is writable
only if it is empty (see pipe_poll() in /usr/src/linux-2.4/fs/pipe.c).
See the attached annotated strace.
One solution is to make the pipe nonblocking. This causes pth_write to
skip the select() call and just write to the fd. The attached patch
does this. Is this the best solution ? One question that arises from
looking at the strace listing is why doesn't _mio_main get scheduled
sooner - perhaps removing the pth_yield() would help ?
I am running jabbderd1.4.2CVS under rh7.2, linux2.4.9 and pth-1.4.1.
I am not subscribed to this list so please reply to me directly.
I can provide full strace listing if necessary.
Thanks,
Tom
--
Tom Riddle
Oracom, Inc.
http://www.almeric.com
Tel. +1 978.557.5710x305
Fax +1 978.557.5716
-------------TEST PROGRAM---------
#include <arpa/inet.h>
int main(int argc, char **argv)
{
int sd1 = socket(AF_INET, SOCK_STREAM, 0);
int sd2 = socket(AF_INET, SOCK_STREAM, 0);
char *ipAddr = "192.168.0.96";
int port = 5222;
char *openStream = "<stream:stream
to='xpress-install3.oracom.com' xmlns='jabber:client'
xmlns:stream='http://etherx.jabber.org/streams'>";
char buf[1024];
struct sockaddr_in addr;
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
inet_aton(ipAddr, &addr.sin_addr);
printf("connect1: %d\n",
connect(sd1,(struct sockaddr *) &addr,sizeof(addr)));
printf("connect2: %d\n",
connect(sd2,(struct sockaddr *) &addr,sizeof(addr)));
printf("write1: %d\n",
write(sd1,openStream,strlen(openStream)));
printf("write2: %d\n",
write(sd2,openStream,strlen(openStream)));
printf("read1: %d\n",
read(sd1,buf,sizeof(buf)));
printf("read2: %d\n",
read(sd2,buf,sizeof(buf)));
return 0;
}
--------------PATCH---------
--- jabber-1.4.2/jabberd/mio.c Fri Feb 8 02:39:27 2002
+++ jabber-1.4.2-new/jabberd/mio.c Thu Jul 18 10:23:51 2002
@@ -843,6 +843,9 @@
mio__data->k = karma_new(p);
pipe(mio__data->zzz);
+ /* set write side of pipe nonblocking to avoid sleeping
in pth_write */
+ pth_fdmode(mio__data->zzz[1],PTH_FDMODE_NONBLOCK);
+
/* start main accept/read/write thread */
attr = pth_attr_new();
pth_attr_set(attr,PTH_ATTR_JOINABLE,FALSE);
-------------STRACE----------
jabberd starts up and goes idle:
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 992463}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 992598}, NULL) = 0
rt_sigpending([]) = 0
read(4, 0x80d9e68, 128) = -1 EAGAIN (Resource
temporarily unavailable)
...
handles first connection:
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 993429}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
fcntl64(9, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
accept(9, {sin_family=AF_INET, sin_port=htons(43454),
sin_addr=inet_addr("192.168.0.52")}}, [16]) = 16
fcntl64(9, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(16, F_GETFL) = 0x2 (flags O_RDWR)
fcntl64(16, F_SETFL, O_RDWR|O_NONBLOCK) = 0
time(NULL) = 1026982108
fcntl64(16, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(16, F_SETFL, O_RDWR|O_NONBLOCK) = 0
fd 6 is mio_data->zzz[1], here we are in pth_write
fcntl64(6, F_GETFL) = 0x1 (flags O_WRONLY)
fcntl64(6, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
select(7, NULL, [6], NULL, {0, 0}) = 1 (out [6], left {0, 0})
write(6, " ", 1) = 1
fcntl64(6, F_GETFL) = 0x801 (flags O_WRONLY|O_NONBLOCK)
fcntl64(6, F_SETFL, O_WRONLY) = 0
pth_write is successful, so far so good
time(NULL) = 1026982108
select(17, [3 9 12 13 14 15 16], [], NULL, {0, 0}) = 3 (in [3 9 16],
left {0, 0})
fcntl64(3, F_GETFL) = 0 (flags O_RDONLY)
_mio_main wakes up...
select(4, [3], NULL, NULL, {0, 0}) = 1 (in [3], left {0, 0})
read(3, " ", 8192) = 1
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 994735}, NULL) = 0
rt_sigpending([]) = 0
read(4, 0x80d9e68, 128) = -1 EAGAIN (Resource
temporarily unavailable)
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 995090}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
fcntl64(16, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
read(16, "<stream:stream to=\'xpress-install3.oracom.com\'
xmlns=\'jabber:client\'
xmlns:stream=\'http://etherx.jabber.org/streams\'>", 5000) = 117
brk(0x8149000) = 0x8149000
brk(0x814b000) = 0x814b000
time(NULL) = 1026982108
pth_write on mio_data->zzz[1] again...
fcntl64(6, F_GETFL) = 0x1 (flags O_WRONLY)
fcntl64(6, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
select(7, NULL, [6], NULL, {0, 0}) = 1 (out [6], left {0, 0})
write(6, " ", 1) = 1
fcntl64(6, F_GETFL) = 0x801 (flags O_WRONLY|O_NONBLOCK)
fcntl64(6, F_SETFL, O_WRONLY) = 0
pth_write successful again
fcntl64(16, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
write(16, "<?xml version=\'1.0\'?><stream:stream
xmlns:stream=\'http://etherx.jabber.org/streams\' id=\'3D3680DC\'
xmlns=\'jabber:client\' from=\'xpress-install3.oracom.com\'>", 154) = 154
fcntl64(16, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
we go idle again...
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 996331}, NULL) = 0
rt_sigpending([]) = 0
read(4, 0x80d9e68, 128) = -1 EAGAIN (Resource
temporarily unavailable)
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 996696}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
...
handle second connection
fcntl64(9, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
accept(9, {sin_family=AF_INET, sin_port=htons(43455),
sin_addr=inet_addr("192.168.0.52")}}, [16]) = 17
fcntl64(9, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(17, F_GETFL) = 0x2 (flags O_RDWR)
fcntl64(17, F_SETFL, O_RDWR|O_NONBLOCK) = 0
time(NULL) = 1026982108
fcntl64(17, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(17, F_SETFL, O_RDWR|O_NONBLOCK) = 0
since we have not read fd 3 pth_write blocks
fcntl64(6, F_GETFL) = 0x1 (flags O_WRONLY)
fcntl64(6, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
select(7, NULL, [6], NULL, {0, 0}) = 0 (Timeout)
_mio_main now blocked on fd 6 becoming writeable
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 999364}, NULL) = 0
rt_sigpending([]) = 0
read(4, 0x80d9e68, 128) = -1 EAGAIN (Resource
temporarily unavailable)
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(7, [4], [6], [], {0, 269263}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982109, 268550}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
gettimeofday({1026982109, 268649}, NULL) = 0
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982109, 268737}, NULL) = 0
rt_sigpending([]) = 0
read(4, 0x80d9e68, 128) = -1 EAGAIN (Resource
temporarily unavailable)
more of the same
More information about the JDev
mailing list