[JDEV] 1.4.2CVS _mio_main race and patch

Tom Riddle ftr at almeric.com
Thu Jul 18 10:36:04 CDT 2002


The following test program causes the _mio_main thread to sleep forever.
It does this by causing _mio_main to call pth_write() on 
mio__data->zzz[1] twice without an intervening call to pth_read().  This 
causes the thread to sleep because pth_write() checks the fd for 
writeability using select() before writing to it.  If the fd is not 
writeable it sleeps the thread until it is writeable.  mio_data->zzz is 
a pipe and the select semantics for a pipe is that the pipe is writable 
only if it is empty (see pipe_poll() in /usr/src/linux-2.4/fs/pipe.c). 
See the attached annotated strace.

One solution is to make the pipe nonblocking.  This causes pth_write to 
skip the select() call and just write to the fd.  The attached patch 
does this.  Is this the best solution ?  One question that arises from 
looking at the strace listing is why doesn't _mio_main get scheduled 
sooner - perhaps removing the pth_yield() would help ?

I am running jabbderd1.4.2CVS under rh7.2, linux2.4.9 and pth-1.4.1.
I am not subscribed to this list so please reply to me directly.
I can provide full strace listing if necessary.

Thanks,
Tom

-- 

Tom Riddle

Oracom, Inc.
http://www.almeric.com

Tel. +1 978.557.5710x305
Fax  +1 978.557.5716

-------------TEST PROGRAM---------

#include <arpa/inet.h>

int main(int argc, char **argv)
{
         int sd1 = socket(AF_INET, SOCK_STREAM, 0);
         int sd2 = socket(AF_INET, SOCK_STREAM, 0);
         char *ipAddr = "192.168.0.96";
         int port = 5222;
         char *openStream = "<stream:stream 
to='xpress-install3.oracom.com' xmlns='jabber:client' 
xmlns:stream='http://etherx.jabber.org/streams'>";
         char buf[1024];

         struct sockaddr_in addr;
         addr.sin_family = AF_INET;
         addr.sin_port = htons(port);
         inet_aton(ipAddr, &addr.sin_addr);

         printf("connect1: %d\n",
                 connect(sd1,(struct sockaddr *) &addr,sizeof(addr)));
         printf("connect2: %d\n",
                 connect(sd2,(struct sockaddr *) &addr,sizeof(addr)));

         printf("write1: %d\n",
                 write(sd1,openStream,strlen(openStream)));
         printf("write2: %d\n",
                 write(sd2,openStream,strlen(openStream)));

         printf("read1: %d\n",
                 read(sd1,buf,sizeof(buf)));
         printf("read2: %d\n",
                 read(sd2,buf,sizeof(buf)));

         return 0;
}


--------------PATCH---------

--- jabber-1.4.2/jabberd/mio.c  Fri Feb  8 02:39:27 2002
+++ jabber-1.4.2-new/jabberd/mio.c      Thu Jul 18 10:23:51 2002
@@ -843,6 +843,9 @@
          mio__data->k = karma_new(p);
          pipe(mio__data->zzz);

+               /* set write side of pipe nonblocking to avoid sleeping 
in pth_write */
+               pth_fdmode(mio__data->zzz[1],PTH_FDMODE_NONBLOCK);
+
          /* start main accept/read/write thread */
          attr = pth_attr_new();
          pth_attr_set(attr,PTH_ATTR_JOINABLE,FALSE);

-------------STRACE----------


jabberd starts up and goes idle:

rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0})          = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 992463}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 992598}, NULL) = 0
rt_sigpending([])                       = 0
read(4, 0x80d9e68, 128)                 = -1 EAGAIN (Resource 
temporarily unavailable)

...

handles first connection:

rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0})          = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 993429}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
fcntl64(9, F_GETFL)                     = 0x802 (flags O_RDWR|O_NONBLOCK)
accept(9, {sin_family=AF_INET, sin_port=htons(43454), 
sin_addr=inet_addr("192.168.0.52")}}, [16]) = 16
fcntl64(9, F_GETFL)                     = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(16, F_GETFL)                    = 0x2 (flags O_RDWR)
fcntl64(16, F_SETFL, O_RDWR|O_NONBLOCK) = 0
time(NULL)                              = 1026982108
fcntl64(16, F_GETFL)                    = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(16, F_SETFL, O_RDWR|O_NONBLOCK) = 0

fd 6 is mio_data->zzz[1], here we are in pth_write

fcntl64(6, F_GETFL)                     = 0x1 (flags O_WRONLY)
fcntl64(6, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
select(7, NULL, [6], NULL, {0, 0})      = 1 (out [6], left {0, 0})
write(6, " ", 1)                        = 1
fcntl64(6, F_GETFL)                     = 0x801 (flags O_WRONLY|O_NONBLOCK)
fcntl64(6, F_SETFL, O_WRONLY)           = 0

pth_write is successful, so far so good

time(NULL)                              = 1026982108
select(17, [3 9 12 13 14 15 16], [], NULL, {0, 0}) = 3 (in [3 9 16], 
left {0, 0})
fcntl64(3, F_GETFL)                     = 0 (flags O_RDONLY)


_mio_main wakes up...

select(4, [3], NULL, NULL, {0, 0})      = 1 (in [3], left {0, 0})
read(3, " ", 8192)                      = 1
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 994735}, NULL) = 0
rt_sigpending([])                       = 0
read(4, 0x80d9e68, 128)                 = -1 EAGAIN (Resource 
temporarily unavailable)
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0})          = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 995090}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
fcntl64(16, F_GETFL)                    = 0x802 (flags O_RDWR|O_NONBLOCK)
read(16, "<stream:stream to=\'xpress-install3.oracom.com\' 
xmlns=\'jabber:client\' 
xmlns:stream=\'http://etherx.jabber.org/streams\'>", 5000) = 117
brk(0x8149000)                          = 0x8149000
brk(0x814b000)                          = 0x814b000
time(NULL)                              = 1026982108

pth_write on mio_data->zzz[1] again...

fcntl64(6, F_GETFL)                     = 0x1 (flags O_WRONLY)
fcntl64(6, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
select(7, NULL, [6], NULL, {0, 0})      = 1 (out [6], left {0, 0})
write(6, " ", 1)                        = 1
fcntl64(6, F_GETFL)                     = 0x801 (flags O_WRONLY|O_NONBLOCK)
fcntl64(6, F_SETFL, O_WRONLY)           = 0

pth_write successful again

fcntl64(16, F_GETFL)                    = 0x802 (flags O_RDWR|O_NONBLOCK)
write(16, "<?xml version=\'1.0\'?><stream:stream 
xmlns:stream=\'http://etherx.jabber.org/streams\' id=\'3D3680DC\' 
xmlns=\'jabber:client\' from=\'xpress-install3.oracom.com\'>", 154) = 154
fcntl64(16, F_GETFL)                    = 0x802 (flags O_RDWR|O_NONBLOCK)


we go idle again...

sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 996331}, NULL) = 0
rt_sigpending([])                       = 0
read(4, 0x80d9e68, 128)                 = -1 EAGAIN (Resource 
temporarily unavailable)
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(5, [4], [], [], {0, 0})          = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982108, 996696}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0

...

handle second connection

fcntl64(9, F_GETFL)                     = 0x802 (flags O_RDWR|O_NONBLOCK)
accept(9, {sin_family=AF_INET, sin_port=htons(43455), 
sin_addr=inet_addr("192.168.0.52")}}, [16]) = 17
fcntl64(9, F_GETFL)                     = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(17, F_GETFL)                    = 0x2 (flags O_RDWR)
fcntl64(17, F_SETFL, O_RDWR|O_NONBLOCK) = 0
time(NULL)                              = 1026982108
fcntl64(17, F_GETFL)                    = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(17, F_SETFL, O_RDWR|O_NONBLOCK) = 0

since we have not read fd 3 pth_write blocks

fcntl64(6, F_GETFL)                     = 0x1 (flags O_WRONLY)
fcntl64(6, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
select(7, NULL, [6], NULL, {0, 0})      = 0 (Timeout)

_mio_main now blocked on fd 6 becoming writeable
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982108, 999364}, NULL) = 0
rt_sigpending([])                       = 0
read(4, 0x80d9e68, 128)                 = -1 EAGAIN (Resource 
temporarily unavailable)
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP], 8) = 0
select(7, [4], [6], [], {0, 269263})    = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
gettimeofday({1026982109, 268550}, NULL) = 0
sigprocmask(SIG_SETMASK, [], ~[KILL STOP]) = 0
gettimeofday({1026982109, 268649}, NULL) = 0
sigprocmask(SIG_SETMASK, ~[KILL STOP], []) = 0
gettimeofday({1026982109, 268737}, NULL) = 0
rt_sigpending([])                       = 0
read(4, 0x80d9e68, 128)                 = -1 EAGAIN (Resource 
temporarily unavailable)

more of the same






More information about the JDev mailing list