I have somewhat of a complex issue, hoping someone here may have some insight. I have a beowulf cluster of systems for a scientific application we run. This cluster consists of 32 diskless slaves running fc4 w/ a monolithic kernel, and a master node running FC4 with a custom kernel. The master issues rsh commands to the slaves to grab a chunk of a job and process it, then return it. My issue is this -- we have an old RH9 cluster that is similar in design, and the rsh commands (measured using `time rsh node2 uname -a`) takes around 0.050s to complete. On the FC4 system, we are running around 0.650-1.35s to complete the same command. I've traced the delay to xinetd or in.rshd, but am at a loss going any further. I've run some straces and I can see the delay occur. I've pasted the straces below for reference. Does anyone have any idea why this delay is happening? These systems are all wired up over gig-e (0.1-0.2ms pings round trip) and running dual 3.4ghz Xeons w/ 2mb cache and 1gb mem in each slave, 4gb mem in the master. There is a lot of processing power here, so I can't see a reason for the delay. The PAM, rhosts, hosts.equiv, etc are all identical among the nodes (and the clusters). Here's the strace of the rsh command to a node: ============================ start strace ========================== [14:51:42 - root@xxxx:/etc] strace -T -tt rsh node2 uname -a 16:18:55.277044 execve("/usr/bin/rsh", ["rsh", "node2", "uname", "-a"], [/* 20 vars */]) = 0 <0.000128> 16:18:55.277525 brk(0) = 0x80003000 <0.000034> 16:18:55.277668 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f67000> 16:18:55.277825 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000034> 16:18:55.277984 open("/etc/ld.so.cache", O_RDONLY) = 3 <0.000037> 16:18:55.278109 fstat64(3, {st_mode=S_IFREG|0644, st_size=60812, ...}) = 0 <0.000031> 16:18:55.278276 old_mmap(NULL, 60812, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f58000 <0.000044> 16:18:55.278415 close(3) = 0 <0.000040> 16:18:55.278546 open("/lib/libcrypt.so.1", O_RDONLY) = 3 <0.000048> 16:18:55.278680 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\7H"..., 512) = 512 <0.000042> 16:18:55.278821 fstat64(3, {st_mode=S_IFREG|0755, st_size=27660, ...}) = 0 <0.000033> 16:18:55.278988 old_mmap(NULL, 184604, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7f2a000> 16:18:55.279131 old_mmap(0xb7f2f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, > 16:18:55.279281 old_mmap(0xb7f31000, 155932, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -> 16:18:55.279423 close(3) = 0 <0.000030> 16:18:55.279538 open("/lib/libutil.so.1", O_RDONLY) = 3 <0.000040> 16:18:55.279662 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\260|\310"..., 512) = 512 <0.0000> 16:18:55.279793 fstat64(3, {st_mode=S_IFREG|0755, st_size=15080, ...}) = 0 <0.000030> 16:18:55.279937 old_mmap(NULL, 12432, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7f26000 > 16:18:55.280067 old_mmap(0xb7f28000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, > 16:18:55.280222 close(3) = 0 <0.000030> 16:18:55.280334 open("/lib/libc.so.6", O_RDONLY) = 3 <0.000035> 16:18:55.280448 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\n_B\000"..., 512) = 512 <0.00003> 16:18:55.280579 fstat64(3, {st_mode=S_IFREG|0755, st_size=1489572, ...}) = 0 <0.000030> 16:18:55.280725 old_mmap(NULL, 1219548, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7dfc00> 16:18:55.280854 old_mmap(0xb7f20000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3,> 16:18:55.280994 old_mmap(0xb7f24000, 7132, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1,> 16:18:55.281128 close(3) = 0 <0.000031> 16:18:55.281251 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7dfb000> 16:18:55.281391 set_thread_area({entry_number:-1 -> 6, base_addr:0xb7dfb9e0, limit:1048575, seg_32bit:1, > 16:18:55.281665 mprotect(0xb7f20000, 8192, PROT_READ) = 0 <0.000035> 16:18:55.281780 mprotect(0xb7f28000, 4096, PROT_READ) = 0 <0.000034> 16:18:55.281892 mprotect(0xb7f2f000, 4096, PROT_READ) = 0 <0.000033> 16:18:55.282008 mprotect(0x409000, 4096, PROT_READ) = 0 <0.000034> 16:18:55.282115 munmap(0xb7f58000, 60812) = 0 <0.000037> 16:18:55.282278 getuid32() = 0 <0.000031> 16:18:55.282424 brk(0) = 0x80003000 <0.000030> 16:18:55.282526 brk(0x80024000) = 0x80024000 <0.000035> 16:18:55.282652 socket(PF_FILE, SOCK_STREAM, 0) = 3 <0.000040> 16:18:55.282764 fcntl64(3, F_GETFL) = 0x2 (flags O_RDWR) <0.000024> 16:18:55.282857 fcntl64(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 <0.000024> 16:18:55.282952 connect(3, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such fi> 16:18:55.283097 close(3) = 0 <0.000027> 16:18:55.283195 socket(PF_FILE, SOCK_STREAM, 0) = 3 <0.000037> 16:18:55.283322 fcntl64(3, F_GETFL) = 0x2 (flags O_RDWR) <0.000033> 16:18:55.283438 fcntl64(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 <0.000031> 16:18:55.283551 connect(3, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such fi> 16:18:55.283706 close(3) = 0 <0.000033> 16:18:55.283823 open("/etc/nsswitch.conf", O_RDONLY) = 3 <0.000035> 16:18:55.283948 fstat64(3, {st_mode=S_IFREG|0644, st_size=1686, ...}) = 0 <0.000030> 16:18:55.284094 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.284213 read(3, "#\n# /etc/nsswitch.conf\n#\n# An ex"..., 4096) = 1686 <0.000046> 16:18:55.284374 read(3, "", 4096) = 0 <0.000031> 16:18:55.284479 close(3) = 0 <0.000031> 16:18:55.284586 munmap(0xb7f66000, 4096) = 0 <0.000035> 16:18:55.284715 open("/etc/ld.so.cache", O_RDONLY) = 3 <0.000034> 16:18:55.284830 fstat64(3, {st_mode=S_IFREG|0644, st_size=60812, ...}) = 0 <0.000030> 16:18:55.284974 old_mmap(NULL, 60812, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f58000 <0.000033> 16:18:55.285113 close(3) = 0 <0.000033> 16:18:55.285238 open("/lib/libnss_files.so.2", O_RDONLY) = 3 <0.000042> 16:18:55.285365 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0 \33\0\000"..., 512) = 512 <0.000> 16:18:55.285518 fstat64(3, {st_mode=S_IFREG|0755, st_size=46552, ...}) = 0 <0.000031> 16:18:55.285669 old_mmap(NULL, 41616, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7df0000 > 16:18:55.285800 old_mmap(0xb7df9000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, > 16:18:55.285949 close(3) = 0 <0.000031> 16:18:55.286073 mprotect(0xb7df9000, 4096, PROT_READ) = 0 <0.000036> 16:18:55.286220 munmap(0xb7f58000, 60812) = 0 <0.000036> 16:18:55.286367 open("/etc/passwd", O_RDONLY) = 3 <0.000034> 16:18:55.286486 fcntl64(3, F_GETFD) = 0 <0.000031> 16:18:55.286590 fcntl64(3, F_SETFD, FD_CLOEXEC) = 0 <0.000031> 16:18:55.286696 fstat64(3, {st_mode=S_IFREG|0644, st_size=2107, ...}) = 0 <0.000031> 16:18:55.286840 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.286949 read(3, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 2107 <0.000045> 16:18:55.287095 close(3) = 0 <0.000031> 16:18:55.287198 munmap(0xb7f66000, 4096) = 0 <0.000037> 16:18:55.287319 open("/etc/services", O_RDONLY) = 3 <0.000027> 16:18:55.287424 fcntl64(3, F_GETFD) = 0 <0.000024> 16:18:55.287513 fcntl64(3, F_SETFD, FD_CLOEXEC) = 0 <0.000023> 16:18:55.287601 fstat64(3, {st_mode=S_IFREG|0644, st_size=20385, ...}) = 0 <0.000023> 16:18:55.287730 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.287823 read(3, "# /etc/services:\n# $Id: services"..., 4096) = 4096 <0.000037> 16:18:55.287991 read(3, "otocol\nntp\t\t123/tcp\nntp\t\t123/udp"..., 4096) = 4096 <0.000028> 16:18:55.288156 read(3, "67/udp\nrsync\t\t873/tcp\t\t\t\t# rsync"..., 4096) = 4096 <0.000027> 16:18:55.288269 close(3) = 0 <0.000023> 16:18:55.288358 munmap(0xb7f66000, 4096) = 0 <0.000027> 16:18:55.288456 getpid() = 10358 <0.000023> 16:18:55.288577 gettimeofday({1130977135, 288621}, NULL) = 0 <0.000023> 16:18:55.288674 open("/etc/resolv.conf", O_RDONLY) = 3 <0.000027> 16:18:55.288776 fstat64(3, {st_mode=S_IFREG|0644, st_size=68, ...}) = 0 <0.000023> 16:18:55.288905 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.288999 read(3, "search accentopto.com\nnameserver"..., 4096) = 68 <0.000036> 16:18:55.289151 read(3, "", 4096) = 0 <0.000031> 16:18:55.289261 close(3) = 0 <0.000031> 16:18:55.289367 munmap(0xb7f66000, 4096) = 0 <0.000034> 16:18:55.289488 socket(PF_FILE, SOCK_STREAM, 0) = 3 <0.000034> 16:18:55.289596 fcntl64(3, F_GETFL) = 0x2 (flags O_RDWR) <0.000031> 16:18:55.289702 fcntl64(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 <0.000032> 16:18:55.289811 connect(3, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such fi> 16:18:55.289965 close(3) = 0 <0.000033> 16:18:55.290076 socket(PF_FILE, SOCK_STREAM, 0) = 3 <0.000036> 16:18:55.290185 fcntl64(3, F_GETFL) = 0x2 (flags O_RDWR) <0.000031> 16:18:55.290290 fcntl64(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 <0.000031> 16:18:55.290399 connect(3, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such fi> 16:18:55.290555 close(3) = 0 <0.000037> 16:18:55.290670 open("/etc/host.conf", O_RDONLY) = 3 <0.000034> 16:18:55.290786 fstat64(3, {st_mode=S_IFREG|0644, st_size=17, ...}) = 0 <0.000031> 16:18:55.290930 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.291038 read(3, "order hosts,bind\n", 4096) = 17 <0.000039> 16:18:55.291163 read(3, "", 4096) = 0 <0.000031> 16:18:55.291267 close(3) = 0 <0.000030> 16:18:55.291370 munmap(0xb7f66000, 4096) = 0 <0.000035> 16:18:55.291480 open("/etc/hosts", O_RDONLY) = 3 <0.000037> 16:18:55.291596 fcntl64(3, F_GETFD) = 0 <0.000031> 16:18:55.291699 fcntl64(3, F_SETFD, FD_CLOEXEC) = 0 <0.000031> 16:18:55.291809 fstat64(3, {st_mode=S_IFREG|0644, st_size=1819, ...}) = 0 <0.000030> 16:18:55.291953 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.292061 read(3, "# Do not remove the following li"..., 4096) = 1819 <0.000053> 16:18:55.292227 close(3) = 0 <0.000031> 16:18:55.292332 munmap(0xb7f66000, 4096) = 0 <0.000034> 16:18:55.292447 open("/etc/hosts", O_RDONLY) = 3 <0.000033> 16:18:55.292559 fcntl64(3, F_GETFD) = 0 <0.000031> 16:18:55.292663 fcntl64(3, F_SETFD, FD_CLOEXEC) = 0 <0.000031> 16:18:55.292766 fstat64(3, {st_mode=S_IFREG|0644, st_size=1819, ...}) = 0 <0.000030> 16:18:55.292910 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f66000 <0> 16:18:55.293018 read(3, "# Do not remove the following li"..., 4096) = 1819 <0.000035> 16:18:55.293142 close(3) = 0 <0.000031> 16:18:55.293249 munmap(0xb7f66000, 4096) = 0 <0.000034> 16:18:55.293365 rt_sigprocmask(SIG_BLOCK, [URG], [], 8) = 0 <0.000032> 16:18:55.293492 socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3 <0.000038> 16:18:55.293609 bind(3, {sa_family=AF_INET, sin_port=htons(1023), sin_addr=inet_addr("0.0.0.0")}, 16) = -> 16:18:55.293738 bind(3, {sa_family=AF_INET, sin_port=htons(1022), sin_addr=inet_addr("0.0.0.0")}, 16) = 0> 16:18:55.293855 fcntl64(3, F_SETOWN, 10358) = 0 <0.000031> 16:18:55.293963 connect(3, {sa_family=AF_INET, sin_port=htons(514), sin_addr=inet_addr("192.168.101.2")},> 16:18:55.294279 socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 4 <0.000036> 16:18:55.294399 bind(4, {sa_family=AF_INET, sin_port=htons(1021), sin_addr=inet_addr("0.0.0.0")}, 16) = 0> 16:18:55.294522 listen(4, 1) = 0 <0.000040> 16:18:55.294636 write(3, "1021\0", 5) = 5 <0.000037> 16:18:55.294754 poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN, revents=POLLIN}], 2, -1) = 1 <0.002044> 16:18:55.296918 accept(4, {sa_family=AF_INET, sin_port=htons(1023), sin_addr=inet_addr("192.168.101.2")},> 16:18:55.297084 close(4) = 0 <0.000038> 16:18:55.297196 writev(3, [{"root\0", 5}, {"root\0", 5}, {"uname -a\0", 9}], 3) = 19 <0.000038> 16:18:55.297332 read(3, "\0", 1) = 1 <0.632181> 16:18:55.929633 rt_sigprocmask(SIG_SETMASK, [], [URG], 8) = 0 <0.000039> 16:18:55.929763 setuid32(0) = 0 <0.000039> 16:18:55.929866 rt_sigprocmask(SIG_BLOCK, [INT QUIT TERM], [], 8) = 0 <0.000047> 16:18:55.929991 rt_sigaction(SIGINT, {SIG_IGN}, {SIG_DFL}, 8) = 0 <0.000044> 16:18:55.930116 rt_sigaction(SIGINT, {0x80000d44, [INT], SA_RESTART}, {SIG_IGN}, 8) = 0 <0.000051> 16:18:55.930253 rt_sigaction(SIGQUIT, {SIG_IGN}, {SIG_DFL}, 8) = 0 <0.000046> 16:18:55.930377 rt_sigaction(SIGQUIT, {0x80000d44, [QUIT], SA_RESTART}, {SIG_IGN}, 8) = 0 <0.000042> 16:18:55.930499 rt_sigaction(SIGTERM, {SIG_IGN}, {SIG_DFL}, 8) = 0 <0.000049> 16:18:55.930627 rt_sigaction(SIGTERM, {0x80000d44, [TERM], SA_RESTART}, {SIG_IGN}, 8) = 0 <0.000045> 16:18:55.930758 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=> 16:18:55.931071 ioctl(5, FIONBIO, [1]) = 0 <0.000037> 16:18:55.931190 ioctl(3, FIONBIO, [1]) = 0 <0.000037> 16:18:55.931304 rt_sigprocmask(SIG_SETMASK, [], [INT QUIT TERM], 8) = 0 <0.000038> 16:18:55.931429 select(6, [3 5], NULL, NULL, NULL) = 2 (in [3 5]) <0.012086> 16:18:55.943612 read(5, "", 8192) = 0 <0.000043> 16:18:55.943722 read(3, "Linux node2.pcm3.cluster 2.6.13."..., 8192) = 95 <0.000039> 16:18:55.943842 write(1, "Linux node2.pcm3.cluster 2.6.13."..., 95Linux node2.pcm3.cluster 2.6.13.2 #1 SMx ) = 95 <0.000038> 16:18:55.943930 select(6, [3], NULL, NULL, NULL) = 1 (in [3]) <0.000008> 16:18:55.943977 read(3, "", 8192) = 0 <0.000007> 16:18:55.944018 kill(10359, SIGKILL) = 0 <0.000008> 16:18:55.944096 exit_group(0) = ? ==================================== end strace ================================= Here you can clearly see the delay happen: <cut and paste section of interest from above> 16:18:55.297196 writev(3, [{"root\0", 5}, {"root\0", 5}, {"uname -a\0", 9}], 3) = 19 <0.000038> 16:18:55.297332 read(3, "\0", 1) = 1 <0.632181> 16:18:55.929633 rt_sigprocmask(SIG_SETMASK, [], [URG], 8) = 0 <0.000039> 16:18:55.929763 setuid32(0) = 0 <0.000039> <end cut and paste> It looks like it takes .63s to write the data to the socket and get the response, which I find hard to fathom (especially since anything outside of xinetd's realm appears to be really fast over the network). Thoughts? -Tim