[scyld-users] Problem running Charmm on Scyld cluster
Andre Kerstens
akerstens at utep.edu
Tue Mar 14 16:29:40 PST 2006
Hello all,
We have recently bought a Penguin cluster with Scyld release 29cz
(29cz-5u0001 200506091805) on it and are trying to get a statically
compiled version of Charmm to run on the nodes. The problem is that
Charmm runs fine on the master node, but segfaults as soon as it is
migrated to a compute node. From the strace below you can see that the
segfault happens after the library /lib64/ld-linux-x86-64.so.2 cannot be
found (it exists on the master node and is exported to the nodes in
/etc/beowulf/config though).
[akerstens at cluster 3ptb_1000110]$ bpsh 1 ./strace ./charmm64
execve("./charmm64", ["./charmm64"], [/* 22 vars */]) = 0
uname({sys="Linux", node=".1", ...}) = 0
brk(0) = 0x17a6dcc0
brk(0x17a8ecc0) = 0x17a8ecc0
brk(0x17a8f000) = 0x17a8f000
times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 180579094
times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 180579094
times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 180579094
access("charmm.inp", F_OK) = 0
open("charmm.inp", O_RDWR) = 3
fstat(3, {st_mode=S_IFREG|0775, st_size=23616, ...}) = 0
access("charmm.out", F_OK) = -1 ENOENT (No such file or
directory)
open("charmm.out", O_RDWR|O_CREAT|O_TRUNC, 0666) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
open("/etc/localtime", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=877, ...}) = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=877, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2a95556000
read(5, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0"...,
4096) = 877
close(5) = 0
munmap(0x2a95556000, 4096) = 0
readlink("/proc/self/fd/0", "socket:[5816]", 511) = 13
ioctl(0, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fbfffee90) = -1 EINVAL
(Invalid argument)
getuid() = 500
socket(PF_UNIX, SOCK_STREAM, 0) = 5
fcntl(5, F_GETFL) = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1
ENOENT (No such file or directory)
close(5) = 0
socket(PF_UNIX, SOCK_STREAM, 0) = 5
fcntl(5, F_GETFL) = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1
ENOENT (No such file or directory)
close(5) = 0
open("/etc/nsswitch.conf", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=175, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2a95556000
read(5, "# Generated by node_up for Scyld"..., 4096) = 175
read(5, "", 4096) = 0
close(5) = 0
munmap(0x2a95556000, 4096) = 0
open("/etc/ld.so.cache", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=144816, ...}) = 0
mmap(NULL, 144816, PROT_READ, MAP_PRIVATE, 5, 0) = 0x2a95556000
close(5) = 0
open("/lib64/libnss_beo.so.2", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20(\0\0"...,
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=40535, ...}) = 0
mmap(NULL, 1079320, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5,
0) = 0x2a9557a000
madvise(0x2a9557a000, 1079320, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a95581000, 1050648, PROT_NONE) = 0
mmap(0x2a95681000, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x7000) = 0x2a95681000
close(5) = 0
open("/lib64/libc.so.6", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\327"...,
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=1567579, ...}) = 0
mmap(NULL, 2377064, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5,
0) = 0x2a95682000
madvise(0x2a95682000, 2377064, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a957bd000, 1086824, PROT_NONE) = 0
mmap(0x2a958bd000, 20480, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x13b000) = 0x2a958bd000
mmap(0x2a958c2000, 17768, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958c2000
close(5) = 0
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file
or directory)
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file
or directory)
stat("/lib64", {st_mode=S_IFDIR|0755, st_size=440, ...}) = 0
open("/usr/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such
file or directory)
stat("/usr/lib64", {st_mode=S_IFDIR|0755, st_size=460, ...}) = 0
munmap(0x2a95556000, 144816) = 0
munmap(0x2a9557a000, 1079320) = 0
munmap(0x2a95682000, 2377064) = 0
open("/etc/ld.so.cache", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=144816, ...}) = 0
mmap(NULL, 144816, PROT_READ, MAP_PRIVATE, 5, 0) = 0x2a95556000
close(5) = 0
open("/lib64/libnss_bproc.so.2", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0%\0\0\0"...,
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=30705, ...}) = 0
mmap(NULL, 1070784, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5,
0) = 0x2a9557a000
madvise(0x2a9557a000, 1070784, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a95580000, 1046208, PROT_NONE) = 0
mmap(0x2a9567f000, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x5000) = 0x2a9567f000
close(5) = 0
open("/lib64/libc.so.6", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\327"...,
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=1567579, ...}) = 0
mmap(NULL, 2377064, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5,
0) = 0x2a95680000
madvise(0x2a95680000, 2377064, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a957bb000, 1086824, PROT_NONE) = 0
mmap(0x2a958bb000, 20480, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x13b000) = 0x2a958bb000
mmap(0x2a958c0000, 17768, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958c0000
close(5) = 0
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file
or directory)
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file
or directory)
open("/usr/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such
file or directory)
munmap(0x2a95556000, 144816) = 0
munmap(0x2a9557a000, 1070784) = 0
munmap(0x2a95680000, 2377064) = 0
uname({sys="Linux", node=".1", ...}) = 0
getpid() = 3646
open("/etc/resolv.conf", O_RDONLY) = -1 ENOENT (No such file or
directory)
uname({sys="Linux", node=".1", ...}) = 0
stat("/etc/resolv.conf", 0x7fbfffedc0) = -1 ENOENT (No such file or
directory)
socket(PF_UNIX, SOCK_STREAM, 0) = 5
fcntl(5, F_GETFL) = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1
ENOENT (No such file or directory)
close(5) = 0
socket(PF_UNIX, SOCK_STREAM, 0) = 5
fcntl(5, F_GETFL) = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1
ENOENT (No such file or directory)
close(5) = 0
open("/etc/ld.so.cache", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=144816, ...}) = 0
mmap(NULL, 144816, PROT_READ, MAP_PRIVATE, 5, 0) = 0x2a95556000
close(5) = 0
open("/lib64/libnss_files.so.2", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200%\0\0"...,
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=57649, ...}) = 0
mmap(NULL, 1096200, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5,
0) = 0x2a9557a000
madvise(0x2a9557a000, 1096200, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a95586000, 1047048, PROT_NONE) = 0
mmap(0x2a95685000, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0xb000) = 0x2a95685000
close(5) = 0
open("/lib64/libc.so.6", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\327"...,
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=1567579, ...}) = 0
mmap(NULL, 2377064, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5,
0) = 0x2a95686000
madvise(0x2a95686000, 2377064, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a957c1000, 1086824, PROT_NONE) = 0
mmap(0x2a958c1000, 20480, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x13b000) = 0x2a958c1000
mmap(0x2a958c6000, 17768, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958c6000
close(5) = 0
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file
or directory)
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file
or directory)
open("/usr/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such
file or directory)
munmap(0x2a95556000, 144816) = 0
munmap(0x2a9557a000, 1096200) = 0
munmap(0x2a95686000, 2377064) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
Master node:
[akerstens at sacagawea 3ptb_1000110]$ ll /lib64/ld-linux-x86-64.so.2
lrwxrwxrwx 1 root root 11 Sep 29 21:18
/lib64/ld-linux-x86-64.so.2 -> ld-2.3.2.so
[akerstens at sacagawea 3ptb_1000110]$ ll /lib64/ld-2.3.2.so
-rwxr-xr-x 1 root root 100772 May 13 2005 /lib64/ld-2.3.2.so
Since the Charmm binary is static, it seems that bpsh is looking for
this library and cannot find it on the compute nodes somehow.
Did anybody have this problem before and knows what is going on? Any
help is appreciated as I didn't get any pointers from Penguin support
for 2 weeks now.
Thanks
Andre Kerstens
--
--------------------------------------------------------------
Andre Kerstens
The University of Texas at El Paso
College of Engineering
The best way to predict the future is to invent it.
--- Alan Kay
---------------------------------------------------------------
More information about the Scyld-users
mailing list