Beowulf vs Dual
Mark Hahn
hahn at coffee.psychology.mcmaster.ca
Sat Apr 21 00:15:18 PDT 2001
> There is a significant difference for many applications. A dual has
> no "latency" in communication, while a beowulf pair would have to
> deal with typical ethernet latency.
a dual very definitely has measurable, even *significant* latency.
I just hacked up the attached program, which does a very simple-minded
ping-pong latency test between two CPUs. on my (admittedly cheesy)
dual celeron/366, it takes around 300 clock ticks (.82 us) for one
thread to notice that the other has changed a flag, and respond.
(that's a ping-pong, so latency is half that.) sure, this is a hack,
and my code sucks, but that's still nontrivial.
note that there are cluster interconnects that claim latencies
in the 2-4 us range. a simple ping-pong with small UDP packets
over cheap-o 100bT shows around 120 us latency.
anyway, it's a mistake to think of IPC on SMP being so low-latency
that you can ignore it.
regards, mark hahn
-------------- next part --------------
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <myio.H>
typedef unsigned long long u64;
double ticksPerSecond, secondsPerTick;
static inline u64 rdtsc(void) {
u64 clock;
__asm__ __volatile__("rdtsc" : "=A" (clock));
return clock;
}
double second() {
struct timeval tv;
gettimeofday(&tv,0);
return tv.tv_sec + 1e-6 * tv.tv_usec;
}
void selectsleep(unsigned us) {
struct timeval tv;
tv.tv_sec = 0;
tv.tv_usec = us;
select(0,0,0,0,&tv);
}
void calibrate() {
double sumx = 0;
double sumy = 0;
double sumxx = 0;
double sumxy = 0;
double slope;
// least squares linear regression of ticks onto real time
// as returned by gettimeofday.
const unsigned n = 30;
unsigned i;
for (i=0; i<n; i++) {
double breal,real,ticks;
u64 bticks,aticks;
breal = second();
bticks = rdtsc();
selectsleep((unsigned)(10000 + drand48() * 100000));
aticks = rdtsc();
real = second() - breal;
ticks = aticks - bticks;
sumx += real;
sumxx += real * real;
sumxy += real * ticks;
sumy += ticks;
}
slope = ((sumxy - (sumx*sumy) / n) /
(sumxx - (sumx*sumx) / n));
ticksPerSecond = slope;
secondsPerTick = 1.0 / slope;
printf("%3.3f MHz\n",ticksPerSecond*1e-6);
}
int main() {
calibrate();
volatile unsigned *p;
p = (volatile unsigned *) mmap(0,
4096,
PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANONYMOUS,
-1, 0);
if (p == MAP_FAILED)
cerr << "mmap failed" << perr << fatal;
pid_t pid = fork();
switch(pid) {
case -1:
cerr << "fork failed" << perr << fatal;
case 0:
while (1) {
*p = 1;
while (*p == 1);
}
break;
default:
break;
}
const unsigned times = 1000;
u64 min = -1;
for (unsigned i=0; i<times; i++) {
u64 before = rdtsc();
*p = 0;
while (*p == 0);
u64 ticks = rdtsc() - before;
if (ticks < min) min = ticks;
}
kill(pid,SIGKILL);
cout << "minumum of " << min << " ticks ("
<< 1e6 * min * secondsPerTick << " microseconds\n";
return 0;
}
More information about the Beowulf
mailing list