[Beowulf] MPICH2 + PVFS2 + Help needed urgently.
Michael Gauckler
maillists at gauckler.ch
Wed Jun 1 13:33:39 PDT 2005
Dear Lists,
I am having problems with the performance of MPICH2 and PVFS2.
The program attached below should write 136MB junks of data to a
2.7GB file on a pvfs2 mount.
Unfortunately the performance is so poor that my program never
finishes. PVFS2 performance seems not great but acceptable for
136 MB junks to finish soon (122MB/s, see below).
If someone could run a test on his machine and give me estimation of
the runtime or hints where the problem might be I would be more than
happy! I need to locate the problem: Code, MPICH2, ROMIO, PVFS2.
Sincereley yours,
Michael
___
System configuration
40 Dual Xeon 3.0 GHz, all acting as PVFS2 data servers. GigE Ethernet.
Software RAID on 2 SCSI disks.
Debian Sarge: Linux 2.6.8-2-686-smp #1 SMP Mon Jan 24 02:32:52 EST
2005 i686 GNU/Linux
___
Performance of PVFS2:
mpdrun -np 2 ./mpi-io-test
# Using mpi-io calls.
nr_procs = 2, nr_iter = 1, blk_sz = 16777216
# total_size = 33554432
# Write: min_t = 0.045768, max_t = 0.274489, mean_t = 0.160128, var_t
= 0.026157
# Read: min_t = 0.023897, max_t = 0.038090, mean_t = 0.030993, var_t
= 0.000101
Write bandwidth = 122.243300 Mbytes/sec
Read bandwidth = 880.925184 Mbytes/sec
___
Command line to run programm given below:
mpdrun -1 -np 2 ./mpicube
___
Programm "mpicube.cpp":
#include "mpi.h"
#include <stdio.h>
#include <stdexcept>
#include <stdlib.h>
#include <sstream>
#include <iostream>
char filename[] = "pvfs2:/mnt/pvfs2/mpicube_testfile.dat";
// the following lines might not be needed if not linked with the
boost library
namespace boost
{
void assertion_failed(char const * expr, char const * function,
char const * file, long line)
{
std::ostringstream ss;
ss << "BOOST_ASSERT failed for expr " << expr << ", function "
<< function << " in file " << file << " at line " << line <<
std::endl;
throw std::runtime_error(ss.str());
}
}
int main( int argc, char *argv[] )
{
int rank;
int err;
int worldsize;
MPI_Offset headerOffset = 0;
MPI_File fh;
MPI_Datatype filetype;
MPI_Datatype datatype = MPI_DOUBLE;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
printf("Hello world from process %d of %d with filename %s\n",
rank, worldsize, filename);
int iterations = 10;
int extent0 = 600;
int extent1 = 12;
int extent2 = 10;
int numSamples = 5000;
int numSamplesPerBlock = numSamples / worldsize / iterations;
int numIterConcurrent = 1;
int numFinalConcurrent = 0;
int groupColor = 0;
int current;
int gsizes[4];
int lsizes[4];
int indices[4];
gsizes[0] = extent0;
gsizes[1] = extent1;
gsizes[2] = extent2;
gsizes[3] = numSamples;
lsizes[0] = extent0;
lsizes[1] = extent1;
lsizes[2] = extent2;
lsizes[3] = numSamplesPerBlock;
indices[0] = 0;
indices[1] = 0;
indices[2] = 0;
MPI_Comm groupcomm = MPI_COMM_WORLD;
std::cout << "opening file <" << filename << ">" << std::flush <<
std::endl;
MPI_File_open(groupcomm, filename, MPI_MODE_RDWR |
MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &fh);
std::cout << "opened file" << std::flush << std::endl;
// number of elements of type T to be stored
long long lcubesize = lsizes[0]*lsizes[1]*lsizes[2]*lsizes[3];
long long gcubesize = gsizes[0]*gsizes[1]*gsizes[2]*gsizes[3];
std::cout << "local cube size * 8 = " << (long long)lcubesize /
1024 / 1024 * 8 << " MB " << std::flush << std::endl;
std::cout << "global cube size * 8 = " << (long long)gcubesize /
1024 / 1024 * 8 << " MB " << std::flush << std::endl;
double *cube = new double[extent0 * extent1 * extent2 *
numSamplesPerBlock];
for(int j = 0; j < extent0 * extent1 * extent2 *
numSamplesPerBlock; j++)
cube[j] = 3.1415;
for(int i = 0; i < iterations; i++){
indices[3] = (i + rank*iterations)*numSamplesPerBlock;
std::cout << "iteration = " << i << std::endl;
std::cout << "indices[3] = " << indices[3] << std::endl;
// create a data type to get desired view of file
err = MPI_Type_create_subarray(4, gsizes, lsizes, indices,
MPI_ORDER_C, MPI_DOUBLE, &filetype);
if (err != MPI_SUCCESS)
std::cerr << "could not create subarray" << std::endl;
err = MPI_Type_commit(&filetype);
if (err != MPI_SUCCESS)
std::cerr << "could not commit datatype" << std::endl;
std::cout << "writeSubCube: setting view" << std::endl;
// store the view into file
err = MPI_File_set_view(fh, 0, datatype, filetype, "native",
MPI_INFO_NULL);
if (err != MPI_SUCCESS)
std::cerr << "could not set view" << std::endl;
std::cout << "allocating cube" << std::endl;
std::cout << "starting write all" << std::endl;
err = MPI_File_write_all(fh, &cube[0], lcubesize, datatype,
MPI_STATUS_IGNORE);
if (err != MPI_SUCCESS)
std::cerr << "could not write to file" << std::endl;
std::cout << "done write all" << std::endl;
err = MPI_Type_free(&filetype);
if (err != MPI_SUCCESS)
std::cerr << "could not free datatype" << std::endl;
}
MPI_File_close(&fh);
std::cout << "closed file" << std::flush << std::endl;
MPI_Finalize();
return 0;
}
More information about the Beowulf
mailing list