Comentarios: Esta version del codigo es
best effort en el sentido de que he usado todas las posibilidades de C/C++ para obtener un codigo lo mas rapido posible. Por un lado esto hace que la comparacion con otros programas pueda ser un poco distorsionada ya que incluso en modo secuencial hay una gran ganancia. En principio el hacer el codigo muy rapido hace mas dificil obtener altas eficiencias en la paralelizacion, ya que el tiempo de comunicacion/sincronizacion pesa mas con respecto al de calculo. De todas formas he obtenido eficiencias del orden del 98% usando balance de carga en tableros de 4000x4000 celdas. (ver
ProcessorPerformance)
// Copyright (C) 1999-2002, Mario Alberto Storti, Centro
// Internacional de Metodos Numericos en Ingenieria
// (CIMEC-Argentina), Universidad Nacional del Litoral
// (UNL-Argentina), Consejo Nacional de Investigaciones Cientificas y
// Tecnicas (UNL-Argentina).
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307,
// USA.
// $Id: GameOfLife.txt,v 1.4 2014/09/05 23:06:01 MarioStorti Exp $
#define _GNU_SOURCE
#include <mpi.h>
#include <cstdio>
#include <cassert>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <cstring>
#include <vector>
#include <cmath>
#include <time.h>
#include <sys/time.h>
// Simulates the `Game of Life' in parallel using MPI.
/// This represents the board.
class Board {
private:
// N is the size of the board, so that it has N x N cells NN = N+2
// is the size with two rows added at each end for simulating the
// walls of the board. We use a 0-based (C-style) indices i,j for
// rows and columns. Each processor owns a range of consecutive
// rowns with indices i1<= i <i2. Currently the number of rows in
// each processor is almost the same (no load balance). For
// instance, if N=20 and size=2, then i1=0, i2=10 in processor 0 and
// i1=10, i2=20 in processor 1. So that each processor owns i2-i1
// rows. We store a board that has one additional row at each end
// (so called `ghost' rows), so that in fact at each processor we
// store i2-i1+2 rows. We define I1=i1-1 and I2=i2+1 so that the
// rows stored at this processor are I1 <= i < I2. Ghost rows I1 and
// I2-1 are computed at processors `myrank-1' and `myrank+1'. So
// that they have to be transmitted at each time step with MPI. The
// flag `periodic' indicates whether we use periodic boundary
// conditions or not (not used currently though!!).
int N,NN,i1,i2,I1,I2,size,myrank,periodic;
// Weights of processors
vector<double> w;
// Number of rows in each processor
vector<int> rows_proc;
// This arraus store the board. `board2' is a copy of the board for
// when updating the board.
char **board,*row1,*row2;
// returns the position in the board of cell j,k
inline char *pos(int j, int k) { return &board[(k-I1)][j+1]; }
void print_row(int k);
public:
// Read weights file from "weights.dat"
void read_weights_file(int flag);
// Initializes the board.
void init(int N_,int periodic=0);
// Sets the the state of a cell.
void setv(int j,int k,int val) {
if (k>=i1 && k<i2) *pos(j,k) = val;
}
// Prints the board
void print();
// Advances the board one generation.
void advance();
Board() { board = NULL; }
~Board();
};
void Board::read_weights_file(int flag) {
// Read processors speed from file
double r;
char *line=NULL;
size_t n=0;
w.clear();
if (flag) {
// if (myrank==0) printf("Reading weights file\n");
FILE *fid = fopen("weights.dat","r");
assert(fid);
int p=0;
while (1) {
int nread = getline(&line,&n,fid);
if (nread == EOF) break;
sscanf(line,"%lf",&r);
// if (myrank==0) printf("[%d] weight %f",++p,r);
w.push_back(r);
}
fclose(fid);
if (line) free(line);
}
}
void Board::print() {
// This is done in parallel and may not work
// correctly sometimes.
// Loop over processors. This is synchronized by the barrier below.
for (int r=0; r<size; r++ ) {
if (r==myrank) {
for (int k=i1; k<i2; k++) {
char *p = pos(0,k), *pe=p+N;
while (p<pe) {
putc((*p++ ? '*' : '.'),stdout);
putc(' ',stdout);
}
putc('\n',stdout);
}
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
if (myrank==0) fflush(stdout);
}
if (myrank==0) printf("\n");
}
void Board::print_row(int k) {
char *p = pos(0,k), *pe = p+N;
printf("row %3d -> ",k);
while (p < pe) {
putc((*p++ ? '*' : '.'),stdout);
putc(' ',stdout);
}
putc('\n',stdout);
}
// Computes next generation
void Board::advance() {
#define SEND(row,proc) \
MPI_Send(pos(0,(row)),N,MPI_CHAR,(proc),0,MPI_COMM_WORLD)
#define RECEIVE(row,proc) \
MPI_Recv(pos(0,(row)),N,MPI_CHAR,(proc),MPI_ANY_TAG,MPI_COMM_WORLD,&stat)
// First stage 2j -> 2j+1
MPI_Status stat;
if (myrank % 2 == 0 ) {
if (myrank<size-1) {
SEND(i2-1,myrank+1);
RECEIVE(I2-1,myrank+1);
}
if (myrank>0) {
SEND(i1,myrank-1);
RECEIVE(I1,myrank-1);
}
} else {
RECEIVE(I1,myrank-1);
SEND(i1,myrank-1);
if (myrank<size-1) {
RECEIVE(I2-1,myrank+1);
SEND(i2-1,myrank+1);
}
}
// Initialize row2
{ char *q = row2, *qe = q+NN;
while (q<qe) *q++ = 0;
}
// Loop over rows in this processor (not included the `ghost' ones
// (I1 and I2-1)
int k; char *tmp;
register char SW,S,SE, W,O,E, NW,NN,NE;
for (k=i1; k<i2; k++) {
char *p = pos(0,k),
*pe = p+N,
*pNE = pos(1,k+1),
*pSE = pos(1,k-1),
*pp = row1+1;
SW = *(pSE-2);
S = *(pSE-1);
SE = *(pSE );
W = *(p-1);
O = *(p );
E = *(p+1);
NW = *(pNE-2);
NN = *(pNE-1);
NE = *(pNE );
// Make it periodic
if (periodic) {
*(p-1) = *(pe-1);
*pe = *p;
assert(size==1); // not implemented yet
}
// Cells computed in this processor in this row have
// p <= address < pe
int alive;
while (p < pe) {
// Neighbors at positions one row below
alive = SW + S + SE + W + E + NW + NN + NE;
if (alive==0) { // We treat this case before, perhaps we gain
*pp = 0; // some acceleration. Cell becomes dead
} else if (alive==3) {
*pp = 1; // Cell becomes alive
} else if (alive!=2) {
*pp = 0; // Cell becomes dead
} else *pp = *p; // cell remains in its state
p++; pSE++; pNE++; pp++; // update positions in board to East
SW = S; S = SE ; SE = *pSE;
NW = NN; NN = NE ; NE = *pNE;
W = O; O = E; E = *(p+1);
}
tmp = board[k-1-I1];
board[k-1-I1] = row2;
row2 = row1;
row1 = tmp;
}
tmp = board[k-1-I1];
board[k-1-I1] = row2;
row2 = row1;
row1 = tmp;
#if 0
// Swap boards
char *temp = board;
board = board2;
board2 = temp;
#endif
}
// Initialize board
void Board::init(int N_,int per=0) {
// size = number of processors
MPI_Comm_size(MPI_COMM_WORLD,&size);
// my rank in the row of processors
MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
// flags whther periodic b.c.'s are used or not
periodic = per;
// size of board
N = N_;
if (w.size()>0 && w.size()<size ) {
if (myrank==0) printf("number of weights in \"weights\" "
"file doesn't match number of procs.\n");
MPI_Finalize();
exit(1);
}
if (w.size()==0) w.resize(size,1.);
rows_proc.resize(size);
double sum_w = 0., carry=0., a;
for (int p=0; p<size; p++) sum_w += w[p];
int j = 0;
double tol = 1e-8;
for (int p=0; p<size; p++) {
w[p] /= sum_w;
a = w[p]*N + carry + tol;
rows_proc[p] = int(floor(a));
carry = a - rows_proc[p] - tol;
if (p==myrank) {
i1 = j;
i2 = i1 + rows_proc[p];
}
j += rows_proc[p];
}
assert(j==N);
assert(fabs(carry) < tol);
I1=i1-1;
I2=i2+1;
// Alloc memory
NN = N+2; // number of stored rows (include ghosts)
board = new (char *)[I2-I1]; // allocate memory for boards
for (int k=0; k<I2-I1; k++) board[k] = new char[NN];
row1 = new char[NN];
row2 = new char[NN];
// Initialize board
for (int k=I1; k<I2; k++) {
char *p = pos(-1,k), *pe = p+N+2;
while (p < pe) *p++ = 0;
}
}
// Destructor
Board::~Board() {
for (int k=0; k<I2-I1; k++)
delete[] board[k];
delete[] board;
board=NULL;
delete[] row1;
row1 = NULL;
delete[] row2;
row2 = NULL;
}
double etime(timeval &x,timeval &y) {
return double(y.tv_sec)-double(x.tv_sec)
+(double(y.tv_usec)-double(x.tv_usec))/1e6;
}
// Main program
int main(int argc,char **args) {
int size,myrank;
char *pattern="cross";;
// Initializes MPI
MPI_Init(&argc,&args);
// size = number of processors
MPI_Comm_size(MPI_COMM_WORLD,&size);
// my rank in the row of processors
MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
int N=20,nstep=30,opt_q=0,opt_v=0,opt_w=0;
char c;
while ((c = getopt(argc, args, "N:s:hqp:vw")) != -1) {
switch (c) {
case 'h':
if (myrank==0) {
printf(" usage: $ mpirun [OPTIONS] life\n\n"
"MPI options: -np <number-of-processors>\n"
" -machinefile <machine-file>\n\n"
"Other options: -N <number of columns/rows>\n"
" -s <numer of time steps>\n"
" -q # quiet (no output)\n"
" -v # verbose (print board at each step)\n"
" -p <pattern> # may be ('cross',line10)\n"
" -w # read weights from \"weigths.dat\"\n"
" -h # print this help\n"
);
}
MPI_Finalize();
case 'N':
sscanf(optarg,"%d",&N);
break;
case 'w':
opt_w = 1;
break;
case 's':
sscanf(optarg,"%d",&nstep);
break;
case 'q':
opt_q = 1;
break;
case 'v':
opt_v = 1;
break;
case 'p':
pattern = new char[strlen(optarg+1)];
strcpy(pattern,optarg);
break;
case '?':
if (isprint (optopt))
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
else
fprintf (stderr,
"Unknown option character `\\x%x'.\n",
optopt);
return 1;
default:
if (isprint (optopt))
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
else
fprintf (stderr,
"Unknown option character `\\x%x'.\n",
optopt);
abort ();
}
}
/// time related quantities
struct timeval start, end;
// Defines and initializes the board
Board board;
board.read_weights_file(opt_w);
board.init(N);
if (!strcmp(pattern,"cross")) {
for (int k=0; k<N; k++) {
board.setv(N/2,k,1);
board.setv(k,N/2,1);
}
} else if (!strcmp(pattern,"line10")) {
assert(N>20);
for (int k=-5; k<5; k++) board.setv(N/2,N/2+k,1);
} else if (!strcmp(pattern,"block")) {
assert(N>3);
board.setv(N/2 ,N/2 ,1);
board.setv(N/2 ,N/2-1,1);
board.setv(N/2-1,N/2 ,1);
board.setv(N/2-1,N/2-1,1);
} else {
if (myrank==0)
printf("Not known pattern \"%s\"\n",pattern);
MPI_Finalize();
exit(0);
}
if (!opt_q) board.print();
gettimeofday (&start,NULL);
for (int k=0; k<nstep; k++) {
board.advance();
if (opt_v) {
if (myrank==0) printf("-- Generation %d:\n",k+1);
board.print();
}
}
if (myrank==0) {
gettimeofday (&end,NULL);
double elapsed = etime(start,end);
double rate = double(N)*double(N)*double(nstep)/elapsed/1e6;
printf("N %d, steps %d, elapsed %f, rate: %f Mcell/sec\n",
N,nstep,elapsed,rate);
}
gettimeofday (&end,NULL);
if (!opt_q && !opt_v) {
if (myrank==0) printf("-- Final board:\n");
board.print();
}
MPI_Finalize();
}
--
MarioStorti - 03 May 2002