cdvmLDe (1158401), страница 8
Текст из файла (страница 8)
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define DVM(dvmdir)
#define DO(v,l,h,s) for(v=l; v<=h; v+=s)
#define N 10
int main (int argn, char **args)
{
long i, j, k;
/* declaration of dynamic distributed arrays */
DVM(DISTRIBUTE [BLOCK] []) float (*A)[N+1];
DVM(DISTRIBUTE [BLOCK]) float (*X);
/* creation of arrays */
A = malloc( N*(N+1)*sizeof(float));
X = malloc( N*sizeof(float));
/* initialize array A*/
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,N-1,1)
DO(j,0,N,1)
if (i==j || j==N) A[i][j] = 1.f;
else A[i][j]=0.f;
/* elimination */
for (i=0; i<N-1; i++)
{
DVM(PARALLEL [k][j] ON A[k][j]; REMOTE_ACCESS A[i][])
DO (k,i+1,N-1,1)
DO (j,i+1,N,1)
A[k][j] = A[k][j]-A[k][i]*A[i][j]/A[i][i];
}
/* reverse substitution */
X[N-1] = A[N-1][N]/A[N-1][N-1];
for (j=N-2; j>=0; j-=1)
{
DVM(PARALLEL [k] ON A[k][]; REMOTE_ACCESS X[j+1])
DO (k,0,j,1)
A[k][N] = A[k][N]-A[k][j+1]*X[j+1];
X[j]=A[j][N]/A[j][j];
DVM(REMOTE_ACCESS X[j])
printf(“j=%4i X[j]=%3.3E\n”,j,X[j]);
}
return 0;
}
Example 2. Jacobi algorithm
/* JACOBI program */
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define Max(a,b) ((a)>(b)?(a): (b))
#define DVM(dvmdir)
#define DO(v,l,h,s) for(v=l; v<=h; v+=s)
#define L 8
#define ITMAX 20
int i,j,it,k;
double eps;
double MAXEPS = 0.5;
FILE *f;
/* 2D arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN[i][j] WITH A[i][j]) double B[L][L];
int main(int argn, char **args)
{
/* 2D loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
{A[i][j]=0.;
B[i][j]=1.+i+j;
}
/****** iteration loop *************************/
DO(it,1,ITMAX,1)
{
eps= 0.;
/* Parallel loop with base array A */
/* calculating maximum in variable eps */
DVM(PARALLEL [i][j] ON A[i][j]; REDUCTION MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
{eps = Max(fabs(B[i][j]-A[i][j]),eps);
A[i][j] = B[i][j];
}
/* Parallel loop with base array B and */
/* with prior updating shadow elements of array A */
DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_RENEW A)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4.;
printf(“it=%4i eps=%3.3E\n”, it,eps);
if (eps < MAXEPS) break;
}/*DO it*/
f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}
Example 3. Jacobi Algorithm (asynchronous version)
/* Asynchronous JACOBI */
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define Max(a,b) ((a)>(b)? (a): (b))
#define DVM(dvmdir)
#define DO(v,l,u,s) for(v=l; v<=u; v+=s)
#define L 8
#define ITMAX 20
int i,j,it,k;
double eps;
double MAXEPS = 0.5;
FILE *f;
/* declare groups for shadow and reduction operations */
DVM(SHADOW_GROUP) void *grshad;
DVM(REDUCTION_GROUP) void *emax;
/* 2D arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN [i][j] WITH A[i][j]) double B[L][L];
int main(int argn, char **args)
{
/* 2D parallel loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
{A[i][j]=0.;
B[i][j]=1.+i+j;}
/* Create group for shadow operation */
DVM(CREATE_SHADOW_GROUP grshad: A);
/************ iteration loop *************************/
DO(it,1,ITMAX,1)
{
eps= 0.;
/* Parallel loop with base array A: */
/* at first elements of array A exported by the */
/* neighbor processor are calculated and sent and */
/* then internal elements of array A are calculated */
DVM(PARALLEL [i][j] ON A[i][j];
SHADOW_START grshad; REDUCTION emax : MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
{eps = max(fabs(B[i][j]-A[i][j]),eps);
A[i][j] = B[i][j];
}
/* Start asynchronous calculation of maximum */
DVM(REDUCTION_START emax);
/* Parallel loop with base array B: */
/* internal elements of array B are calculated at first */
/* then completion of array A shadow edges update is */
/* awaited and the loop iterations, requiring shadow */
/* elements of array A are calculated */
DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_WAIT grshad)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4;
/* Wait for completion of reduction */
DVM(REDUCTION_WAIT emax);
printf( "it=%4i eps=%3.3E\n”, it,eps);
if (eps < MAXEPS) break;
}/*DO it*/
f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}
Example 4. Red-Black Successive Over-Relaxation
The idea of the method is to paint the points of the grid (the variables) in two colors - red and black - as on a chess-board. Non-rectangular index space is defined by the special form of a loop header: DO (var, a+b%2, ub, 2)
/* RED-BLACK program */
#include <stdlib.h>
#include <math.h>
#define DVM(dvmdir)
#define DO(v,l,h,s) for(v=l; v<=h; v+=s)
#define Max(a,b) ((a)>(b)?(a):(b))
#define ITMAX 10
int main(int an, char ** as)
{long i, j, it, irb;
float eps=0.;
float MAXEPS = 0.5E-5;
float w = 0.5;
/* Groups of asynchronous operations */
DVM(SHADOW_GROUP) void *sha;
DVM(REDUCTION_GROUP) void *rg;
int N;
/* redefine N as constant */
#define N 8
DVM(DISTRIBUTE [BLOCK] [BLOCK]) float (*A)[N];
#undef N /* restore N */
/* "Calculate" of array size (=8!). Create array */
N = 8;
A = malloc( N*N*sizeof(float));
/* Specification of members of shadow group */
DVM(CREATE_SHADOW_GROUP sha : A);
/* Initialization: parallel loop with surpassing */
/* calculation and sending of exported data */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha)
DO (i,0,N-1,1)
DO (j,0,N-1,1)
if(i==0 || i==N-1 || j==0 || j==N-1) A[i][j]=0.;
else A[i][j] = 1.+i+j;
DVM(SHADOW_WAIT sha);
/* iteration loop */
DO (it,0,ITMAX,1)
{
eps = 0.;
/* Parallel loop with reduction on RED points */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha;
REDUCTION rg: MAX(eps) )
DO (i,1,N-2,1)
DO (j,1+i%2,N-2,2)
{float s;
s = A[i][j];
A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+
A[i][j-1]+A[i][j+1])+(1-w)*A[i][j];
eps = Max (fabs(s-A[i][j]), eps);
}
/* Start red reduction -- as early as possible */
/* Then wait shadows */
DVM(REDUCTION_START rg);
DVM(SHADOW_WAIT sha);
/* Parallel loop on BLACK points (without reduction) */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha)
DO (i,1,N-2,1)
DO (j,1+(i+1)%2,N-2,2)
{A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+
A[i][j-1]+A[i][j+1])+(1-w)*A[i][j];
}
/* Wait shadows, then */
/* wait red point's reduction -- as late as possible */
DVM(SHADOW_WAIT sha);
DVM(REDUCTION_WAIT rg);
printf("it=%d eps=%e\n",it,eps);
if (eps < MAXEPS) break;
} /* DO it */
free(A);
return 0;
}
Example 5. Multigrid method program
/* MULTIGRID program */
#include <stdio.h>
#include <stdlib.h>
#define DVM(dvm)
#define DO(v,l,h,s) for(v=(l); v<=(h); v+=(s))
void oper(
DVM(*DISTRIBUTE [*][*][*]) float *AA,
int AAN, int AAM, int AAK,
DVM(*DISTRIBUTE [*][*][*]) float *BB,
int BBN, int BBM, int BBK)
/*Parameters: two distributed 3D arrays and */
/* their actual dimensions */
{
#define AA(i,j,k) AA[((i)*AAM+(j))*AAK+(k)]
#define BB(i,j,k) AA[((i)*BBM+(j))*BBK+(k)]
int i, j,k;
/* Alignment of array BB with array AA using stretching*/
DVM(REALIGN BB[i][j][k] WITH AA[2*i][2*j][2*k] NEW);
/* forming array BB from elements of array AA with even indexes */
DVM(PARALLEL [i][j][k] ON BB[i][j][k])
FOR(i,BBN)
FOR(j,BBM)
FOR(k,BBK)
BB(i,j,k)=AA(i*2,j*2,k*2);
#undef AA
#undef BB
}
int main(int argn, char **args)
{
int N1=8,M1=12,K1=16;
int N2,M2,K2;
int Narr=5,Nit=5;
int grid=0;
int ACM,ACK;
int i,j,k;
int step_grid=1;
/* Up to 20 distributed arrays */
DVM(DISTRIBUTE[BLOCK][BLOCK][]) float *A[20];
/* Pointer to current distributed array */
DVM(*DISTRIBUTE[*][*][*]) float *AC;
/* creation of array A[0] */
A[0]=malloc(N1*M1*K1*sizeof(float));
AC=A[0];
ACM=M1;
ACK=K1;
#define AC(i,j,k) AC[((i)*ACM+(j))*ACK+(k)]
/* initialization of source array */
DVM(PARALLEL [i][j][k] ON AC[i][j][k])
DO(i,0,N1-1,1)
DO(j,0,M1-1,1)
DO(k,0,K1-1,1)
AC(i,j,k)=1.+i+j+k ;
#undef AC
do{
printf("N1=%d M1=%d K1=%d \n”,N1,M1,K1);
N2=N1/2;
M2=M1/2;
K2=K1/2;
grid++;
/* creation of array A[grid] */
A[grid]=malloc(N2*M2*K2*sizeof(float));
oper(A[grid-1],N1,M1,K1,A[grid],N2,M2,K2);
N1=N2;
M1=M2;
K1=K2;
} while (N2>2 && grid<Narr)
for(i=0;i<=grid;i++)
free(A[i]);
return 0;
}
Example 6. Task Parallelism for Multiblock Code
/* PROGRAM TASKS */
/* rectangular grid is distributed on two blocks */
/* */
/* <------- A2,B2 -----> */
/* 0 1 2 ... N2 */
/* <--- A1,B1 --------> */
/* 0 1 ... N1-1 N1 N1+1 ... N1+N2-1 */
/*------------------------------------------------*/
/* 0 | . . . . . ... . */
/* ... | */
/* K-1 | . . . . . ... . */
/**************************************************/
#include <stdlib.h>
#include <math.h>
#define DVM(dvmdir)
#define DO(v,l,h,s) for(v=l; v<=h; v+=s)
#define FOR(v,n) for(v=0; v<n; v++)
#define Max(a,b) ((a)>(b)?(a):(b))
#define NUMBER_OF_PROCESSORS() 1
#define K 8
#define N1 4
#define N2 K-N1
#define ITMAX 10
DVM(PROCESSORS) void * P[NUMBER_OF_PROCESSORS()];
DVM(TASK) void * MB[2];
double eps0;
DVM(DISTRIBUTE) float (*A1)[K], (*A2)[K];
DVM(ALIGN) float (*B1)[K], (*B2)[K];
int main(int argn, char** args)
{
int i,j, it;
int NP;
printf("---------- starting --------------\n");
DVM(DEBUG 1 -d0)
{
NP = NUMBER_OF_PROCESSORS() / 2;
}
DVM(MAP MB[0] ONTO P[0:(NP?NP-1:0)]);
DVM(MAP MB[1] ONTO P[NP:(NP?2*NP-1:NP)]);
A1=malloc((N1+1)*K*sizeof(float));
DVM(REDISTRIBUTE A1[][BLOCK] ONTO MB[0]);
B1=malloc((N1+1)*K*sizeof(float));
DVM(REALIGN B1[i][j] WITH A1[i][j]);
A2=malloc((N2+1)*K*sizeof(float));
DVM(REDISTRIBUTE A2[][BLOCK] ONTO MB[1]);
B2=malloc((N2+1)*K*sizeof(float));
DVM(REALIGN B2[i][j] WITH A2[i][j]);
/* Initialization */
DVM(PARALLEL [i][j] ON A1[i][j])
FOR(i,N1+1)
FOR(j,K)
{if (i==0 || j==0 || j==K-1) {
A1[i][j] = 0.f;
B1[i][j] = 0.f;
} else {
B1[i][j] = 1.f + i + j ;
A1[i][j] = B1[i][j];
}}
DVM(PARALLEL [i][j] ON A2[i][j])
FOR(i,N2+1)
FOR(j,K)
{if(i == N2 || j==0 || j==K-1) {















