CDVM2 (1158340), страница 8
Текст из файла (страница 8)
{
DVM(PROCESSORS) void * PA[ ACTIVE_NUM_PROC() ];
/* local array aligned with formal argument */
DVM(ALIGN [I][J] WITH C[I][J] ) float *X;
/* distributed local array */
DVM(DISTRIBUTE Y [][BLOCK] ONTO PA) float *Y;
Note that here dimensions of actual parameters are not known. So the technique described in subsection 4.2.3 should be used.
9. Input/output
In C-DVM, the following I/O statements are allowed for replicated data:
fopen, fclose, feof, fprintf, printf, fscanf, fputc, putc, fputs, puts, fgetc, getc, fgets, gets, fflush, fseek, ftell, ferror, clearerr, perror, as well as fread, fwrite.
These statements are executed on the single processor specially designated for this purpose by runtime system. Output values are taken from this processor, and input data are inputted on this processor and sent to others.
Only unformatted I/O of the whole array (with normal order of elements) is allowed for distributed arrays. In the source program it is coded as fread (fwrite) statement with the array identifier as a pointer to the memory area and with the area size.
The I/O statements cannot be used in a parallel loop and in block TASK_REGION. For example usage of print statements inside a parallel loop from several processors might cause destruction on normal sequence and format of printed information.
References
-
N.A.Konovalov, V.A.Krukov, S.N.Mihailov and A.A.Pogrebtsov, “Fortran-DVM language for portable parallel programs development”, Proceedings of Software for Multiprocessors & Supercomputers: Theory, Practice, Experience (SMS-TPE 94), Inst. for System Programming RAS, Moscow, Sept. 1994.
-
High Performance Fortran Forum. High Performance Fortran Language Specification. Version 2.0, January 31, 1997.
-
Y.Hu, S.L.Johnsson, S.-H.Teng. High Performance Fortran for Highly Irregular Problems. CACM, v.32, N.7, 1997, pp.13-24.
Appendix 1. Examples of C-DVM programs.
The following basic methods are used to illustrate CDVM language features.
Direct methods. The well-known Gaussian Elimination method is the most commonly used algorithm of this class. The main idea of this algorithm is to reduce the matrix A to upper triangular form and then to use backward substitution to diagonalize the matrix.
Explicit iteration methods. Jacobi Relaxation is the most known algorithm of this class. The algorithm performs the following computation iteratively
xi,jnew = (xi-1,jold + xi,j-1old + xi+1,jold + xi,j+1old ) / 4
Implicit iteration methods. Successive Over Relaxation (SOR) belongs to this class. The algorithm performs the following calculation iteratively
xi,jnew = ( w / 4 ) * (xi-1,jnew + xi,j-1new + xi+1,jold + xi,j+1old ) + (1-w) * xi,jold
By using “red-black” coloring of variables each step of SOR consists of two half Jacobi steps. One processes “red” variables and the other processes “black” variables. Coloring of variables allows to overlap calculation and communication.
Example 1. The Gauss elimination algorithm.
This is a program to solve a system of linear equations Ax = B by Gauss elimination method. The coefficient matrix A is represented by an array section A[0:N-1][0:N-1], and the vector B is represented by the section A[0:N-1][N] of the same array.
/* GAUSS program */
#include <stdlib.h>
#include <math.h>
#define DVM(dvmdir)
#define DO(v,l,h,s) for(v=l; v<=h; v+=s)
#define N 100
int main (int argn, char ** args)
{
long i, j, k;
/* declaration of dynamic distributed arrays */
DVM(DISTRIBUTE [BLOCK] []) float (*A)[N+1];
DVM(DISTRIBUTE [BLOCK]) float (*X);
/* creation of arrays */
A = malloc( N*(N+1)*sizeof(float));
X = malloc( N*sizeof(float));
/* initialization of array A*/
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,N-1,1)
DO(j,0,N,1)
if (i==j || j==N) A[i][j] = 1.f;
else A[i][j]=0.f;
/* elimination */
for (i=0; i<=N-1; i++)
{
DVM(PARALLEL [k][j] ON A[k][j]; REMOTE_ACCESS A[i][])
DO (k,i+1,N-1,1)
DO (j,i+1,N,1)
A[k][j] = A[k][j]-A[k][i]*A[i][j]/A[i][i];
}
/* reverse substitution */
DVM(OWN) X[N-1] = A[N-1][N]/A[N-1][N-1];
for (j=N-2; j>=0; j-=1)
{
DVM(PARALLEL [k] ON A[k][]; REMOTE_ACCESS X[j+1])
DO (k,0,j,1)
A[k][N] = A[k][N]-A[k][j+1]*X[j+1];
DVM(OWN) X[j]=A[j][N]/A[j][j];
DVM(REMOTE_ACCESS X[j]) printf("j=%4i X[j]=%3.3E\n",j,X[j]);
}
return 0;
}
Example 2. Jacobi Algorithm
/* JACOBI program */
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define max(a,b) ((a)>(b)?(a): (b))
/* CDVM-macroes */
#define DVM(dvmdir)
#define DO(v,l,h,s) for(v=l; v<=h; v+=s)
#define L 8
#define ITMAX 20
int i,j,it,k;
double eps;
double MAXEPS = 0.5;
FILE *f;
/* two-dimensional arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN [i][j] WITH A[i][j]) double B[L][L];
int main(int argn, char ** args)
{
/* two-dimensional loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
{A[i][j]=0.;
B[i][j]=1.+i+j;
}
/****** iteration loop *************************/
DO(it,1,ITMAX,1)
{
eps= 0.;
/* Parallel loop with base array A */
/* calculating maximum in variable eps */
DVM(PARALLEL [i][j] ON A[i][j]; REDUCTION MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
{eps = max(fabs(B[i][j]-A[i][j]),eps);
A[i][j] = B[i][j];
}
/* Parallel loop with base array B and */
/* with prior updating shadow elements of array A */
DVM(PARALLEL [i][j] ON B[i][j]; SHADOW_RENEW A)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
B[i][j] = (A[i-1][j]+A[i+1][j]+ A[i][j-1]+A[i][j+1])/4.;
printf( "it=%4i eps=%3.3E\n", it,eps);
if (eps < MAXEPS) break;
}/*DO it*/
f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}
Example 3. Jacobi Algorithm (asynchronous version)
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define max(a,b) ((a)>(b)? (a): (b))
#define DVM(dvmdir)
#define DO(v,l,u,s) for(v=l; v<=u; v+=s)
#define L 8
#define ITMAX 20
int i,j,it,k;
double eps;
double MAXEPS = 0.5;
FILE *f;
/* declaration of operation group of shadow element update */
DVM(SHADOW_GROUP) void *grshad;
/* declaration of the group reduction */
DVM(REDUCTION_GROUP) void *emax;
/* two-dimensional arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK])
double A[L][L], B[L][L];
int main(int argn, char **args)
{
/* two-dimensional parallel loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
{A[i][j]=0.;
B[i][j]=1.+i+j;}
/* Specification of members of shadow edges update operation group */
DVM(CREATE_SHADOW_GROUP grshad: A);
/************ iteration loop *************************/
DO(it,1,ITMAX,1)
{
eps= 0.;
/* Specification of members of group reduction */
DVM(CREATE_REDUCTION_GROUP emax : MAX(eps));
/* Parallel loop with base array A: */
/* at first elements of array A exported by neighbor processor */
/* are calculated and sent */
/* and then internal elements of array A are calculated */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START grshad)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
{eps = max(fabs(B[i][j]-A[i][j]),eps);
A[i][j] = B[i][j];
}
/* Start asynchronous calculation of maximum */
DVM(REDUCTION_START emax);
/* Parallel loop with base array B: */
/* internal elements of array B are calculated at first */
/* then completion of array A shadow edges updates is awaited */
/*and the loop iterations, that require shadow elements of array A */
/* are calculated */
DVM(PARALLEL [i][j] ON B[i][j]; SHADOW_WAIT grshad)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
B[i][j] = (A[i-1][j]+A[i+1][j]+ A[i][j-1]+A[i][j+1])/4;
/* Awaiting completion of reduction */
DVM(REDUCTION_WAIT emax);
printf( "it=%4i eps=%3.3E\n", it,eps);
if (eps < MAXEPS) break;
}/*DO it*/
f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}
Example 4. Irregular Calculations Based on Jacobi Algorithm
/* IRREG program */
/* Irregular calculations model on a rectangular grid
numbering graph nodes for K = 4 :
| 1 | — | 2 | — | 3 | — | 4 | |||
| | | | | ||||||
| 5 | — | 6 | — | 7 | — | 8 | |||
| | | | | ||||||
| 9 | — | 10 | — | 11 | — | 12 | |||
| | | | | ||||||
| 13 | — | 14 | — | 15 | — | 16 |
**********************************************************/
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define DVM(dvmdir)
#define DO(v,l,u,s) for(v=l; v<=u; v+=s)
#define NUMBER_OF_PROCESSORS() 1
#define K 100
#define ITMAX 20
DVM(PROCESSORS) void * P[NUMBER_OF_PROCESSORS()];
DVM(DISTRIBUTE [BLOCK] ONTO P) float A[K*K];
DVM(ALIGN [i] WITH A[i]) float B[K*K];
float EPS, MAXEPS;
DVM(ALIGN [][i] WITH A[i]) int MATEDG[5][K*K];
DVM(INDIRECT_GROUP) void * GRM;
int main(int argn, char **args)
{
printf(“********** TEST_IRREG **********\n”);
MAXEPS = 0.5E – 7;
/* initialisation of array B and graph edge table */
DVM(PARALLEL [L] ON B[L])
FOR(L, K*K)
{
I = L/K;
J = L%K;
if(I==0 || J==0 || I==K-1 || J==K-1) {















