fdvmLDe (1158420), страница 12
Текст из файла (страница 12)
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
Example 3. Jacobi algorithm (asynchronous version)
PROGRAM JACOB1
PARAMETER (K=8, ITMAX=20)
REAL A(K,K), B(K,K), EPS, MAXEPS
CDVM$ DISTRIBUTE A ( BLOCK, BLOCK)
CDVM$ ALIGN B( I, J ) WITH A( I, J )
C arrays A and B with block distribution
CDVM$ REDUCTION_GROUP REPS
PRINT *, '********** TEST_JACOBI_ASYNCHR **********'
CDVM SHADOW_GROUP SA ( A )
C creation of shadow edge group
MAXEPS = 0.5E - 7
CDVM$ PARALLEL (J,I) ON A(I, J)
C nest of two parallel loops, iteration (i,j) will be executed on
C processor, which is owner of element A(i,j)
DO 1 J = 1, K
DO 1 I = 1, K
A(I, J) = 0.
IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
B(I, J) = 0.
ELSE
B(I, J) = 1. + I + J
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
C group of reduction operations is created
C and initial values of reduction variables are stored
CDVM$ PARALLEL (J, I) ON A(I, J), SHADOW_START SA,
CDVM$*REDUCTION_GROUP ( REPS : MAX( EPS ))
C the loops iteration order is changed:
C at first boundary elements of A are calculated and sent,
C then internal elements of array A are calculated
DO 21 J = 2, K-1
DO 21 I = 2, K-1
EPS = MAX ( EPS, ABS( B( I, J) - A( I, J)))
A(I, J) = B(I, J)
21 CONTINUE
CDVM$ REDUCTION_START REPS
C start of reduction operation to accumulate the partial results
C calculated in copies of variable EPS on every processor
CDVM$ PARALLEL (J, I) ON B(I, J), SHADOW_WAIT SA
C the loops iteration order is changed:
C at first internal elements of B are calculated, then shadow edge elements
C of array A from neighboring processors are received,
C then boundary elements of array B are calculated
DO 22 J = 2, K-1
DO 22 I = 2, K-1
B(I, J) = (A( I-1, J ) + A( I, J-1 ) + A( I+1, J) + A( I, J+1 )) / 4
22 CONTINUE
CDVM$ REDUCTION_WAIT REPS
C waiting completion of reduction operation
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF ( EPS . LT . MAXEPS ) GO TO 3
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
Example 4. Successive over-relaxation
PROGRAM SOR
PARAMETER ( N = 100 )
REAL A( N, N ), EPS, MAXEPS, W
INTEGER ITMAX
CDVM$ DISTRIBUTE A ( BLOCK, BLOCK )
ITMAX=20
MAXEPS = 0.5E - 5
W = 0.5
*DVM$ PARALLEL ( I, J ) ON A( I, J )
DO 1 I = 1, N
DO 1 J = 1, N
IF ( I .EQ.J) THEN
A( I, J ) = N + 2
ELSE
A( I, J ) = -1.0
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
*DVM$ PARALLEL ( I, J) ON A( I, J), NEW (S),
*DVM$* REDUCTION ( MAX( EPS )), ACROSS (A(1:1,1:1))
C S variable – private variable
С (its usage is localized in one iteration range)
C EPS variable is used for maximum calculation
DO 21 I = 2, N-1
DO 21 J = 2, N-1
S = A( I, J )
A( I, J ) = (W / 4) * (A( I-1, J ) + A( I+1, J ) + A( I, J-1 ) +
* A( I, J+1 )) + ( 1-W ) * A( I, J)
EPS = MAX ( EPS, ABS( S - A( I, J )))
21 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF (EPS .LT. MAXEPS ) GO TO 4
2 CONTINUE
4 PRINT *, A
END
Example 5. Red-black successive over-relaxation
PROGRAM REDBLACK
PARAMETER ( N = 100 )
REAL A( N, N ), EPS, MAXEPS, W
INTEGER ITMAX
*DVM$ DISTRIBUTE A ( BLOCK, BLOCK )
ITMAX=20
MAXEPS = 0.5E - 5
W = 0.5
*DVM$ PARALLEL ( I, J ) ON A( I, J )
DO 1 I = 1, N
DO 1 J = 1, N
IF ( I .EQ.J) THEN
A( I, J ) = N + 2
ELSE
A( I, J ) = -1.0
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
C loop for red and black variables
DO 3 IRB = 1,2
*DVM$ PARALLEL ( I, J) ON A( I, J), NEW (S),
*DVM$* REDUCTION ( MAX( EPS )), SHADOW_RENEW (A)
C variable S - private variable in loop iterations
C variable EPS is used for calculation of maximum value
C Exception : iteration space is not rectangular
DO 21 I = 2, N-1
DO 21 J = 2 + MOD ( I+ IRB, 2 ), N-1, 2
S = A( I, J )
A( I, J ) = (W / 4) * (A( I-1, J ) + A( I+1, J ) + A( I, J-1 ) +
* A( I, J+1 )) + ( 1-W ) * A( I, J)
EPS = MAX ( EPS, ABS( S - A( I, J )))
21 CONTINUE
3 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF (EPS .LT. MAXEPS ) GO TO 4
2 CONTINUE
4 PRINT *, A
END
Example 6. Static tasks (parallel sections)
PROGRAM TASKS
rectangular grid is subdivided on two areas
C
| K | ||
| C | N1 | A1, B1 |
| C | N2 | A2, B2 |
C
PARAMETER (K=100, N1 = 50, ITMAX=10, N2 = K – N1 )
CDVM$ PROCESSORS P(NUMBER_OF_PROCESSORS( ))
REAL A1(N1+1,K), A2(N2+1,K), B1(N1+1,K), B2(N2+1,K)
INTEGER LP(2), HP(2)
CDVM$ TASK MB( 2 )
CDVM$ ALIGN B1( I, J ) WITH A1( I, J )
CDVM$ ALIGN B2( I, J ) WITH A2( I, J )
CDVM$ DISTRIBUTE :: A1, A2
CDVM$ REMOTE_GROUP BOUND
CALL DPT(LP, HP, 2)
C Task (area) distribution over processors
C Array distribution over tasks
CDVM$ MAP MB( 1 ) ONTO P( LP(1) : HP(1) )
CDVM$ REDISTRIBUTE A1( *, BLOCK ) ONTO MB( 1 )
CDVM$ MAP MB( 2 ) ONTO P( LP(2) : HP(2) )
CDVM$ REDISTRIBUTE A2( *, BLOCK ) ONTO MB( 2 )
C Initialization
CDVM$ PARALLEL ( J, I ) ON A1(I, J)
DO 10 J = 1, K
DO 10 I = 1, N1
IF(I.EQ.1 .OR. J.EQ.1 .OR. J.EQ.K) THEN
A1(I, J) = 0.
B1(I, J) = 0.
ELSE
B1(I, J) = 1. + I + J
A1(I, J) = B1(I, J)
ENDIF
10 CONTINUE
CDVM$ PARALLEL ( J, I ) ON A2(I, J)
DO 20 J = 1, K
DO 20 I = 2, N2+1
IF(I.EQ.N2+1 .OR. J.EQ.1 .OR. J.EQ.K) THEN
A2(I, J) = 0.
B2(I, J) = 0.
ELSE
B2(I, J) = 1. + ( I + N1 – 1 ) + J
A2(I, J) = B2(I, J)
ENDIF
20 CONTINUE
DO 2 IT = 1, ITMAX
CDVM$ PREFETCH BOUND
C exchange of edges
CDVM$ PARALLEL ( J ) ON A1(N1+1, J),
CDVM$*REMOTE_ACCESS (BOUND : B2( 2, J ) )
DO 30 J = 1, K
30 A1(N1+1, J) = B2(2, J)
CDVM$ PARALLEL ( J ) ON A2( 1, J),
CDVM$*REMOTE_ACCESS (BOUND : B1( N1, J ) )
DO 40 J = 1, K
40 A2(1, J) = B1(N1, J)
CDVM$ TASK_REGION MB
CDVM$ ON MB( 1 )
CDVM$ PARALLEL ( J, I ) ON B1(I, J),
CDVM$*SHADOW_RENEW ( A1 )
DO 50 J = 2, K-1
DO 50 I = 2, N1
50 B1(I, J) = (A1( I-1, J ) + A1( I, J-1 ) + A1( I+1, J) + A1( I, J+1 )) / 4
CDVM$ PARALLEL ( J, I ) ON A1(I, J)
DO 60 J = 2, K-1
DO 60 I = 2, N1
60 A1(I, J) = B1( I, J )
CDVM$ END ON
CDVM$ ON MB( 2 )
CDVM$ PARALLEL ( J, I ) ON B2(I, J),
CDVM$*SHADOW_RENEW ( A2 )
DO 70 J = 2, K-1
DO 70 I = 2, N2
70 B2(I, J) = (A2( I-1, J ) + A2( I, J-1 ) + A2( I+1, J) + A2( I, J+1 )) / 4
CDVM$ PARALLEL ( J, I ) ON A2(I, J)
DO 80 J = 2, K-1
DO 80 I = 2, N2
80 A2(I, J) = B2( I, J )
CDVM$ END ON
CDVM$ END TASK_REGION
2 CONTINUE
PRINT *, 'A1 '
PRINT *, A1
PRINT *, 'A2 '
PRINT *, A2
END
SUBROUTINE DPT( LP, HP, NT )
C processor distribution for NT tasks (NT = 2)
INTEGER LP(2), HP(2)
NUMBER_OF_PROCESSORS( ) = 1
NP = NUMBER_OF_PROCESSORS( )
NTP = NP/NT
IF(NP.EQ.1) THEN
LP(1) = 1
HP(1) = 1
LP(2) = 1
HP(2) = 1
ELSE
LP(1) = 1
HP(1) = NTP
LP(2) = NTP+1
HP(2) = NP
END IF
END
Example 7. Dynamic tasks (task loop)
PROGRAM MULTIBLOCK
С Model of multi-area task.
C Area amount, size of each area, external and internal edges
C are defined during program execution.
C Test of following DVM-constructions: dynamic arrays,
C dynamic tasks, asynchronous REMOTE_ACCESS for dynamic
C arrays (formal parameters)
*DVM$ PROCESSORS MBC100( NUMBER_OF_PROCESSORS( ) )
PARAMETER (M = 8, N =8, NTST = 1)
C MXSIZE – dynamic memory size
C MXBL – maximal number of areas
PARAMETER ( MXS=10000 )
PARAMETER ( MXBL=2 )
C HEAP – dynamic memory
REAL HEAP(MXS)
C PA,PB – arrays of pointers for dynamic arrays
C PA(I),PB(I) – function value on previous and current step in I-th area
*DVM$ REAL, POINTER ( :, : ) :: PA, PB, P1, P2
*DVM$ DYNAMIC PA, PB, P1, P2
INTEGER PA(MXBL), PB(MXBL), P1, P2
C SIZE( 1:2, I) – sizes of dimensions of I-th area
INTEGER SIZE( 2, MXBL ) , ALLOCATE
C TINB( :,I ) – table of internal edges of I-th area
C TINB( 1,I ) - - edge amount (from 1 till 4)
C TINB( 2,I ) = J - number of adjacent area
C TINB( 3,I ) TBOUND( 4,I ), - edges of one-dimensional section
C TINB( 5,I ) - dimension number in I-th area (1 or 2)
C TINB( 6,I ) - dimension coordinate in I-th area
C TINB( 7,I ) - dimension number in J-th area (1 or 2)
C TINB( 8,I ) - dimension coordinate in J-th area
INTEGER TINB( 29, MXBL )
C TEXB( :,I ) – table of external edges of I-th area
C TEXB( 1,I ) - (от 1 до 4) edges amount (from 1 to 4)
C TEXB( 2,I ) TBOUND( 3,I ), - coordinates of one-dimensional section of array
C for 1-th edge
C TEXB( 4,I ) - dimension number (1 or 2)
C TEXB( 5,I ) - coordinate of given dimension
INTEGER TEXB(17,MXBL)
C NBL - a number of blocks
C NTST – a number of steps
INTEGER NBL, NTST
C IDM – pointer to free dynamic memory
INTEGER IDM
COMMON IDM,MXSIZE
C по каждой области postponed distribution of arrays on each area
*DVM$ DISTRIBUTE :: PA, P1
*DVM$ ALIGN :: PB, P2
C task array
*DVM$ TASK TSA ( MXBL )
C name of group exchange of internal edges
*DVM$ REMOTE_GROUP GRINB
C LP( I ), HP( I ) – edges of processor array section of I-th area
INTEGER LP(MXBL), HP(MXBL)
C TGLOB( :, I ) – table of global coordinates in Jacobi algorithm grid
C for I-th area
C TGLOB( 1, I ) – 1-th dimension coordinate
C TGLOB( 2, I ) – 2-th dimension coordinate
INTEGER TGLOB(2,MXBL)
MXSIZE = MXS
C subdividing M*N area on sub-areas
CALL DISDOM(NBL,TGLOB,TEXB,TINB,SIZE,M,N,MXBL)
C Dividing processor array on areas
CALL MPROC(LP,HP,SIZE,NBL)
C Distribution of tasks (areas) over processors.
C Array distribution over tasks
IDM = 1
DO 10 IB = 1, NBL
*DVM$ MAP TSA( IB ) ONTO MBC100( LP(IB) : HP(IB) )















