fdvmLDe (1158336), страница 13
Текст из файла (страница 13)
A x = b
where A - matrix of coefficients,
b - vector of free members,
x - vector of unknowns.
The following basic methods are used for solving this system.
Direct methods. The well-known Gaussian Elimination method is the most commonly used algorithm of this class. The main idea of this algorithm is to reduce the matrix A to upper triangular form and then to use backward substitution to diagonalize the matrix.
Explicit iteration methods. Jacobi Relaxation is the most known algorithm of this class. The algorithm perform the following computation iteratively
xi,jnew = (xi-1,jold + xi,j-1old + xi+1,jold + xi,j+1old ) / 4
Implicit iteration methods. Successive Over Relaxation (SOR) refers to this class. The algorithm performs the following calculation iteratively
xi,jnew = ( w / 4 ) * (xi-1,jnew + xi,j-1new + xi+1,jold + xi,j+1old ) + (1-w) * xi,jold
By using «red-black» coloring of variables each step of SOR consists of two half Jacobi steps. One processes «red»variables and the other processes «black» variables. Coloring of variables allows to overlap calculation and communication.
Example 1. Gauss elimination algorithm
PROGRAM GAUSS
C Solving linear equation system Ax = b
PARAMETER ( N = 100 )
REAL A( N, N+1 ), X( N )
C A : Coefficient matrix with dimension (N,N+1)
C Right hand side vector of linear equations is stored
C into last column (N+1)-th, of matrix A
C X : Unknown vector
C N : Number of linear equations
*DVM$ DISTRIBUTE A (BLOCK,*)
*DVM$ ALIGN X(I) WITH A(I,N+1)
C
C Initialization
C
*DVM$ PARALLEL ( I ) ON A(I,*)
DO 100 I = 1, N
DO 100 J = 1, N+1
IF (( I .EQ. J ) THEN
A(I,J) = 2.0
ELSE
IF ( J .EQ. N+1) THEN
A(I,J) = 0.0
ENDIF
ENDIF
100 CONTINUE
C
C Elimination
C
DO 1 I = 1, N
C the I-th row of array A will be buffered before
C execution of I-th iteration, and references A(I,K), A(I,I)
C will be replaced with corresponding reference to buffer
*DVM$ PARALLEL ( J ) ON A(J,*), REMOTE_ACCESS ( A(I,:) )
DO 5 J = I+1, N
DO 5 K = I+1, N+1
A(J,K) = A(J,K) - A(J,I) * A(I,K) / A(I,I)
5 CONTINUE
1 CONTINUE
C First calculate X(N)
X(N) = A(N,N+1) / A(N,N)
C
C Solve X(N-1), X(N-2), ...,X(1) by backward substitution
C
DO 6 J = N-1, 1, -1
C the (J+1)-th elements of array X will be buffered before
C execution of J-th iteration, and reference X(J+1)
C will be replaced with reference to temporal variable
*DVM$ PARALLEL ( I ) ON A(I,*), REMOTE_ACCESS ( X(J+1) )
DO 7 I = 1, J
A(I,N+1) = A(I,N+1) - A(I,J+1) * X(J+1)
7 CONTINUE
X(J) = A(J,N+1) / A(J,J)
6 CONTINUE
PRINT *, X
END
Example 2. Jacobi algorithm
PROGRAM JACOB
PARAMETER (K=8, ITMAX=20)
REAL A(K,K), B(K,K), EPS, MAXEPS
CDVM$ DISTRIBUTE A (BLOCK, BLOCK)
CDVM$ ALIGN B(I,J) WITH A(I,J)
C arrays A and B with block distribution
PRINT *, '********** TEST_JACOBI **********'
MAXEPS = 0.5E - 7
CDVM$ PARALLEL (J,I) ON A(I,J)
C nest of two parallel loops, iteration (i,j) will be executed on
C processor, which is owner of element A(i,j)
DO 1 J = 1, K
DO 1 I = 1, K
A(I,J) = 0.
IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
B(I,J) = 0.
ELSE
B(I,J) = 1. + I + J
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
CDVM$ PARALLEL (J,I) ON A(I,J), REDUCTION ( MAX( EPS ))
C variable EPS is used for calculation of maximum value
DO 21 J = 2, K-1
DO 21 I = 2, K-1
EPS = MAX ( EPS, ABS( B(I,J) - A(I,J)))
A(I,J) = B(I,J)
21 CONTINUE
CDVM$ PARALLEL (J,I) ON B(I,J), SHADOW_RENEW (A)
C copying shadow elements of array A from
C neighboring processors before loop execution
DO 22 J = 2, K-1
DO 22 I = 2, K-1
B(I,J) = (A(I-1,J) + A(I,J-1) + A(I+1,J) + A(I,J+1)) / 4
22 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF ( EPS . LT . MAXEPS ) GO TO 3
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
Example 3. Jacobi algorithm (asynchronous version)
PROGRAM JACOB1
PARAMETER (K=8, ITMAX=20)
REAL A(K,K), B(K,K), EPS, MAXEPS
CDVM$ DISTRIBUTE A (BLOCK, BLOCK)
CDVM$ ALIGN B(I,J) WITH A(I,J)
C arrays A and B with block distribution
CDVM$ REDUCTION_GROUP REPS
PRINT *, '********** TEST_JACOBI_ASYNCHR **********'
CDVM$ SHADOW_GROUP SA (A)
C creation of shadow edge group
MAXEPS = 0.5E - 7
CDVM$ PARALLEL (J,I) ON A(I,J)
C nest of two parallel loops, iteration (i,j) will be executed on
C processor, which is owner of element A(i,j)
DO 1 J = 1, K
DO 1 I = 1, K
A(I,J) = 0.
IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
B(I,J) = 0.
ELSE
B(I,J) = 1. + I + J
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
C group of reduction operations is created
C and initial values of reduction variables are stored
CDVM$ PARALLEL (J,I) ON A(I,J), SHADOW_START SA,
CDVM$* REDUCTION_GROUP ( REPS : MAX( EPS ))
C the loops iteration order is changed:
C at first boundary elements of A are calculated and sent,
C then internal elements of array A are calculated
DO 21 J = 2, K-1
DO 21 I = 2, K-1
EPS = MAX ( EPS, ABS( B(I,J) - A(I,J)))
A(I,J) = B(I,J)
21 CONTINUE
CDVM$ REDUCTION_START REPS
C start of reduction operation to accumulate the partial results
C calculated in copies of variable EPS on every processor
CDVM$ PARALLEL (J,I) ON B(I,J), SHADOW_WAIT SA
C the loops iteration order is changed:
C at first internal elements of B are calculated,
C then shadow edge elements of array A from neighboring processors
C are received, then boundary elements of array B are calculated
DO 22 J = 2, K-1
DO 22 I = 2, K-1
B(I,J) = (A(I-1,J) + A(I,J-1) + A(I+1,J) + A(I,J+1)) / 4
22 CONTINUE
CDVM$ REDUCTION_WAIT REPS
C waiting completion of reduction operation
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF ( EPS . LT . MAXEPS ) GO TO 3
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
Example 4. Successive over-relaxation
PROGRAM SOR
PARAMETER ( N = 100 )
REAL A( N, N ), EPS, MAXEPS, W
INTEGER ITMAX
*DVM$ DISTRIBUTE A (BLOCK,BLOCK)
ITMAX = 20
MAXEPS = 0.5E - 5
W = 0.5
*DVM$ PARALLEL (I,J) ON A(I,J)
DO 1 I = 1, N
DO 1 J = 1, N
IF ( I .EQ.J) THEN
A(I,J) = N + 2
ELSE
A(I,J) = -1.0
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
*DVM$ PARALLEL (I,J) ON A(I,J), NEW (S),
*DVM$* REDUCTION ( MAX( EPS )), ACROSS (A(1:1,1:1))
C S variable – private variable
C (its usage is localized in the range of one iteration)
C EPS variable is used for maximum calculation
DO 21 I = 2, N-1
DO 21 J = 2, N-1
S = A(I,J)
A(I,J) = (W / 4) * (A(I-1,J) + A(I+1,J) + A(I,J-1) +
* A(I,J+1)) + ( 1-W ) * A(I,J)
EPS = MAX ( EPS, ABS( S - A(I,J)))
21 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF (EPS .LT. MAXEPS ) GO TO 4
2 CONTINUE
4 PRINT *, A
END
Example 5. Red-black successive over-relaxation
PROGRAM REDBLACK
PARAMETER ( N = 100 )
REAL A( N, N ), EPS, MAXEPS, W
INTEGER ITMAX
*DVM$ DISTRIBUTE A (BLOCK,BLOCK)
ITMAX = 20
MAXEPS = 0.5E - 5
W = 0.5
*DVM$ PARALLEL (I,J) ON A(I,J)
DO 1 I = 1, N
DO 1 J = 1, N
IF ( I .EQ.J) THEN
A(I,J) = N + 2
ELSE
A(I,J) = -1.0
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
C loop for red and black variables
DO 3 IRB = 1,2
*DVM$ PARALLEL (I,J) ON A(I,J), NEW (S),
*DVM$* REDUCTION ( MAX( EPS )), SHADOW_RENEW (A)
C variable S - private variable in loop iterations
C variable EPS is used for calculation of maximum value
C Exception : iteration space is not rectangular
DO 21 I = 2, N-1
DO 21 J = 2 + MOD( I+ IRB, 2 ), N-1, 2
S = A(I,J)
A(I,J) = (W / 4) * (A(I-1,J) + A(I+1,J) + A(I,J-1) +
* A(I,J+1)) + ( 1-W ) * A(I,J)
EPS = MAX ( EPS, ABS( S - A(I,J)))
21 CONTINUE
3 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF (EPS .LT. MAXEPS ) GO TO 4
2 CONTINUE
4 PRINT *, A
END
Example 6. Static tasks (parallel sections)
PROGRAM TASKS
C rectangular grid is subdivided on two blocks
C
C | K | |
C | N1 | A1, B1 |
C | N2 | A2, B2 |
C
PARAMETER (K=100, N1 = 50, ITMAX=10, N2 = K – N1 )
CDVM$ PROCESSORS P(NUMBER_OF_PROCESSORS( ))
REAL A1(N1+1,K), A2(N2+1,K), B1(N1+1,K), B2(N2+1,K)
INTEGER LP(2), HP(2)
CDVM$ TASK MB( 2 )
CDVM$ ALIGN B1(I,J) WITH A1(I,J)
CDVM$ ALIGN B2(I,J) WITH A2(I,J)
CDVM$ DISTRIBUTE :: A1, A2
CDVM$ REMOTE_GROUP BOUND
CALL DPT(LP, HP, 2)
C Task (block) distribution over processors
C Array distribution over tasks
CDVM$ MAP MB( 1 ) ONTO P( LP(1) : HP(1) )
CDVM$ REDISTRIBUTE A1( *, BLOCK ) ONTO MB( 1 )
CDVM$ MAP MB( 2 ) ONTO P( LP(2) : HP(2) )
CDVM$ REDISTRIBUTE A2(*,BLOCK) ONTO MB( 2 )
C Initialization
CDVM$ PARALLEL (J,I) ON A1(I,J)
DO 10 J = 1, K
DO 10 I = 1, N1
IF(I.EQ.1 .OR. J.EQ.1 .OR. J.EQ.K) THEN
A1(I,J) = 0.
B1(I,J) = 0.
ELSE
B1(I,J) = 1. + I + J
A1(I,J) = B1(I, J)
ENDIF
10 CONTINUE
CDVM$ PARALLEL (J,I) ON A2(I,J)
DO 20 J = 1, K
DO 20 I = 2, N2+1
IF(I.EQ.N2+1 .OR. J.EQ.1 .OR. J.EQ.K) THEN
A2(I,J) = 0.
B2(I,J) = 0.
ELSE
B2(I,J) = 1. + ( I + N1 – 1 ) + J
A2(I,J) = B2(I,J)
ENDIF
20 CONTINUE
DO 2 IT = 1, ITMAX
CDVM$ PREFETCH BOUND
C exchange of edges
CDVM$ PARALLEL ( J ) ON A1(N1+1, J),
CDVM$* REMOTE_ACCESS (BOUND : B2(2,J) )
DO 30 J = 1, K
30 A1(N1+1, J) = B2(2, J)
CDVM$ PARALLEL ( J ) ON A2(1,J),
CDVM$* REMOTE_ACCESS (BOUND : B1(N1,J) )
DO 40 J = 1, K
40 A2(1,J) = B1(N1,J)
CDVM$ TASK_REGION MB
CDVM$ ON MB( 1 )
CDVM$ PARALLEL (J,I) ON B1(I,J),
CDVM$* SHADOW_RENEW ( A1 )
DO 50 J = 2, K-1
DO 50 I = 2, N1
50 B1(I,J) = (A1(I-1,J) + A1(I,J-1) + A1(I+1,J) + A1(I,J+1)) / 4
CDVM$ PARALLEL (J,I) ON A1(I,J)
DO 60 J = 2, K-1
DO 60 I = 2, N1
60 A1(I,J) = B1(I,J)
CDVM$ END ON
CDVM$ ON MB( 2 )
CDVM$ PARALLEL (J,I) ON B2(I,J),
CDVM$* SHADOW_RENEW ( A2 )
DO 70 J = 2, K-1
DO 70 I = 2, N2
70 B2(I,J) = (A2(I-1,J) + A2(I,J-1) + A2(I+1,J) + A2(I,J+1)) / 4
CDVM$ PARALLEL (J,I) ON A2(I,J)
DO 80 J = 2, K-1
DO 80 I = 2, N2
80 A2(I,J) = B2(I,J)