FDVM2 (1158347), страница 11
Текст из файла (страница 11)
A x = b
where A - matrix of coefficients,
b - vector of free members,
x - vector of unknowns.
The following basic methods are used for solving this system.
Direct methods. The well-known Gaussian Elimination method is the most commonly used algorithm of this class. The main idea of this algorithm is to reduce the matrix A to upper triangular form and then to use backward substitution to diagonalize the matrix.
Explicit iteration methods. Jacobi Relaxation is the most known algorithm of this class. The algorithm perform the following computation iteratively
xi,jnew = (xi-1,jold + xi,j-1old + xi+1,jold + xi,j+1old ) / 4
Implicit iteration methods. Successive Over Relaxation (SOR) refers to this class. The algorithm performs the following calculation iteratively
xi,jnew = ( w / 4 ) * (xi-1,jnew + xi,j-1new + xi+1,jold + xi,j+1old ) + (1-w) * xi,jold
By using «red-black» coloring of variables each step of SOR consists of two half Jacobi steps. One processes «red»variables and the other processes «black» variables. Coloring of variables allows to overlap calculation and communication.
2.1. Gauss Elimination Algorithm
PROGRAM GAUSS
C Solving linear equation system A x = b
PARAMETER ( N = 100 )
REAL A( N, N+1 ), X( N )
CHPF$ PROCESSORS P( 10 )
C A : Coefficient matrix with dimension (N,N+1).
C Right hand side vector of linear equations is stored
C into last column (N+1)-th, of matrix A
C X : Unknown vector
C N : Number of linear equations
CHPF$ DISTRIBUTE A ( BLOCK, *) ONTO P
CHPF$ ALIGN X(I) WITH A(I, N+1)
C
C Initialization
C
*DVM$ PARALLEL ( I ) ON A( I , * )
DO 100 I = 1, N
DO 100 J = 1, N+1
IF (( I .EQ. J ) THEN
A( I, J ) = 2.0
ELSE
IF ( J .EQ. N+1) THEN
A( I, J ) = 0.0
ENDIF
ENDIF
100 CONTINUE
C
C Elimination
C
DO 1 I = 1, N
C the I-th row of array A will be buffered before
C execution of I-th iteration, and references A(I,K), A(I, I)
C will be replaced with corresponding reference to buffer
*DVM$ PARALLEL ( J ) ON A( J, * ) , REMOTE_ACCESS (A ( I, : ))
DO 5 J = I+1, N
DO 5 K = I+1, N+1
A( J, K ) = A( J, K ) - A( J, I ) * A( I, K ) / A( I, I )
5 CONTINUE
1 CONTINUE
C First calculate X(N)
*DVM OWN
X( N ) = A( N, N+1 ) / A( N, N )
C
C Solve X(N-1), X(N-2), ...,X(1) by backward substitution
C
DO 6 J = N-1, 1, -1
C the (J+1)-th elements of array X will be buffered before
C execution of J-th iteration, and reference X(J+1)
C will be replaced with reference to temporal variable
*DVM$ PARALLEL ( I ) ON A( I , * ) , REMOTE_ACCESS ( X( J+1 ))
DO 7 I = 1, J
A( I, N+1 ) = A( I, N+1 ) - A( I, J+1 ) * X( J+1 )
7 CONTINUE
*DVM OWN
X( J ) = A( J, N+1 ) / A( J, J)
6 CONTINUE
PRINT *, X
END
2.2. Jacobi Algorithm
PROGRAM JACOB
PARAMETER (K=8, ITMAX=20)
REAL A(K,K), B(K,K), EPS, MAXEPS
CHPF$ PROCESSORS P(4,4)
CHPF$ DISTRIBUTE A ( BLOCK, BLOCK) ONTO P
CHPF$ ALIGN B( I, J ) WITH A( I, J )
C arrays A and B with block distribution
PRINT *, '********** TEST_JACOBI **********'
MAXEPS = 0.5E - 7
CDVM$ PARALLEL (J,I) ON A(I, J)
C nest of two parallel loops, iteration (i,j) will be executed on
C processor, which is owner of element A(i,j)
DO 1 J = 1, K
DO 1 I = 1, K
A(I, J) = 0.
IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
B(I, J) = 0.
ELSE
B(I, J) = 1. + I + J
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
CDVM$ PARALLEL (J, I) ON A(I, J), REDUCTION ( MAX( EPS ))
C variable EPS is used for calculation of maximum value
DO 21 J = 2, K-1
DO 21 I = 2, K-1
EPS = MAX ( EPS, ABS( B( I, J) - A( I, J)))
A(I, J) = B(I, J)
21 CONTINUE
CDVM$ PARALLEL (J, I) ON B(I, J), SHADOW_RENEW (A)
C copying shadow elements of array A from
C neighboring processors before loop execution
DO 22 J = 2, K-1
DO 22 I = 2, K-1
B(I, J) = (A( I-1, J ) + A( I, J-1 ) + A( I+1, J) + A( I, J+1 )) / 4
22 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF ( EPS . LT . MAXEPS ) GO TO 3
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
2.3. Jacobi Algorithm (Asynchronous Version)
PROGRAM JACOB
PARAMETER (K=8, ITMAX=20)
REAL A(K,K), B(K,K), EPS, MAXEPS
CHPF$ PROCESSORS P(4,4)
CHPF$ DISTRIBUTE A ( BLOCK, BLOCK) ONTO P
CHPF$ ALIGN B( I, J ) WITH A( I, J )
C arrays A and B with block distribution
PRINT *, '********** TEST_JACOBI_ASYNCHR **********'
CDVM SHADOW_GROUP SA ( A )
C creation of shadow edge group
MAXEPS = 0.5E - 7
CDVM$ PARALLEL (J,I) ON A(I, J)
C nest of two parallel loops, iteration (i,j) will be executed on
C processor, which is owner of element A(i,j)
DO 1 J = 1, K
DO 1 I = 1, K
A(I, J) = 0.
IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
B(I, J) = 0.
ELSE
B(I, J) = 1. + I + J
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
CDVM$ REDUCTION_GROUP REPS ( MAX( EPS ))
C group of reduction operations is created
C and initial values of reduction variables are stored
CDVM$ PARALLEL (J, I) ON A(I, J), SHADOW_START SA
C the loops iteration order is changed:
C at first boundary elements of A are calculated and sent,
C then internal elements of array A are calculated
DO 21 J = 2, K-1
DO 21 I = 2, K-1
EPS = MAX ( EPS, ABS( B( I, J) - A( I, J)))
A(I, J) = B(I, J)
21 CONTINUE
CDVM$ REDUCTION_START REPS
C start of reduction operation to accumulate the partial results
C calculated in copies of variable EPS on every processor
CDVM$ PARALLEL (J, I) ON B(I, J), SHADOW_WAIT SA
C the loops iteration order is changed:
C at first internal elements of B are calculated, then shadow edge elements
C of array A from neighboring processors are received,
C then boundary elements of array B are calculated
DO 22 J = 2, K-1
DO 22 I = 2, K-1
B(I, J) = (A( I-1, J ) + A( I, J-1 ) + A( I+1, J) + A( I, J+1 )) / 4
22 CONTINUE
CDVM$ REDUCTION_WAIT REPS
C waiting completion of reduction operation
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF ( EPS . LT . MAXEPS ) GO TO 3
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
2.4. Irregular Calculations Based on Jacobi Algorithm
PROGRAM IRREG
C Irregular calculations model on a rectangular grid
C numbering graph nodes for K = 4 :
C
| C | 1 | — | 2 | — | 3 | — | 4 | ||
| C | | | | | |||||
| C | 5 | — | 6 | — | 7 | — | 8 | ||
| C | | | | | |||||
| C | 9 | — | 10 | — | 11 | — | 12 | ||
| C | | | | | |||||
| C | 13 | — | 14 | — | 15 | — | 16 |
C
PARAMETER (K=100, ITMAX=20)
REAL A(K*K), B(K*K), EPS, MAXEPS
INTEGER MATEDG(5,K*K)
CHPF$ PROCESSORS P(NUMBER_OF_PROCESSORS( ))
CHPF$ DISTRIBUTE A ( BLOCK) ONTO P
C array A with block distribution
CHPF$ ALIGN B( I ) WITH A( I )
CHPF$ ALIGN MATEDG( *, I ) WITH A( I )
CDVM$ INDIRECT_GROUP GRM
PRINT *, '********** TEST_IRREG **********'
MAXEPS = 0.5E - 7
C initialization of array B and graph edge table
CDVM$ PARALLEL ( L ) ON B( L )
DO 1 L = 1, K*K
I = L/K + 1
J = MOD(L,K)
IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
MATEDG(1, L) = 0
B(L) = 0.
ELSE
MATEDG(1, L) = 4
MATEDG(2, L) = L-1
MATEDG(3, L) = L+1
MATEDG(4, L) = L-K
MATEDG(5, L) = L+K
B(L) = 1. + I + J
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
CDVM$ PREFETCH GRM
CDVM$ PARALLEL ( L ) ON A( L ), REDUCTION ( MAX( EPS ))
DO 21 L = 1, K*K
A(K) = B(L)
EPS = MAX ( EPS, ABS( B( I, J) - A( I, J)))
A(I, J) = B(I, J)
21 CONTINUE
CDVM$ PARALLEL ( L ) ON B( L ),
CDVM$*INDIRECT_ACCESS ( GRM : A(MATEDG(:,L)))
C
C
DO 22 L = 1, K*K
IF (MATEDG(1,L) .NE. 0) THEN
B(L) = 0.
DO 23 M = 1, MATEDG(1,L)
B(L) = B(L) + A( MATEDG(M,L) ) / 4
23 CONTINUE
ENDIF
22 CONTINUE
PRINT *, 'IT = ', IT, ' EPS = ', EPS
IF ( EPS . LT . MAXEPS ) GO TO 3
2 CONTINUE
3 OPEN (3, FILE='JACOBI.DAT', FORM='FORMATTED')
WRITE (3,*) B
CLOSE (3)
END
2.5. Red-Black Successive Over-Relaxation
PROGRAM REDBLACK
PARAMETER ( N = 100 )
REAL A( N, N ), EPS, MAXEPS, W
INTEGER ITMAX
CHPF$ PROCESSORS P( 5, 5 )
CHPF$ DISTRIBUTE A ( BLOCK, BLOCK ) ONTO P
ITMAX=20
MAXEPS = 0.5E - 5
W = 0.5
*DVM$ PARALLEL ( I, J ) ON A( I, J )
DO 1 I = 1, N
DO 1 J = 1, N
IF ( I .EQ.J) THEN
A( I, J ) = N + 2
ELSE
A( I, J ) = -1.0
ENDIF
1 CONTINUE
DO 2 IT = 1, ITMAX
EPS = 0.
C loop for red and black variables
DO 3 IRB = 1,2
*DVM$ PARALLEL ( I, J) ON A( I, J), NEW (S),
*DVM$* REDUCTION ( MAX( EPS )), SHADOW_RENEW (A)
C variable S - private variable in loop iterations
C variable EPS is used for calculation of maximum value
C Exception : iteration space is not rectangular
DO 21 I = 2, N-1
DO 21 J = 2 + MOD ( I+ IRB, 2 ), N-1, 2
S = A( I, J )
A( I, J ) = (W / 4) * (A( I-1, J ) + A( I+1, J ) + A( I, J-1 ) +
* A( I, J+1 )) + ( 1-W ) * A( I, J)















