f90 -O3 -mips4 -r12000 -64 -r8 -extend_source -flist mat_mul.f C *********************************************************** C Fortran file translated from WHIRL Thu Aug 16 15:21:34 2001 C *********************************************************** PROGRAM MAIN IMPLICIT NONE C C **** Variables and functions **** C CHARACTER*8 ENV_VALUE INTEGER(4) N INTEGER(4) OMP_NUM_THREADS INTEGER(8) t$1(1_8) SAVE t$1 EXTERNAL getenv INTEGER(4) getenv C C **** Temporary variables **** C INTEGER(4) tmp0 C C **** Initializers **** C DATA t$1 / 2902913183561809920_8 / C C **** statements **** C READ(5, *) N tmp0 = getenv('OMP_NUM_THREADS', ENV_VALUE) READ(ENV_VALUE(1_8 : 1), t$1) OMP_NUM_THREADS CALL array_mult(N, OMP_NUM_THREADS) STOP END SUBROUTINE array_mult(N, N_CPUS) IMPLICIT NONE INTEGER(4) N INTEGER(4) N_CPUS C C **** Variables and functions **** C REAL(8) deref_p_AA(t$4, t$4) POINTER(p_AA, deref_p_AA) INTEGER(8) t$4 INTEGER(8) t$5 REAL(8) ANG_INCR REAL(8) deref_p_BB(t$4, t$4) POINTER(p_BB, deref_p_BB) REAL(8) deref_p_CC(t$4, t$4) POINTER(p_CC, deref_p_CC) INTEGER(4) J INTEGER(4) K INTEGER(4) NSZ REAL(8) SECS REAL(8) TM1 INTEGER(8) t$6 INTEGER(8) t$7 REAL(8) t$11 REAL(8) deref_se1_F8(*) POINTER(se1_F8, deref_se1_F8) REAL(8) deref_se2_F8(*) POINTER(se2_F8, deref_se2_F8) EXTERNAL timef REAL(8) timef C C **** Temporary variables **** C INTEGER(8) f90sp_1 INTEGER(8) f90sp_2 INTEGER(8) f90sp_3 INTEGER(8) se1__$stk INTEGER(8) se2__$stk INTEGER(4) seonly0I INTEGER(4) I4 INTEGER(4) tile2J INTEGER(4) tile1I INTEGER(4) J0 REAL(8) mi0 REAL(8) mi1 REAL(8) mi2 REAL(8) mi3 REAL(8) mi4 REAL(8) mi5 REAL(8) mi6 REAL(8) mi7 INTEGER(4) I0 INTEGER(4) wd_J0 REAL(8) mi8 REAL(8) mi9 REAL(8) mi10 REAL(8) mi11 INTEGER(4) I1 INTEGER(4) wd_K INTEGER(4) J1 REAL(8) mi12 REAL(8) mi13 INTEGER(4) I2 INTEGER(4) wd_J REAL(8) mi14 INTEGER(4) I3 REAL(8) t$ C C **** statements **** C t$4 = N t$5 = MAX(N, 0) t$6 = (MAX(N, 0) * 2) t$7 = (MAX(N, 0) * MAX(N, 0)) f90sp_1 = OPR_ALLOCA(0) p_AA = OPR_ALLOCA(((MAX(N, 0) * MAX(N, 0)) * 8)) f90sp_2 = OPR_ALLOCA(0) p_BB = OPR_ALLOCA(((MAX(N, 0) * MAX(N, 0)) * 8)) f90sp_3 = OPR_ALLOCA(0) p_CC = OPR_ALLOCA(((MAX(N, 0) * MAX(N, 0)) * 8)) ANG_INCR = (1.1769021394429878D+03 / DBLE((N * N))) DO J = 1, N, 1 se1__$stk = OPR_ALLOCA(0) se1_F8 = OPR_ALLOCA((MIN(N, 1000) * 8)) se2__$stk = OPR_ALLOCA(0) se2_F8 = OPR_ALLOCA((MIN(N, 1000) * 8)) DO seonly0I = 1, N, 1000 DO I4 = seonly0I, MIN(N, (seonly0I + 999)), 1 deref_se2_F8((I4 - seonly0I) + 1) = (DBLE((I4 +(NSZ *(J + -1)))) * ANG_INCR) deref_se1_F8((I4 - seonly0I) + 1) = (DBLE((I4 +(NSZ *(J + -1)))) * ANG_INCR) END DO CALL vcos$(deref_se2_F8(1), deref_p_AA(seonly0I, J), %val(((MIN(N, (seonly0I + 999)) - seonly0I) + 1)), %val(1_8), %val(1_8)) CALL vsin$(deref_se1_F8(1), deref_p_BB(seonly0I, J), %val(((MIN(N, (seonly0I + 999)) - seonly0I) + 1)), %val(1_8), %val(1_8)) END DO CALL OPR_DEALLOCA(se2__$stk, se2_F8) CALL OPR_DEALLOCA(se1__$stk, se1_F8) END DO t$11 = timef() TM1 = t$11 DO tile2J = 1, N, 160 DO tile1I = 1, N, 240 DO K = 1, (N + -3_8), 4 DO J0 = tile2J, MIN((tile2J + 158), (N + -1_8)), 2 mi0 = deref_p_BB(K, J0) mi1 = deref_p_BB(K + 3, J0 + 1) mi2 = deref_p_BB(K + 2, J0 + 1) mi3 = deref_p_BB(K + 1, J0 + 1) mi4 = deref_p_BB(K, J0 + 1) mi5 = deref_p_BB(K + 1, J0) mi6 = deref_p_BB(K + 3, J0) mi7 = deref_p_BB(K + 2, J0) DO I0 = tile1I, MIN(N, (tile1I + 239)), 1 deref_p_CC(I0, J0) = (deref_p_CC(I0, J0) +(deref_p_AA(I0, K) * mi0)) deref_p_CC(I0, J0) = (deref_p_CC(I0, J0) +(deref_p_AA(I0, K + 1) * mi5)) deref_p_CC(I0, J0) = (deref_p_CC(I0, J0) +(deref_p_AA(I0, K + 2) * mi7)) deref_p_CC(I0, J0) = (deref_p_CC(I0, J0) +(deref_p_AA(I0, K + 3) * mi6)) deref_p_CC(I0, J0 + 1) = (deref_p_CC(I0, J0 + 1) +(deref_p_AA(I0, K) * mi4)) deref_p_CC(I0, J0 + 1) = (deref_p_CC(I0, J0 + 1) +(deref_p_AA(I0, K + 1) * mi3)) deref_p_CC(I0, J0 + 1) = (deref_p_CC(I0, J0 + 1) +(deref_p_AA(I0, K + 2) * mi2)) deref_p_CC(I0, J0 + 1) = (deref_p_CC(I0, J0 + 1) +(deref_p_AA(I0, K + 3) * mi1)) END DO END DO DO wd_J0 = J0, MIN(N, (tile2J + 159)), 1 mi8 = deref_p_BB(K, wd_J0) mi9 = deref_p_BB(K + 3, wd_J0) mi10 = deref_p_BB(K + 1, wd_J0) mi11 = deref_p_BB(K + 2, wd_J0) DO I1 = tile1I, MIN(N, (tile1I + 239)), 1 deref_p_CC(I1, wd_J0) = (deref_p_CC(I1, wd_J0) +(deref_p_AA(I1, K) * mi8)) deref_p_CC(I1, wd_J0) = (deref_p_CC(I1, wd_J0) +(deref_p_AA(I1, K + 1) * mi10)) deref_p_CC(I1, wd_J0) = (deref_p_CC(I1, wd_J0) +(deref_p_AA(I1, K + 2) * mi11)) deref_p_CC(I1, wd_J0) = (deref_p_CC(I1, wd_J0) +(deref_p_AA(I1, K + 3) * mi9)) END DO END DO END DO DO wd_K = K, N, 1 DO J1 = tile2J, MIN((tile2J + 158), (N + -1_8)), 2 mi12 = deref_p_BB(wd_K, J1) mi13 = deref_p_BB(wd_K, J1 + 1) DO I2 = tile1I, MIN(N, (tile1I + 239)), 1 deref_p_CC(I2, J1) = (deref_p_CC(I2, J1) +(deref_p_AA(I2, wd_K) * mi12)) deref_p_CC(I2, J1 + 1) = (deref_p_CC(I2, J1 + 1) +(deref_p_AA(I2, wd_K) * mi13)) END DO END DO DO wd_J = J1, MIN(N, (tile2J + 159)), 1 mi14 = deref_p_BB(wd_K, wd_J) DO I3 = tile1I, MIN(N, (tile1I + 239)), 1 deref_p_CC(I3, wd_J) = (deref_p_CC(I3, wd_J) +(deref_p_AA(I3, wd_K) * mi14)) END DO END DO END DO END DO END DO t$ = timef() SECS = (((t$ - TM1)) * 1.0D-03) WRITE(6, *) WRITE(6, *) 'NUMBER OF CPUS/TASKS ASSIGNED = ', N_CPUS WRITE(6, *) 'Parallelized Matrix Multiplication for ', N, 'x', N, ' Size' WRITE(6, *) WRITE(6, *) 'Elapsed Wall Clock Time ', SECS, ' Seconds' WRITE(6, *) WRITE(*, *) deref_p_CC(1234, 1), deref_p_CC(8023, 1), deref_p_CC(2345, 1) CALL OPR_DEALLOCA(f90sp_3, p_CC) CALL OPR_DEALLOCA(f90sp_2, p_BB) CALL OPR_DEALLOCA(f90sp_1, p_AA) RETURN END SUBROUTINE